A parser for extracting uploaded files and parameters from the body of a multipart/form-data request.
require 'strelka/multipartmimeparser' parser = Strelka::MultipartMimeParser.new files, params = parser.parse( io, '---boundary' )
Michael Granger <ged@FaerieMUD.org>
Mahlon E. Smith <mahlon@martini.nu>
A class for parsing multipart mime documents from a stream.
Pattern for matching a blank line
Configurability API – configuration defaults
Line-ending regexp. Supports UNIX line-endings for testing.
Line-ending for RFC5322 header fields; EOL not followed by a WSP char
The current buffer for unparsed data
Parsed form fields
Configurability API – configure the parser with the 'mimeparser' section of the config:
the size of the buffer (in bytes) to use when reading the document. Larger sizes use more heap, but are faster.
the directory to spool file upload parts to.
# File lib/strelka/multipartparser.rb, line 77
def self::configure( options=nil )
if options
self.log.debug "Configuring the %p: %p" % [ self, options ]
self.bufsize = Integer( options[:bufsize] ) || CONFIG_DEFAULTS[:bufsize]
self.spooldir = Pathname( options[:spooldir] ) || CONFIG_DEFAULTS[:spooldir]
else
self.log.debug "Configuring %p with defaults: %p" % [ self, CONFIG_DEFAULTS ]
self.bufsize = CONFIG_DEFAULTS[:bufsize]
self.spooldir = CONFIG_DEFAULTS[:spooldir]
end
end
Create a new Strelka::MultipartMimeParser
# File lib/strelka/multipartparser.rb, line 91
def initialize( io, boundary )
io = StringIO.new( io ) unless io.respond_to?( :read )
boundary = '--' + boundary # unless boundary.start_with?( '--' )
@bufsize = self.class.bufsize || CONFIG_DEFAULTS[:bufsize]
@spooldir = self.class.spooldir || CONFIG_DEFAULTS[:spooldir]
@io = io
@boundary = boundary
@fields = {}
@buffer = ''
# Ensure that the buffer can contain at least a whole boundary,
# otherwise we can't scan for it.
@bufsize = @boundary.bytesize * 1.5 if @bufsize < @boundary.bytesize * 1.5
@spooldir.mkpath
end
The configured buffer size to use when parsing
# File lib/strelka/multipartparser.rb, line 64
singleton_attr_accessor :bufsize
Parse the form data from the IO and return it as a Hash.
# File lib/strelka/multipartparser.rb, line 121
def parse
self.log.debug "Starting parse: %p" % [ self ]
# Strip off the initial boundary
self.strip_boundary or
raise Strelka::ParseError, "No initial boundary"
# Now scan until we see the ending boundary (the one with the trailing '--')
self.scan_part until @buffer.start_with?( '--' )
self.log.debug "Finished parse. %d fields" % [ self.fields.length ]
return self.fields
end
The configured spool directory for storing attachments
# File lib/strelka/multipartparser.rb, line 68
singleton_attr_accessor :spooldir
Read at least bytecount bytes from the io, appending the data
onto the buffer.
# File lib/strelka/multipartparser.rb, line 321
def read_at_least( bytecount )
# self.log.debug "Reading at least %d bytes from %p." % [ bytecount, @io ]
if @io.eof?
# self.log.debug " input stream at EOF. Returning."
return false
end
self.read_some_more until
@buffer.bytesize >= bytecount || @io.eof?
return true
end
Read data from the state's IO until the buffer contains at least the number of bytes in the chunksize, or the IO is at EOF.
# File lib/strelka/multipartparser.rb, line 312
def read_chunk
# self.log.debug "Reading a new chunk."
self.read_at_least( @bufsize )
# self.log.debug " buffer is now: %p" % [ @buffer ]
end
Try to read another chunk of data into the buffer of the given
state, returning true unless the state's IO is at eof.
# File lib/strelka/multipartparser.rb, line 338
def read_some_more
# self.log.debug "Reading more data from %p..." % [ @io ]
return false if @io.eof?
startsize = @buffer.bytesize
@buffer << @io.read( @bufsize )
# self.log.debug " after reading, buffer has %d bytes." % [ @buffer.bytesize ]
until @buffer.bytesize > startsize
return false if @io.eof?
Thread.pass
@buffer << @io.read( @bufsize )
end
return true
end
Scan the body of the current document part, spooling the data to a tempfile on disk and returning the resulting filehandle.
# File lib/strelka/multipartparser.rb, line 234
def scan_file_field( filename, headers )
self.log.info "Parsing file '%s'" % [ filename ]
io, size = self.spool_file_upload
io.extend( FileInputField )
io.filename = filename
io.content_type = headers['content-type']
io.content_length = size
self.log.debug "Scanned file %p to: %s (%d bytes)" % [ io.filename, io.path, size ]
return io
end
Scan the buffer for MIME headers and return them as a Hash.
# File lib/strelka/multipartparser.rb, line 179
def scan_headers
headerlines = ''
@buffer.slice!( /^#{CRLF_REGEXP}/ )
# Find the headers
while headerlines.empty?
if pos = @buffer.index( BLANK_LINE_REGEXP )
headerlines = @buffer.slice!( 0, pos )
else
self.log.debug "Couldn't find a blank line in the first %d bytes (%p)" %
[ @buffer.bytesize, @buffer[0..100] ]
self.read_at_least( @bufsize ) or
raise Strelka::ParseError, "EOF while searching for headers"
end
end
# put headers into a hash
headers = headerlines.strip.split( HEADER_FIELD_EOL ).inject({}) {|hash, line|
line.gsub!( CRLF_REGEXP, '' ) # Un-fold long headers
key, val = line.split( /:\s*/, 2 )
hash[ key.downcase ] = val
hash
}
self.log.debug "Scanned headers: %p" % [headers]
# remove headers from parse buffer
@buffer.slice!( /^#{BLANK_LINE_REGEXP}/ )
return headers
end
Scan a part from the buffer.
# File lib/strelka/multipartparser.rb, line 142
def scan_part
headers = self.scan_headers
disposition = headers['content-disposition']
raise UnimplementedError, "don't know what to do with %p parts" % [ disposition ] unless
disposition.start_with?( 'form-data' )
key = disposition[ /\bname="(\S+)"/i, 1 ] or
raise Strelka::ParseError, "no field name: %p" % [ disposition ]
val = nil
# :TODO: Support for content-type and content-transfer-encoding headers for parts.
# If it's a file, spool it out to a tempfile
if disposition =~ /\bfilename=/i
file = disposition[ /\bfilename="(?:.*\)?(.+?)"/, 1 ] or return nil
self.log.debug "Parsing an uploaded file %p (%p)" % [ key, file ]
val = self.scan_file_field( file, headers )
# otherwise just read it as a regular parameter
else
self.log.debug "Parsing a form parameter (%p)" % [ key ]
val = self.scan_regular_field( key )
end
# Convert the value to an Array if there are more than one
if @fields.key?( key )
@fields[ key ] = [ @fields[key] ] unless @fields[ key ].is_a?( Array )
@fields[ key ] << val
else
@fields[ key ] = val
end
self.strip_boundary
end
Scan the value after the scan pointer for the specified metadata
parameter.
# File lib/strelka/multipartparser.rb, line 214
def scan_regular_field( key )
param = ''
self.log.debug "Scanning form parameter: %p" % [key]
while param.empty?
if start = @buffer.index( @boundary )
self.log.debug "Found the end of the parameter."
param = @buffer.slice!( 0, start )
else
self.read_some_more or raise Strelka::ParseError,
"EOF while scanning a form parameter"
end
end
return param.chomp
end
Scan the file data and metadata in the given scannner,
spooling the file data into a temporary file. Returns the tempfile object
and a hash of metadata.
# File lib/strelka/multipartparser.rb, line 252
def spool_file_upload
self.log.debug "Spooling file from upload"
tmpfile = Tempfile.open( 'filedata', @spooldir.to_s, encoding: 'ascii-8bit' )
size = 0
# :TODO: Use mmap(2) to map the resulting IOs from mongrel's spool file
# rather than writing them all out to disk a second time.
until tmpfile.closed?
# look for end, store everything until boundary
if start = @buffer.index( @boundary )
self.log.debug "Found the end of the file"
leavings = @buffer.slice!( 0, start )
leavings.slice!( -2, 2 ) # trailing CRLF
tmpfile.write( leavings )
size += leavings.length
tmpfile.close
# not at the end yet, buffer this chunker to disk
elsif @buffer.bytesize >= @bufsize
# make sure we're never writing a portion of the boundary
# out while we're buffering
buf = @buffer.slice!( 0, @buffer.bytesize - @bufsize )
# self.log.debug " writing %d bytes" % [ buf.bytesize ]
tmpfile.print( buf )
size += buf.bytesize
end
# put some more data into the buffer
unless tmpfile.closed?
self.read_some_more or
raise Strelka::ParseError, "EOF while spooling file upload"
end
end
return tmpfile, size
end
Strip data from the head of the buffer that matches pat,
returning it if successful, or returning nil if not. The
matched data should fit within the parser's chunk size.
# File lib/strelka/multipartparser.rb, line 294
def strip( pat )
self.read_chunk
return nil unless @buffer.index( pat ) == 0
@buffer.slice!( pat )
end
Strip the boundary that's at the front of the buffer, reading more data
into it as necessary. Returns the boundary if successful, or
nil if there wasn't a boundary in the buffer.
# File lib/strelka/multipartparser.rb, line 304
def strip_boundary
self.log.debug "Stripping boundary:\n%p at:\n%p" % [ @boundary, @buffer[0,40] ]
self.strip( @boundary )
end