# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::CGI::MultiPart # CVS Info : 087,071,086,086,085 # $Id: MultiPart.pm,v 1.12 2008/07/14 23:40:31 brewt Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: # Multipart form handling for GT::CGI objects. # # This is taken almost entirely from CGI.pm, and is loaded on demand. # package GT::CGI::MultiPart; # ============================================================================== use strict 'vars', 'subs'; use GT::CGI; use GT::Base; use GT::TempFile(); use vars qw/$DEBUG $ERRORS @ISA $ATTRIBS $CRLF/; @ISA = qw/GT::Base/; use constants BLOCK_SIZE => 4096, MAX_READS => 2000; $CRLF = "\015\012"; $ATTRIBS = { fh => undef, # web request on stdin buffer => '', # buffer to hold tmp data length => 0, # length of file to parse boundary => undef, # mime boundary to look for fillunit => BLOCK_SIZE, # amount to read per chunk safety => 0 # safety counter }; $ERRORS = { NOBOUNDARY => "Unable to find a MIME boundary in environment. Content-type looks like: %s", CLIENTABORT => "Unable to read data from server. Still have %s bytes to read, but got 0. Data in buffer is: %s", BADMULTIPART => "Invalid multipart message. Nothing left to read, and can't find closing boundary. Data in buffer is: %s" }; sub parse { # ------------------------------------------------------------------- # Parses a multipart form to handle file uploads. # my ($class, $cgi, $callback) = @_; # We override any fatal handlers as our handlers typically create a CGI object # avoiding a nasty loop. local $SIG{__DIE__} = 'DEFAULT'; # We only load the multipart parser if we have multipart code. my $parser = $class->new or return; my ($header, $name, $value, $filename); until ($parser->eof) { $header = $parser->read_header or return die "BADREQUEST"; if ($header->{'Content-Disposition'} =~ m/ name=(?:"([^"]*)"|((?!")[^;]*))/) { $name = length $1 ? $1 : $2; } $filename = ''; if ($header->{'Content-Disposition'} =~ m/ filename=(?:"([^"]*)"|((?!")[^;]*))/) { $filename = length $1 ? $1 : $2; # Strip off any paths from the filename (IE sends the full path to the file). $filename =~ s|^.*[/\\]|| if $filename; } $name .= $GT::CGI::TAINTED; $filename .= $GT::CGI::TAINTED; # Not a file, just regular form data. if (! defined $filename or $filename eq '') { $value = $parser->read_body; # Netscape 6 does some fun things with line feeds in multipart form data $value =~ s/\r\r/\r/g; # What it does on unix $value =~ s/\r\n/\n/g if $^O eq 'MSWin32'; unless ($cgi->{params}->{$name}) { push @{$cgi->{param_order}}, $name; } unshift @{$cgi->{params}->{$name}}, $value; next; } # Print out the data to a temp file. local $\; my $tmp_file = new GT::TempFile; require GT::CGI::Fh; my $fh = GT::CGI::Fh->new($filename, $$tmp_file, 0); binmode $fh; my $data; my $bytes_read = 0; while (defined($data = $parser->read)) { if (defined $callback and (ref $callback eq 'CODE')) { $bytes_read += length $data; $callback->($filename, \$data, $bytes_read); } print $fh $data; } seek $fh, 0, 0; unless ($cgi->{params}->{$name}) { push @{$cgi->{param_order}}, $name; } unshift @{$cgi->{params}->{$name}}, $fh; } } sub init { # ------------------------------------------------------------------- # Initilize our object. # $DEBUG = $GT::CGI::DEBUG; my $self = shift; # Get the boundary marker. my $boundary; if (defined $ENV{CONTENT_TYPE} and $ENV{CONTENT_TYPE} =~ /boundary=\"?([^\";,]+)\"?/) { $boundary = $1 . $GT::CGI::TAINTED; } else { return $self->error("NOBOUNDARY", "FATAL", $ENV{CONTENT_TYPE}); } $self->{boundary} = "--$boundary"; # Get our filehandle. binmode(STDIN); # And if the boundary is > the BLOCK_SIZE, adjust. if (length $boundary > $self->{fillunit}) { $self->{fillunit} = length $boundary; } # Set the content-length. $self->{length} = $ENV{CONTENT_LENGTH} || 0; # Read the preamble and the topmost (boundary) line plus the CRLF. while ($self->read) { } } sub fill_buffer { # ------------------------------------------------------------------- # Fill buffer. # my ($self, $bytes) = @_; return unless $self->{length}; my $boundary_length = length $self->{boundary}; my $buffer_length = length $self->{buffer}; my $bytes_to_read = $bytes - $buffer_length + $boundary_length + 2; $bytes_to_read = $self->{length} if $self->{length} < $bytes_to_read; my $bytes_read = read(STDIN, $self->{buffer}, $bytes_to_read, $buffer_length); if (! defined $self->{buffer}) { $self->{buffer} = ''; } if ($bytes_read == 0) { if ($self->{safety}++ > MAX_READS) { return $self->error(CLIENTABORT => FATAL => $self->{length}, $self->{buffer}); } } else { $self->{safety} = 0; } $self->{length} -= $bytes_read; } sub read { # ------------------------------------------------------------------- # Read some input. # my $self = shift; my $bytes = $self->{fillunit}; # Load up self->{buffer} with data. $self->fill_buffer($bytes); # find the boundary (if exists). my $start = index($self->{buffer}, $self->{boundary}); # Make sure the post was formed properly. unless (($start >= 0) or ($self->{length} > 0)) { return $self->error(BADMULTIPART => FATAL => $self->{buffer}); } if ($start == 0) { # Quit if we found the last boundary at the beginning. if (index($self->{buffer},"$self->{boundary}--") == 0) { $self->{buffer} = ''; $self->{length} = 0; return; } # Otherwise remove the boundary (+2 to remove line feeds). substr($self->{buffer}, 0, length ($self->{boundary}) + 2) = ''; return; } my $bytes_to_return; if ($start > 0) { $bytes_to_return = $start > $bytes ? $bytes : $start; } else { $bytes_to_return = $bytes - length($self->{boundary}) + 1; } my $return = substr($self->{buffer}, 0, $bytes_to_return); substr($self->{buffer}, 0, $bytes_to_return) = ''; return $start > 0 ? substr($return, 0, -2) : $return; } sub read_header { # ------------------------------------------------------------------- # Reads the header. # my $self = shift; my ($ok, $bad, $end, $safety) = (0, 0); until ($ok or $bad) { $self->fill_buffer($self->{fillunit}); $ok++ if ($end = index($self->{buffer}, "$CRLF$CRLF")) >= 0; $ok++ if $self->{buffer} eq ''; $bad++ if !$ok and $self->{length} <= 0; return if $safety++ >= 10; } return if $bad; my $header = substr($self->{buffer}, 0, $end + 2); substr($self->{buffer}, 0, $end + 4) = ''; my %header; my $token = '[-\w!\#$%&\'*+.^_\`|{}~]'; $header =~ s/$CRLF\s+/ /og; while ($header =~ /($token+):\s+([^$CRLF]*)/go) { my ($field_name, $field_value) = ($1 . $GT::CGI::TAINTED, $2 . $GT::CGI::TAINTED); $field_name =~ s/\b(\w)/\u$1/g; $header{$field_name} = $field_value; } return \%header; } sub read_body { # ------------------------------------------------------------------- # Reads a body and returns as a single scalar value. # my $self = shift; my $data = ''; my $return = ''; while (defined($data = $self->read)) { $return .= $data; } return $return; } sub eof { # ------------------------------------------------------------------- # Return true when we've finished reading. # my $self = shift; return 1 if length $self->{buffer} == 0 and $self->{length} <= 0; } 1;