discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/CGI/MultiPart.pm
2024-06-17 21:49:12 +10:00

271 lines
8.1 KiB
Perl

# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::CGI::MultiPart
# CVS Info : 087,071,086,086,085
# $Id: MultiPart.pm,v 1.12 2008/07/14 23:40:31 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description:
# Multipart form handling for GT::CGI objects.
#
# This is taken almost entirely from CGI.pm, and is loaded on demand.
#
package GT::CGI::MultiPart;
# ==============================================================================
use strict 'vars', 'subs';
use GT::CGI;
use GT::Base;
use GT::TempFile();
use vars qw/$DEBUG $ERRORS @ISA $ATTRIBS $CRLF/;
@ISA = qw/GT::Base/;
use constants
BLOCK_SIZE => 4096,
MAX_READS => 2000;
$CRLF = "\015\012";
$ATTRIBS = {
fh => undef, # web request on stdin
buffer => '', # buffer to hold tmp data
length => 0, # length of file to parse
boundary => undef, # mime boundary to look for
fillunit => BLOCK_SIZE, # amount to read per chunk
safety => 0 # safety counter
};
$ERRORS = {
NOBOUNDARY => "Unable to find a MIME boundary in environment. Content-type looks like: %s",
CLIENTABORT => "Unable to read data from server. Still have %s bytes to read, but got 0. Data in buffer is: %s",
BADMULTIPART => "Invalid multipart message. Nothing left to read, and can't find closing boundary. Data in buffer is: %s"
};
sub parse {
# -------------------------------------------------------------------
# Parses a multipart form to handle file uploads.
#
my ($class, $cgi, $callback) = @_;
# We override any fatal handlers as our handlers typically create a CGI object
# avoiding a nasty loop.
local $SIG{__DIE__} = 'DEFAULT';
# We only load the multipart parser if we have multipart code.
my $parser = $class->new or return;
my ($header, $name, $value, $filename);
until ($parser->eof) {
$header = $parser->read_header or return die "BADREQUEST";
if ($header->{'Content-Disposition'} =~ m/ name=(?:"([^"]*)"|((?!")[^;]*))/) {
$name = length $1 ? $1 : $2;
}
$filename = '';
if ($header->{'Content-Disposition'} =~ m/ filename=(?:"([^"]*)"|((?!")[^;]*))/) {
$filename = length $1 ? $1 : $2;
# Strip off any paths from the filename (IE sends the full path to the file).
$filename =~ s|^.*[/\\]|| if $filename;
}
$name .= $GT::CGI::TAINTED;
$filename .= $GT::CGI::TAINTED;
# Not a file, just regular form data.
if (! defined $filename or $filename eq '') {
$value = $parser->read_body;
# Netscape 6 does some fun things with line feeds in multipart form data
$value =~ s/\r\r/\r/g; # What it does on unix
$value =~ s/\r\n/\n/g if $^O eq 'MSWin32';
unless ($cgi->{params}->{$name}) {
push @{$cgi->{param_order}}, $name;
}
unshift @{$cgi->{params}->{$name}}, $value;
next;
}
# Print out the data to a temp file.
local $\;
my $tmp_file = new GT::TempFile;
require GT::CGI::Fh;
my $fh = GT::CGI::Fh->new($filename, $$tmp_file, 0);
binmode $fh;
my $data;
my $bytes_read = 0;
while (defined($data = $parser->read)) {
if (defined $callback and (ref $callback eq 'CODE')) {
$bytes_read += length $data;
$callback->($filename, \$data, $bytes_read);
}
print $fh $data;
}
seek $fh, 0, 0;
unless ($cgi->{params}->{$name}) {
push @{$cgi->{param_order}}, $name;
}
unshift @{$cgi->{params}->{$name}}, $fh;
}
}
sub init {
# -------------------------------------------------------------------
# Initilize our object.
#
$DEBUG = $GT::CGI::DEBUG;
my $self = shift;
# Get the boundary marker.
my $boundary;
if (defined $ENV{CONTENT_TYPE} and $ENV{CONTENT_TYPE} =~ /boundary=\"?([^\";,]+)\"?/) {
$boundary = $1 . $GT::CGI::TAINTED;
}
else {
return $self->error("NOBOUNDARY", "FATAL", $ENV{CONTENT_TYPE});
}
$self->{boundary} = "--$boundary";
# Get our filehandle.
binmode(STDIN);
# And if the boundary is > the BLOCK_SIZE, adjust.
if (length $boundary > $self->{fillunit}) {
$self->{fillunit} = length $boundary;
}
# Set the content-length.
$self->{length} = $ENV{CONTENT_LENGTH} || 0;
# Read the preamble and the topmost (boundary) line plus the CRLF.
while ($self->read) { }
}
sub fill_buffer {
# -------------------------------------------------------------------
# Fill buffer.
#
my ($self, $bytes) = @_;
return unless $self->{length};
my $boundary_length = length $self->{boundary};
my $buffer_length = length $self->{buffer};
my $bytes_to_read = $bytes - $buffer_length + $boundary_length + 2;
$bytes_to_read = $self->{length} if $self->{length} < $bytes_to_read;
my $bytes_read = read(STDIN, $self->{buffer}, $bytes_to_read, $buffer_length);
if (! defined $self->{buffer}) {
$self->{buffer} = '';
}
if ($bytes_read == 0) {
if ($self->{safety}++ > MAX_READS) {
return $self->error(CLIENTABORT => FATAL => $self->{length}, $self->{buffer});
}
}
else {
$self->{safety} = 0;
}
$self->{length} -= $bytes_read;
}
sub read {
# -------------------------------------------------------------------
# Read some input.
#
my $self = shift;
my $bytes = $self->{fillunit};
# Load up self->{buffer} with data.
$self->fill_buffer($bytes);
# find the boundary (if exists).
my $start = index($self->{buffer}, $self->{boundary});
# Make sure the post was formed properly.
unless (($start >= 0) or ($self->{length} > 0)) {
return $self->error(BADMULTIPART => FATAL => $self->{buffer});
}
if ($start == 0) {
# Quit if we found the last boundary at the beginning.
if (index($self->{buffer},"$self->{boundary}--") == 0) {
$self->{buffer} = '';
$self->{length} = 0;
return;
}
# Otherwise remove the boundary (+2 to remove line feeds).
substr($self->{buffer}, 0, length ($self->{boundary}) + 2) = '';
return;
}
my $bytes_to_return;
if ($start > 0) {
$bytes_to_return = $start > $bytes ? $bytes : $start;
}
else {
$bytes_to_return = $bytes - length($self->{boundary}) + 1;
}
my $return = substr($self->{buffer}, 0, $bytes_to_return);
substr($self->{buffer}, 0, $bytes_to_return) = '';
return $start > 0 ? substr($return, 0, -2) : $return;
}
sub read_header {
# -------------------------------------------------------------------
# Reads the header.
#
my $self = shift;
my ($ok, $bad, $end, $safety) = (0, 0);
until ($ok or $bad) {
$self->fill_buffer($self->{fillunit});
$ok++ if ($end = index($self->{buffer}, "$CRLF$CRLF")) >= 0;
$ok++ if $self->{buffer} eq '';
$bad++ if !$ok and $self->{length} <= 0;
return if $safety++ >= 10;
}
return if $bad;
my $header = substr($self->{buffer}, 0, $end + 2);
substr($self->{buffer}, 0, $end + 4) = '';
my %header;
my $token = '[-\w!\#$%&\'*+.^_\`|{}~]';
$header =~ s/$CRLF\s+/ /og;
while ($header =~ /($token+):\s+([^$CRLF]*)/go) {
my ($field_name, $field_value) = ($1 . $GT::CGI::TAINTED, $2 . $GT::CGI::TAINTED);
$field_name =~ s/\b(\w)/\u$1/g;
$header{$field_name} = $field_value;
}
return \%header;
}
sub read_body {
# -------------------------------------------------------------------
# Reads a body and returns as a single scalar value.
#
my $self = shift;
my $data = '';
my $return = '';
while (defined($data = $self->read)) {
$return .= $data;
}
return $return;
}
sub eof {
# -------------------------------------------------------------------
# Return true when we've finished reading.
#
my $self = shift;
return 1 if length $self->{buffer} == 0 and $self->{length} <= 0;
}
1;