# ================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::Mail::Encoder # Author : Scott Beck # CVS Info : 087,071,086,086,085 # $Id: Encoder.pm,v 1.44 2008/10/29 23:31:51 brewt Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ================================================================== # # Description: A general purpose perl interface for encoding data. # package GT::Mail::Encoder; # ================================================================== # If MIME::Base64 is installed use it - must eval before hand or 5.004_04 # wipes our ISA. my $have_b64 = eval { local $SIG{__DIE__}; require MIME::Base64; import MIME::Base64; if ($] < 5.005) { local $^W; encode_base64('brok'); } 1; }; $have_b64 or *encode_base64 = \>_old_encode_base64; my $use_encode_qp; if ($have_b64 and $MIME::Base64::VERSION ge 2.16 and defined &MIME::QuotedPrint::encode_qp and ( not defined &MIME::QuotedPrint::old_encode_qp or \&MIME::QuotedPrint::encode_qp != \&MIME::QuotedPrint::old_encode_qp ) ) { $use_encode_qp = 1; } # Pragmas use strict; use vars qw($VERSION $DEBUG @ISA %EncodeFor $CRLF); $VERSION = sprintf "%d.%03d", q$Revision: 1.44 $ =~ /(\d+)\.(\d+)/; $CRLF = "\015\012"; $DEBUG = 0; @ISA = qw(GT::Base); my %EncoderFor = ( # Standard... '7bit' => sub { NBit('7bit', @_) }, '8bit' => sub { NBit('8bit', @_) }, 'base64' => \&Base64, 'binary' => \&Binary, 'none' => \&Binary, 'quoted-printable' => \&QuotedPrint, # Non-standard... 'x-uu' => \&UU, 'x-uuencode' => \&UU, ); sub new { # -------------------------------------------------------------------------- my $this = shift; my $class = ref $this || $this; my $self = bless {}, $class; $self->init(@_); my $encoding = lc($self->{encoding} || ''); defined $EncoderFor{$encoding} or return or return $self->error("NOENCODING", "FATAL"); $self->debug("Set encoding to $encoding") if ($self->{_debug}); $self->{encoding} = $EncoderFor{$encoding}; return $self; } sub init { # -------------------------------------------------------------------------- # $obj->init (%opts); # ------------------- # Sets the options for the current object. # my $self = shift; my $opt = {}; if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift } elsif (defined $_[0] and not @_ % 2) { $opt = {@_} } else { return $self->error("BADARGS", "FATAL", "init") } $self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG; for my $m (qw(encoding in out)) { $self->{$m} = $opt->{$m} if defined $opt->{$m}; } return $self; } sub gt_encode { # -------------------------------------------------------------------------- my $self = shift; if (!ref $self or ref $self ne 'GT::Mail::Encoder') { $self = GT::Mail::Encoder->new(@_) or return; } $self->{encoding} or return $self->error("NOENCODING", "FATAL");; return $self->{encoding}->($self->{in}, $self->{out}); } sub supported { return exists $EncoderFor{pop()} } sub Base64 { # -------------------------------------------------------------------------- my ($in, $out) = @_; my $encoded; my $nread; my $buf = ''; # Reading multiples of 57 bytes is recommended by MIME::Base64 as it comes out # to a line of exactly 76 characters (the max). We use 2299*57 (131043 bytes) # because it comes out to about 128KB (131072 bytes). Admittedly, this number # is fairly arbitrary, but should work well for both large and small files, and # shouldn't be too memory intensive. my $read_size = 2299 * 57; if (not ref $in) { while (1) { last unless length $in; $buf = substr($in, 0, $read_size); substr($in, 0, $read_size) = ''; $encoded = encode_base64($buf, $CRLF); # Encoding to send over SMTP $encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline! $out->($encoded); } } elsif (defined fileno $in) { while ($nread = read($in, $buf, $read_size)) { $encoded = encode_base64($buf, $CRLF); $encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline! $out->($encoded); } } elsif (ref $in eq 'GLOB') { die "Glob reference passed in is not an open filehandle"; } else { die "Bad arguments passed to Base64, first argument must be a scalar or a filehandle"; } 1; } sub Binary { # -------------------------------------------------------------------------- my ($in, $out) = @_; if (not ref $in) { $in =~ s/\015?\012/$CRLF/g; $out->($in); } elsif (defined fileno $in) { my ($buf, $nread) = ('', 0); while ($nread = read($in, $buf, 4096)) { $buf =~ s/\015?\012/$CRLF/g; $out->($buf); } defined ($nread) or return; # check for error } elsif (ref $in eq 'GLOB') { die "Glob reference passed in is not an open filehandle"; } else { die "Bad arguments passed to Binary, first argument must be a scalar or a filehandle"; } 1; } sub UU { # -------------------------------------------------------------------------- my ($in, $out, $file) = @_; my $buf = ''; my $fname = ($file || ''); $out->("begin 644 $fname\n"); if (not ref $in) { while (1) { last unless length $in; $buf = substr($in, 0, 45); substr($in, 0, 45) = ''; $out->(pack('u', $buf)); } } elsif (defined fileno $in) { while (read($in, $buf, 45)) { $buf =~ s/\015?\012/$CRLF/g; $out->(pack('u', $buf)) } } elsif (ref $in eq 'GLOB') { die "Glob reference passed in is not an open filehandle"; } else { die "Bad arguments passed to UU, first argument must be a scalar or a filehandle"; } $out->("end\n"); 1; } sub NBit { # -------------------------------------------------------------------------- my ($enc, $in, $out) = @_; if (not ref $in) { $in =~ s/\015?\012/$CRLF/g; $out->($in); } elsif (defined fileno $in) { while (<$in>) { s/\015?\012/$CRLF/g; $out->($_); } } elsif (ref $in eq 'GLOB') { die "Glob reference passed in is not an open filehandle"; } else { die "Bad arguments passed to NBit, first argument must be a scalar or a filehandle"; } 1; } sub QuotedPrint { # -------------------------------------------------------------------------- my ($in, $out) = @_; local $_; my $ref = ref $in; if ($ref and not defined fileno($in)) { if ($ref eq 'GLOB') { die "Glob reference passed in is not an open filehandle"; } else { die "Bad arguments passed to QuotedPrint, first argument must be a scalar or a filehandle"; } } $in =~ s/\015?\012/\n/g unless $ref; while () { local $_; if ($ref) { # Try to get around 32KB at once. This could end up being much larger than # 32KB if there is a very very long line - up to the length of the line + 32700 # bytes. $_ = <$in>; while (my $line = <$in>) { $_ .= $line; last if length > 32_700; # Not exactly 32KB, but close enough. } last unless defined; } else { # Grab up to just shy of 32KB of the string, plus the following line. As # above, this could be much longer than 32KB if there is one or more very long # lines involved. $in =~ s/^(.{0,32700}.*?(?:\n|\Z))//ms; # Encode somewhere around 32KB at a time $_ = $1; last unless defined and length; } if ($use_encode_qp) { $_ = MIME::QuotedPrint::encode_qp($_, $CRLF); } else { s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg; # rule #2,#3 s/([ \t]+)$/ join('', map { sprintf("=%02X", ord($_)) } split('', $1) )/egm; # rule #3 (encode whitespace at eol) # rule #5 (lines must be shorter than 76 chars, but we are not allowed # to break =XX escapes. This makes things complicated :-( ) my $brokenlines = ""; $brokenlines .= "$1=\n" while s/(.*?^[^\n]{73} (?: [^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n |[^=\n] (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n | (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n ))//xsm; $_ = "$brokenlines$_"; s/\015?\012/$CRLF/g; } # Escape 'From ' at the beginning of the line. This is fairly easy - if the # line is currently 73 or fewer characters, we simply change the F to =46, # making the line 75 characters long (the max). If the line is longer than 73, # we escape the F, follow it with "=$CRLF", and put the 'rom ' and the rest of # the line on the next line - meaning one line of 4 characters, and one of 73 # or 74. s/^From (.*)/ length($1) <= 68 ? "=46rom $1" : "=46=${CRLF}rom $1" /emg; # Escape 'From' at the beginning of a line # The '.' at the beginning of the line is more difficult. The easy case is # when the line is 73 or fewer characters - just escape the initial . and we're # done. If the line is longer, the fun starts. First, we escape the initial . # to =2E. Then we look for the first = in the line; if it is found within the # first 3 characters, we split two characters after it (to catch the "12" in # "=12") otherwise we split after the third character. We then add "=$CRLF" to # the current line, and look at the next line; if it starts with 'From ' or a # ., we escape it - and since the second line will always be less than 73 # characters long (since we remove at least three for the first line), we can # just escape it without worrying about splitting the line up again. s/^\.([^$CRLF]*)/ if (length($1) <= 72) { "=2E$1" } else { my $ret = "=2E"; my $match = $1; my $index = index($match, '='); my $len = $index >= 2 ? 2 : $index + 3; $ret .= substr($match, 0, $len); substr($match, 0, $len) = ''; $ret .= "=$CRLF"; substr($match, 0, 1) = "=46" if substr($match, 0, 5) eq 'From '; substr($match, 0, 1) = "=2E" if substr($match, 0, 1) eq '.'; $ret .= $match; $ret } /emg; $out->($_); last unless $ref or length $in; } return 1; } sub gt_old_encode_base64 { # -------------------------------------------------------------------------- my $eol = $_[1]; $eol = "\n" unless defined $eol; my $res = pack("u", $_[0]); $res =~ s/^.//mg; # Remove first character of each line $res =~ tr/\n//d; # Remove newlines $res =~ tr|` -_|AA-Za-z0-9+/|; # Fix padding at the end my $padding = (3 - length($_[0]) % 3) % 3; $res =~ s/.{$padding}$/'=' x $padding/e if $padding; # Break encoded string into lines of no more than 76 characters each if (length $eol) { $res =~ s/(.{1,76})/$1$eol/g; } $res; } 1; __END__ =head1 NAME GT::Mail::Encoder - MIME Encoder =head1 SYNOPSIS open IN, 'decoded.txt' or die $!; open OUT, '>encoded.txt' or die $!; if (GT::Mail::Encoder->supported ('7bit')) { GT::Mail::Encoder->decode ( debug => 1, encoding => '7bit', in => \*IN, out => sub { print OUT $_[0] } ) or die $GT::Mail::Encoder::error; } else { die "Unsupported encoding"; } close IN; close OUT; =head1 DESCRIPTION GT::Mail::Encoder is a MIME Encoder implemented in perl. It will try to use the C extension for encoding Base64. If the extension is not there it will do it in perl (slow!). =head2 Encoding a stream The new() constructor and the supported() class method are the only methods that are public in the interface. The new() constructor takes a hash of params. The supported() method takes a single string, the name of the encoding you want to encode and returns true if the encoding is supported and false otherwise. =over 4 =item debug Set debugging level. 1 or 0. =item encoding Sets the encoding used to encode. =item in Set to a file handle or IO handle. =item out Set to a code reference, the decoded stream will be passed in at the first argument for each chunk encoded. =back =head1 COPYRIGHT Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. http://www.gossamer-threads.com/ =head1 VERSION Revision: $Id: Encoder.pm,v 1.44 2008/10/29 23:31:51 brewt Exp $