# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
#   GT::Mail::Encoder
#   Author  : Scott Beck
#   CVS Info :                          
#   $Id: Encoder.pm,v 1.40 2004/01/13 01:35:17 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
# ==================================================================
#
# Description: A general purpose perl interface for encoding data.
#

package GT::Mail::Encoder;
# ==================================================================
# If MIME::Base64 is installed use it - must eval before hand or 5.004_04
# wipes our ISA.
my $have_b64 = eval {
    local $SIG{__DIE__};
    require MIME::Base64;
    import MIME::Base64;
    if ($] < 5.005) { local $^W; encode_base64('brok'); }
    1;
};
$have_b64 or *encode_base64 = \&gt_old_encode_base64;
my $use_encode_qp;
if ($have_b64 and
    $MIME::Base64::VERSION >= 2.16 and
    defined &MIME::QuotedPrint::encode_qp and (
        not defined &MIME::QuotedPrint::old_encode_qp or
        \&MIME::QuotedPrint::encode_qp != \&MIME::QuotedPrint::old_encode_qp
    )
) {
    $use_encode_qp = 1;
}

# Pragmas
use strict;
use vars qw($VERSION $DEBUG @ISA %EncodeFor $CRLF);

$VERSION = sprintf "%d.%03d", q$Revision: 1.40 $ =~ /(\d+)\.(\d+)/;
$CRLF    = "\015\012";
$DEBUG   = 0;
@ISA     = qw(GT::Base);

my %EncoderFor = (
    # Standard...
    '7bit'       => sub { NBit('7bit', @_) },
    '8bit'       => sub { NBit('8bit', @_) },
    'base64'     => \&Base64,
    'binary'     => \&Binary,
    'none'       => \&Binary,
    'quoted-printable' => \&QuotedPrint,

    # Non-standard...
    'x-uu'       => \&UU,
    'x-uuencode' => \&UU,
);

sub new {
# --------------------------------------------------------------------------
    my $this = shift;
    my $class = ref $this || $this;
    my $self = bless {}, $class;
    $self->init(@_);
    my $encoding = lc($self->{encoding} || '');
    defined $EncoderFor{$encoding} or return or return $self->error("NOENCODING", "FATAL");
    $self->debug("Set encoding to $encoding") if ($self->{_debug});
    $self->{encoding} = $EncoderFor{$encoding};
    return $self;
}

sub init { 
# --------------------------------------------------------------------------
# $obj->init (%opts);
# -------------------
#   Sets the options for the current object.
#
    my $self = shift;
    my $opt = {};
    if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift }
    elsif (defined $_[0] and not @_ % 2) { $opt = {@_} }
    else { return $self->error("BADARGS", "FATAL", "init") }
    
    $self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG;
    for my $m (qw(encoding in out)) {
        $self->{$m} = $opt->{$m} if defined $opt->{$m};
    }
    return $self;
}

sub gt_encode {
# --------------------------------------------------------------------------
    my $self = shift;
    if (!ref $self or ref $self ne 'GT::Mail::Encoder') {
        $self = GT::Mail::Encoder->new(@_) or return;
    }
    $self->{encoding} or return $self->error("NOENCODING", "FATAL");;
    return $self->{encoding}->($self->{in}, $self->{out});
}

sub supported { return exists $EncoderFor{pop()} }


sub Base64 {
# --------------------------------------------------------------------------
    my ($in, $out) = @_;
    my $encoded;

    my $nread;
    my $buf = '';

# Reading multiples of 57 bytes is recommended by MIME::Base64 as it comes out
# to a line of exactly 76 characters (the max).  We use 2299*57 (131043 bytes)
# because it comes out to about 128KB (131072 bytes).  Admittedly, this number
# is fairly arbitrary, but should work well for both large and small files, and
# shouldn't be too memory intensive.
    my $read_size = 2299 * 57;

    if (not ref $in) {
        while (1) {
            last unless length $in;
            $buf = substr($in, 0, $read_size);
            substr($in, 0, $read_size) = '';

            $encoded = encode_base64($buf, $CRLF);

# Encoding to send over SMTP
            $encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
            $out->($encoded);
        }
    }
    elsif (fileno $in) {
        while ($nread = read($in, $buf, $read_size)) {
            $encoded = encode_base64($buf, $CRLF);

            $encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
            $out->($encoded);
        }
    }
    elsif (ref $in eq 'GLOB') {
        die "Glob reference passed in is not an open filehandle";
    }
    else {
        die "Bad arguments passed to Base64, first argument must be a scalar or a filehandle";
    }
    1;
}

sub Binary {
# --------------------------------------------------------------------------
    my ($in, $out) = @_;

    if (not ref $in) {
        $in =~ s/\015?\012/$CRLF/g;
        $out->($in);
    }
    elsif (fileno $in) {
        my ($buf, $nread) = ('', 0);
        while ($nread = read($in, $buf, 4096)) {
            $buf =~ s/\015?\012/$CRLF/g;
            $out->($buf);
        }
        defined ($nread) or return;      # check for error
    }
    elsif (ref $in eq 'GLOB') {
        die "Glob reference passed in is not an open filehandle";
    }
    else {
        die "Bad arguments passed to Binary, first argument must be a scalar or a filehandle";
    }
    1;
}

sub UU {
# --------------------------------------------------------------------------
    my ($in, $out, $file) = @_;

    my $buf = '';
    my $fname = ($file || '');
    $out->("begin 644 $fname\n");
    if (not ref $in) {
        while (1) {
            last unless length $in;
            $buf = substr($in, 0, 45);
            substr($in, 0, 45) = '';
            $out->(pack('u', $buf));
        }
    }
    elsif (fileno $in) {
        while (read($in, $buf, 45)) {
            $buf =~ s/\015?\012/$CRLF/g;
            $out->(pack('u', $buf)) 
        }
    }
    elsif (ref $in eq 'GLOB') {
        die "Glob reference passed in is not an open filehandle";
    }
    else {
        die "Bad arguments passed to UU, first argument must be a scalar or a filehandle";
    }
    $out->("end\n");
    1;
}

sub NBit {
# --------------------------------------------------------------------------
    my ($enc, $in, $out) = @_;

    if (not ref $in) {
        $in =~ s/\015?\012/$CRLF/g;
        $out->($in);
    }
    elsif (fileno $in) {
        while (<$in>) {
            s/\015?\012/$CRLF/g;
            $out->($_);
        }
    }
    elsif (ref $in eq 'GLOB') {
        die "Glob reference passed in is not an open filehandle";
    }
    else {
        die "Bad arguments passed to NBit, first argument must be a scalar or a filehandle";
    }
    1;
}

sub QuotedPrint {
# --------------------------------------------------------------------------
    my ($in, $out) = @_;

    local $_;
    my $ref = ref $in;
    if ($ref and !fileno($in)) {
        if ($ref eq 'GLOB') {
            die "Glob reference passed in is not an open filehandle";
        }
        else {
            die "Bad arguments passed to QuotedPrint, first argument must be a scalar or a filehandle";
        }
    }
    $in =~ s/\015?\012/\n/g unless $ref;

    while () {
        local $_;
        if ($ref) {
# Try to get around 32KB at once.  This could end up being much larger than
# 32KB if there is a very very long line - up to the length of the line + 32700
# bytes.
            $_ = <$in>;
            while (my $line = <$in>) {
                $_ .= $line;
                last if length > 32_700; # Not exactly 32KB, but close enough.
            }
            last unless defined;
        }
        else {
# Grab up to just shy of 32KB of the string, plus the following line.  As
# above, this could be much longer than 32KB if there is one or more very long
# lines involved.
            $in =~ s/^(.{0,32700}.*?(?:\n|\Z))//ms; # Encode somewhere around 32KB at a time
            $_ = $1;
            last unless defined and length;
        }

        if ($use_encode_qp) {
            $_ = MIME::QuotedPrint::encode_qp($_, $CRLF);
        }
        else {
            s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg;  # rule #2,#3
            s/([ \t]+)$/
              join('', map { sprintf("=%02X", ord($_)) }
                   split('', $1)
              )/egm;                        # rule #3 (encode whitespace at eol)

            # rule #5 (lines must be shorter than 76 chars, but we are not allowed
            # to break =XX escapes.  This makes things complicated :-( )
            my $brokenlines = "";
            $brokenlines .= "$1=\n"
                while s/(.*?^[^\n]{73} (?:
                     [^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n
                    |[^=\n]    (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n
                    |          (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n
                ))//xsm;

            $_ = "$brokenlines$_";

            s/\015?\012/$CRLF/g;
        }

# Escape 'From ' at the beginning of the line.  This is fairly easy - if the
# line is currently 73 or fewer characters, we simply change the F to =46,
# making the line 75 characters long (the max).  If the line is longer than 73,
# we escape the F, follow it with "=$CRLF", and put the 'rom ' and the rest of
# the line on the next line - meaning one line of 4 characters, and one of 73
# or 74.
        s/^From (.*)/
            length($1) <= 68 ? "=46rom $1" : "=46=${CRLF}rom $1"
        /emg; # Escape 'From' at the beginning of a line
# The '.' at the beginning of the line is more difficult.  The easy case is
# when the line is 73 or fewer characters - just escape the initial . and we're
# done.  If the line is longer, the fun starts.  First, we escape the initial .
# to =2E.  Then we look for the first = in the line; if it is found within the
# first 3 characters, we split two characters after it (to catch the "12" in
# "=12") otherwise we split after the third character.  We then add "=$CRLF" to
# the current line, and look at the next line; if it starts with 'From ' or a
# ., we escape it - and since the second line will always be less than 73
# characters long (since we remove at least three for the first line), we can
# just escape it without worrying about splitting the line up again.
        s/^\.([^$CRLF]*)/
            if (length($1) <= 72) {
                "=2E$1"
            }
            else {
                my $ret = "=2E";
                my $match = $1;
                my $index = index($match, '=');
                my $len = $index >= 2 ? 2 : $index + 3;
                $ret .= substr($match, 0, $len);
                substr($match, 0, $len) = '';
                $ret .= "=$CRLF";
                substr($match, 0, 1) = "=46" if substr($match, 0, 5) eq 'From ';
                substr($match, 0, 1) = "=2E" if substr($match, 0, 1) eq '.';
                $ret .= $match;
                $ret
            }
        /emg;

        $out->($_);

        last unless $ref or length $in;
    }
    return 1;
}

sub gt_old_encode_base64 {
# --------------------------------------------------------------------------
    my $eol = $_[1];
    $eol = "\n" unless defined $eol;

    my $res = pack("u", $_[0]);
    $res =~ s/^.//mg; # Remove first character of each line
    $res =~ tr/\n//d; # Remove newlines

    $res =~ tr|` -_|AA-Za-z0-9+/|;

    # Fix padding at the end
    my $padding = (3 - length($_[0]) % 3) % 3;
    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;

    # Break encoded string into lines of no more than 76 characters each
    if (length $eol) {
        $res =~ s/(.{1,76})/$1$eol/g;
    }
    $res;
}

1;

__END__

=head1 NAME

GT::Mail::Encoder - MIME Encoder

=head1 SYNOPSIS

        open IN, 'decoded.txt' or die $!;
        open OUT, '>encoded.txt' or die $!;
        if (GT::Mail::Encoder->supported ('7bit')) {
            GT::Mail::Encoder->decode (
                                    debug    => 1,
                                    encoding => '7bit',
                                    in       => \*IN,
                                    out      => sub { print OUT $_[0] }
                                ) or die $GT::Mail::Encoder::error;
        }
        else {
            die "Unsupported encoding";
        }
        close IN;
        close OUT;

=head1 DESCRIPTION

GT::Mail::Encoder is a MIME Encoder implemented in perl. It will try to use
the C extension for encoding Base64. If the extension is not there 
it will do it in perl (slow!).

=head2 Encoding a stream

The new() constructor and the supported() class method are the only methods that 
are public in the interface. The new() constructor takes a hash of params.
The supported() method takes a single string, the name of the encoding you want
to encode and returns true if the encoding is supported and false otherwise.

=over 4

=item debug

Set debugging level. 1 or 0.

=item encoding

Sets the encoding used to encode.

=item in

Set to a file handle or IO handle.

=item out

Set to a code reference, the decoded stream will be passed in at the first
argument for each chunk encoded.

=back

=head1 COPYRIGHT

Copyright (c) 2004 Gossamer Threads Inc.  All Rights Reserved.
http://www.gossamer-threads.com/

=head1 VERSION

Revision: $Id: Encoder.pm,v 1.40 2004/01/13 01:35:17 jagerman Exp $