430 lines
13 KiB
Perl
430 lines
13 KiB
Perl
# ==================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::Mail::Encoder
|
|
# Author : Scott Beck
|
|
# CVS Info :
|
|
# $Id: Encoder.pm,v 1.40 2004/01/13 01:35:17 jagerman Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# ==================================================================
|
|
#
|
|
# Description: A general purpose perl interface for encoding data.
|
|
#
|
|
|
|
package GT::Mail::Encoder;
|
|
# ==================================================================
|
|
# If MIME::Base64 is installed use it - must eval before hand or 5.004_04
|
|
# wipes our ISA.
|
|
my $have_b64 = eval {
|
|
local $SIG{__DIE__};
|
|
require MIME::Base64;
|
|
import MIME::Base64;
|
|
if ($] < 5.005) { local $^W; encode_base64('brok'); }
|
|
1;
|
|
};
|
|
$have_b64 or *encode_base64 = \>_old_encode_base64;
|
|
my $use_encode_qp;
|
|
if ($have_b64 and
|
|
$MIME::Base64::VERSION >= 2.16 and
|
|
defined &MIME::QuotedPrint::encode_qp and (
|
|
not defined &MIME::QuotedPrint::old_encode_qp or
|
|
\&MIME::QuotedPrint::encode_qp != \&MIME::QuotedPrint::old_encode_qp
|
|
)
|
|
) {
|
|
$use_encode_qp = 1;
|
|
}
|
|
|
|
# Pragmas
|
|
use strict;
|
|
use vars qw($VERSION $DEBUG @ISA %EncodeFor $CRLF);
|
|
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.40 $ =~ /(\d+)\.(\d+)/;
|
|
$CRLF = "\015\012";
|
|
$DEBUG = 0;
|
|
@ISA = qw(GT::Base);
|
|
|
|
my %EncoderFor = (
|
|
# Standard...
|
|
'7bit' => sub { NBit('7bit', @_) },
|
|
'8bit' => sub { NBit('8bit', @_) },
|
|
'base64' => \&Base64,
|
|
'binary' => \&Binary,
|
|
'none' => \&Binary,
|
|
'quoted-printable' => \&QuotedPrint,
|
|
|
|
# Non-standard...
|
|
'x-uu' => \&UU,
|
|
'x-uuencode' => \&UU,
|
|
);
|
|
|
|
sub new {
|
|
# --------------------------------------------------------------------------
|
|
my $this = shift;
|
|
my $class = ref $this || $this;
|
|
my $self = bless {}, $class;
|
|
$self->init(@_);
|
|
my $encoding = lc($self->{encoding} || '');
|
|
defined $EncoderFor{$encoding} or return or return $self->error("NOENCODING", "FATAL");
|
|
$self->debug("Set encoding to $encoding") if ($self->{_debug});
|
|
$self->{encoding} = $EncoderFor{$encoding};
|
|
return $self;
|
|
}
|
|
|
|
sub init {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->init (%opts);
|
|
# -------------------
|
|
# Sets the options for the current object.
|
|
#
|
|
my $self = shift;
|
|
my $opt = {};
|
|
if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift }
|
|
elsif (defined $_[0] and not @_ % 2) { $opt = {@_} }
|
|
else { return $self->error("BADARGS", "FATAL", "init") }
|
|
|
|
$self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG;
|
|
for my $m (qw(encoding in out)) {
|
|
$self->{$m} = $opt->{$m} if defined $opt->{$m};
|
|
}
|
|
return $self;
|
|
}
|
|
|
|
sub gt_encode {
|
|
# --------------------------------------------------------------------------
|
|
my $self = shift;
|
|
if (!ref $self or ref $self ne 'GT::Mail::Encoder') {
|
|
$self = GT::Mail::Encoder->new(@_) or return;
|
|
}
|
|
$self->{encoding} or return $self->error("NOENCODING", "FATAL");;
|
|
return $self->{encoding}->($self->{in}, $self->{out});
|
|
}
|
|
|
|
sub supported { return exists $EncoderFor{pop()} }
|
|
|
|
|
|
sub Base64 {
|
|
# --------------------------------------------------------------------------
|
|
my ($in, $out) = @_;
|
|
my $encoded;
|
|
|
|
my $nread;
|
|
my $buf = '';
|
|
|
|
# Reading multiples of 57 bytes is recommended by MIME::Base64 as it comes out
|
|
# to a line of exactly 76 characters (the max). We use 2299*57 (131043 bytes)
|
|
# because it comes out to about 128KB (131072 bytes). Admittedly, this number
|
|
# is fairly arbitrary, but should work well for both large and small files, and
|
|
# shouldn't be too memory intensive.
|
|
my $read_size = 2299 * 57;
|
|
|
|
if (not ref $in) {
|
|
while (1) {
|
|
last unless length $in;
|
|
$buf = substr($in, 0, $read_size);
|
|
substr($in, 0, $read_size) = '';
|
|
|
|
$encoded = encode_base64($buf, $CRLF);
|
|
|
|
# Encoding to send over SMTP
|
|
$encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
|
|
$out->($encoded);
|
|
}
|
|
}
|
|
elsif (fileno $in) {
|
|
while ($nread = read($in, $buf, $read_size)) {
|
|
$encoded = encode_base64($buf, $CRLF);
|
|
|
|
$encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
|
|
$out->($encoded);
|
|
}
|
|
}
|
|
elsif (ref $in eq 'GLOB') {
|
|
die "Glob reference passed in is not an open filehandle";
|
|
}
|
|
else {
|
|
die "Bad arguments passed to Base64, first argument must be a scalar or a filehandle";
|
|
}
|
|
1;
|
|
}
|
|
|
|
sub Binary {
|
|
# --------------------------------------------------------------------------
|
|
my ($in, $out) = @_;
|
|
|
|
if (not ref $in) {
|
|
$in =~ s/\015?\012/$CRLF/g;
|
|
$out->($in);
|
|
}
|
|
elsif (fileno $in) {
|
|
my ($buf, $nread) = ('', 0);
|
|
while ($nread = read($in, $buf, 4096)) {
|
|
$buf =~ s/\015?\012/$CRLF/g;
|
|
$out->($buf);
|
|
}
|
|
defined ($nread) or return; # check for error
|
|
}
|
|
elsif (ref $in eq 'GLOB') {
|
|
die "Glob reference passed in is not an open filehandle";
|
|
}
|
|
else {
|
|
die "Bad arguments passed to Binary, first argument must be a scalar or a filehandle";
|
|
}
|
|
1;
|
|
}
|
|
|
|
sub UU {
|
|
# --------------------------------------------------------------------------
|
|
my ($in, $out, $file) = @_;
|
|
|
|
my $buf = '';
|
|
my $fname = ($file || '');
|
|
$out->("begin 644 $fname\n");
|
|
if (not ref $in) {
|
|
while (1) {
|
|
last unless length $in;
|
|
$buf = substr($in, 0, 45);
|
|
substr($in, 0, 45) = '';
|
|
$out->(pack('u', $buf));
|
|
}
|
|
}
|
|
elsif (fileno $in) {
|
|
while (read($in, $buf, 45)) {
|
|
$buf =~ s/\015?\012/$CRLF/g;
|
|
$out->(pack('u', $buf))
|
|
}
|
|
}
|
|
elsif (ref $in eq 'GLOB') {
|
|
die "Glob reference passed in is not an open filehandle";
|
|
}
|
|
else {
|
|
die "Bad arguments passed to UU, first argument must be a scalar or a filehandle";
|
|
}
|
|
$out->("end\n");
|
|
1;
|
|
}
|
|
|
|
sub NBit {
|
|
# --------------------------------------------------------------------------
|
|
my ($enc, $in, $out) = @_;
|
|
|
|
if (not ref $in) {
|
|
$in =~ s/\015?\012/$CRLF/g;
|
|
$out->($in);
|
|
}
|
|
elsif (fileno $in) {
|
|
while (<$in>) {
|
|
s/\015?\012/$CRLF/g;
|
|
$out->($_);
|
|
}
|
|
}
|
|
elsif (ref $in eq 'GLOB') {
|
|
die "Glob reference passed in is not an open filehandle";
|
|
}
|
|
else {
|
|
die "Bad arguments passed to NBit, first argument must be a scalar or a filehandle";
|
|
}
|
|
1;
|
|
}
|
|
|
|
sub QuotedPrint {
|
|
# --------------------------------------------------------------------------
|
|
my ($in, $out) = @_;
|
|
|
|
local $_;
|
|
my $ref = ref $in;
|
|
if ($ref and !fileno($in)) {
|
|
if ($ref eq 'GLOB') {
|
|
die "Glob reference passed in is not an open filehandle";
|
|
}
|
|
else {
|
|
die "Bad arguments passed to QuotedPrint, first argument must be a scalar or a filehandle";
|
|
}
|
|
}
|
|
$in =~ s/\015?\012/\n/g unless $ref;
|
|
|
|
while () {
|
|
local $_;
|
|
if ($ref) {
|
|
# Try to get around 32KB at once. This could end up being much larger than
|
|
# 32KB if there is a very very long line - up to the length of the line + 32700
|
|
# bytes.
|
|
$_ = <$in>;
|
|
while (my $line = <$in>) {
|
|
$_ .= $line;
|
|
last if length > 32_700; # Not exactly 32KB, but close enough.
|
|
}
|
|
last unless defined;
|
|
}
|
|
else {
|
|
# Grab up to just shy of 32KB of the string, plus the following line. As
|
|
# above, this could be much longer than 32KB if there is one or more very long
|
|
# lines involved.
|
|
$in =~ s/^(.{0,32700}.*?(?:\n|\Z))//ms; # Encode somewhere around 32KB at a time
|
|
$_ = $1;
|
|
last unless defined and length;
|
|
}
|
|
|
|
if ($use_encode_qp) {
|
|
$_ = MIME::QuotedPrint::encode_qp($_, $CRLF);
|
|
}
|
|
else {
|
|
s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg; # rule #2,#3
|
|
s/([ \t]+)$/
|
|
join('', map { sprintf("=%02X", ord($_)) }
|
|
split('', $1)
|
|
)/egm; # rule #3 (encode whitespace at eol)
|
|
|
|
# rule #5 (lines must be shorter than 76 chars, but we are not allowed
|
|
# to break =XX escapes. This makes things complicated :-( )
|
|
my $brokenlines = "";
|
|
$brokenlines .= "$1=\n"
|
|
while s/(.*?^[^\n]{73} (?:
|
|
[^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n
|
|
|[^=\n] (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n
|
|
| (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n
|
|
))//xsm;
|
|
|
|
$_ = "$brokenlines$_";
|
|
|
|
s/\015?\012/$CRLF/g;
|
|
}
|
|
|
|
# Escape 'From ' at the beginning of the line. This is fairly easy - if the
|
|
# line is currently 73 or fewer characters, we simply change the F to =46,
|
|
# making the line 75 characters long (the max). If the line is longer than 73,
|
|
# we escape the F, follow it with "=$CRLF", and put the 'rom ' and the rest of
|
|
# the line on the next line - meaning one line of 4 characters, and one of 73
|
|
# or 74.
|
|
s/^From (.*)/
|
|
length($1) <= 68 ? "=46rom $1" : "=46=${CRLF}rom $1"
|
|
/emg; # Escape 'From' at the beginning of a line
|
|
# The '.' at the beginning of the line is more difficult. The easy case is
|
|
# when the line is 73 or fewer characters - just escape the initial . and we're
|
|
# done. If the line is longer, the fun starts. First, we escape the initial .
|
|
# to =2E. Then we look for the first = in the line; if it is found within the
|
|
# first 3 characters, we split two characters after it (to catch the "12" in
|
|
# "=12") otherwise we split after the third character. We then add "=$CRLF" to
|
|
# the current line, and look at the next line; if it starts with 'From ' or a
|
|
# ., we escape it - and since the second line will always be less than 73
|
|
# characters long (since we remove at least three for the first line), we can
|
|
# just escape it without worrying about splitting the line up again.
|
|
s/^\.([^$CRLF]*)/
|
|
if (length($1) <= 72) {
|
|
"=2E$1"
|
|
}
|
|
else {
|
|
my $ret = "=2E";
|
|
my $match = $1;
|
|
my $index = index($match, '=');
|
|
my $len = $index >= 2 ? 2 : $index + 3;
|
|
$ret .= substr($match, 0, $len);
|
|
substr($match, 0, $len) = '';
|
|
$ret .= "=$CRLF";
|
|
substr($match, 0, 1) = "=46" if substr($match, 0, 5) eq 'From ';
|
|
substr($match, 0, 1) = "=2E" if substr($match, 0, 1) eq '.';
|
|
$ret .= $match;
|
|
$ret
|
|
}
|
|
/emg;
|
|
|
|
$out->($_);
|
|
|
|
last unless $ref or length $in;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub gt_old_encode_base64 {
|
|
# --------------------------------------------------------------------------
|
|
my $eol = $_[1];
|
|
$eol = "\n" unless defined $eol;
|
|
|
|
my $res = pack("u", $_[0]);
|
|
$res =~ s/^.//mg; # Remove first character of each line
|
|
$res =~ tr/\n//d; # Remove newlines
|
|
|
|
$res =~ tr|` -_|AA-Za-z0-9+/|;
|
|
|
|
# Fix padding at the end
|
|
my $padding = (3 - length($_[0]) % 3) % 3;
|
|
$res =~ s/.{$padding}$/'=' x $padding/e if $padding;
|
|
|
|
# Break encoded string into lines of no more than 76 characters each
|
|
if (length $eol) {
|
|
$res =~ s/(.{1,76})/$1$eol/g;
|
|
}
|
|
$res;
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
GT::Mail::Encoder - MIME Encoder
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
open IN, 'decoded.txt' or die $!;
|
|
open OUT, '>encoded.txt' or die $!;
|
|
if (GT::Mail::Encoder->supported ('7bit')) {
|
|
GT::Mail::Encoder->decode (
|
|
debug => 1,
|
|
encoding => '7bit',
|
|
in => \*IN,
|
|
out => sub { print OUT $_[0] }
|
|
) or die $GT::Mail::Encoder::error;
|
|
}
|
|
else {
|
|
die "Unsupported encoding";
|
|
}
|
|
close IN;
|
|
close OUT;
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
GT::Mail::Encoder is a MIME Encoder implemented in perl. It will try to use
|
|
the C extension for encoding Base64. If the extension is not there
|
|
it will do it in perl (slow!).
|
|
|
|
=head2 Encoding a stream
|
|
|
|
The new() constructor and the supported() class method are the only methods that
|
|
are public in the interface. The new() constructor takes a hash of params.
|
|
The supported() method takes a single string, the name of the encoding you want
|
|
to encode and returns true if the encoding is supported and false otherwise.
|
|
|
|
=over 4
|
|
|
|
=item debug
|
|
|
|
Set debugging level. 1 or 0.
|
|
|
|
=item encoding
|
|
|
|
Sets the encoding used to encode.
|
|
|
|
=item in
|
|
|
|
Set to a file handle or IO handle.
|
|
|
|
=item out
|
|
|
|
Set to a code reference, the decoded stream will be passed in at the first
|
|
argument for each chunk encoded.
|
|
|
|
=back
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
http://www.gossamer-threads.com/
|
|
|
|
=head1 VERSION
|
|
|
|
Revision: $Id: Encoder.pm,v 1.40 2004/01/13 01:35:17 jagerman Exp $
|
|
|
|
|