discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/Mail/Encoder.pm
2024-06-17 21:49:12 +10:00

430 lines
13 KiB
Perl

# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Mail::Encoder
# Author : Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: Encoder.pm,v 1.44 2008/10/29 23:31:51 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description: A general purpose perl interface for encoding data.
#
package GT::Mail::Encoder;
# ==================================================================
# If MIME::Base64 is installed use it - must eval before hand or 5.004_04
# wipes our ISA.
my $have_b64 = eval {
local $SIG{__DIE__};
require MIME::Base64;
import MIME::Base64;
if ($] < 5.005) { local $^W; encode_base64('brok'); }
1;
};
$have_b64 or *encode_base64 = \&gt_old_encode_base64;
my $use_encode_qp;
if ($have_b64 and
$MIME::Base64::VERSION ge 2.16 and
defined &MIME::QuotedPrint::encode_qp and (
not defined &MIME::QuotedPrint::old_encode_qp or
\&MIME::QuotedPrint::encode_qp != \&MIME::QuotedPrint::old_encode_qp
)
) {
$use_encode_qp = 1;
}
# Pragmas
use strict;
use vars qw($VERSION $DEBUG @ISA %EncodeFor $CRLF);
$VERSION = sprintf "%d.%03d", q$Revision: 1.44 $ =~ /(\d+)\.(\d+)/;
$CRLF = "\015\012";
$DEBUG = 0;
@ISA = qw(GT::Base);
my %EncoderFor = (
# Standard...
'7bit' => sub { NBit('7bit', @_) },
'8bit' => sub { NBit('8bit', @_) },
'base64' => \&Base64,
'binary' => \&Binary,
'none' => \&Binary,
'quoted-printable' => \&QuotedPrint,
# Non-standard...
'x-uu' => \&UU,
'x-uuencode' => \&UU,
);
sub new {
# --------------------------------------------------------------------------
my $this = shift;
my $class = ref $this || $this;
my $self = bless {}, $class;
$self->init(@_);
my $encoding = lc($self->{encoding} || '');
defined $EncoderFor{$encoding} or return or return $self->error("NOENCODING", "FATAL");
$self->debug("Set encoding to $encoding") if ($self->{_debug});
$self->{encoding} = $EncoderFor{$encoding};
return $self;
}
sub init {
# --------------------------------------------------------------------------
# $obj->init (%opts);
# -------------------
# Sets the options for the current object.
#
my $self = shift;
my $opt = {};
if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift }
elsif (defined $_[0] and not @_ % 2) { $opt = {@_} }
else { return $self->error("BADARGS", "FATAL", "init") }
$self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG;
for my $m (qw(encoding in out)) {
$self->{$m} = $opt->{$m} if defined $opt->{$m};
}
return $self;
}
sub gt_encode {
# --------------------------------------------------------------------------
my $self = shift;
if (!ref $self or ref $self ne 'GT::Mail::Encoder') {
$self = GT::Mail::Encoder->new(@_) or return;
}
$self->{encoding} or return $self->error("NOENCODING", "FATAL");;
return $self->{encoding}->($self->{in}, $self->{out});
}
sub supported { return exists $EncoderFor{pop()} }
sub Base64 {
# --------------------------------------------------------------------------
my ($in, $out) = @_;
my $encoded;
my $nread;
my $buf = '';
# Reading multiples of 57 bytes is recommended by MIME::Base64 as it comes out
# to a line of exactly 76 characters (the max). We use 2299*57 (131043 bytes)
# because it comes out to about 128KB (131072 bytes). Admittedly, this number
# is fairly arbitrary, but should work well for both large and small files, and
# shouldn't be too memory intensive.
my $read_size = 2299 * 57;
if (not ref $in) {
while (1) {
last unless length $in;
$buf = substr($in, 0, $read_size);
substr($in, 0, $read_size) = '';
$encoded = encode_base64($buf, $CRLF);
# Encoding to send over SMTP
$encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
$out->($encoded);
}
}
elsif (defined fileno $in) {
while ($nread = read($in, $buf, $read_size)) {
$encoded = encode_base64($buf, $CRLF);
$encoded .= $CRLF unless $encoded =~ /$CRLF\Z/; # ensure newline!
$out->($encoded);
}
}
elsif (ref $in eq 'GLOB') {
die "Glob reference passed in is not an open filehandle";
}
else {
die "Bad arguments passed to Base64, first argument must be a scalar or a filehandle";
}
1;
}
sub Binary {
# --------------------------------------------------------------------------
my ($in, $out) = @_;
if (not ref $in) {
$in =~ s/\015?\012/$CRLF/g;
$out->($in);
}
elsif (defined fileno $in) {
my ($buf, $nread) = ('', 0);
while ($nread = read($in, $buf, 4096)) {
$buf =~ s/\015?\012/$CRLF/g;
$out->($buf);
}
defined ($nread) or return; # check for error
}
elsif (ref $in eq 'GLOB') {
die "Glob reference passed in is not an open filehandle";
}
else {
die "Bad arguments passed to Binary, first argument must be a scalar or a filehandle";
}
1;
}
sub UU {
# --------------------------------------------------------------------------
my ($in, $out, $file) = @_;
my $buf = '';
my $fname = ($file || '');
$out->("begin 644 $fname\n");
if (not ref $in) {
while (1) {
last unless length $in;
$buf = substr($in, 0, 45);
substr($in, 0, 45) = '';
$out->(pack('u', $buf));
}
}
elsif (defined fileno $in) {
while (read($in, $buf, 45)) {
$buf =~ s/\015?\012/$CRLF/g;
$out->(pack('u', $buf))
}
}
elsif (ref $in eq 'GLOB') {
die "Glob reference passed in is not an open filehandle";
}
else {
die "Bad arguments passed to UU, first argument must be a scalar or a filehandle";
}
$out->("end\n");
1;
}
sub NBit {
# --------------------------------------------------------------------------
my ($enc, $in, $out) = @_;
if (not ref $in) {
$in =~ s/\015?\012/$CRLF/g;
$out->($in);
}
elsif (defined fileno $in) {
while (<$in>) {
s/\015?\012/$CRLF/g;
$out->($_);
}
}
elsif (ref $in eq 'GLOB') {
die "Glob reference passed in is not an open filehandle";
}
else {
die "Bad arguments passed to NBit, first argument must be a scalar or a filehandle";
}
1;
}
sub QuotedPrint {
# --------------------------------------------------------------------------
my ($in, $out) = @_;
local $_;
my $ref = ref $in;
if ($ref and not defined fileno($in)) {
if ($ref eq 'GLOB') {
die "Glob reference passed in is not an open filehandle";
}
else {
die "Bad arguments passed to QuotedPrint, first argument must be a scalar or a filehandle";
}
}
$in =~ s/\015?\012/\n/g unless $ref;
while () {
local $_;
if ($ref) {
# Try to get around 32KB at once. This could end up being much larger than
# 32KB if there is a very very long line - up to the length of the line + 32700
# bytes.
$_ = <$in>;
while (my $line = <$in>) {
$_ .= $line;
last if length > 32_700; # Not exactly 32KB, but close enough.
}
last unless defined;
}
else {
# Grab up to just shy of 32KB of the string, plus the following line. As
# above, this could be much longer than 32KB if there is one or more very long
# lines involved.
$in =~ s/^(.{0,32700}.*?(?:\n|\Z))//ms; # Encode somewhere around 32KB at a time
$_ = $1;
last unless defined and length;
}
if ($use_encode_qp) {
$_ = MIME::QuotedPrint::encode_qp($_, $CRLF);
}
else {
s/([^ \t\n!-<>-~])/sprintf("=%02X", ord($1))/eg; # rule #2,#3
s/([ \t]+)$/
join('', map { sprintf("=%02X", ord($_)) }
split('', $1)
)/egm; # rule #3 (encode whitespace at eol)
# rule #5 (lines must be shorter than 76 chars, but we are not allowed
# to break =XX escapes. This makes things complicated :-( )
my $brokenlines = "";
$brokenlines .= "$1=\n"
while s/(.*?^[^\n]{73} (?:
[^=\n]{2} (?! [^=\n]{0,1} $) # 75 not followed by .?\n
|[^=\n] (?! [^=\n]{0,2} $) # 74 not followed by .?.?\n
| (?! [^=\n]{0,3} $) # 73 not followed by .?.?.?\n
))//xsm;
$_ = "$brokenlines$_";
s/\015?\012/$CRLF/g;
}
# Escape 'From ' at the beginning of the line. This is fairly easy - if the
# line is currently 73 or fewer characters, we simply change the F to =46,
# making the line 75 characters long (the max). If the line is longer than 73,
# we escape the F, follow it with "=$CRLF", and put the 'rom ' and the rest of
# the line on the next line - meaning one line of 4 characters, and one of 73
# or 74.
s/^From (.*)/
length($1) <= 68 ? "=46rom $1" : "=46=${CRLF}rom $1"
/emg; # Escape 'From' at the beginning of a line
# The '.' at the beginning of the line is more difficult. The easy case is
# when the line is 73 or fewer characters - just escape the initial . and we're
# done. If the line is longer, the fun starts. First, we escape the initial .
# to =2E. Then we look for the first = in the line; if it is found within the
# first 3 characters, we split two characters after it (to catch the "12" in
# "=12") otherwise we split after the third character. We then add "=$CRLF" to
# the current line, and look at the next line; if it starts with 'From ' or a
# ., we escape it - and since the second line will always be less than 73
# characters long (since we remove at least three for the first line), we can
# just escape it without worrying about splitting the line up again.
s/^\.([^$CRLF]*)/
if (length($1) <= 72) {
"=2E$1"
}
else {
my $ret = "=2E";
my $match = $1;
my $index = index($match, '=');
my $len = $index >= 2 ? 2 : $index + 3;
$ret .= substr($match, 0, $len);
substr($match, 0, $len) = '';
$ret .= "=$CRLF";
substr($match, 0, 1) = "=46" if substr($match, 0, 5) eq 'From ';
substr($match, 0, 1) = "=2E" if substr($match, 0, 1) eq '.';
$ret .= $match;
$ret
}
/emg;
$out->($_);
last unless $ref or length $in;
}
return 1;
}
sub gt_old_encode_base64 {
# --------------------------------------------------------------------------
my $eol = $_[1];
$eol = "\n" unless defined $eol;
my $res = pack("u", $_[0]);
$res =~ s/^.//mg; # Remove first character of each line
$res =~ tr/\n//d; # Remove newlines
$res =~ tr|` -_|AA-Za-z0-9+/|;
# Fix padding at the end
my $padding = (3 - length($_[0]) % 3) % 3;
$res =~ s/.{$padding}$/'=' x $padding/e if $padding;
# Break encoded string into lines of no more than 76 characters each
if (length $eol) {
$res =~ s/(.{1,76})/$1$eol/g;
}
$res;
}
1;
__END__
=head1 NAME
GT::Mail::Encoder - MIME Encoder
=head1 SYNOPSIS
open IN, 'decoded.txt' or die $!;
open OUT, '>encoded.txt' or die $!;
if (GT::Mail::Encoder->supported ('7bit')) {
GT::Mail::Encoder->decode (
debug => 1,
encoding => '7bit',
in => \*IN,
out => sub { print OUT $_[0] }
) or die $GT::Mail::Encoder::error;
}
else {
die "Unsupported encoding";
}
close IN;
close OUT;
=head1 DESCRIPTION
GT::Mail::Encoder is a MIME Encoder implemented in perl. It will try to use
the C extension for encoding Base64. If the extension is not there
it will do it in perl (slow!).
=head2 Encoding a stream
The new() constructor and the supported() class method are the only methods that
are public in the interface. The new() constructor takes a hash of params.
The supported() method takes a single string, the name of the encoding you want
to encode and returns true if the encoding is supported and false otherwise.
=over 4
=item debug
Set debugging level. 1 or 0.
=item encoding
Sets the encoding used to encode.
=item in
Set to a file handle or IO handle.
=item out
Set to a code reference, the decoded stream will be passed in at the first
argument for each chunk encoded.
=back
=head1 COPYRIGHT
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
http://www.gossamer-threads.com/
=head1 VERSION
Revision: $Id: Encoder.pm,v 1.44 2008/10/29 23:31:51 brewt Exp $