832 lines
26 KiB
Perl
832 lines
26 KiB
Perl
# ==================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::Mail::Parse
|
|
# Author : Scott Beck
|
|
# CVS Info : 087,071,086,086,085
|
|
# $Id: Parse.pm,v 1.90 2008/10/29 23:32:07 brewt Exp $
|
|
#
|
|
# Copyright (c) 2005 Gossamer Threads Inc. All Rights Reserved.
|
|
# ==================================================================
|
|
|
|
package GT::Mail::Parse;
|
|
# =============================================================================
|
|
# If MIME::Base64 is installed use it - must eval before hand or 5.004_04 wipes
|
|
# our ISA.
|
|
my $have_b64 = eval {
|
|
local $SIG{__DIE__};
|
|
require MIME::Base64;
|
|
import MIME::Base64;
|
|
if ($] < 5.005) { local $^W; decode_base64('brok'); }
|
|
1;
|
|
};
|
|
$have_b64 or *decode_base64 = \>_old_decode_base64;
|
|
my $use_decode_qp;
|
|
if ($have_b64 and
|
|
$MIME::Base64::VERSION ge 2.16 and # Prior versions had decoding bugs
|
|
defined &MIME::QuotedPrint::decode_qp and (
|
|
not defined &MIME::QuotedPrint::old_decode_qp or
|
|
\&MIME::QuotedPrint::decode_qp != \&MIME::QuotedPrint::old_decode_qp
|
|
)
|
|
) {
|
|
$use_decode_qp = 1;
|
|
}
|
|
|
|
# Pragmas
|
|
use strict;
|
|
use vars qw($VERSION $DEBUG $ERRORS @ISA);
|
|
|
|
# System modules
|
|
use Fcntl;
|
|
|
|
# Internal modules
|
|
use GT::Mail::Parts;
|
|
use GT::Base;
|
|
|
|
# Inherent from GT::Base for errors and debug
|
|
@ISA = qw(GT::Base);
|
|
|
|
# Debugging mode
|
|
$DEBUG = 0;
|
|
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.90 $ =~ /(\d+)\.(\d+)/;
|
|
|
|
# Error messages
|
|
$ERRORS = {
|
|
PARSE => "An error occurred while parsing: %s",
|
|
DECODE => "An error occurred while decoding: %s",
|
|
NOPARTS => "Email has no parts!",
|
|
DEEPPARTS => "Deep recursion dected, email appears to have more than 50 parts!",
|
|
MALFORMED => "Found (%s) before finding the start of the boundary. Message malformed"
|
|
};
|
|
|
|
my %DecoderFor = (
|
|
# Standard...
|
|
'7bit' => 'NBit',
|
|
'8bit' => 'NBit',
|
|
'base64' => 'Base64',
|
|
'binary' => 'Binary',
|
|
'none' => 'Binary',
|
|
'quoted-printable' => 'QuotedPrint',
|
|
|
|
# Non-standard...
|
|
'x-uu' => 'UU',
|
|
'x-uuencode' => 'UU',
|
|
);
|
|
|
|
sub new {
|
|
# --------------------------------------------------------------------------
|
|
# CLASS->new (
|
|
# naming => \&naming,
|
|
# in_file => '/path/to/file/to/parse',
|
|
# handle => \*FH
|
|
# );
|
|
# ----------------------------------------------
|
|
# Class method to get a new object. Calles init if there are any additional
|
|
# argument. To set the arguments that are passed to naming call naming
|
|
# directly.
|
|
#
|
|
my $this = shift;
|
|
my $class = ref $this || $this;
|
|
my $self = bless {
|
|
file_handle => undef,
|
|
parts => [],
|
|
head_part => undef,
|
|
headers_intact => 1,
|
|
_debug => $DEBUG,
|
|
eol => "\012"
|
|
}, $class;
|
|
$self->init(@_) if @_;
|
|
$self->debug("Created new object ($self).") if $self->{_debug} > 1;
|
|
return $self;
|
|
}
|
|
|
|
sub init {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->init (%opts);
|
|
# -------------------
|
|
# Sets the options for the current object.
|
|
#
|
|
my $self = shift;
|
|
my $opt = {};
|
|
if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift }
|
|
elsif (defined $_[0] and not @_ % 2) { $opt = {@_} }
|
|
else { return $self->error("BADARGS", "FATAL", "init") }
|
|
|
|
$self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG;
|
|
$self->{headers_intact} = exists($opt->{headers_intact}) ? $opt->{headers_intact} : 1;
|
|
for my $m (qw(crlf in_file in_handle in_string attach_rfc822)) {
|
|
$self->$m($opt->{$m}) if defined $opt->{$m};
|
|
}
|
|
}
|
|
|
|
sub attach_rfc822 {
|
|
# --------------------------------------------------------------------------
|
|
my $self = shift;
|
|
if (@_) {
|
|
$self->{attach_rfc822} = shift;
|
|
}
|
|
return $self->{attach_rfc822};
|
|
}
|
|
|
|
sub crlf {
|
|
# -----------------------------------------------------------------------------
|
|
# Sets the end-of-line character sequence to use when parsing. This defaults
|
|
# to \012 (\n); you'll likely want to use \015\012 at times (for example, when
|
|
# parsing mail downloaded from a POP3 server). This is set on a per-parser
|
|
# basis (it used to be global, but that was significantly broken).
|
|
#
|
|
my ($self, $eol) = @_;
|
|
$self->{eol} = $eol;
|
|
}
|
|
|
|
sub parse {
|
|
# --------------------------------------------------------------------------
|
|
# my $top = $obj->parse;
|
|
# ----------------------
|
|
# Parses the email set in new or init. Also calls init if there are any
|
|
# arguments passed in.
|
|
# Returns the top level part object.
|
|
#
|
|
my ($self, @opts) = @_;
|
|
|
|
# Any additional arguments goto init
|
|
$self->init(@opts) if @opts;
|
|
|
|
($self->{string} and ref($self->{string}) eq 'SCALAR')
|
|
or return $self->error('BADARGS', 'FATAL', "No input was given to parse before parse() was called");
|
|
|
|
# Recursive function to parse
|
|
$self->_parse_part(undef, $self->{string}); # parse!
|
|
|
|
# Return top part
|
|
return $self->{head_part};
|
|
}
|
|
|
|
sub parse_head {
|
|
# --------------------------------------------------------------------------
|
|
# my $head = $obj->parse_head;
|
|
# ----------------------------
|
|
# Passes any additional arguments to init. Parses only the top level header.
|
|
# This saves some overhead if for example all you need to do it find out who
|
|
# an email is to on a POP3 server.
|
|
#
|
|
my ($self, $in, @opts) = @_;
|
|
|
|
unless (ref $self) {
|
|
$self = $self->new(@opts);
|
|
}
|
|
|
|
$in ||= $self->{string};
|
|
$in || return $self->error("BADARGS", "FATAL", "No string to parse set!");
|
|
|
|
# Parse the head
|
|
return $self->_parse_head($in);
|
|
}
|
|
|
|
#--------------------------------------------
|
|
# Access
|
|
#--------------------------------------------
|
|
|
|
|
|
sub in_handle {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->in_handle (\*FH);
|
|
# --------------------
|
|
# Pass in a file handle to parse from when parse is called.
|
|
#
|
|
my ($self, $value) = @_;
|
|
if (@_ > 1 and ref $value and defined fileno $value) {
|
|
read $value, ${$self->{string}}, -s $value;
|
|
}
|
|
return $self->{string};
|
|
}
|
|
|
|
sub in_file {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->in_file ('/path/to/file');
|
|
# --------------------------------
|
|
# Pass in the path to a file to parse when parse is called
|
|
#
|
|
my $self = shift;
|
|
my $file = shift;
|
|
my $io = \do { local *FH; *FH };
|
|
open $io, "<$file" or return $self->error("READOPEN", "FATAL", $file, $!);
|
|
return $self->in_handle($io);
|
|
}
|
|
|
|
sub in_string {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $string) = @_;
|
|
return $self->{string} unless (@_ > 1);
|
|
if (ref($string) eq 'SCALAR') {
|
|
$self->{string} = $string;
|
|
}
|
|
else {
|
|
$self->{string} = \$string;
|
|
}
|
|
return $self->{string};
|
|
}
|
|
|
|
sub size {
|
|
# --------------------------------------------------------------------------
|
|
# my $email_size = $obj->size;
|
|
# ----------------------------
|
|
# Returns the total size of an email. Call this method after the email has
|
|
# been parsed.
|
|
#
|
|
my $self = shift;
|
|
(@{$self->{parts}} > 0) or return $self->error("NOPARTS", "WARN");
|
|
my $size = 0;
|
|
foreach (@{$self->{parts}}) {
|
|
$size += $_->size;
|
|
}
|
|
return $size;
|
|
}
|
|
|
|
sub all_parts {
|
|
# --------------------------------------------------------------------------
|
|
# my @parts = $obj->all_parts;
|
|
# ----------------------------
|
|
# Returns a list of all the part object for the current parsed email. If the
|
|
# email is not multipart this will be just the header part.
|
|
#
|
|
return @{shift()->{parts}}
|
|
}
|
|
|
|
sub top_part {
|
|
# --------------------------------------------------------------------------
|
|
return ${shift()->{parts}}[0];
|
|
}
|
|
|
|
#---------------------------------------------
|
|
# Internal Methods
|
|
#---------------------------------------------
|
|
|
|
sub _parse_head {
|
|
# --------------------------------------------------------------------------
|
|
# Internal Method
|
|
# ---------------
|
|
# Parse just the head. Returns the part object.
|
|
#
|
|
my ($self, $in) = @_;
|
|
|
|
# Get a new part object
|
|
my $part = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
|
|
|
if (ref $in eq 'ARRAY') {
|
|
$part->extract($in) or return $self->error("PARSE", "WARN", "Couldn't parse head!");
|
|
return $part;
|
|
}
|
|
$part->extract([map "$_$self->{eol}", split /\Q$self->{eol}/, $$in]) or return $self->error($GT::Mail::Parts::error, 'WARN');
|
|
return $part;
|
|
}
|
|
|
|
sub _parse_part {
|
|
# --------------------------------------------------------------------------
|
|
# Internal Method
|
|
# ---------------
|
|
# Parses all the parts of an email and stores them in there parts object.
|
|
# This function is recursive.
|
|
#
|
|
my ($self, $outer_bound, $in, $part) = @_;
|
|
my $state = 'OK';
|
|
|
|
# First part is going to be the top level part
|
|
if (!$part) {
|
|
$part = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
|
$self->{head_part} = $part;
|
|
}
|
|
push @{$self->{parts}}, $part;
|
|
|
|
# Get the header for this part
|
|
=for comment
|
|
According to rfc2045 and rfc2046, the MIME part headers are optional, so for
|
|
parsing out the headers, we have the following cases:
|
|
|
|
1) no headers, no body
|
|
EOL--boundary
|
|
|
|
2) no headers, body
|
|
EOLbodyEOL--boundary
|
|
|
|
3) headers, no body
|
|
headers[EOL]EOL--boundary
|
|
|
|
4) headers, body
|
|
headersEOLbodyEOL--boundary
|
|
|
|
_parse_to_bound parses everything after the header to EOL--boundary, so this
|
|
header parsing must be careful not to remove the EOL before the --boundary
|
|
(cases 1 and 3), or _parse_to_bound will parse more than it should.
|
|
=cut
|
|
my $eol_len = length $self->{eol};
|
|
if (defined $outer_bound and substr($$in, 0, length "$self->{eol}--$outer_bound") eq "$self->{eol}--$outer_bound") {
|
|
# do nothing
|
|
}
|
|
elsif (substr($$in, 0, $eol_len) eq $self->{eol}) {
|
|
substr($$in, 0, $eol_len) = '';
|
|
}
|
|
else {
|
|
my $indx = index($$in, $self->{eol} x 2);
|
|
if ($indx == -1) {
|
|
$self->debug('Message has no body.') if $self->{_debug};
|
|
$indx = length($$in);
|
|
}
|
|
$part->extract([map "$_$self->{eol}", split /\Q$self->{eol}/, substr $$in, 0, $indx]) or return $self->warn($GT::Mail::Parts::error);
|
|
|
|
my $trim_len = $eol_len * 2;
|
|
if (defined $outer_bound) {
|
|
my $next_bound = "$self->{eol}$self->{eol}--$outer_bound";
|
|
if (substr($$in, $indx, length $next_bound) eq $next_bound) {
|
|
$trim_len = $eol_len;
|
|
}
|
|
}
|
|
substr($$in, 0, $indx + $trim_len) = '';
|
|
}
|
|
|
|
# Get the mime type
|
|
my ($type, $subtype) = split m{/}, $part->mime_type;
|
|
$type ||= 'text';
|
|
$subtype ||= 'plain';
|
|
if ($self->{_debug}) {
|
|
my $name = $part->recommended_filename || '[unnamed]';
|
|
$self->debug("Type is '$type/$subtype' ($name)");
|
|
}
|
|
|
|
# Deal with the multipart type with some recursion
|
|
if ($type eq 'multipart') {
|
|
my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
|
|
|
|
# Find the multipart boundary
|
|
my $inner_bound = $part->multipart_boundary;
|
|
$self->debug("Boundary is $inner_bound") if $self->{_debug} > 1;
|
|
defined $inner_bound or return $self->error("PARSE", "WARN", "No multipart boundary in multipart message.");
|
|
index($inner_bound, $self->{eol}) == -1 or return $self->error("PARSE", "WARN", "End-of-line character in multipart boundary.");
|
|
|
|
# Parse the Preamble
|
|
$self->debug("Parsing preamble.") if $self->{_debug} > 1;
|
|
$state = $self->_parse_preamble($inner_bound, $in, $part) or return;
|
|
chomp($part->preamble->[-1]) if @{$part->preamble};
|
|
|
|
# Get all the parts of the multipart message
|
|
my $partno = 0;
|
|
my $parts;
|
|
while (1) {
|
|
++$partno < 200 or return $self->error('DEEPPARTS', 'WARN');
|
|
$self->debug("Parsing part $partno.") if $self->{_debug};
|
|
|
|
($parts, $state) = $self->_parse_part($inner_bound, $in, GT::Mail::Parts->new(headers_intact => $self->{headers_intact})) or return;
|
|
|
|
$parts->mime_type($retype) if $retype;
|
|
push(@{$part->{parts}}, $parts);
|
|
|
|
if ($state eq 'EOF') {
|
|
$self->warn(PARSE => 'Unexpected EOF before close.');
|
|
return ($part, 'EOF');
|
|
}
|
|
|
|
last if $state eq 'CLOSE';
|
|
}
|
|
|
|
# Parse the epilogue
|
|
$self->debug("Parsing epilogue.") if $self->{_debug} > 1;
|
|
$state = $self->_parse_epilogue($outer_bound, $in, $part) or return;
|
|
chomp($part->epilogue->[-1]) if @{$part->epilogue} and $state ne 'EOF';
|
|
}
|
|
|
|
# We are on a single part
|
|
else {
|
|
$self->debug("Decoding single part.") if $self->{_debug} > 1;
|
|
|
|
# Find the encoding for the body of the part
|
|
my $encoding = $part->mime_encoding || 'binary';
|
|
if (!exists($DecoderFor{lc($encoding)})) {
|
|
$self->debug("Unsupported encoding '$encoding': using 'binary'... \n" .
|
|
"The entity will have an effective MIME type of \n" .
|
|
"application/octet-stream, as per RFC-2045.")
|
|
if $self->{_debug};
|
|
$part->effective_type('application/octet-stream');
|
|
$encoding = 'binary';
|
|
}
|
|
my $reparse;
|
|
$reparse = ("$type/$subtype" eq "message/rfc822") unless $self->{attach_rfc822};
|
|
my $encoded = "";
|
|
|
|
# If we have boundaries we parse the body to the boundary
|
|
if (defined $outer_bound) {
|
|
$self->debug("Parsing to boundary.") if $self->{_debug} > 1;
|
|
$state = $self->_parse_to_bound($outer_bound, $in, \$encoded) or return;
|
|
}
|
|
# Else we would parse the rest of the input stream as the rest of the message
|
|
else {
|
|
$self->debug("No Boundries.") if $self->{_debug} > 1;
|
|
$encoded = $$in;
|
|
$state = 'EOF';
|
|
}
|
|
|
|
# Normal part so we get the body and decode it.
|
|
if (!$reparse) {
|
|
$self->debug("Not reparsing.") if $self->{_debug} > 1;
|
|
$part->{body_in} = 'MEMORY';
|
|
|
|
my $decoder = $DecoderFor{lc($encoding)};
|
|
$self->debug("Decoding part using: " . lc($encoding)) if $self->{_debug};
|
|
$part->{data} = '';
|
|
my $out = '';
|
|
my $res = $self->$decoder(\$encoded, \$out);
|
|
undef $encoded;
|
|
$res or return;
|
|
$part->{data} = $out;
|
|
undef $out;
|
|
}
|
|
else {
|
|
# If have an embeded email we reparse it.
|
|
$self->debug("Reparsing enclosed message.") if $self->{_debug};
|
|
my $out = '';
|
|
|
|
my $decoder = $DecoderFor{lc($encoding)};
|
|
$self->debug("Decoding " . lc($encoding)) if $self->{_debug};
|
|
my $res = $self->$decoder(\$encoded, \$out);
|
|
undef $encoded;
|
|
$res or return;
|
|
my $p = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
|
push @{$part->{parts}}, $p;
|
|
$self->_parse_part(undef, \$out, $p) or return;
|
|
}
|
|
}
|
|
return ($part, $state);
|
|
}
|
|
|
|
sub _parse_to_bound {
|
|
# --------------------------------------------------------------------------
|
|
# This method takes a boundary ($bound), an input string ref ($in), and an
|
|
# output string ref ($out). It will place into $$out the data contained by
|
|
# $bound, and remove the entire region (including boundary) from $$in.
|
|
#
|
|
my ($self, $bound, $in, $out) = @_;
|
|
|
|
# Set up strings for faster checking:
|
|
$self->debug("Parsing bounds. Skip until\n\tdelim (--$bound)\n\tclose (--$bound--)") if $self->{_debug} > 1;
|
|
my $ret;
|
|
|
|
# Various shortcut variables - 'e' is eol, 'd' is delimiter, 'c' is closing delimiter:
|
|
my ($ede, $de, $ece, $ec, $ce) = (
|
|
"$self->{eol}--$bound$self->{eol}",
|
|
"--$bound$self->{eol}",
|
|
"$self->{eol}--$bound--$self->{eol}",
|
|
"$self->{eol}--$bound--",
|
|
"--$bound--$self->{eol}"
|
|
);
|
|
|
|
# Place our part in $$out.
|
|
$$out = undef;
|
|
# eoldelimeol found anywhere:
|
|
if ((my $pos = index $$in, $ede) >= 0) {
|
|
$$out = substr($$in, 0, $pos);
|
|
substr($$in, 0, $pos + length $ede) = '';
|
|
$ret = 'DELIM';
|
|
}
|
|
# delimeol at beginning of string:
|
|
elsif (substr($$in, 0, length $de) eq $de) {
|
|
substr($$in, 0, length $de) = '';
|
|
$$out = '';
|
|
$ret = 'DELIM';
|
|
}
|
|
# eolcloseeol found anywhere:
|
|
elsif (($pos = index($$in, $ece)) >= 0) {
|
|
# This code could be much more clearly written as:
|
|
#
|
|
#$$out = substr($$in, 0, $pos);
|
|
#substr($$in, 0, $pos + length $ece) = '';
|
|
#
|
|
# However, that can cause excessive memory usage in some cases (changed in revision 1.59).
|
|
|
|
$$out = $$in;
|
|
substr($$out, -(length($$out) - $pos)) = '';
|
|
my $len = $pos + length($ece) - length($$in);
|
|
$$in = $len == 0 ? '' : substr($$in, $len);
|
|
$ret = 'CLOSE';
|
|
}
|
|
# The first eolclose occurs at the end of the string:
|
|
elsif (index($$in, $ec) == (length($$in) - length($ec))) {
|
|
$$out = substr($$in, 0, -length($ec));
|
|
$$in = '';
|
|
$ret = 'CLOSE';
|
|
}
|
|
# closeeol at beginning of string:
|
|
elsif (substr($$in, 0, length $ce) eq $ce) {
|
|
$$out = '';
|
|
substr($$in, 0, length $ce) = '';
|
|
$ret = 'CLOSE';
|
|
}
|
|
# The only thing in the string is the closing boundary:
|
|
elsif ($$in eq "--$bound--") {
|
|
$$out = '';
|
|
$$in = '';
|
|
$ret = 'CLOSE';
|
|
}
|
|
|
|
if (defined $$out) {
|
|
return $ret;
|
|
}
|
|
else {
|
|
# Broken e-mail - we hit the end of the message without finding a boundary.
|
|
# Assume that everything left is the part body.
|
|
$$out = $$in;
|
|
$$in = '';
|
|
return 'EOF';
|
|
}
|
|
}
|
|
|
|
sub _parse_preamble {
|
|
# --------------------------------------------------------------------------
|
|
# Internal Method
|
|
# ---------------
|
|
# Parses preamble and sets it in part.
|
|
#
|
|
my ($self, $inner_bound, $in, $part) = @_;
|
|
|
|
my $delim = "--$inner_bound";
|
|
|
|
$self->debug("Parsing preamble. Skip until delim ($delim)") if $self->{_debug} > 1;
|
|
my @saved;
|
|
$part->preamble(\@saved);
|
|
|
|
my $data;
|
|
if (substr($$in, 0, length "$delim$self->{eol}") eq "$delim$self->{eol}") {
|
|
$data = '';
|
|
substr($$in, 0, length "$delim$self->{eol}") = '';
|
|
}
|
|
else {
|
|
if ((my $pos = index($$in, "$self->{eol}$delim$self->{eol}")) >= 0) {
|
|
$data = substr($$in, 0, $pos);
|
|
substr($$in, 0, $pos + length("$self->{eol}$delim$self->{eol}")) = '';
|
|
}
|
|
else {
|
|
return $self->warn(PARSE => "Unable to find opening boundary: $delim\nMessage is probably corrupt.");
|
|
}
|
|
}
|
|
push @saved, split /\Q$self->{eol}/, $data;
|
|
undef $data;
|
|
return 'DELIM';
|
|
}
|
|
|
|
sub _parse_epilogue {
|
|
# --------------------------------------------------------------------------
|
|
# Internal Method
|
|
# ---------------
|
|
# Parses epilogue and sets it in part.
|
|
#
|
|
my ($self, $outer_bound, $in, $part) = @_;
|
|
|
|
$self->debug("Parsing epilogue.") if $self->{_debug} > 1;
|
|
$part->epilogue(\my @saved);
|
|
|
|
if (defined $outer_bound) {
|
|
my ($delim, $close) = ("--$outer_bound", "--$outer_bound--");
|
|
|
|
$self->debug("Skip until\n\tdelim ($delim)\n\tclose($close)") if $self->{_debug} > 1;
|
|
|
|
if ($$in =~ s/(.*?)(?:\A|\Q$self->{eol}\E)\Q$delim$self->{eol}//s) {
|
|
push @saved, split /\Q$self->{eol}/, $1;
|
|
$self->debug("Found delim($delim)") if $self->{_debug};
|
|
return 'DELIM'
|
|
}
|
|
elsif ($$in =~ s/(.*?)(?:\A|\Q$self->{eol}\E)\Q$close\E(?:\Z|\Q$self->{eol}\E)//s) {
|
|
push @saved, split /\Q$self->{eol}/, $1;
|
|
$self->debug("Found close($close)") if $self->{_debug};
|
|
return 'CLOSE'
|
|
}
|
|
}
|
|
push @saved, split /\Q$self->{eol}/, $$in;
|
|
$$in = '';
|
|
$self->debug("EOF: epilogue is " . length(join '', @saved) . " bytes") if $self->{_debug};
|
|
return 'EOF';
|
|
}
|
|
|
|
|
|
sub Base64 {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $in, $out) = @_;
|
|
|
|
# Remove any non base64 characters.
|
|
$$in =~ tr{A-Za-z0-9+/}{}cd;
|
|
|
|
# Must pass multiple of 4 to decode_base64. Store any remainder in $rem_str and
|
|
# pad it with trailing equal signs.
|
|
my $rem = length($$in) % 4;
|
|
my ($rem_str);
|
|
if ($rem) {
|
|
my $pad = '=' x (4 - $rem);
|
|
$rem_str = substr($$in, length($$in) - $rem);
|
|
$rem_str .= $pad;
|
|
substr($$in, $rem * -1) = '';
|
|
}
|
|
|
|
$$out = decode_base64($$in);
|
|
if ($rem) {
|
|
$$out .= decode_base64($rem_str);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub Binary {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $in, $out) = @_;
|
|
$$out = $$in;
|
|
return 1;
|
|
}
|
|
|
|
sub NBit {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $in, $out) = @_;
|
|
$$out = $$in;
|
|
return 1;
|
|
}
|
|
|
|
sub QuotedPrint {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $in, $out) = @_;
|
|
if ($use_decode_qp) {
|
|
$$out = MIME::QuotedPrint::decode_qp($$in);
|
|
}
|
|
else {
|
|
$$out = $$in;
|
|
$$out =~ s/\r\n/\n/g; # normalize newlines
|
|
$$out =~ s/[ \t]+\n/\n/g; # rule #3 (trailing whitespace must be deleted)
|
|
$$out =~ s/=\n//g; # rule #5 (soft line breaks)
|
|
$$out =~ s/=([\da-fA-F]{2})/chr hex $1/ge;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub UU {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $in, $out) = @_;
|
|
my ($mode, $file);
|
|
|
|
# Find beginning...
|
|
while ($$in =~ s/^(.+\Q$self->{eol}\E)//) {
|
|
local $_ = $1;
|
|
last if ($mode, $file) = /^begin\s*(\d*)\s*(\S*)/;
|
|
}
|
|
return $self->warn("uu decoding: no begin found") if not defined $file;
|
|
|
|
# Decode:
|
|
while ($$in =~ s/^(.+\Q$self->{eol}\E)//) {
|
|
local $_ = $1;
|
|
last if /^end/;
|
|
next if /[a-z]/;
|
|
next unless int((((ord() - 32) & 077) + 2) / 3) == int(length($_) / 4);
|
|
$$out .= unpack('u', $_);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub gt_old_decode_base64 {
|
|
# --------------------------------------------------------------------------
|
|
my $str = shift;
|
|
my $res = "";
|
|
|
|
$str =~ tr|A-Za-z0-9+=/||cd;
|
|
|
|
$str =~ s/=+$//;
|
|
$str =~ tr|A-Za-z0-9+/| -_|;
|
|
return "" unless length $str;
|
|
|
|
my $uustr = '';
|
|
my ($i, $l);
|
|
$l = length($str) - 60;
|
|
for ($i = 0; $i <= $l; $i += 60) {
|
|
$uustr .= "M" . substr($str, $i, 60);
|
|
}
|
|
$str = substr($str, $i);
|
|
# and any leftover chars
|
|
if ($str ne "") {
|
|
$uustr .= chr(32 + length($str)*3/4) . $str;
|
|
}
|
|
return unpack("u", $uustr);
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
GT::Mail::Parse - MIME Parse
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use GT::Mail::Parse
|
|
|
|
my $parser = new GT::Mail::Parse (
|
|
naming => \&name_files,
|
|
in_file => '/path/to/file.eml',
|
|
debug => 1
|
|
);
|
|
|
|
my $top = $parser->parse or die $GT::Mail::Parse::error;
|
|
|
|
- or -
|
|
|
|
my $parser = new GT::Mail::Parse;
|
|
|
|
open FH, '/path/to/file.eml' or die $!;
|
|
my $top = $parser->parse (
|
|
naming => \&name_files,
|
|
handle => \*FH,
|
|
debug => 1
|
|
) or die $GT::Mail::Parse::error;
|
|
close FH;
|
|
|
|
- or -
|
|
|
|
my $parser = new GT::Mail::Parse;
|
|
|
|
my $top_head = $parser->parse_head (
|
|
naming => \&name_files,
|
|
in_file => '/path/to/file.eml',
|
|
debug => 1
|
|
) or die $GT::Mail::Parse::error;
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
GT::Mail::Parse is a 100% rfc822 email MIME parser that supports unlimited
|
|
nested levels of MIME. Emails are parsed into L<GT::Mail::Parts> objects. Each
|
|
part knows where it's body is and each part contains it's sub parts. See
|
|
L<GT::Mail::Parts> for details on parts methods.
|
|
|
|
=head2 new - Constructor method
|
|
|
|
This is the constructor method to get a GT::Mail::Parse object, which you
|
|
need to access all the methods (there are no Class methods). new() takes
|
|
a hash or hash ref as it's arguments. Each key has an accessor method by the
|
|
same name except debug, which can only be set by passing debug to new(), parse()
|
|
or parse_head().
|
|
|
|
=over 4
|
|
|
|
=item debug
|
|
|
|
Sets the debug level for this insance of the class.
|
|
|
|
=item naming
|
|
|
|
Specify a code reference to use as a naming convention for each part of the
|
|
email being parsed. This is useful to keep file IO down when you want the emails
|
|
seperated into each part as a file. If this is not specified GT::Mail::Parse
|
|
uses a default naming, which is to start at one and incriment that number for each
|
|
attachment. The attachments would go in the current working directory.
|
|
|
|
=item in_file
|
|
|
|
Specify the path to the file that contains the email to be parsed. One of in_file
|
|
and handle must be specified.
|
|
|
|
=item handle
|
|
|
|
Specify the file handle or IO stream that contains the email to be parsed.
|
|
|
|
=item attach_rfc822
|
|
|
|
By default, the parser will decode any embeded emails, and flatten out all the
|
|
parts. If you prefer to leave embeded emails unparsed, pass in 1 to this option
|
|
and the parser will treat it as an attachment.
|
|
|
|
=back
|
|
|
|
=head2 parse - Parse an email
|
|
|
|
Instance method. Parses the email specified by either in_file or handle. Returns
|
|
the top level L<GT::Mail::Parts> object. Any additional parameters passed in are
|
|
treated the same as if they were passed to the constuctor.
|
|
|
|
=head2 parse_head - Parse just the header of the email
|
|
|
|
Instance method. This method is exactly the same as parse except only the top
|
|
level header is parsed and it's part object returned. This is useful to keep
|
|
overhead down if you only need to know about the header of the email.
|
|
|
|
=head2 size - Get the size
|
|
|
|
Instance method. Returns the total size in bytes of the parsed unencoded email. This
|
|
method will return undef if no email has been parsed.
|
|
|
|
=head2 all_parts - Get all parts
|
|
|
|
Instance method. Returns all the parts in the parsed email. This is a flatened
|
|
list of the objects. Somewhat similar to what MIME::Tools does. All the parts
|
|
still contain their sub parts.
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
http://www.gossamer-threads.com/
|
|
|
|
=head1 VERSION
|
|
|
|
Revision: $Id: Parse.pm,v 1.90 2008/10/29 23:32:07 brewt Exp $
|
|
|