First pass at adding key files
This commit is contained in:
831
site/slowtwitch.com/cgi-bin/articles/admin/GT/Mail/Parse.pm
Normal file
831
site/slowtwitch.com/cgi-bin/articles/admin/GT/Mail/Parse.pm
Normal file
@ -0,0 +1,831 @@
|
||||
# ==================================================================
|
||||
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||||
#
|
||||
# GT::Mail::Parse
|
||||
# Author : Scott Beck
|
||||
# CVS Info : 087,071,086,086,085
|
||||
# $Id: Parse.pm,v 1.90 2008/10/29 23:32:07 brewt Exp $
|
||||
#
|
||||
# Copyright (c) 2005 Gossamer Threads Inc. All Rights Reserved.
|
||||
# ==================================================================
|
||||
|
||||
package GT::Mail::Parse;
|
||||
# =============================================================================
|
||||
# If MIME::Base64 is installed use it - must eval before hand or 5.004_04 wipes
|
||||
# our ISA.
|
||||
my $have_b64 = eval {
|
||||
local $SIG{__DIE__};
|
||||
require MIME::Base64;
|
||||
import MIME::Base64;
|
||||
if ($] < 5.005) { local $^W; decode_base64('brok'); }
|
||||
1;
|
||||
};
|
||||
$have_b64 or *decode_base64 = \>_old_decode_base64;
|
||||
my $use_decode_qp;
|
||||
if ($have_b64 and
|
||||
$MIME::Base64::VERSION ge 2.16 and # Prior versions had decoding bugs
|
||||
defined &MIME::QuotedPrint::decode_qp and (
|
||||
not defined &MIME::QuotedPrint::old_decode_qp or
|
||||
\&MIME::QuotedPrint::decode_qp != \&MIME::QuotedPrint::old_decode_qp
|
||||
)
|
||||
) {
|
||||
$use_decode_qp = 1;
|
||||
}
|
||||
|
||||
# Pragmas
|
||||
use strict;
|
||||
use vars qw($VERSION $DEBUG $ERRORS @ISA);
|
||||
|
||||
# System modules
|
||||
use Fcntl;
|
||||
|
||||
# Internal modules
|
||||
use GT::Mail::Parts;
|
||||
use GT::Base;
|
||||
|
||||
# Inherent from GT::Base for errors and debug
|
||||
@ISA = qw(GT::Base);
|
||||
|
||||
# Debugging mode
|
||||
$DEBUG = 0;
|
||||
|
||||
$VERSION = sprintf "%d.%03d", q$Revision: 1.90 $ =~ /(\d+)\.(\d+)/;
|
||||
|
||||
# Error messages
|
||||
$ERRORS = {
|
||||
PARSE => "An error occurred while parsing: %s",
|
||||
DECODE => "An error occurred while decoding: %s",
|
||||
NOPARTS => "Email has no parts!",
|
||||
DEEPPARTS => "Deep recursion dected, email appears to have more than 50 parts!",
|
||||
MALFORMED => "Found (%s) before finding the start of the boundary. Message malformed"
|
||||
};
|
||||
|
||||
my %DecoderFor = (
|
||||
# Standard...
|
||||
'7bit' => 'NBit',
|
||||
'8bit' => 'NBit',
|
||||
'base64' => 'Base64',
|
||||
'binary' => 'Binary',
|
||||
'none' => 'Binary',
|
||||
'quoted-printable' => 'QuotedPrint',
|
||||
|
||||
# Non-standard...
|
||||
'x-uu' => 'UU',
|
||||
'x-uuencode' => 'UU',
|
||||
);
|
||||
|
||||
sub new {
|
||||
# --------------------------------------------------------------------------
|
||||
# CLASS->new (
|
||||
# naming => \&naming,
|
||||
# in_file => '/path/to/file/to/parse',
|
||||
# handle => \*FH
|
||||
# );
|
||||
# ----------------------------------------------
|
||||
# Class method to get a new object. Calles init if there are any additional
|
||||
# argument. To set the arguments that are passed to naming call naming
|
||||
# directly.
|
||||
#
|
||||
my $this = shift;
|
||||
my $class = ref $this || $this;
|
||||
my $self = bless {
|
||||
file_handle => undef,
|
||||
parts => [],
|
||||
head_part => undef,
|
||||
headers_intact => 1,
|
||||
_debug => $DEBUG,
|
||||
eol => "\012"
|
||||
}, $class;
|
||||
$self->init(@_) if @_;
|
||||
$self->debug("Created new object ($self).") if $self->{_debug} > 1;
|
||||
return $self;
|
||||
}
|
||||
|
||||
sub init {
|
||||
# --------------------------------------------------------------------------
|
||||
# $obj->init (%opts);
|
||||
# -------------------
|
||||
# Sets the options for the current object.
|
||||
#
|
||||
my $self = shift;
|
||||
my $opt = {};
|
||||
if (@_ == 1 and ref $_[0] eq 'HASH') { $opt = shift }
|
||||
elsif (defined $_[0] and not @_ % 2) { $opt = {@_} }
|
||||
else { return $self->error("BADARGS", "FATAL", "init") }
|
||||
|
||||
$self->{_debug} = exists($opt->{debug}) ? $opt->{debug} : $DEBUG;
|
||||
$self->{headers_intact} = exists($opt->{headers_intact}) ? $opt->{headers_intact} : 1;
|
||||
for my $m (qw(crlf in_file in_handle in_string attach_rfc822)) {
|
||||
$self->$m($opt->{$m}) if defined $opt->{$m};
|
||||
}
|
||||
}
|
||||
|
||||
sub attach_rfc822 {
|
||||
# --------------------------------------------------------------------------
|
||||
my $self = shift;
|
||||
if (@_) {
|
||||
$self->{attach_rfc822} = shift;
|
||||
}
|
||||
return $self->{attach_rfc822};
|
||||
}
|
||||
|
||||
sub crlf {
|
||||
# -----------------------------------------------------------------------------
|
||||
# Sets the end-of-line character sequence to use when parsing. This defaults
|
||||
# to \012 (\n); you'll likely want to use \015\012 at times (for example, when
|
||||
# parsing mail downloaded from a POP3 server). This is set on a per-parser
|
||||
# basis (it used to be global, but that was significantly broken).
|
||||
#
|
||||
my ($self, $eol) = @_;
|
||||
$self->{eol} = $eol;
|
||||
}
|
||||
|
||||
sub parse {
|
||||
# --------------------------------------------------------------------------
|
||||
# my $top = $obj->parse;
|
||||
# ----------------------
|
||||
# Parses the email set in new or init. Also calls init if there are any
|
||||
# arguments passed in.
|
||||
# Returns the top level part object.
|
||||
#
|
||||
my ($self, @opts) = @_;
|
||||
|
||||
# Any additional arguments goto init
|
||||
$self->init(@opts) if @opts;
|
||||
|
||||
($self->{string} and ref($self->{string}) eq 'SCALAR')
|
||||
or return $self->error('BADARGS', 'FATAL', "No input was given to parse before parse() was called");
|
||||
|
||||
# Recursive function to parse
|
||||
$self->_parse_part(undef, $self->{string}); # parse!
|
||||
|
||||
# Return top part
|
||||
return $self->{head_part};
|
||||
}
|
||||
|
||||
sub parse_head {
|
||||
# --------------------------------------------------------------------------
|
||||
# my $head = $obj->parse_head;
|
||||
# ----------------------------
|
||||
# Passes any additional arguments to init. Parses only the top level header.
|
||||
# This saves some overhead if for example all you need to do it find out who
|
||||
# an email is to on a POP3 server.
|
||||
#
|
||||
my ($self, $in, @opts) = @_;
|
||||
|
||||
unless (ref $self) {
|
||||
$self = $self->new(@opts);
|
||||
}
|
||||
|
||||
$in ||= $self->{string};
|
||||
$in || return $self->error("BADARGS", "FATAL", "No string to parse set!");
|
||||
|
||||
# Parse the head
|
||||
return $self->_parse_head($in);
|
||||
}
|
||||
|
||||
#--------------------------------------------
|
||||
# Access
|
||||
#--------------------------------------------
|
||||
|
||||
|
||||
sub in_handle {
|
||||
# --------------------------------------------------------------------------
|
||||
# $obj->in_handle (\*FH);
|
||||
# --------------------
|
||||
# Pass in a file handle to parse from when parse is called.
|
||||
#
|
||||
my ($self, $value) = @_;
|
||||
if (@_ > 1 and ref $value and defined fileno $value) {
|
||||
read $value, ${$self->{string}}, -s $value;
|
||||
}
|
||||
return $self->{string};
|
||||
}
|
||||
|
||||
sub in_file {
|
||||
# --------------------------------------------------------------------------
|
||||
# $obj->in_file ('/path/to/file');
|
||||
# --------------------------------
|
||||
# Pass in the path to a file to parse when parse is called
|
||||
#
|
||||
my $self = shift;
|
||||
my $file = shift;
|
||||
my $io = \do { local *FH; *FH };
|
||||
open $io, "<$file" or return $self->error("READOPEN", "FATAL", $file, $!);
|
||||
return $self->in_handle($io);
|
||||
}
|
||||
|
||||
sub in_string {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $string) = @_;
|
||||
return $self->{string} unless (@_ > 1);
|
||||
if (ref($string) eq 'SCALAR') {
|
||||
$self->{string} = $string;
|
||||
}
|
||||
else {
|
||||
$self->{string} = \$string;
|
||||
}
|
||||
return $self->{string};
|
||||
}
|
||||
|
||||
sub size {
|
||||
# --------------------------------------------------------------------------
|
||||
# my $email_size = $obj->size;
|
||||
# ----------------------------
|
||||
# Returns the total size of an email. Call this method after the email has
|
||||
# been parsed.
|
||||
#
|
||||
my $self = shift;
|
||||
(@{$self->{parts}} > 0) or return $self->error("NOPARTS", "WARN");
|
||||
my $size = 0;
|
||||
foreach (@{$self->{parts}}) {
|
||||
$size += $_->size;
|
||||
}
|
||||
return $size;
|
||||
}
|
||||
|
||||
sub all_parts {
|
||||
# --------------------------------------------------------------------------
|
||||
# my @parts = $obj->all_parts;
|
||||
# ----------------------------
|
||||
# Returns a list of all the part object for the current parsed email. If the
|
||||
# email is not multipart this will be just the header part.
|
||||
#
|
||||
return @{shift()->{parts}}
|
||||
}
|
||||
|
||||
sub top_part {
|
||||
# --------------------------------------------------------------------------
|
||||
return ${shift()->{parts}}[0];
|
||||
}
|
||||
|
||||
#---------------------------------------------
|
||||
# Internal Methods
|
||||
#---------------------------------------------
|
||||
|
||||
sub _parse_head {
|
||||
# --------------------------------------------------------------------------
|
||||
# Internal Method
|
||||
# ---------------
|
||||
# Parse just the head. Returns the part object.
|
||||
#
|
||||
my ($self, $in) = @_;
|
||||
|
||||
# Get a new part object
|
||||
my $part = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
||||
|
||||
if (ref $in eq 'ARRAY') {
|
||||
$part->extract($in) or return $self->error("PARSE", "WARN", "Couldn't parse head!");
|
||||
return $part;
|
||||
}
|
||||
$part->extract([map "$_$self->{eol}", split /\Q$self->{eol}/, $$in]) or return $self->error($GT::Mail::Parts::error, 'WARN');
|
||||
return $part;
|
||||
}
|
||||
|
||||
sub _parse_part {
|
||||
# --------------------------------------------------------------------------
|
||||
# Internal Method
|
||||
# ---------------
|
||||
# Parses all the parts of an email and stores them in there parts object.
|
||||
# This function is recursive.
|
||||
#
|
||||
my ($self, $outer_bound, $in, $part) = @_;
|
||||
my $state = 'OK';
|
||||
|
||||
# First part is going to be the top level part
|
||||
if (!$part) {
|
||||
$part = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
||||
$self->{head_part} = $part;
|
||||
}
|
||||
push @{$self->{parts}}, $part;
|
||||
|
||||
# Get the header for this part
|
||||
=for comment
|
||||
According to rfc2045 and rfc2046, the MIME part headers are optional, so for
|
||||
parsing out the headers, we have the following cases:
|
||||
|
||||
1) no headers, no body
|
||||
EOL--boundary
|
||||
|
||||
2) no headers, body
|
||||
EOLbodyEOL--boundary
|
||||
|
||||
3) headers, no body
|
||||
headers[EOL]EOL--boundary
|
||||
|
||||
4) headers, body
|
||||
headersEOLbodyEOL--boundary
|
||||
|
||||
_parse_to_bound parses everything after the header to EOL--boundary, so this
|
||||
header parsing must be careful not to remove the EOL before the --boundary
|
||||
(cases 1 and 3), or _parse_to_bound will parse more than it should.
|
||||
=cut
|
||||
my $eol_len = length $self->{eol};
|
||||
if (defined $outer_bound and substr($$in, 0, length "$self->{eol}--$outer_bound") eq "$self->{eol}--$outer_bound") {
|
||||
# do nothing
|
||||
}
|
||||
elsif (substr($$in, 0, $eol_len) eq $self->{eol}) {
|
||||
substr($$in, 0, $eol_len) = '';
|
||||
}
|
||||
else {
|
||||
my $indx = index($$in, $self->{eol} x 2);
|
||||
if ($indx == -1) {
|
||||
$self->debug('Message has no body.') if $self->{_debug};
|
||||
$indx = length($$in);
|
||||
}
|
||||
$part->extract([map "$_$self->{eol}", split /\Q$self->{eol}/, substr $$in, 0, $indx]) or return $self->warn($GT::Mail::Parts::error);
|
||||
|
||||
my $trim_len = $eol_len * 2;
|
||||
if (defined $outer_bound) {
|
||||
my $next_bound = "$self->{eol}$self->{eol}--$outer_bound";
|
||||
if (substr($$in, $indx, length $next_bound) eq $next_bound) {
|
||||
$trim_len = $eol_len;
|
||||
}
|
||||
}
|
||||
substr($$in, 0, $indx + $trim_len) = '';
|
||||
}
|
||||
|
||||
# Get the mime type
|
||||
my ($type, $subtype) = split m{/}, $part->mime_type;
|
||||
$type ||= 'text';
|
||||
$subtype ||= 'plain';
|
||||
if ($self->{_debug}) {
|
||||
my $name = $part->recommended_filename || '[unnamed]';
|
||||
$self->debug("Type is '$type/$subtype' ($name)");
|
||||
}
|
||||
|
||||
# Deal with the multipart type with some recursion
|
||||
if ($type eq 'multipart') {
|
||||
my $retype = (($subtype eq 'digest') ? 'message/rfc822' : '');
|
||||
|
||||
# Find the multipart boundary
|
||||
my $inner_bound = $part->multipart_boundary;
|
||||
$self->debug("Boundary is $inner_bound") if $self->{_debug} > 1;
|
||||
defined $inner_bound or return $self->error("PARSE", "WARN", "No multipart boundary in multipart message.");
|
||||
index($inner_bound, $self->{eol}) == -1 or return $self->error("PARSE", "WARN", "End-of-line character in multipart boundary.");
|
||||
|
||||
# Parse the Preamble
|
||||
$self->debug("Parsing preamble.") if $self->{_debug} > 1;
|
||||
$state = $self->_parse_preamble($inner_bound, $in, $part) or return;
|
||||
chomp($part->preamble->[-1]) if @{$part->preamble};
|
||||
|
||||
# Get all the parts of the multipart message
|
||||
my $partno = 0;
|
||||
my $parts;
|
||||
while (1) {
|
||||
++$partno < 200 or return $self->error('DEEPPARTS', 'WARN');
|
||||
$self->debug("Parsing part $partno.") if $self->{_debug};
|
||||
|
||||
($parts, $state) = $self->_parse_part($inner_bound, $in, GT::Mail::Parts->new(headers_intact => $self->{headers_intact})) or return;
|
||||
|
||||
$parts->mime_type($retype) if $retype;
|
||||
push(@{$part->{parts}}, $parts);
|
||||
|
||||
if ($state eq 'EOF') {
|
||||
$self->warn(PARSE => 'Unexpected EOF before close.');
|
||||
return ($part, 'EOF');
|
||||
}
|
||||
|
||||
last if $state eq 'CLOSE';
|
||||
}
|
||||
|
||||
# Parse the epilogue
|
||||
$self->debug("Parsing epilogue.") if $self->{_debug} > 1;
|
||||
$state = $self->_parse_epilogue($outer_bound, $in, $part) or return;
|
||||
chomp($part->epilogue->[-1]) if @{$part->epilogue} and $state ne 'EOF';
|
||||
}
|
||||
|
||||
# We are on a single part
|
||||
else {
|
||||
$self->debug("Decoding single part.") if $self->{_debug} > 1;
|
||||
|
||||
# Find the encoding for the body of the part
|
||||
my $encoding = $part->mime_encoding || 'binary';
|
||||
if (!exists($DecoderFor{lc($encoding)})) {
|
||||
$self->debug("Unsupported encoding '$encoding': using 'binary'... \n" .
|
||||
"The entity will have an effective MIME type of \n" .
|
||||
"application/octet-stream, as per RFC-2045.")
|
||||
if $self->{_debug};
|
||||
$part->effective_type('application/octet-stream');
|
||||
$encoding = 'binary';
|
||||
}
|
||||
my $reparse;
|
||||
$reparse = ("$type/$subtype" eq "message/rfc822") unless $self->{attach_rfc822};
|
||||
my $encoded = "";
|
||||
|
||||
# If we have boundaries we parse the body to the boundary
|
||||
if (defined $outer_bound) {
|
||||
$self->debug("Parsing to boundary.") if $self->{_debug} > 1;
|
||||
$state = $self->_parse_to_bound($outer_bound, $in, \$encoded) or return;
|
||||
}
|
||||
# Else we would parse the rest of the input stream as the rest of the message
|
||||
else {
|
||||
$self->debug("No Boundries.") if $self->{_debug} > 1;
|
||||
$encoded = $$in;
|
||||
$state = 'EOF';
|
||||
}
|
||||
|
||||
# Normal part so we get the body and decode it.
|
||||
if (!$reparse) {
|
||||
$self->debug("Not reparsing.") if $self->{_debug} > 1;
|
||||
$part->{body_in} = 'MEMORY';
|
||||
|
||||
my $decoder = $DecoderFor{lc($encoding)};
|
||||
$self->debug("Decoding part using: " . lc($encoding)) if $self->{_debug};
|
||||
$part->{data} = '';
|
||||
my $out = '';
|
||||
my $res = $self->$decoder(\$encoded, \$out);
|
||||
undef $encoded;
|
||||
$res or return;
|
||||
$part->{data} = $out;
|
||||
undef $out;
|
||||
}
|
||||
else {
|
||||
# If have an embeded email we reparse it.
|
||||
$self->debug("Reparsing enclosed message.") if $self->{_debug};
|
||||
my $out = '';
|
||||
|
||||
my $decoder = $DecoderFor{lc($encoding)};
|
||||
$self->debug("Decoding " . lc($encoding)) if $self->{_debug};
|
||||
my $res = $self->$decoder(\$encoded, \$out);
|
||||
undef $encoded;
|
||||
$res or return;
|
||||
my $p = GT::Mail::Parts->new(headers_intact => $self->{headers_intact});
|
||||
push @{$part->{parts}}, $p;
|
||||
$self->_parse_part(undef, \$out, $p) or return;
|
||||
}
|
||||
}
|
||||
return ($part, $state);
|
||||
}
|
||||
|
||||
sub _parse_to_bound {
|
||||
# --------------------------------------------------------------------------
|
||||
# This method takes a boundary ($bound), an input string ref ($in), and an
|
||||
# output string ref ($out). It will place into $$out the data contained by
|
||||
# $bound, and remove the entire region (including boundary) from $$in.
|
||||
#
|
||||
my ($self, $bound, $in, $out) = @_;
|
||||
|
||||
# Set up strings for faster checking:
|
||||
$self->debug("Parsing bounds. Skip until\n\tdelim (--$bound)\n\tclose (--$bound--)") if $self->{_debug} > 1;
|
||||
my $ret;
|
||||
|
||||
# Various shortcut variables - 'e' is eol, 'd' is delimiter, 'c' is closing delimiter:
|
||||
my ($ede, $de, $ece, $ec, $ce) = (
|
||||
"$self->{eol}--$bound$self->{eol}",
|
||||
"--$bound$self->{eol}",
|
||||
"$self->{eol}--$bound--$self->{eol}",
|
||||
"$self->{eol}--$bound--",
|
||||
"--$bound--$self->{eol}"
|
||||
);
|
||||
|
||||
# Place our part in $$out.
|
||||
$$out = undef;
|
||||
# eoldelimeol found anywhere:
|
||||
if ((my $pos = index $$in, $ede) >= 0) {
|
||||
$$out = substr($$in, 0, $pos);
|
||||
substr($$in, 0, $pos + length $ede) = '';
|
||||
$ret = 'DELIM';
|
||||
}
|
||||
# delimeol at beginning of string:
|
||||
elsif (substr($$in, 0, length $de) eq $de) {
|
||||
substr($$in, 0, length $de) = '';
|
||||
$$out = '';
|
||||
$ret = 'DELIM';
|
||||
}
|
||||
# eolcloseeol found anywhere:
|
||||
elsif (($pos = index($$in, $ece)) >= 0) {
|
||||
# This code could be much more clearly written as:
|
||||
#
|
||||
#$$out = substr($$in, 0, $pos);
|
||||
#substr($$in, 0, $pos + length $ece) = '';
|
||||
#
|
||||
# However, that can cause excessive memory usage in some cases (changed in revision 1.59).
|
||||
|
||||
$$out = $$in;
|
||||
substr($$out, -(length($$out) - $pos)) = '';
|
||||
my $len = $pos + length($ece) - length($$in);
|
||||
$$in = $len == 0 ? '' : substr($$in, $len);
|
||||
$ret = 'CLOSE';
|
||||
}
|
||||
# The first eolclose occurs at the end of the string:
|
||||
elsif (index($$in, $ec) == (length($$in) - length($ec))) {
|
||||
$$out = substr($$in, 0, -length($ec));
|
||||
$$in = '';
|
||||
$ret = 'CLOSE';
|
||||
}
|
||||
# closeeol at beginning of string:
|
||||
elsif (substr($$in, 0, length $ce) eq $ce) {
|
||||
$$out = '';
|
||||
substr($$in, 0, length $ce) = '';
|
||||
$ret = 'CLOSE';
|
||||
}
|
||||
# The only thing in the string is the closing boundary:
|
||||
elsif ($$in eq "--$bound--") {
|
||||
$$out = '';
|
||||
$$in = '';
|
||||
$ret = 'CLOSE';
|
||||
}
|
||||
|
||||
if (defined $$out) {
|
||||
return $ret;
|
||||
}
|
||||
else {
|
||||
# Broken e-mail - we hit the end of the message without finding a boundary.
|
||||
# Assume that everything left is the part body.
|
||||
$$out = $$in;
|
||||
$$in = '';
|
||||
return 'EOF';
|
||||
}
|
||||
}
|
||||
|
||||
sub _parse_preamble {
|
||||
# --------------------------------------------------------------------------
|
||||
# Internal Method
|
||||
# ---------------
|
||||
# Parses preamble and sets it in part.
|
||||
#
|
||||
my ($self, $inner_bound, $in, $part) = @_;
|
||||
|
||||
my $delim = "--$inner_bound";
|
||||
|
||||
$self->debug("Parsing preamble. Skip until delim ($delim)") if $self->{_debug} > 1;
|
||||
my @saved;
|
||||
$part->preamble(\@saved);
|
||||
|
||||
my $data;
|
||||
if (substr($$in, 0, length "$delim$self->{eol}") eq "$delim$self->{eol}") {
|
||||
$data = '';
|
||||
substr($$in, 0, length "$delim$self->{eol}") = '';
|
||||
}
|
||||
else {
|
||||
if ((my $pos = index($$in, "$self->{eol}$delim$self->{eol}")) >= 0) {
|
||||
$data = substr($$in, 0, $pos);
|
||||
substr($$in, 0, $pos + length("$self->{eol}$delim$self->{eol}")) = '';
|
||||
}
|
||||
else {
|
||||
return $self->warn(PARSE => "Unable to find opening boundary: $delim\nMessage is probably corrupt.");
|
||||
}
|
||||
}
|
||||
push @saved, split /\Q$self->{eol}/, $data;
|
||||
undef $data;
|
||||
return 'DELIM';
|
||||
}
|
||||
|
||||
sub _parse_epilogue {
|
||||
# --------------------------------------------------------------------------
|
||||
# Internal Method
|
||||
# ---------------
|
||||
# Parses epilogue and sets it in part.
|
||||
#
|
||||
my ($self, $outer_bound, $in, $part) = @_;
|
||||
|
||||
$self->debug("Parsing epilogue.") if $self->{_debug} > 1;
|
||||
$part->epilogue(\my @saved);
|
||||
|
||||
if (defined $outer_bound) {
|
||||
my ($delim, $close) = ("--$outer_bound", "--$outer_bound--");
|
||||
|
||||
$self->debug("Skip until\n\tdelim ($delim)\n\tclose($close)") if $self->{_debug} > 1;
|
||||
|
||||
if ($$in =~ s/(.*?)(?:\A|\Q$self->{eol}\E)\Q$delim$self->{eol}//s) {
|
||||
push @saved, split /\Q$self->{eol}/, $1;
|
||||
$self->debug("Found delim($delim)") if $self->{_debug};
|
||||
return 'DELIM'
|
||||
}
|
||||
elsif ($$in =~ s/(.*?)(?:\A|\Q$self->{eol}\E)\Q$close\E(?:\Z|\Q$self->{eol}\E)//s) {
|
||||
push @saved, split /\Q$self->{eol}/, $1;
|
||||
$self->debug("Found close($close)") if $self->{_debug};
|
||||
return 'CLOSE'
|
||||
}
|
||||
}
|
||||
push @saved, split /\Q$self->{eol}/, $$in;
|
||||
$$in = '';
|
||||
$self->debug("EOF: epilogue is " . length(join '', @saved) . " bytes") if $self->{_debug};
|
||||
return 'EOF';
|
||||
}
|
||||
|
||||
|
||||
sub Base64 {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $in, $out) = @_;
|
||||
|
||||
# Remove any non base64 characters.
|
||||
$$in =~ tr{A-Za-z0-9+/}{}cd;
|
||||
|
||||
# Must pass multiple of 4 to decode_base64. Store any remainder in $rem_str and
|
||||
# pad it with trailing equal signs.
|
||||
my $rem = length($$in) % 4;
|
||||
my ($rem_str);
|
||||
if ($rem) {
|
||||
my $pad = '=' x (4 - $rem);
|
||||
$rem_str = substr($$in, length($$in) - $rem);
|
||||
$rem_str .= $pad;
|
||||
substr($$in, $rem * -1) = '';
|
||||
}
|
||||
|
||||
$$out = decode_base64($$in);
|
||||
if ($rem) {
|
||||
$$out .= decode_base64($rem_str);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub Binary {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $in, $out) = @_;
|
||||
$$out = $$in;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub NBit {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $in, $out) = @_;
|
||||
$$out = $$in;
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub QuotedPrint {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $in, $out) = @_;
|
||||
if ($use_decode_qp) {
|
||||
$$out = MIME::QuotedPrint::decode_qp($$in);
|
||||
}
|
||||
else {
|
||||
$$out = $$in;
|
||||
$$out =~ s/\r\n/\n/g; # normalize newlines
|
||||
$$out =~ s/[ \t]+\n/\n/g; # rule #3 (trailing whitespace must be deleted)
|
||||
$$out =~ s/=\n//g; # rule #5 (soft line breaks)
|
||||
$$out =~ s/=([\da-fA-F]{2})/chr hex $1/ge;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub UU {
|
||||
# --------------------------------------------------------------------------
|
||||
my ($self, $in, $out) = @_;
|
||||
my ($mode, $file);
|
||||
|
||||
# Find beginning...
|
||||
while ($$in =~ s/^(.+\Q$self->{eol}\E)//) {
|
||||
local $_ = $1;
|
||||
last if ($mode, $file) = /^begin\s*(\d*)\s*(\S*)/;
|
||||
}
|
||||
return $self->warn("uu decoding: no begin found") if not defined $file;
|
||||
|
||||
# Decode:
|
||||
while ($$in =~ s/^(.+\Q$self->{eol}\E)//) {
|
||||
local $_ = $1;
|
||||
last if /^end/;
|
||||
next if /[a-z]/;
|
||||
next unless int((((ord() - 32) & 077) + 2) / 3) == int(length($_) / 4);
|
||||
$$out .= unpack('u', $_);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub gt_old_decode_base64 {
|
||||
# --------------------------------------------------------------------------
|
||||
my $str = shift;
|
||||
my $res = "";
|
||||
|
||||
$str =~ tr|A-Za-z0-9+=/||cd;
|
||||
|
||||
$str =~ s/=+$//;
|
||||
$str =~ tr|A-Za-z0-9+/| -_|;
|
||||
return "" unless length $str;
|
||||
|
||||
my $uustr = '';
|
||||
my ($i, $l);
|
||||
$l = length($str) - 60;
|
||||
for ($i = 0; $i <= $l; $i += 60) {
|
||||
$uustr .= "M" . substr($str, $i, 60);
|
||||
}
|
||||
$str = substr($str, $i);
|
||||
# and any leftover chars
|
||||
if ($str ne "") {
|
||||
$uustr .= chr(32 + length($str)*3/4) . $str;
|
||||
}
|
||||
return unpack("u", $uustr);
|
||||
}
|
||||
|
||||
1;
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
GT::Mail::Parse - MIME Parse
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use GT::Mail::Parse
|
||||
|
||||
my $parser = new GT::Mail::Parse (
|
||||
naming => \&name_files,
|
||||
in_file => '/path/to/file.eml',
|
||||
debug => 1
|
||||
);
|
||||
|
||||
my $top = $parser->parse or die $GT::Mail::Parse::error;
|
||||
|
||||
- or -
|
||||
|
||||
my $parser = new GT::Mail::Parse;
|
||||
|
||||
open FH, '/path/to/file.eml' or die $!;
|
||||
my $top = $parser->parse (
|
||||
naming => \&name_files,
|
||||
handle => \*FH,
|
||||
debug => 1
|
||||
) or die $GT::Mail::Parse::error;
|
||||
close FH;
|
||||
|
||||
- or -
|
||||
|
||||
my $parser = new GT::Mail::Parse;
|
||||
|
||||
my $top_head = $parser->parse_head (
|
||||
naming => \&name_files,
|
||||
in_file => '/path/to/file.eml',
|
||||
debug => 1
|
||||
) or die $GT::Mail::Parse::error;
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
GT::Mail::Parse is a 100% rfc822 email MIME parser that supports unlimited
|
||||
nested levels of MIME. Emails are parsed into L<GT::Mail::Parts> objects. Each
|
||||
part knows where it's body is and each part contains it's sub parts. See
|
||||
L<GT::Mail::Parts> for details on parts methods.
|
||||
|
||||
=head2 new - Constructor method
|
||||
|
||||
This is the constructor method to get a GT::Mail::Parse object, which you
|
||||
need to access all the methods (there are no Class methods). new() takes
|
||||
a hash or hash ref as it's arguments. Each key has an accessor method by the
|
||||
same name except debug, which can only be set by passing debug to new(), parse()
|
||||
or parse_head().
|
||||
|
||||
=over 4
|
||||
|
||||
=item debug
|
||||
|
||||
Sets the debug level for this insance of the class.
|
||||
|
||||
=item naming
|
||||
|
||||
Specify a code reference to use as a naming convention for each part of the
|
||||
email being parsed. This is useful to keep file IO down when you want the emails
|
||||
seperated into each part as a file. If this is not specified GT::Mail::Parse
|
||||
uses a default naming, which is to start at one and incriment that number for each
|
||||
attachment. The attachments would go in the current working directory.
|
||||
|
||||
=item in_file
|
||||
|
||||
Specify the path to the file that contains the email to be parsed. One of in_file
|
||||
and handle must be specified.
|
||||
|
||||
=item handle
|
||||
|
||||
Specify the file handle or IO stream that contains the email to be parsed.
|
||||
|
||||
=item attach_rfc822
|
||||
|
||||
By default, the parser will decode any embeded emails, and flatten out all the
|
||||
parts. If you prefer to leave embeded emails unparsed, pass in 1 to this option
|
||||
and the parser will treat it as an attachment.
|
||||
|
||||
=back
|
||||
|
||||
=head2 parse - Parse an email
|
||||
|
||||
Instance method. Parses the email specified by either in_file or handle. Returns
|
||||
the top level L<GT::Mail::Parts> object. Any additional parameters passed in are
|
||||
treated the same as if they were passed to the constuctor.
|
||||
|
||||
=head2 parse_head - Parse just the header of the email
|
||||
|
||||
Instance method. This method is exactly the same as parse except only the top
|
||||
level header is parsed and it's part object returned. This is useful to keep
|
||||
overhead down if you only need to know about the header of the email.
|
||||
|
||||
=head2 size - Get the size
|
||||
|
||||
Instance method. Returns the total size in bytes of the parsed unencoded email. This
|
||||
method will return undef if no email has been parsed.
|
||||
|
||||
=head2 all_parts - Get all parts
|
||||
|
||||
Instance method. Returns all the parts in the parsed email. This is a flatened
|
||||
list of the objects. Somewhat similar to what MIME::Tools does. All the parts
|
||||
still contain their sub parts.
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||||
http://www.gossamer-threads.com/
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
Revision: $Id: Parse.pm,v 1.90 2008/10/29 23:32:07 brewt Exp $
|
||||
|
Reference in New Issue
Block a user