1226 lines
39 KiB
Perl
1226 lines
39 KiB
Perl
# ====================================================================
|
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
|
#
|
|
# GT::Mail::Parts
|
|
# Author : Scott Beck
|
|
# CVS Info :
|
|
# $Id: Parts.pm,v 1.77 2005/03/18 00:35:54 alex Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# ====================================================================
|
|
#
|
|
|
|
package GT::Mail::Parts;
|
|
# ===================================================================
|
|
|
|
# Pragmas
|
|
use strict;
|
|
use vars qw($VERSION @ISA $FIELD_NAME $PARAMNAME $FIRST $TSPECIAL $TOKEN $SPCZ $CRLF @HEADER $ERRORS $DEBUG);
|
|
use GT::Base;
|
|
use GT::Text::Tools;
|
|
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.77 $ =~ /(\d+)\.(\d+)/;
|
|
@ISA = qw(GT::Base);
|
|
$DEBUG = 0;
|
|
$CRLF = "\015\012";
|
|
@HEADER = qw/Return-Path Received Date From Subject Sender To Cc Bcc Content-Type Content-Transfer-Encoding Content-Disposition X-Mailer Message-Id/;
|
|
|
|
$ERRORS = {};
|
|
|
|
# Pattern to match a RFC822 Field name ( Extract from RFC #822)
|
|
my $FIELD_NAME = '[^\x00-\x1f\x7f-\xff :]+:';
|
|
|
|
# Pattern to match parameter names (like fieldnames, but = not allowed):
|
|
my $PARAMNAME = '[^\x00-\x1f\x7f-\xff :=]+';
|
|
|
|
# Pattern to match the first value on the line:
|
|
my $FIRST = '[^\s\;\x00-\x1f\x7f-\xff]+';
|
|
|
|
# Pattern to match an RFC-1521 token:
|
|
#
|
|
# token = 1*<any (ASCII) CHAR except SPACE, CTLs, or tspecials> #
|
|
my $TSPECIAL = '()<>@,;:\</[]?="';
|
|
my $TOKEN = '[^ \x00-\x1f\x7f-\xff' . "\Q$TSPECIAL\E" . ']+';
|
|
|
|
# Pattern to match spaces or comments:
|
|
my $SPCZ = '(?:\s|\([^\)]*\))*';
|
|
|
|
sub new {
|
|
# --------------------------------------------------------------------------
|
|
# my $obj = $class->new;
|
|
# ----------------------
|
|
# Generic constructor.
|
|
#
|
|
my ($self, %opts) = @_;
|
|
my $type = ref($self) || $self;
|
|
$self = bless {
|
|
header_lines => {},
|
|
header_order => [],
|
|
headers_intact => $opts{headers_intact} || 0,
|
|
parts => [],
|
|
_debug => $opts{debug},
|
|
header_charset => $opts{header_charset} || 'ISO-8859-1',
|
|
}, $type;
|
|
$self;
|
|
}
|
|
|
|
##################################################
|
|
## Header Methods ##
|
|
##################################################
|
|
|
|
sub extract {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->extract(\@headerline);
|
|
# ----------------------------
|
|
# Takes an array reference containing the header lines and parses it into
|
|
# an internal data structure accessible with methods in this class.
|
|
#
|
|
my ($self, $headlines) = @_;
|
|
my $lines;
|
|
|
|
# Remove any starting header lines which are really broken
|
|
shift @$headlines while @$headlines > 0 and $headlines->[0] !~ /^(?:$FIELD_NAME|From )/o;
|
|
|
|
while (@$headlines > 0 and $headlines->[0] =~ /^($FIELD_NAME|From )/o) {
|
|
my $tag = $1;
|
|
|
|
$lines = shift @$headlines;
|
|
$lines .= shift @$headlines while @$headlines > 0 and $headlines->[0] =~ /^[ \t]+/;
|
|
|
|
# Remove any following header lines which are really broken
|
|
shift @$headlines while @$headlines > 0 and $headlines->[0] !~ /^$FIELD_NAME/o;
|
|
|
|
($tag, $lines) = $self->_fmt_line($tag, $lines) or return;
|
|
$self->_insert($tag, $lines) if defined $lines;
|
|
}
|
|
shift @$headlines if @$headlines > 0 and $headlines->[0] =~ /^\s*$/;
|
|
|
|
return $self;
|
|
}
|
|
|
|
sub parse_params {
|
|
# --------------------------------------------------------------------------
|
|
# my $params = $obj->parse_params($raw);
|
|
# --------------------------------------
|
|
# Takes a raw line from the header and returns a hash ref of param name to
|
|
# it's value. The initial value is stored with key _.
|
|
#
|
|
my ($self, $raw) = @_;
|
|
my $params = {};
|
|
my $param;
|
|
|
|
# Get raw field, and unfold it:
|
|
defined $raw or $raw = '';
|
|
$raw =~ tr/\n//d;
|
|
|
|
# Extract special first parameter:
|
|
$raw =~ m/\A$SPCZ($FIRST)$SPCZ/og or return {}; # nada!
|
|
$params->{'_'} = $1;
|
|
|
|
# Extract subsequent parameters.
|
|
# No, we can't just "split" on semicolons: they're legal in quoted strings!
|
|
while (1) { # keep chopping away until done...
|
|
$raw =~ m/$SPCZ\;$SPCZ/og or last; # skip leading separator
|
|
$raw =~ m/($PARAMNAME)\s*=\s*/og or last; # give up if not a param
|
|
$param = lc($1);
|
|
$raw =~ m/"((?:[^\\"]+|\\.)+)"|($TOKEN)/g or last; # give up if no value
|
|
my ($quoted, $unquoted) = ($1, $2);
|
|
$quoted =~ s/\\(.)/$1/g if $quoted;
|
|
$params->{$param} = $quoted || $unquoted;
|
|
}
|
|
|
|
return $params;
|
|
}
|
|
|
|
sub effective_type {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->effective_type;
|
|
# ---------------------
|
|
# $obj->effective_type('text/plain');
|
|
# -----------------------------------
|
|
# This is a set or get method for the effective type. If there is no
|
|
# effective type this method returns the mime type.
|
|
#
|
|
my $self = shift;
|
|
$self->{efftype} = shift if @_;
|
|
return ($self->{efftype} ? lc($self->{efftype}) : $self->mime_type);
|
|
}
|
|
|
|
sub get {
|
|
# --------------------------------------------------------------------------
|
|
# my @to = $obj->get('to');
|
|
# -------------------------
|
|
# my $from = $obj->get('from');
|
|
# -----------------------------
|
|
# my $recieved = $obj->get('recieved', 1);
|
|
# ----------------------------------------
|
|
# This provides a method to access the header tags. In list context this
|
|
# always returns an array of all the specified tags.
|
|
# In scalar context returns the tag's value specified by position (second
|
|
# argument). If no position is specified returns the first accurence.
|
|
#
|
|
my ($self, $tag, $pos) = @_;
|
|
$pos ||= 0;
|
|
unless (defined $tag) {
|
|
return keys %{$self->{header_lines}};
|
|
}
|
|
$tag = lc $tag;
|
|
exists $self->{header_lines}->{$tag} or return;
|
|
my @name = @{$self->{header_lines}->{$tag}};
|
|
return wantarray ? @name : $name[$pos];
|
|
}
|
|
|
|
sub set {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->set( to => 'scott@gossamer-threads.com', $pos );
|
|
# ------------------------------------------------
|
|
# This method allows you to set tags in the header. If you do not specify a
|
|
# position (third argument) all the tags of that name are deleted and what
|
|
# you specify for that tag is put in its place.
|
|
#
|
|
my ($self, $tag, $val, $pos) = @_;
|
|
defined $tag or return $self->error("BADARGS", "FATAL", '$obj->set($tag, $value, $pos); $pos is optional but $tag and $value is not');
|
|
defined $val or return $self->error("BADARGS", "FATAL", '$obj->set($tag, $value, $pos); $pos is optional but $tag and $value is not');
|
|
$tag = lc($tag);
|
|
unless (defined $pos) {
|
|
@{$self->{header_lines}->{$tag}} = ($val);
|
|
for (my $i = 0; $i < @{$self->{header_order}}; $i++) {
|
|
if (lc $self->{header_order}->[$i]->[0] eq $tag) {
|
|
splice @{$self->{header_order}}, $i, 1;
|
|
$i--;
|
|
}
|
|
}
|
|
push @{$self->{header_order}}, [$self->_tag_case($tag), 0];
|
|
}
|
|
else {
|
|
$self->{header_lines}->{$tag}->[$pos] = $val;
|
|
my $found;
|
|
for (@{$self->{header_order}}) {
|
|
if (lc $_->[0] eq $tag and $_->[1] == $pos) {
|
|
$found++;
|
|
last;
|
|
}
|
|
}
|
|
push @{$self->{header_order}}, [$self->_tag_case($tag), $pos] unless $found;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
sub delete {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->delete('to');
|
|
# -------------------
|
|
# This method allows you to delete tags in the header of the message. If
|
|
# there are more than one of the specified tag they are all deleted. e.g.
|
|
# there is often more than one Received tag in a header. If you specify that
|
|
# tag all of them are deleted.
|
|
#
|
|
# $obj->delete('received', $pos);
|
|
# -------------------------------
|
|
# This allows you to delete tages at a certain position. e.g. If you want to
|
|
# delete the second Received tag you would do:
|
|
# $obj->delete('received', 1);
|
|
# Note that position is zero based.
|
|
# Returns whatever was deleted or spliced.
|
|
#
|
|
my $self = shift;
|
|
defined $_[0] or return $self->error("BADARGS", "FATAL", '$obj->delete($tag)');
|
|
my $tag = lc($_[0]);
|
|
if (defined $_[1] and exists $self->{header_lines}->{$tag}) {
|
|
my $del = splice(@{$self->{header_lines}->{$tag}}, $_[1], 1);
|
|
return wantarray ? @$del : $del->[0];
|
|
}
|
|
elsif (exists $self->{header_lines}->{$tag} and not defined $_[1]) {
|
|
my $del = delete $self->{header_lines}->{$tag};
|
|
return wantarray ? @$del : $del->[0];
|
|
}
|
|
else {
|
|
return;
|
|
}
|
|
}
|
|
|
|
sub size {
|
|
# --------------------------------------------------------------------------
|
|
# my $part_size = $obj->size;
|
|
# ---------------------------
|
|
# Returns the total size of the part including the body.
|
|
#
|
|
my $self = shift;
|
|
my $size = 0;
|
|
if ($self->{body_in}) {
|
|
if ($self->{body_in} eq 'MEMORY') {
|
|
$size = length $self->{data};
|
|
}
|
|
elsif ($self->{body_in} eq 'HANDLE') {
|
|
$size = -s $self->{io};
|
|
}
|
|
elsif ($self->{body_in} eq 'FILE') {
|
|
$size = -s $self->{path};
|
|
}
|
|
}
|
|
$size += length $self->header_as_string;
|
|
return $size;
|
|
}
|
|
|
|
sub preamble {
|
|
# --------------------------------------------------------------------------
|
|
# my $preamble = $obj->preamble;
|
|
# ------------------------------
|
|
# $obj->preamble("My cool preamble");
|
|
# -----------------------------------
|
|
# This is a set or get method for the preamble of this part.
|
|
#
|
|
my ($self, $lines) = @_;
|
|
$self->{preamble} = $lines if @_ > 1;
|
|
return $self->{preamble};
|
|
}
|
|
|
|
sub epilogue {
|
|
# --------------------------------------------------------------------------
|
|
# my $epilogue = $obj->epilogue;
|
|
# ------------------------------
|
|
# $obj->epilogue("My cool epilogue");
|
|
# -----------------------------------
|
|
# This is a set or get method for the parts epilogue.
|
|
#
|
|
my ($self, $lines) = @_;
|
|
$self->{epilogue} = $lines if @_ > 1;
|
|
return $self->{epilogue};
|
|
}
|
|
|
|
sub header_charset {
|
|
# -----------------------------------------------------------------------------
|
|
# my $charset = $obj->header_charset;
|
|
# -----------------------------------
|
|
# $obj->header_charset('windows-1256');
|
|
# -------------------------------------
|
|
# This sets or retrieves the charset that will be used to encode the header.
|
|
# The default encoding is ISO-8859-1.
|
|
#
|
|
my $self = shift;
|
|
$self->{header_charset} = shift if @_;
|
|
return $self->{header_charset};
|
|
}
|
|
|
|
sub binmode {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->binmode(1);
|
|
# -----------------
|
|
# Sets the binmode flag for all opens done on the body of the message.
|
|
# If this flag is set binmode will be used on all files.
|
|
#
|
|
my $self = shift;
|
|
my $mode = shift;
|
|
$self->{mode} = $mode if defined $mode;
|
|
return $self->{mode};
|
|
}
|
|
|
|
sub mime_type {
|
|
# --------------------------------------------------------------------------
|
|
# my $type = $obj->mime_type;
|
|
# ---------------------------
|
|
# $obj->mime_type($default_type);
|
|
# -------------------------------
|
|
# Returns the content-type of this message. If there is no content type
|
|
# and you passed in a default, returns that.
|
|
#
|
|
my ($self, $default) = @_;
|
|
$self->{mime_type} = $default if @_ > 1;
|
|
return lc($self->mime_attr('content-type') || $self->{mime_type} || $self->get('content-type'));
|
|
}
|
|
|
|
sub is_multipart {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->is_multipart;
|
|
# -------------------
|
|
# Returns true if this part is a multipart part. Returns false otherwise.
|
|
#
|
|
my $self = shift;
|
|
my ($type, $subtype) = split('/', $self->mime_type);
|
|
$type ||= 'text';
|
|
$subtype ||= 'plain';
|
|
return (($type eq 'multipart') ? 1 : 0);
|
|
}
|
|
|
|
sub parts {
|
|
# --------------------------------------------------------------------------
|
|
# my @parts = $obj->parts;
|
|
# ------------------------
|
|
# Returns an array of part objects if there are any parts to this part.
|
|
# Returns false if there are no parts.
|
|
#
|
|
my $self = shift;
|
|
$self->{parts} ||= [];
|
|
if (@_ > 0) {
|
|
push @{$self->{parts}}, @_;
|
|
unless ($self->multipart_boundary) {
|
|
$self->multipart_boundary("---------=_" . scalar(time) . "-$$-" . int(rand(time)/2));
|
|
}
|
|
}
|
|
return wantarray ? @{$self->{parts}} : $self->{parts};
|
|
}
|
|
|
|
sub mime_encoding {
|
|
# --------------------------------------------------------------------------
|
|
# my $encoding = $obj->mime_encoding;
|
|
# -----------------------------------
|
|
# Returns the content-transfer-encoding for the current part. This method
|
|
# returns 7bit if there is none.
|
|
#
|
|
my $self = shift;
|
|
return lc($self->mime_attr('content-transfer-encoding') || '7bit');
|
|
}
|
|
|
|
sub multipart_boundary {
|
|
# --------------------------------------------------------------------------
|
|
# my $boundary = $obj->multipart_boundary;
|
|
# ----------------------------------------
|
|
# Set get method for the multipart boundary to use if this part is a
|
|
# multipart message.
|
|
#
|
|
my $self = shift;
|
|
my $boundary = shift;
|
|
if (defined $boundary) {
|
|
$self->{boundary} = $boundary;
|
|
}
|
|
elsif ($self->mime_attr('content-type.boundary')) {
|
|
$self->{boundary} = $self->mime_attr('content-type.boundary');
|
|
}
|
|
return $self->{boundary};
|
|
}
|
|
|
|
sub mime_attr {
|
|
# --------------------------------------------------------------------------
|
|
# $obj->mime_attr('type.subtype');
|
|
# --------------------------------
|
|
# Given a string that consists of type.sybtype returns the value of
|
|
# subtype. If only type is sent returns the value of type. This is useful
|
|
# when dealing with header fields such as Content-Type where it can have
|
|
# a subtype like:
|
|
# Content-Type: text/plain; name="foo.js"
|
|
#
|
|
|
|
my ($self, $attr) = @_;
|
|
|
|
# Break attribute name up:
|
|
my ($tag, $subtag) = split /\./, $attr;
|
|
$subtag ||= '_';
|
|
defined($tag) or return $self->error("BADARGS", "FATAL", '$obj->mime_attr ("type.subtype")');
|
|
|
|
$tag = lc($tag);
|
|
|
|
# There should only be one of these.
|
|
defined($self->{header_lines}->{$tag}->[-1]) or return;
|
|
my $field = $self->parse_params($self->{header_lines}->{$tag}->[-1]);
|
|
return $field->{$subtag};
|
|
}
|
|
|
|
sub decode_mimewords {
|
|
# --------------------------------------------------------------------------
|
|
# my $line = decode_mimewords($line);
|
|
# -----------------------------------
|
|
# Checks $line for mime encodings and decodes them if found. The string
|
|
# passwed in should be a line in the header.
|
|
#
|
|
my $encstr = shift;
|
|
my @tokens;
|
|
$@ = ''; # error-return
|
|
|
|
$encstr =~ s{(\?\=)\r?\n[ \t](\=\?)}{$1$2}gs;
|
|
pos($encstr) = 0;
|
|
|
|
# Decode:
|
|
my ($charset, $encoding, $enc, $dec);
|
|
while (1) {
|
|
last if (pos($encstr) >= length($encstr));
|
|
my $pos = pos($encstr); # save it
|
|
|
|
# Case 1: are we looking at "=?..?..?="?
|
|
if ($encstr =~ m{\G # from where we left off..
|
|
=\?([^?]*) # "=?" + charset +
|
|
\?([bq]) # "?" + encoding +
|
|
\?([^?]+) # "?" + data maybe with spcs +
|
|
\?= # "?="
|
|
}xgi) {
|
|
($charset, $encoding, $enc) = ($1, lc($2), $3);
|
|
$dec = (($encoding eq 'q') ? _decode_q($enc) : _decode_b($enc));
|
|
push @tokens, [$dec, $charset];
|
|
next;
|
|
}
|
|
|
|
# Case 2: are we looking at a bad "=?..." prefix?
|
|
# We need this to detect problems for case 3, which stops at "=?":
|
|
pos($encstr) = $pos; # reset the pointer.
|
|
if ($encstr =~ m{\G=\?}xg) {
|
|
$@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
|
|
push @tokens, ['=?'];
|
|
next;
|
|
}
|
|
|
|
# Case 3: are we looking at ordinary text?
|
|
pos($encstr) = $pos; # reset the pointer.
|
|
if ($encstr =~ m{\G # from where we left off...
|
|
([\x00-\xFF]*? # shortest possible string,
|
|
\n*) # followed by 0 or more NLs,
|
|
(?=(\Z|=\?)) # terminated by "=?" or EOS
|
|
}xg) {
|
|
length($1) or die "GT::Mail::Parts: internal logic err: empty token.";
|
|
push @tokens, [$1];
|
|
next;
|
|
}
|
|
|
|
# Case 4: bug!
|
|
die "GT:Mail::Parts: unexpected case: ($encstr) pos $pos Please alert developer.";
|
|
}
|
|
return (wantarray ? @tokens : join('', map { $_->[0] } @tokens));
|
|
}
|
|
|
|
sub encode_mimewords {
|
|
# --------------------------------------------------------------------------
|
|
# my $line = decode_mimewords($line);
|
|
# -----------------------------------
|
|
# Checks $line for non-ansii and encodes them if found. The string
|
|
# passed in should be a line in the header.
|
|
#
|
|
my $words = shift;
|
|
return $words if $words =~ /\A[\x09\x0A\x0D\x20-\x7E]*\Z/;
|
|
my $encoding = uc(shift || 'Q');
|
|
my $charset = uc(shift || 'ISO-8859-1');
|
|
$words =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{
|
|
my $word = $1;
|
|
my $ret;
|
|
if ($word =~ /\A[\x09\x0A\x0D\x20-\x7E]*\Z/o) {
|
|
$ret = $word
|
|
}
|
|
else {
|
|
$ret = "=?$charset?$encoding?" . ($encoding eq 'Q' ? _encode_q($word) : _encode_b($word)) . '?=';
|
|
}
|
|
|
|
# As per rfc2047 section 2, we can not have lines more then 75 characters long
|
|
# we must seperate long encodings with a CRLF and space.
|
|
if (length $word == 18 and substr($words, pos($words) + 18, 1) =~ /\S/) {
|
|
$ret .= $CRLF . " ";
|
|
}
|
|
$ret;
|
|
}oeg;
|
|
return $words;
|
|
}
|
|
|
|
sub fold {
|
|
# --------------------------------------------------------------------------
|
|
my ($tag, $line) = @_;
|
|
|
|
# Remove any newlines that shouldn't be in the header
|
|
$line =~ s/(?:\r?\n)+$//;
|
|
$line =~ s/(?:\r?\n)(?=[^\t ])/ /g;
|
|
|
|
# Don't fold if we don't need to
|
|
my $fold = 0;
|
|
for (split /$CRLF/, $line) {
|
|
if (length $_ > 72) {
|
|
$fold = 1;
|
|
last;
|
|
}
|
|
}
|
|
return $line unless $fold;
|
|
|
|
# First we unfold
|
|
$line =~ s/[\t ]*[$CRLF][\t ]*/ /g;
|
|
my $key = lc $tag;
|
|
|
|
# Special case for TO fields
|
|
if ($key eq 'to' or $key eq 'cc' or $key eq 'bcc') {
|
|
$line =~ s/,[\t ]+/,$CRLF /g if length $line > 72;
|
|
return $line;
|
|
}
|
|
|
|
# We have nothing to wrap on :(
|
|
if ($line !~ /\s/) {
|
|
return $line;
|
|
}
|
|
|
|
# Wrap the line
|
|
$line = GT::Text::Tools->linewrap($line, 72, { eol => $CRLF, nowrap => [ q{=\?([^?]*)\?([bq])\?([^?]+)\?=} ] });
|
|
|
|
# Outlook has a problem with unfolding subject lines with more then one leading
|
|
# space. It ends up putting that into the actual subject.
|
|
$line =~ s/^/ /mg;
|
|
$line =~ s/^\s+//;
|
|
$line =~ s/$CRLF$//;
|
|
return $line;
|
|
}
|
|
|
|
sub header_as_string {
|
|
# --------------------------------------------------------------------------
|
|
# my $header = $obj->header_as_string;
|
|
# ------------------------------------
|
|
# Returns the header for this part as a string. This actually creates the
|
|
# header when called so it has a bit of overhead.
|
|
#
|
|
my ($self) = @_;
|
|
my $ret = '';
|
|
my %head = %{$self->{header_lines}};
|
|
|
|
if ($self->{headers_intact}) {
|
|
for (@{$self->{header_order}}) {
|
|
my ($tag, $j) = @{$_};
|
|
my $ltag = lc $tag;
|
|
if (exists $head{$ltag} and defined $head{$ltag}->[$j]) {
|
|
$ret .= "$tag: " . encode_mimewords($head{$ltag}->[$j], undef, $self->{header_charset}) . $CRLF;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
foreach my $tag (@HEADER) {
|
|
my $key = lc $tag;
|
|
$tag = $self->_tag_case($tag);
|
|
if ($key eq 'content-type') {
|
|
if ($self->get($tag)) {
|
|
my $val = $self->get($key);
|
|
$ret .= "$tag: " . fold($tag, encode_mimewords($val, undef, $self->{header_charset})) . $CRLF;
|
|
}
|
|
elsif ($self->mime_type) {
|
|
my $val = $self->mime_type;
|
|
$ret .= "$tag: " . fold($tag, encode_mimewords($val, undef, $self->{header_charset})) . $CRLF;
|
|
}
|
|
else {
|
|
$ret .= "$tag: text/plain$CRLF";
|
|
}
|
|
delete $head{$key};
|
|
next;
|
|
}
|
|
next unless exists $self->{header_lines}{$key};
|
|
foreach (@{$self->{header_lines}->{$key}}) {
|
|
next unless defined $_;
|
|
$ret .= "$tag: " . fold($tag, $key eq 'received' ? $_ : encode_mimewords($_, undef, $self->{header_charset})) . $CRLF; # Can not encode recieved lines
|
|
}
|
|
delete $head{$key};
|
|
}
|
|
for my $key (sort keys %head) {
|
|
next unless defined $key;
|
|
my $tag = $self->_tag_case($key);
|
|
foreach (@{$self->{header_lines}{$key}}) {
|
|
next unless defined $_;
|
|
$ret .= "$tag: " . fold($tag, encode_mimewords($_, undef, $self->{header_charset})) . $CRLF;
|
|
}
|
|
}
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub split_field {
|
|
# --------------------------------------------------------------------------
|
|
# my @to = $obj->split_field('to');
|
|
# ---------------------------------
|
|
# Takes the tag you wish to split on and returns the parts split on
|
|
# \s*,\s*. This is usfull only for fields that have comma seperated
|
|
# values such as emails. The split know about quotes and will not split
|
|
# inside quotes.
|
|
#
|
|
my ($self, $tag) = @_;
|
|
defined($tag) or $tag = 'to';
|
|
$tag = lc($tag);
|
|
exists($self->{header_lines}->{$tag}) or return;
|
|
(@{$self->{header_lines}->{$tag}} > 0) or return;
|
|
my @ret;
|
|
for my $val (@{$self->{header_lines}->{$tag}}) {
|
|
push @ret, GT::Mail::Parts->split_line('\s*,\s*', $val);
|
|
}
|
|
return @ret;
|
|
}
|
|
|
|
sub split_line {
|
|
# --------------------------------------------------------------------------
|
|
# Class->split_line('\s*,\s*', $line);
|
|
# -----------------------------
|
|
# Splits a line given a delimitor regex and the line. Returns an array
|
|
# of the peices
|
|
#
|
|
# We will be testing undef strings
|
|
my $class = shift;
|
|
local $^W;
|
|
|
|
my ($delimiter, $line) = @_;
|
|
$delimiter =~ s/(\s)/\\$1/g;
|
|
|
|
my ($quote, $quoted, $unquoted, $delim, $word, @pieces);
|
|
|
|
@pieces = $line =~ m{
|
|
\G
|
|
(
|
|
(?:
|
|
" (?:\\.|[^\\"]+(?=[\\"])) * "
|
|
|
|
|
\s ' (?:\\.|[^\\']+(?=[\\'])) * ' # Hack to get around email addresses with ' in them
|
|
|
|
|
\\.
|
|
|
|
|
.
|
|
)+? # May segfault on very long lines (around 8000+ iterations of this group)
|
|
)
|
|
(?: $ | (?:$delimiter)+ )
|
|
}gsx;
|
|
return @pieces;
|
|
}
|
|
|
|
########################################################
|
|
## Body Methods ##
|
|
########################################################
|
|
|
|
sub suggest_encoding {
|
|
# --------------------------------------------------------------------------
|
|
# my $encoding = $obj->suggest_encoding;
|
|
# --------------------------------------
|
|
# This method attempts to guess an encoding for this part. It does this
|
|
# by looking at the mime type and possibly even scaning the body of the
|
|
# part.
|
|
#
|
|
my $self = shift;
|
|
my $eff_type = $self->effective_type || '';
|
|
my ($type) = split('/', $eff_type);
|
|
$type ||= 'text';
|
|
$self->{body_in} ||= '';
|
|
if ((($type eq 'text') || ($type eq 'message'))) { # scan message body
|
|
if ($self->{body_in} eq 'MEMORY') {
|
|
return 'quoted-printable';
|
|
}
|
|
my ($IO, $unclean);
|
|
if ($self->{body_in} eq 'FILE') {
|
|
$IO = $self->open("r");
|
|
}
|
|
elsif ($self->{body_in} eq 'HANDLE') {
|
|
$IO = $self->{io};
|
|
}
|
|
else {
|
|
return 'binary';
|
|
}
|
|
|
|
# Scan message for 7bit-cleanliness:
|
|
while (<$IO>) {
|
|
last if ($unclean = ((length($_) > 999) or /[\200-\377]/));
|
|
}
|
|
# Reset our handle back to the beginning.
|
|
seek $IO, 0, 0;
|
|
|
|
# Return '7bit' if clean; try and encode if not...
|
|
# Note that encodings are not permitted for messages!
|
|
return (($type eq 'message') ? 'binary' : 'quoted-printable');
|
|
}
|
|
else {
|
|
return ($type eq 'multipart') ? 'binary' : 'base64';
|
|
}
|
|
}
|
|
|
|
sub recommended_filename {
|
|
# --------------------------------------------------------------------------
|
|
# my $name = $obj->recommended_filename;
|
|
# --------------------------------------
|
|
# This method attempts to guess a filename for this part. The file name
|
|
# returned is checked for any illegal characters. Any found are stripted
|
|
# and what is left is returned. This method will return empty string
|
|
# on failure.
|
|
#
|
|
my $self = shift;
|
|
my $value;
|
|
|
|
# Start by trying to get 'filename' from the 'content-disposition':
|
|
$value = $self->mime_attr('content-disposition.filename');
|
|
return $self->_evil_file($value) if (defined($value) and $value ne '');
|
|
|
|
# No? Okay, try to get 'name' from the 'content-type':
|
|
$value = $self->mime_attr('content-type.name');
|
|
return $self->_evil_file($value) if (defined($value) and $value ne '');
|
|
return '';
|
|
}
|
|
|
|
sub body_as_string {
|
|
# --------------------------------------------------------------------------
|
|
# my $body = $obj->body_as_string;
|
|
# --------------------------------
|
|
# This method returns the entire body as a string. You should probably
|
|
# not be calling this unless you are sure the body is small enough to fit
|
|
# into memory.
|
|
#
|
|
my $self = shift;
|
|
my ($ret, $fh);
|
|
defined($self->{body_in}) or return;
|
|
if ($self->{body_in} eq 'FILE') {
|
|
$fh = $self->open('r');
|
|
}
|
|
elsif ($self->{body_in} eq 'HANDLE') {
|
|
$fh = $self->{io};
|
|
}
|
|
else {
|
|
return $self->{data};
|
|
}
|
|
local $/;
|
|
$ret = <$fh>;
|
|
seek($fh, 0, 0);
|
|
return $ret;
|
|
}
|
|
|
|
sub open {
|
|
# --------------------------------------------------------------------------
|
|
# my $fh = $obj->open($flag);
|
|
# ---------------------------
|
|
# If the mody is stored in a path this method opens it given the flag r or
|
|
# w. r is to open to read and w is to open to write. If you open to write
|
|
# the file is trucated. If the file is not stored in file this method will
|
|
# return undef.
|
|
#
|
|
my ($self, $mode) = @_;
|
|
my $io = \do { local *FH; *FH };
|
|
$self->{body_in} eq 'FILE'
|
|
or return $self->error('BADARGS', 'FATAL', 'The body must be stored in file for this method to be called');
|
|
my $path = $self->body_path;
|
|
$self->debug("Opening $path") if $self->{_debug};
|
|
if ($mode eq 'w') {
|
|
open $io, ">$path" or return $self->error("WRITEOPEN", "FATAL", $path, $!);
|
|
}
|
|
elsif ($mode eq 'r') {
|
|
open $io, "<$path" or return $self->error("READOPEN", "FATAL", $path, $!);
|
|
}
|
|
else {
|
|
return $self->error("BADARGS", "FATAL", '$obj->open ("r"); -or- $obj->open ("w");');
|
|
}
|
|
CORE::binmode($io);
|
|
return $io;
|
|
}
|
|
|
|
sub body_in {
|
|
# --------------------------------------------------------------------------
|
|
# my $in = $obj->body_in;
|
|
# -----------------------
|
|
# This method returns a flag that says where the body for this part is
|
|
# stored. It will be one of 4 thing. 'MEMORY', 'HANDLE', 'FILE' or undef.
|
|
# If this method returns undef it means this part does not have a body.
|
|
#
|
|
my $self = shift;
|
|
$self->{body_in} = shift if (@_ > 0);
|
|
return $self->{body_in};
|
|
}
|
|
|
|
sub body_data {
|
|
# --------------------------------------------------------------------------
|
|
# my $body = $obj->body_data;
|
|
# ---------------------------
|
|
# $obj->body_data($body);
|
|
# -----------------------
|
|
# This is a set or get method for the body if you are storing the body
|
|
# in memory or the body is stored in memory. This method sets the body_in
|
|
# flag to MEMORY if called with the body of the message.
|
|
#
|
|
my ($self, $data) = @_;
|
|
if (defined $data) {
|
|
$self->{body_in} = 'MEMORY';
|
|
if (ref $data) {
|
|
return $self->error('BADARGS', 'FATAL', "body_data: Body in memory can not be a referece");
|
|
}
|
|
$self->{data} = $data;
|
|
}
|
|
return $self->{data};
|
|
}
|
|
|
|
sub body_handle {
|
|
# --------------------------------------------------------------------------
|
|
# my $fh = $obj->body_handle;
|
|
# ---------------------------
|
|
# $obj->body_handle(\*FH);
|
|
# ------------------------
|
|
# This is a set or get method for the body handle. This sets the body_in
|
|
# flag to HANDLE if a file handle is passed in.
|
|
#
|
|
my ($self, $fh) = @_;
|
|
if ($fh and ref $fh eq "GLOB") {
|
|
$self->{io} = $fh;
|
|
$self->{body_in} = 'HANDLE';
|
|
}
|
|
return $self->{io};
|
|
}
|
|
|
|
sub body_path {
|
|
# --------------------------------------------------------------------------
|
|
# my $path = $obj->body_path;
|
|
# ---------------------------
|
|
# $obj->body_path('/path/to/file');
|
|
# ---------------------------------
|
|
# This is a set or get method for the body path. This sets the body_in
|
|
# flag to FILE if called to set the path.
|
|
#
|
|
my $self = shift;
|
|
my $path = shift;
|
|
if (defined $path) {
|
|
$self->{path} = $path;
|
|
$self->{body_in} = 'FILE';
|
|
}
|
|
return $self->{path};
|
|
}
|
|
|
|
|
|
#####################################################
|
|
## Private Methods ##
|
|
#####################################################
|
|
|
|
sub _insert {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $tag, $val) = @_;
|
|
return unless defined $val and defined $tag;
|
|
push @{$self->{header_lines}->{lc $tag}}, $val;
|
|
push @{$self->{header_order}}, [$tag, $#{$self->{header_lines}->{lc $tag}}];
|
|
}
|
|
|
|
sub _fmt_line {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $tag, $line) = @_;
|
|
|
|
($tag) = $line =~ /^($FIELD_NAME|From )/oi unless defined $tag;
|
|
|
|
defined($tag) and $tag =~ /^($FIELD_NAME|From )/oi or return $self->error("BADTAG", "WARN", $tag);
|
|
$tag =~ s/^([^ :]+):/$1/;
|
|
|
|
# Ensure the line starts with tag
|
|
($line =~ /[^ :]+\s*:\s?(.*)$/s) and $line = $1;
|
|
$line =~ s/\r?\n$//;
|
|
$line = decode_mimewords($line);
|
|
return ($tag, $line);
|
|
}
|
|
|
|
sub _tag_case {
|
|
# --------------------------------------------------------------------------
|
|
my ($self, $tag) = @_;
|
|
defined $tag or return $self->error("BADARGS", "FATAL", '$self->_tag_case ($tag)');
|
|
|
|
$tag =~ s/:$//;
|
|
$tag =~ s/^\s*//;
|
|
$tag =~ s/\s*$//;
|
|
|
|
# Change the case of the tag
|
|
# eq Message-Id
|
|
$tag =~ s/\b([a-z]+)/\u$1/g;
|
|
|
|
return $tag;
|
|
}
|
|
|
|
sub _decode_b {
|
|
# --------------------------------------------------------------------------
|
|
my $str = shift;
|
|
require GT::Mail::Parse;
|
|
return GT::Mail::Parse::decode_base64($str);
|
|
}
|
|
|
|
sub _decode_q {
|
|
# --------------------------------------------------------------------------
|
|
my $str = shift;
|
|
$str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
|
|
$str =~ s/_/\x20/g; # RFC-1522, Q rule 2
|
|
return $str;
|
|
}
|
|
|
|
sub _encode_q {
|
|
# --------------------------------------------------------------------------
|
|
my ($word) = @_;
|
|
$word =~ s/([_?=\x00-\x1F\x7F-\xFF ])/sprintf("=%02X", ord($1))/eog;
|
|
return $word;
|
|
}
|
|
|
|
sub _encode_b {
|
|
# --------------------------------------------------------------------------
|
|
my ($word) = @_;
|
|
my $ret;
|
|
local $GT::Mail::Encoder::CRLF = $CRLF;
|
|
GT::Mail::Encoder->gt_encode(encoding => 'base64', in => $word, out => sub { $ret .= $_[0] });
|
|
return $ret;
|
|
}
|
|
|
|
sub _evil_file {
|
|
# --------------------------------------------------------------------------
|
|
my $self = shift;
|
|
my $file = shift;
|
|
my $l;
|
|
$self->debug("$file being changed") if $self->{_debug};
|
|
|
|
# Windows case
|
|
if ($file =~ m|\\|) {
|
|
$file = substr($file, rindex($file,'\\') + 1);
|
|
}
|
|
|
|
# Unix case
|
|
elsif ($file =~ m|/|) {
|
|
$file = substr($file, rindex($file,'/') + 1);
|
|
}
|
|
|
|
# Mac case
|
|
elsif ($file =~ m|:|) {
|
|
$file = substr($file, rindex($file,':') + 1);
|
|
}
|
|
$file =~ s{[^a-z_A-Z0-9\-\.]+}{}g;
|
|
return length($file) < 40 ? $file : substr($file,0,40);
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
GT::Mail::Parts - Data storage class for MIME parts
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use GT::Mail;
|
|
|
|
my $mail = new GT::Mail;
|
|
|
|
my $top_part = $mail->parse('/path/to/email');
|
|
|
|
# Access the emails as an array
|
|
my @to = $top_part->split_field('to');
|
|
my @from = $top_part->split_field('from');
|
|
|
|
# Access to the header fields
|
|
my $mailer = $top_part->get('X-Mailer');
|
|
my $subject = $top_part->get('Subject');
|
|
|
|
# Access to this parts sub part
|
|
if ($top_part->is_multipart) {
|
|
my @parts = $top_parts->parts;
|
|
for my $part (@parts) {
|
|
|
|
# Access parts of the header
|
|
print "Filename: ", $part->recommended_filename, "\n";
|
|
print "Part is multi-part\n" if $part->is_multipart;
|
|
|
|
# Get the body as a string
|
|
my $body = $part->body_as_string;
|
|
}
|
|
}
|
|
|
|
# Change who it is to
|
|
$top_part->set('to', 'scott@gossamer-threads.com');
|
|
|
|
# Remove the bcc line
|
|
$top_part->delete('bcc');
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
GT::Mail::Parts is a class to provide methods to change and
|
|
access a MIME messages. The object for this class is meant to
|
|
be istansiated from L<GT::Mail>.
|
|
|
|
=head2 effective_type - Access the effective MIME type
|
|
|
|
my $type = $obj->effective_type;
|
|
|
|
if ($type eq 'application/octet-stream') {
|
|
...
|
|
}
|
|
|
|
This method returns the effective MIME Type of this objects part.
|
|
|
|
=head2 get - Access header tags.
|
|
|
|
my $subj = $obj->get('Subject');
|
|
|
|
# or if there is more than one
|
|
my @recv = $obj->get('Received');
|
|
|
|
Used to access any of the tags in the header of the MIME part. If the
|
|
tag requested is not present returns false. The first argument to this
|
|
method is the name of he tag you want to extract. This is case insensitive.
|
|
|
|
=head2 set - Set a header tag.
|
|
|
|
# Change who the email is to
|
|
$obj->set('to', 'scott@gossamer-threads.com');
|
|
|
|
# Change the second Received tag
|
|
$obj->set('Received', 'from unknown', 1);
|
|
|
|
Set any of the tags in the header. If the tag does not exist this will create
|
|
it. This method takes three arguments. The first is the name of the tag to
|
|
change or add, this is case insensitive. The second argument is the value for
|
|
the tag. The third zero based optional argument is the position. The position
|
|
will default to zero if it is not specified.
|
|
|
|
=head2 delete - Remove a header tag.
|
|
|
|
# Delete who the message is from
|
|
$obj->delete('from');
|
|
|
|
This method deletes the tag specified by the first argument from this MIME
|
|
part.
|
|
|
|
=head2 size - Access the total size.
|
|
|
|
my $size = $obj->size;
|
|
|
|
This method returns the total size of this part. This includes the header and
|
|
the body.
|
|
|
|
=head2 preamble - Set or get the preamble.
|
|
|
|
# Retrieve the preamble
|
|
my $pre = $obj->preamble;
|
|
|
|
# Set the preamble
|
|
$obj->preamble('This is a multi-part message in MIME format.');
|
|
|
|
This is a set get method for the preamble. The preamble is the part after the
|
|
head but before the first MIME boundary. This method makes no since if this
|
|
is not a multi-part part.
|
|
|
|
=head2 epilogue - Set or get the epilogue.
|
|
|
|
# Retrieve the epilogue
|
|
my $ep = $obj->epilogue;
|
|
|
|
# Set the epilogue
|
|
$obj->epilogue('This is my cool epilogue');
|
|
|
|
This is a set get method for the epilogue. The epilogue is the part of the
|
|
MIME part after the MIME boundary and before the next head. This method makes
|
|
no since if this is not a multi-part part.
|
|
|
|
=head2 mime_type - Set or get the MIME type.
|
|
|
|
my ($type, $subtype) = split('/', $obj->mime_type);
|
|
|
|
This method returns the MIME type of this part. You can pass in an argument
|
|
to change the MIME type as well. So you could do
|
|
|
|
$obj->mime_type('text/plain');
|
|
|
|
This is probably not such a good idea unless you are constructing the email from
|
|
scratch.
|
|
|
|
=head2 is_multipart - See if you have a multi-part part.
|
|
|
|
if ($obj->is_multipart) {
|
|
# do some multi-part stuff
|
|
}
|
|
|
|
Returns true is this part is a multi-part MIME part.
|
|
|
|
=head2 parts - Access sub parts.
|
|
|
|
my @parts = $obj->parts;
|
|
|
|
Returns the parts object this part contains. Returns false if this part does
|
|
no have any sub parts. The parts objects that returns are from this same class.
|
|
Any parts that are milti-part should contain parts.
|
|
|
|
=head2 multipart_boundary - Set or get the multi-part boundary.
|
|
|
|
my $boundary = $obj->multipart_boundary;
|
|
|
|
This returns the multi-part boundary for this part. Setting this is never needed
|
|
and may be removed in the future. This method only makes since if you are working
|
|
with a multi-part pert.
|
|
|
|
=head2 header_as_string - Access the whole header.
|
|
|
|
my $head = $obj->header_as_string;
|
|
|
|
This method creates and returns the header for this part. The returned header should
|
|
be fully rfc822 compliant. Avoid calling this method more than once, as it will build
|
|
the header from an internal data structure each time.
|
|
|
|
=head2 split_field - Retrieve the emails split up into an array.
|
|
|
|
my @to = $obj->split_field; # Defaults to 'to'
|
|
my @bcc = $obj->split_field('bcc');
|
|
|
|
This is mostly a utility method. It takes an option argument as to the field you want
|
|
the email address from (default is to), it then splits the emails on '\s*,\s*' that is
|
|
not inside quotes. Returns an array of the split up string.
|
|
|
|
=head2 suggest_encoding - Get a suggestion for encoding.
|
|
|
|
my $encode = $obj->suggest_encoding;
|
|
|
|
Returns a suggested encoding for the body of this message. This is useful to decide
|
|
what encoding you should use for the body when building an email. This is used in
|
|
L<GT::Mail::Parse> to decide how to encode the message body.
|
|
|
|
=head2 recommended_filename - Figure out the file name.
|
|
|
|
my $file = $obj->recommended_filename;
|
|
if ($file) {
|
|
...
|
|
}
|
|
|
|
This method tries to figure out the file name of this part. This does not make much
|
|
since if this part is not an attachment of some kind. Returns an empty string on
|
|
failure.
|
|
|
|
=head2 body_as_string - Get the body as a string.
|
|
|
|
This method returns the entire body of the MIME message as a string. You should not
|
|
use this method if the body could be large.
|
|
|
|
=head2 body_in - Find the body.
|
|
|
|
my $in = $obj->body_in;
|
|
my $body;
|
|
|
|
if ($in eq 'MEMORY') {
|
|
$body = $obj->body_data;
|
|
}
|
|
elsif ($in eq 'HANDLE') {
|
|
$body = $obj->body_handle;
|
|
}
|
|
elsif ($in eq 'FILE') {
|
|
$body = $obj->body_path;
|
|
}
|
|
|
|
This method returns the location of the body. The location can be one of three things:
|
|
|
|
=over 4
|
|
|
|
=item MEMORY
|
|
|
|
The body is in a string.
|
|
|
|
=item HANDLE
|
|
|
|
The body is in an IO handle.
|
|
|
|
=item FILE
|
|
|
|
The body is in a file.
|
|
|
|
=back
|
|
|
|
You would use this to decide what method to use to access the body. If the MIME message
|
|
was parsed into GT::Mail::Parts using L<GT::Mail::Parser> the body will always be in
|
|
a FILE.
|
|
|
|
|
|
=head2 body_data - Get the in memory body.
|
|
|
|
This method returns the body if it is stored in memory. Returns undefined if the body is
|
|
not in memory.
|
|
|
|
=head2 body_handle - Get an IO handle to the body.
|
|
|
|
This method returns a handle to the body if the body is stored in a handle for this part.
|
|
Returns undefined if not.
|
|
|
|
=head2 body_path - Get the location of the file the body is in.
|
|
|
|
This method returns the file path to the file the body is located in if the body for this
|
|
part is stored in a file. Returns undefined if not.
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
http://www.gossamer-threads.com/
|
|
|
|
=head1 VERSION
|
|
|
|
$Revision: 1.77 $
|
|
|
|
=cut
|