discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/Mail/Parts.pm
2024-06-17 21:49:12 +10:00

1275 lines
41 KiB
Perl

# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Mail::Parts
# Author : Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: Parts.pm,v 1.84 2006/11/04 19:51:54 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
package GT::Mail::Parts;
# ===================================================================
# Pragmas
use strict;
use vars qw($VERSION @ISA $FIELD_NAME $PARAMNAME $FIRST $TSPECIAL $TOKEN $SPCZ $CRLF @HEADER $ERRORS $DEBUG);
use GT::Base;
use GT::Text::Tools;
$VERSION = sprintf "%d.%03d", q$Revision: 1.84 $ =~ /(\d+)\.(\d+)/;
@ISA = qw(GT::Base);
$DEBUG = 0;
$CRLF = "\015\012";
@HEADER = qw/Return-Path Received Date From Subject Sender To Cc Bcc Content-Type Content-Transfer-Encoding Content-Disposition X-Mailer Message-Id/;
$ERRORS = {};
# Pattern to match a RFC822 Field name ( Extract from RFC #822)
my $FIELD_NAME = '[^\x00-\x1f\x7f-\xff :]+:';
# Pattern to match parameter names (like fieldnames, but = not allowed):
my $PARAMNAME = '[^\x00-\x1f\x7f-\xff :=]+';
# Pattern to match the first value on the line:
my $FIRST = '[^\s\;\x00-\x1f\x7f-\xff]+';
# Pattern to match an RFC-1521 token:
#
# token = 1*<any (ASCII) CHAR except SPACE, CTLs, or tspecials> #
my $TSPECIAL = '()<>@,;:\</[]?="';
my $TOKEN = '[^ \x00-\x1f\x7f-\xff' . "\Q$TSPECIAL\E" . ']+';
# Pattern to match spaces or comments:
my $SPCZ = '(?:\s|\([^\)]*\))*';
sub new {
# --------------------------------------------------------------------------
# my $obj = $class->new;
# ----------------------
# Generic constructor.
#
my ($self, %opts) = @_;
my $type = ref($self) || $self;
$self = bless {
header_lines => {},
header_order => [],
headers_intact => $opts{headers_intact} || 0,
parts => [],
_debug => $opts{debug},
header_charset => $opts{header_charset} || 'ISO-8859-1',
}, $type;
$self;
}
##################################################
## Header Methods ##
##################################################
sub extract {
# --------------------------------------------------------------------------
# $obj->extract(\@headerline);
# ----------------------------
# Takes an array reference containing the header lines and parses it into
# an internal data structure accessible with methods in this class.
#
my ($self, $headlines) = @_;
my $lines;
# Remove any starting header lines which are really broken
shift @$headlines while @$headlines > 0 and $headlines->[0] !~ /^(?:$FIELD_NAME|From )/o;
while (@$headlines > 0 and $headlines->[0] =~ /^($FIELD_NAME|From )/o) {
my $tag = $1;
$lines = shift @$headlines;
$lines .= shift @$headlines while @$headlines > 0 and $headlines->[0] =~ /^[ \t]+/;
# Remove any following header lines which are really broken
shift @$headlines while @$headlines > 0 and $headlines->[0] !~ /^$FIELD_NAME/o;
($tag, $lines) = $self->_fmt_line($tag, $lines) or return;
$self->_insert($tag, $lines) if defined $lines;
}
shift @$headlines if @$headlines > 0 and $headlines->[0] =~ /^\s*$/;
return $self;
}
sub parse_params {
# --------------------------------------------------------------------------
# my $params = $obj->parse_params($raw);
# --------------------------------------
# Takes a raw line from the header and returns a hash ref of param name to
# it's value. The initial value is stored with key _.
#
my ($self, $raw) = @_;
my $params = {};
my $param;
# Get raw field, and unfold it:
defined $raw or $raw = '';
$raw =~ tr/\n//d;
# Extract special first parameter:
$raw =~ m/\A$SPCZ($FIRST)$SPCZ/og or return {}; # nada!
$params->{'_'} = $1;
# Extract subsequent parameters.
# No, we can't just "split" on semicolons: they're legal in quoted strings!
while (1) { # keep chopping away until done...
$raw =~ m/$SPCZ\;$SPCZ/og or last; # skip leading separator
$raw =~ m/($PARAMNAME)\s*=\s*/og or last; # give up if not a param
$param = lc($1);
$raw =~ m/"((?:[^\\"]+|\\.)+)"|($TOKEN)/g or last; # give up if no value
my ($quoted, $unquoted) = ($1, $2);
$quoted =~ s/\\(.)/$1/g if $quoted;
$params->{$param} = $quoted || $unquoted;
}
return $params;
}
sub effective_type {
# --------------------------------------------------------------------------
# $obj->effective_type;
# ---------------------
# $obj->effective_type('text/plain');
# -----------------------------------
# This is a set or get method for the effective type. If there is no
# effective type this method returns the mime type.
#
my $self = shift;
$self->{efftype} = shift if @_;
return ($self->{efftype} ? lc($self->{efftype}) : $self->mime_type);
}
sub get {
# --------------------------------------------------------------------------
# my @to = $obj->get('to');
# -------------------------
# my $from = $obj->get('from');
# -----------------------------
# my $recieved = $obj->get('recieved', 1);
# ----------------------------------------
# This provides a method to access the header tags. In list context this
# always returns an array of all the specified tags.
# In scalar context returns the tag's value specified by position (second
# argument). If no position is specified returns the first accurence.
#
my ($self, $tag, $pos) = @_;
$pos ||= 0;
unless (defined $tag) {
return keys %{$self->{header_lines}};
}
$tag = lc $tag;
exists $self->{header_lines}->{$tag} or return;
my @name = @{$self->{header_lines}->{$tag}};
return wantarray ? @name : $name[$pos];
}
sub set {
# --------------------------------------------------------------------------
# $obj->set( to => 'scott@gossamer-threads.com', $pos );
# ------------------------------------------------
# This method allows you to set tags in the header. If you do not specify a
# position (third argument) all the tags of that name are deleted and what
# you specify for that tag is put in its place.
#
my ($self, $tag, $val, $pos) = @_;
defined $tag or return $self->error("BADARGS", "FATAL", '$obj->set($tag, $value, $pos); $pos is optional but $tag and $value is not');
defined $val or return $self->error("BADARGS", "FATAL", '$obj->set($tag, $value, $pos); $pos is optional but $tag and $value is not');
$tag = lc($tag);
unless (defined $pos) {
@{$self->{header_lines}->{$tag}} = ($val);
for (my $i = 0; $i < @{$self->{header_order}}; $i++) {
if (lc $self->{header_order}->[$i]->[0] eq $tag) {
splice @{$self->{header_order}}, $i, 1;
$i--;
}
}
push @{$self->{header_order}}, [$self->_tag_case($tag), 0];
}
else {
$self->{header_lines}->{$tag}->[$pos] = $val;
my $found;
for (@{$self->{header_order}}) {
if (lc $_->[0] eq $tag and $_->[1] == $pos) {
$found++;
last;
}
}
push @{$self->{header_order}}, [$self->_tag_case($tag), $pos] unless $found;
}
return 1;
}
sub add {
# --------------------------------------------------------------------------
# $obj->add(To => 'scott@gossamer-threads.com', $prepend);
# ----------------------------------------------
# This method allows you to add a line to the headers. Unlike set(), this
# will not delete any existing headers with the same name and does not
# require a position. If you pass in a true value for $prepend, then the
# header will be prepended to the top of the headers.
#
my ($self, $tag, $val, $prepend) = @_;
defined $tag or return $self->error("BADARGS", "FATAL", '$obj->add($tag, $value)');
defined $val or return $self->error("BADARGS", "FATAL", '$obj->add($tag, $value)');
$tag = lc($tag);
if (exists $self->{header_lines}->{$tag}) {
if ($prepend) {
unshift @{$self->{header_lines}->{$tag}}, $val;
}
else {
push @{$self->{header_lines}->{$tag}}, $val;
}
}
else {
@{$self->{header_lines}->{$tag}} = ($val);
}
if ($prepend) {
for (@{$self->{header_order}}) {
if (lc $_->[0] eq $tag) {
$_->[1]++;
}
}
unshift @{$self->{header_order}}, [$self->_tag_case($tag), 0];
}
else {
push @{$self->{header_order}}, [$self->_tag_case($tag), $#{$self->{header_lines}->{$tag}}];
}
return 1;
}
sub delete {
# --------------------------------------------------------------------------
# $obj->delete('to');
# -------------------
# This method allows you to delete tags in the header of the message. If
# there are more than one of the specified tag they are all deleted. e.g.
# there is often more than one Received tag in a header. If you specify that
# tag all of them are deleted.
#
# $obj->delete('received', $pos);
# -------------------------------
# This allows you to delete tages at a certain position. e.g. If you want to
# delete the second Received tag you would do:
# $obj->delete('received', 1);
# Note that position is zero based.
# Returns whatever was deleted or spliced.
#
my $self = shift;
defined $_[0] or return $self->error("BADARGS", "FATAL", '$obj->delete($tag)');
my $tag = lc($_[0]);
if (defined $_[1] and exists $self->{header_lines}->{$tag}) {
my $del = splice(@{$self->{header_lines}->{$tag}}, $_[1], 1);
return wantarray ? @$del : $del->[0];
}
elsif (exists $self->{header_lines}->{$tag} and not defined $_[1]) {
my $del = delete $self->{header_lines}->{$tag};
return wantarray ? @$del : $del->[0];
}
else {
return;
}
}
sub size {
# --------------------------------------------------------------------------
# my $part_size = $obj->size;
# ---------------------------
# Returns the total size of the part including the body.
#
my $self = shift;
my $size = 0;
if ($self->{body_in}) {
if ($self->{body_in} eq 'MEMORY') {
$size = length $self->{data};
}
elsif ($self->{body_in} eq 'HANDLE') {
$size = -s $self->{io};
}
elsif ($self->{body_in} eq 'FILE') {
$size = -s $self->{path};
}
}
$size += length $self->header_as_string;
return $size;
}
sub preamble {
# --------------------------------------------------------------------------
# my $preamble = $obj->preamble;
# ------------------------------
# $obj->preamble("My cool preamble");
# -----------------------------------
# This is a set or get method for the preamble of this part.
#
my ($self, $lines) = @_;
$self->{preamble} = $lines if @_ > 1;
return $self->{preamble};
}
sub epilogue {
# --------------------------------------------------------------------------
# my $epilogue = $obj->epilogue;
# ------------------------------
# $obj->epilogue("My cool epilogue");
# -----------------------------------
# This is a set or get method for the parts epilogue.
#
my ($self, $lines) = @_;
$self->{epilogue} = $lines if @_ > 1;
return $self->{epilogue};
}
sub header_charset {
# -----------------------------------------------------------------------------
# my $charset = $obj->header_charset;
# -----------------------------------
# $obj->header_charset('windows-1256');
# -------------------------------------
# This sets or retrieves the charset that will be used to encode the header.
# The default encoding is ISO-8859-1.
#
my $self = shift;
$self->{header_charset} = shift if @_;
return $self->{header_charset};
}
sub binmode {
# --------------------------------------------------------------------------
# $obj->binmode(1);
# -----------------
# Sets the binmode flag for all opens done on the body of the message.
# If this flag is set binmode will be used on all files.
#
my $self = shift;
my $mode = shift;
$self->{mode} = $mode if defined $mode;
return $self->{mode};
}
sub mime_type {
# --------------------------------------------------------------------------
# my $type = $obj->mime_type;
# ---------------------------
# $obj->mime_type($default_type);
# -------------------------------
# Returns the content-type of this message. If there is no content type
# and you passed in a default, returns that.
#
my ($self, $default) = @_;
$self->{mime_type} = $default if @_ > 1;
return lc($self->mime_attr('content-type') || $self->{mime_type} || $self->get('content-type'));
}
sub is_multipart {
# --------------------------------------------------------------------------
# $obj->is_multipart;
# -------------------
# Returns true if this part is a multipart part. Returns false otherwise.
#
my $self = shift;
my ($type, $subtype) = split('/', $self->mime_type);
$type ||= 'text';
$subtype ||= 'plain';
return (($type eq 'multipart') ? 1 : 0);
}
sub parts {
# --------------------------------------------------------------------------
# my @parts = $obj->parts;
# ------------------------
# Returns an array of part objects if there are any parts to this part.
# Returns false if there are no parts.
#
my $self = shift;
$self->{parts} ||= [];
if (@_ > 0) {
push @{$self->{parts}}, @_;
unless ($self->multipart_boundary) {
$self->multipart_boundary("---------=_" . scalar(time) . "-$$-" . int(rand(time)/2));
}
}
return wantarray ? @{$self->{parts}} : $self->{parts};
}
sub mime_encoding {
# --------------------------------------------------------------------------
# my $encoding = $obj->mime_encoding;
# -----------------------------------
# Returns the content-transfer-encoding for the current part. This method
# returns 7bit if there is none.
#
my $self = shift;
return lc($self->mime_attr('content-transfer-encoding') || '7bit');
}
sub multipart_boundary {
# --------------------------------------------------------------------------
# my $boundary = $obj->multipart_boundary;
# ----------------------------------------
# Set get method for the multipart boundary to use if this part is a
# multipart message.
#
my $self = shift;
my $boundary = shift;
if (defined $boundary) {
$self->{boundary} = $boundary;
}
elsif ($self->mime_attr('content-type.boundary')) {
$self->{boundary} = $self->mime_attr('content-type.boundary');
}
return $self->{boundary};
}
sub mime_attr {
# --------------------------------------------------------------------------
# $obj->mime_attr('type.subtype');
# --------------------------------
# Given a string that consists of type.sybtype returns the value of
# subtype. If only type is sent returns the value of type. This is useful
# when dealing with header fields such as Content-Type where it can have
# a subtype like:
# Content-Type: text/plain; name="foo.js"
#
my ($self, $attr) = @_;
# Break attribute name up:
my ($tag, $subtag) = split /\./, $attr;
$subtag ||= '_';
defined($tag) or return $self->error("BADARGS", "FATAL", '$obj->mime_attr ("type.subtype")');
$tag = lc($tag);
# There should only be one of these.
defined($self->{header_lines}->{$tag}->[-1]) or return;
my $field = $self->parse_params($self->{header_lines}->{$tag}->[-1]);
return $field->{$subtag};
}
sub decode_mimewords {
# --------------------------------------------------------------------------
# my $line = decode_mimewords($line);
# -----------------------------------
# Checks $line for mime encodings and decodes them if found. The string
# passwed in should be a line in the header.
#
my $encstr = shift;
my @tokens;
$@ = ''; # error-return
$encstr =~ s{(\?\=)\r?\n[ \t](\=\?)}{$1$2}gs;
pos($encstr) = 0;
# Decode:
my ($charset, $encoding, $enc, $dec);
while (1) {
last if (pos($encstr) >= length($encstr));
my $pos = pos($encstr); # save it
# Case 1: are we looking at "=?..?..?="?
if ($encstr =~ m{\G # from where we left off..
=\?([^?]*) # "=?" + charset +
\?([bq]) # "?" + encoding +
\?([^?]+) # "?" + data maybe with spcs +
\?= # "?="
}xgi) {
($charset, $encoding, $enc) = ($1, lc($2), $3);
$dec = (($encoding eq 'q') ? _decode_q($enc) : _decode_b($enc));
push @tokens, [$dec, $charset];
next;
}
# Case 2: are we looking at a bad "=?..." prefix?
# We need this to detect problems for case 3, which stops at "=?":
pos($encstr) = $pos; # reset the pointer.
if ($encstr =~ m{\G=\?}xg) {
$@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
push @tokens, ['=?'];
next;
}
# Case 3: are we looking at ordinary text?
pos($encstr) = $pos; # reset the pointer.
if ($encstr =~ m{\G # from where we left off...
([\x00-\xFF]*? # shortest possible string,
\n*) # followed by 0 or more NLs,
(?=(\Z|=\?)) # terminated by "=?" or EOS
}xg) {
length($1) or die "GT::Mail::Parts: internal logic err: empty token.";
push @tokens, [$1];
next;
}
# Case 4: bug!
die "GT:Mail::Parts: unexpected case: ($encstr) pos $pos Please alert developer.";
}
return (wantarray ? @tokens : join('', map { $_->[0] } @tokens));
}
sub encode_mimewords {
# --------------------------------------------------------------------------
# my $line = decode_mimewords($line);
# -----------------------------------
# Checks $line for non-ansii and encodes them if found. The string
# passed in should be a line in the header.
#
my $words = shift;
return $words if $words =~ /\A[\x09\x0A\x0D\x20-\x7E]*\Z/;
my $encoding = uc(shift || 'Q');
my $charset = uc(shift || 'ISO-8859-1');
$words =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{
my $word = $1;
my $ret;
if ($word =~ /\A[\x09\x0A\x0D\x20-\x7E]*\Z/o) {
$ret = $word;
}
else {
$ret = "=?$charset?$encoding?" . ($encoding eq 'Q' ? _encode_q($word) : _encode_b($word)) . '?=';
}
# As per rfc2047 section 2, we can not have lines more then 75 characters long
# we must seperate long encodings with a CRLF and space.
if (length $word == 18 and substr($words, pos($words) + 18, 1) =~ /\S/) {
$ret .= $CRLF . " ";
}
$ret;
}eg;
# Spaces between encoded words need to be encoded as well, or they won't show up
$words =~ s/\?=( +)=\?/"?= =?$charset?Q?" . ("=20" x length $1) . "?= =?"/eg;
return $words;
}
sub fold {
# --------------------------------------------------------------------------
my ($tag, $line) = @_;
# Remove any newlines that shouldn't be in the header
$line =~ s/(?:\r?\n)+$//;
$line =~ s/(?:\r?\n)(?=[^\t ])/ /g;
# Don't fold if we don't need to
my $fold = 0;
for (split /$CRLF/, $line) {
if (length $_ > 72) {
$fold = 1;
last;
}
}
return $line unless $fold;
# First we unfold
$line =~ s/[\t ]*[$CRLF][\t ]*/ /g;
my $key = lc $tag;
# Special case for TO fields
if ($key eq 'to' or $key eq 'cc' or $key eq 'bcc') {
$line =~ s/,[\t ]+/,$CRLF /g if length $line > 72;
return $line;
}
# We have nothing to wrap on :(
if ($line !~ /\s/) {
return $line;
}
# Wrap the line
$line = GT::Text::Tools->linewrap($line, 72, { eol => $CRLF, nowrap => [ q{=\?([^?]*)\?([bq])\?([^?]+)\?=} ] });
# Outlook has a problem with unfolding subject lines with more then one leading
# space. It ends up putting that into the actual subject.
$line =~ s/^/ /mg;
$line =~ s/^\s+//;
$line =~ s/$CRLF$//;
return $line;
}
sub header_as_string {
# --------------------------------------------------------------------------
# my $header = $obj->header_as_string;
# ------------------------------------
# Returns the header for this part as a string. This actually creates the
# header when called so it has a bit of overhead.
#
my ($self) = @_;
my $ret = '';
my %head = %{$self->{header_lines}};
if ($self->{headers_intact}) {
for (@{$self->{header_order}}) {
my ($tag, $j) = @{$_};
my $ltag = lc $tag;
if (exists $head{$ltag} and defined $head{$ltag}->[$j]) {
$ret .= "$tag: " . encode_mimewords($head{$ltag}->[$j], undef, $self->{header_charset}) . $CRLF;
}
}
}
else {
foreach my $tag (@HEADER) {
my $key = lc $tag;
$tag = $self->_tag_case($tag);
if ($key eq 'content-type') {
if ($self->get($tag)) {
my $val = $self->get($key);
$ret .= "$tag: " . fold($tag, encode_mimewords($val, undef, $self->{header_charset})) . $CRLF;
}
elsif ($self->mime_type) {
my $val = $self->mime_type;
$ret .= "$tag: " . fold($tag, encode_mimewords($val, undef, $self->{header_charset})) . $CRLF;
}
else {
$ret .= "$tag: text/plain$CRLF";
}
delete $head{$key};
next;
}
next unless exists $self->{header_lines}{$key};
foreach (@{$self->{header_lines}->{$key}}) {
next unless defined $_;
$ret .= "$tag: " . fold($tag, $key eq 'received' ? $_ : encode_mimewords($_, undef, $self->{header_charset})) . $CRLF; # Can not encode recieved lines
}
delete $head{$key};
}
for my $key (sort keys %head) {
next unless defined $key;
my $tag = $self->_tag_case($key);
foreach (@{$self->{header_lines}{$key}}) {
next unless defined $_;
$ret .= "$tag: " . fold($tag, encode_mimewords($_, undef, $self->{header_charset})) . $CRLF;
}
}
}
return $ret;
}
sub split_field {
# --------------------------------------------------------------------------
# my @to = $obj->split_field('to');
# ---------------------------------
# Takes the tag you wish to split on and returns the parts split on
# \s*,\s*. This is usfull only for fields that have comma seperated
# values such as emails. The split know about quotes and will not split
# inside quotes. Returns the number of parts in scalar context, the
# list of parts in list context.
#
my ($self, $tag) = @_;
defined($tag) or $tag = 'to';
$tag = lc($tag);
exists($self->{header_lines}->{$tag}) or return wantarray ? () : 0;
(@{$self->{header_lines}->{$tag}} > 0) or return wantarray ? () : 0;
my @ret;
for my $val (@{$self->{header_lines}->{$tag}}) {
push @ret, GT::Mail::Parts->split_line('\s*,\s*', $val);
}
return @ret;
}
sub split_line {
# --------------------------------------------------------------------------
# Class->split_line('\s*,\s*', $line);
# -----------------------------
# Splits a line given a delimitor regex and the line. Returns an array
# of the peices
#
# We will be testing undef strings
my $class = shift;
local $^W;
my ($delimiter, $line) = @_;
$delimiter =~ s/(\s)/\\$1/g;
my ($quote, $quoted, $unquoted, $delim, $word, @pieces);
@pieces = $line =~ m{
\G
(
# GIANT HACK WARNING!
# To keep Perl from segfaulting, simply skip over any chunks of
# 8000 characters without a delimiter, up to the next delimiter.
# See segfault warning below. This isn't perfect, but such an
# e-mail is probably bogus anyway.
(?: . (?! $delimiter ) ){8000,}
|
(?:
" (?:\\.|[^\\"]+(?=[\\"])) * "
|
\s ' (?:\\.|[^\\']+(?=[\\'])) * ' # Hack to get around email addresses with ' in them
|
\\.
|
.
)+? # May segfault on very long lines (around 8000+ iterations of this group)
)
(?: $ | (?:$delimiter)+ )
}gsx;
return @pieces;
}
########################################################
## Body Methods ##
########################################################
sub suggest_encoding {
# --------------------------------------------------------------------------
# my $encoding = $obj->suggest_encoding;
# --------------------------------------
# This method attempts to guess an encoding for this part. It does this
# by looking at the mime type and possibly even scaning the body of the
# part.
#
my $self = shift;
my $eff_type = $self->effective_type || '';
my ($type) = split('/', $eff_type);
$type ||= 'text';
$self->{body_in} ||= '';
if ((($type eq 'text') || ($type eq 'message'))) { # scan message body
if ($self->{body_in} eq 'MEMORY') {
return 'quoted-printable';
}
my ($IO, $unclean);
if ($self->{body_in} eq 'FILE') {
$IO = $self->open("r");
}
elsif ($self->{body_in} eq 'HANDLE') {
$IO = $self->{io};
}
else {
return 'binary';
}
# Scan message for 7bit-cleanliness:
while (<$IO>) {
last if ($unclean = ((length($_) > 999) or /[\200-\377]/));
}
# Reset our handle back to the beginning.
seek $IO, 0, 0;
# Return '7bit' if clean; try and encode if not...
# Note that encodings are not permitted for messages!
return (($type eq 'message') ? 'binary' : 'quoted-printable');
}
else {
return ($type eq 'multipart') ? 'binary' : 'base64';
}
}
sub recommended_filename {
# --------------------------------------------------------------------------
# my $name = $obj->recommended_filename;
# --------------------------------------
# This method attempts to guess a filename for this part. The file name
# returned is checked for any illegal characters. Any found are stripted
# and what is left is returned. This method will return empty string
# on failure.
#
my $self = shift;
my $value;
# Start by trying to get 'filename' from the 'content-disposition':
$value = $self->mime_attr('content-disposition.filename');
return $self->_evil_file($value) if (defined($value) and $value ne '');
# No? Okay, try to get 'name' from the 'content-type':
$value = $self->mime_attr('content-type.name');
return $self->_evil_file($value) if (defined($value) and $value ne '');
return '';
}
sub body_as_string {
# --------------------------------------------------------------------------
# my $body = $obj->body_as_string;
# --------------------------------
# This method returns the entire body as a string. You should probably
# not be calling this unless you are sure the body is small enough to fit
# into memory.
#
my $self = shift;
my ($ret, $fh);
defined($self->{body_in}) or return;
if ($self->{body_in} eq 'FILE') {
$fh = $self->open('r');
}
elsif ($self->{body_in} eq 'HANDLE') {
$fh = $self->{io};
}
else {
return $self->{data};
}
local $/;
$ret = <$fh>;
seek($fh, 0, 0);
return $ret;
}
sub open {
# --------------------------------------------------------------------------
# my $fh = $obj->open($flag);
# ---------------------------
# If the mody is stored in a path this method opens it given the flag r or
# w. r is to open to read and w is to open to write. If you open to write
# the file is trucated. If the file is not stored in file this method will
# return undef.
#
my ($self, $mode) = @_;
my $io = \do { local *FH; *FH };
$self->{body_in} eq 'FILE'
or return $self->error('BADARGS', 'FATAL', 'The body must be stored in file for this method to be called');
my $path = $self->body_path;
$self->debug("Opening $path") if $self->{_debug};
if ($mode eq 'w') {
open $io, ">$path" or return $self->error("WRITEOPEN", "FATAL", $path, $!);
}
elsif ($mode eq 'r') {
open $io, "<$path" or return $self->error("READOPEN", "FATAL", $path, $!);
}
else {
return $self->error("BADARGS", "FATAL", '$obj->open ("r"); -or- $obj->open ("w");');
}
CORE::binmode($io);
return $io;
}
sub body_in {
# --------------------------------------------------------------------------
# my $in = $obj->body_in;
# -----------------------
# This method returns a flag that says where the body for this part is
# stored. It will be one of 4 thing. 'MEMORY', 'HANDLE', 'FILE' or undef.
# If this method returns undef it means this part does not have a body.
#
my $self = shift;
$self->{body_in} = shift if (@_ > 0);
return $self->{body_in};
}
sub body_data {
# --------------------------------------------------------------------------
# my $body = $obj->body_data;
# ---------------------------
# $obj->body_data($body);
# -----------------------
# This is a set or get method for the body if you are storing the body
# in memory or the body is stored in memory. This method sets the body_in
# flag to MEMORY if called with the body of the message.
#
my ($self, $data) = @_;
if (defined $data) {
$self->{body_in} = 'MEMORY';
if (ref $data) {
return $self->error('BADARGS', 'FATAL', "body_data: Body in memory can not be a referece");
}
$self->{data} = $data;
}
return $self->{data};
}
sub body_handle {
# --------------------------------------------------------------------------
# my $fh = $obj->body_handle;
# ---------------------------
# $obj->body_handle(\*FH);
# ------------------------
# This is a set or get method for the body handle. This sets the body_in
# flag to HANDLE if a file handle is passed in.
#
my ($self, $fh) = @_;
if ($fh and ref $fh eq "GLOB") {
$self->{io} = $fh;
$self->{body_in} = 'HANDLE';
}
return $self->{io};
}
sub body_path {
# --------------------------------------------------------------------------
# my $path = $obj->body_path;
# ---------------------------
# $obj->body_path('/path/to/file');
# ---------------------------------
# This is a set or get method for the body path. This sets the body_in
# flag to FILE if called to set the path.
#
my $self = shift;
my $path = shift;
if (defined $path) {
$self->{path} = $path;
$self->{body_in} = 'FILE';
}
return $self->{path};
}
#####################################################
## Private Methods ##
#####################################################
sub _insert {
# --------------------------------------------------------------------------
my ($self, $tag, $val) = @_;
return unless defined $val and defined $tag;
push @{$self->{header_lines}->{lc $tag}}, $val;
push @{$self->{header_order}}, [$tag, $#{$self->{header_lines}->{lc $tag}}];
}
sub _fmt_line {
# --------------------------------------------------------------------------
my ($self, $tag, $line) = @_;
($tag) = $line =~ /^($FIELD_NAME|From )/oi unless defined $tag;
defined($tag) and $tag =~ /^($FIELD_NAME|From )/oi or return $self->error("BADTAG", "WARN", $tag);
$tag =~ s/^([^ :]+):/$1/;
# Ensure the line starts with tag
($line =~ /[^ :]+\s*:\s?(.*)$/s) and $line = $1;
$line =~ s/\r?\n$//;
$line = decode_mimewords($line);
return ($tag, $line);
}
sub _tag_case {
# --------------------------------------------------------------------------
my ($self, $tag) = @_;
defined $tag or return $self->error("BADARGS", "FATAL", '$self->_tag_case ($tag)');
$tag =~ s/:$//;
$tag =~ s/^\s*//;
$tag =~ s/\s*$//;
# Change the case of the tag
# eq Message-Id
$tag =~ s/\b([a-z]+)/\u$1/g;
return $tag;
}
sub _decode_b {
# --------------------------------------------------------------------------
my $str = shift;
require GT::Mail::Parse;
return GT::Mail::Parse::decode_base64($str);
}
sub _decode_q {
# --------------------------------------------------------------------------
my $str = shift;
$str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1
$str =~ s/_/\x20/g; # RFC-1522, Q rule 2
return $str;
}
sub _encode_q {
# --------------------------------------------------------------------------
my ($word) = @_;
$word =~ s/([_?=\x00-\x1F\x7F-\xFF ])/sprintf("=%02X", ord($1))/eog;
return $word;
}
sub _encode_b {
# --------------------------------------------------------------------------
my ($word) = @_;
my $ret;
local $GT::Mail::Encoder::CRLF = $CRLF;
GT::Mail::Encoder->gt_encode(encoding => 'base64', in => $word, out => sub { $ret .= $_[0] });
return $ret;
}
sub _evil_file {
# --------------------------------------------------------------------------
my $self = shift;
my $file = shift;
my $l;
$self->debug("$file being changed") if $self->{_debug};
# Windows case
if ($file =~ m|\\|) {
$file = substr($file, rindex($file,'\\') + 1);
}
# Unix case
elsif ($file =~ m|/|) {
$file = substr($file, rindex($file,'/') + 1);
}
# Mac case
elsif ($file =~ m|:|) {
$file = substr($file, rindex($file,':') + 1);
}
$file =~ s{[^a-z_A-Z0-9\-\.]+}{}g;
return length($file) < 40 ? $file : substr($file,0,40);
}
1;
__END__
=head1 NAME
GT::Mail::Parts - Data storage class for MIME parts
=head1 SYNOPSIS
use GT::Mail;
my $mail = new GT::Mail;
my $top_part = $mail->parse('/path/to/email');
# Access the emails as an array
my @to = $top_part->split_field('to');
my @from = $top_part->split_field('from');
# Access to the header fields
my $mailer = $top_part->get('X-Mailer');
my $subject = $top_part->get('Subject');
# Access to this parts sub part
if ($top_part->is_multipart) {
my @parts = $top_parts->parts;
for my $part (@parts) {
# Access parts of the header
print "Filename: ", $part->recommended_filename, "\n";
print "Part is multi-part\n" if $part->is_multipart;
# Get the body as a string
my $body = $part->body_as_string;
}
}
# Change who it is to
$top_part->set('to', 'scott@gossamer-threads.com');
# Remove the bcc line
$top_part->delete('bcc');
=head1 DESCRIPTION
GT::Mail::Parts is a class to provide methods to change and
access a MIME messages. The object for this class is meant to
be istansiated from L<GT::Mail>.
=head2 effective_type - Access the effective MIME type
my $type = $obj->effective_type;
if ($type eq 'application/octet-stream') {
...
}
This method returns the effective MIME Type of this objects part.
=head2 get - Access header tags.
my $subj = $obj->get('Subject');
# or if there is more than one
my @recv = $obj->get('Received');
Used to access any of the tags in the header of the MIME part. If the
tag requested is not present returns false. The first argument to this
method is the name of he tag you want to extract. This is case insensitive.
=head2 set - Set a header tag.
# Change who the email is to
$obj->set('to', 'scott@gossamer-threads.com');
# Change the second Received tag
$obj->set('Received', 'from unknown', 1);
Set any of the tags in the header. If the tag does not exist this will create
it. This method takes three arguments. The first is the name of the tag to
change or add, this is case insensitive. The second argument is the value for
the tag. The third zero based optional argument is the position. The position
will default to zero if it is not specified.
=head2 delete - Remove a header tag.
# Delete who the message is from
$obj->delete('from');
This method deletes the tag specified by the first argument from this MIME
part.
=head2 size - Access the total size.
my $size = $obj->size;
This method returns the total size of this part. This includes the header and
the body.
=head2 preamble - Set or get the preamble.
# Retrieve the preamble
my $pre = $obj->preamble;
# Set the preamble
$obj->preamble('This is a multi-part message in MIME format.');
This is a set get method for the preamble. The preamble is the part after the
head but before the first MIME boundary. This method makes no since if this
is not a multi-part part.
=head2 epilogue - Set or get the epilogue.
# Retrieve the epilogue
my $ep = $obj->epilogue;
# Set the epilogue
$obj->epilogue('This is my cool epilogue');
This is a set get method for the epilogue. The epilogue is the part of the
MIME part after the MIME boundary and before the next head. This method makes
no since if this is not a multi-part part.
=head2 mime_type - Set or get the MIME type.
my ($type, $subtype) = split('/', $obj->mime_type);
This method returns the MIME type of this part. You can pass in an argument
to change the MIME type as well. So you could do
$obj->mime_type('text/plain');
This is probably not such a good idea unless you are constructing the email from
scratch.
=head2 is_multipart - See if you have a multi-part part.
if ($obj->is_multipart) {
# do some multi-part stuff
}
Returns true is this part is a multi-part MIME part.
=head2 parts - Access sub parts.
my @parts = $obj->parts;
Returns the parts object this part contains. Returns false if this part does
no have any sub parts. The parts objects that returns are from this same class.
Any parts that are milti-part should contain parts.
=head2 multipart_boundary - Set or get the multi-part boundary.
my $boundary = $obj->multipart_boundary;
This returns the multi-part boundary for this part. Setting this is never needed
and may be removed in the future. This method only makes since if you are working
with a multi-part pert.
=head2 header_as_string - Access the whole header.
my $head = $obj->header_as_string;
This method creates and returns the header for this part. The returned header should
be fully rfc822 compliant. Avoid calling this method more than once, as it will build
the header from an internal data structure each time.
=head2 split_field - Retrieve the emails split up into an array.
my @to = $obj->split_field; # Defaults to 'to'
my @bcc = $obj->split_field('bcc');
This is mostly a utility method. It takes an option argument as to the field you want
the email address from (default is to), it then splits the emails on '\s*,\s*' that is
not inside quotes. Returns an array of the split up string.
=head2 suggest_encoding - Get a suggestion for encoding.
my $encode = $obj->suggest_encoding;
Returns a suggested encoding for the body of this message. This is useful to decide
what encoding you should use for the body when building an email. This is used in
L<GT::Mail::Parse> to decide how to encode the message body.
=head2 recommended_filename - Figure out the file name.
my $file = $obj->recommended_filename;
if ($file) {
...
}
This method tries to figure out the file name of this part. This does not make much
since if this part is not an attachment of some kind. Returns an empty string on
failure.
=head2 body_as_string - Get the body as a string.
This method returns the entire body of the MIME message as a string. You should not
use this method if the body could be large.
=head2 body_in - Find the body.
my $in = $obj->body_in;
my $body;
if ($in eq 'MEMORY') {
$body = $obj->body_data;
}
elsif ($in eq 'HANDLE') {
$body = $obj->body_handle;
}
elsif ($in eq 'FILE') {
$body = $obj->body_path;
}
This method returns the location of the body. The location can be one of three things:
=over 4
=item MEMORY
The body is in a string.
=item HANDLE
The body is in an IO handle.
=item FILE
The body is in a file.
=back
You would use this to decide what method to use to access the body. If the MIME message
was parsed into GT::Mail::Parts using L<GT::Mail::Parser> the body will always be in
a FILE.
=head2 body_data - Get the in memory body.
This method returns the body if it is stored in memory. Returns undefined if the body is
not in memory.
=head2 body_handle - Get an IO handle to the body.
This method returns a handle to the body if the body is stored in a handle for this part.
Returns undefined if not.
=head2 body_path - Get the location of the file the body is in.
This method returns the file path to the file the body is located in if the body for this
part is stored in a file. Returns undefined if not.
=head1 COPYRIGHT
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
http://www.gossamer-threads.com/
=head1 VERSION
$Revision: 1.84 $
=cut