From 73fadd13c8f13c28e1f47f1cd170f56c2c2dc45c Mon Sep 17 00:00:00 2001 From: Ilia Ross Date: Mon, 20 Apr 2026 19:42:22 +0200 Subject: [PATCH] Fix to decode Outlook winmail.dat with Convert::TNEF safely * Note: Replace external tnef/opentnef shelling with Convert::TNEF for application/ms-tnef attachments. For root-run Webmin mailbox parsing, decode TNEF as the mailbox owner instead of root by carrying open_user on mail objects and switching to that user for the parser temp-file work. https://github.com/webmin/usermin/issues/132 --- mailboxes/boxes-lib.pl | 163 ++++-- mailboxes/folders-lib.pl | 7 + vendor_perl/Convert/TNEF.pm | 735 ++++++++++++++++++++++++ vendor_perl/MIME/Body.pm | 672 ++++++++++++++++++++++ vendor_perl/MIME/Tools.pm | 1043 +++++++++++++++++++++++++++++++++++ 5 files changed, 2576 insertions(+), 44 deletions(-) create mode 100644 vendor_perl/Convert/TNEF.pm create mode 100644 vendor_perl/MIME/Body.pm create mode 100644 vendor_perl/MIME/Tools.pm diff --git a/mailboxes/boxes-lib.pl b/mailboxes/boxes-lib.pl index db960595a..ac413b75e 100755 --- a/mailboxes/boxes-lib.pl +++ b/mailboxes/boxes-lib.pl @@ -15,6 +15,111 @@ use Time::Local; $dbm_index_min = 1000000; $dbm_index_version = 3; +# supports_tnef_module() +# Returns 1 if the Convert::TNEF module is available for decoding winmail.dat +my $supports_tnef_module; +sub supports_tnef_module +{ +if (!defined($supports_tnef_module)) { + $supports_tnef_module = eval { require Convert::TNEF; 1; } ? 1 : 0; + } +return $supports_tnef_module; +} + +# decode_tnef_attachment(&attach) +# Expands a TNEF attachment into a list of normal attachments, or returns +# an error string if decoding is not possible +sub decode_tnef_attachment +{ +my ($attach) = @_; +my @rv; +return (undef, "Convert::TNEF module not installed") + if (!&supports_tnef_module()); + +my $tempdir = &transname(); +mkdir($tempdir, 0700) || return (undef, $!); + +# Only try to de-privilege the parser when we are currently fully root; +# In Usermin or already-switched contexts, just decode as the current user +my $parse_user = !$< && !$> ? &get_mail_parse_user($attach) : undef; +my $tnef; +if ($parse_user) { + # Convert::TNEF may create temp files under output_dir, so hand it a + # private directory owned by the mailbox user and run the parser as + # that user instead of root + local $main::mail_open_user = $parse_user; + my @uinfo = &get_switch_user_info(); + @uinfo || return (undef, "Mail open user $parse_user does not exist"); + &set_ownership_permissions($uinfo[2], $uinfo[3], 0700, $tempdir); + my $switched = &switch_to_mail_user(); + my $io; + my $ok = eval { + open($io, "<", \$attach->{'data'}) || die "$!"; + binmode($io); + $tnef = Convert::TNEF->read($io, + { 'output_dir' => $tempdir, + 'buffer_size' => &get_buffer_size() }); + close($io) || die "$!"; + 1; + }; + if ($switched) { + # Restore root after decoding so the rest of the mailbox code + # keeps running with its original privileges + $) = 0; + $> = 0; + } + return (undef, $@) if (!$ok); + } +else { + my $io; + if (!open($io, "<", \$attach->{'data'})) { + return (undef, $!); + } + binmode($io); + $tnef = eval { + Convert::TNEF->read($io, + { 'output_dir' => $tempdir, + 'buffer_size' => &get_buffer_size() }) + }; + close($io); + } +if ($@) { + return (undef, $@); + } +if (!$tnef) { + return (undef, $Convert::TNEF::errstr || + "failed to decode winmail.dat"); + } +foreach my $ta ($tnef->attachments) { + my $fn = $ta->longname() || $ta->name(); + $fn =~ s/\x00+$// if (defined($fn)); + my $data = $ta->data(); + next if (!defined($data)); + my $ct = $fn ? &guess_mime_type($fn) + : "application/octet-stream"; + push(@rv, + { 'type' => $ct, + 'header' => { 'content-type' => $ct }, + 'headers' => [ [ 'Content-Type', $ct ] ], + 'filename' => $fn, + 'data' => $data }); + } +$tnef->purge(); +return (\@rv, undef); +} + +# get_mail_parse_user(&mail-or-attach) +# Returns the Unix user, if any, that should be used for unsafe mail parsing +sub get_mail_parse_user +{ +my ($obj) = @_; +while ($obj) { + return $obj->{'open_user'} if ($obj->{'open_user'}); + $obj = $obj->{'parent'}; + } +return undef; +} + # list_mails(user|file, [start], [end]) # Returns a subset of mail from a mbox format file sub list_mails @@ -540,49 +645,18 @@ if ($ct =~ /boundary="([^"]+)"/i || $ct =~ /boundary=([^;\s]+)/i) { elsif (lc($attach->{'type'}) eq 'application/ms-tnef') { # This attachment is a winmail.dat file, which may # contain multiple other attachments! - local ($opentnef, $tnef); - if (!($opentnef = &has_command("opentnef")) && - !($tnef = &has_command("tnef"))) { - $attach->{'error'} = "tnef command not installed"; + local ($tattach, $terror) = + &decode_tnef_attachment($attach); + if ($tattach && @{$tattach}) { + pop(@attach); # lose winmail.dat + foreach my $ta (@{$tattach}) { + $ta->{'idx'} = scalar(@attach); + push(@attach, $ta); + } } else { - # Can actually decode - local $tempfile = &transname(); - open(TEMPFILE, ">$tempfile"); - print TEMPFILE $attach->{'data'}; - close(TEMPFILE); - local $tempdir = &transname(); - mkdir($tempdir, 0700); - if ($opentnef) { - system("$opentnef -d $tempdir -i $tempfile >/dev/null 2>&1"); - } - else { - system("$tnef -C $tempdir -f $tempfile >/dev/null 2>&1"); - } - pop(@attach); # lose winmail.dat - opendir(DIR, $tempdir); - while($f = readdir(DIR)) { - next if ($f eq '.' || $f eq '..'); - local $data; - open(FILE, "<$tempdir/$f"); - while() { - $data .= $_; - } - close(FILE); - local $ct = &guess_mime_type($f); - push(@attach, - { 'type' => $ct, - 'idx' => scalar(@attach), - 'header' => - { 'content-type' => $ct }, - 'headers' => - [ [ 'Content-Type', $ct ] ], - 'filename' => $f, - 'data' => $data }); - } - closedir(DIR); - unlink(glob("$tempdir/*"), $tempfile); - rmdir($tempdir); + $attach->{'error'} = $terror || + "failed to decode winmail.dat"; } } last if ($l >= $max || $lines[$l] eq "$bound--"); @@ -3194,12 +3268,13 @@ return 0; # Returns the getpw* function array for the user to switch to sub get_switch_user_info { -if ($main::mail_open_user =~ /^\d+$/) { +my $user = @_ ? $_[0] : $main::mail_open_user; +if ($user =~ /^\d+$/) { # Could be by UID .. but fall back to by name if there is no such UID - my @rv = getpwuid($main::mail_open_user); + my @rv = getpwuid($user); return @rv if (@rv > 0); } -return getpwnam($main::mail_open_user); +return getpwnam($user); } # is_ascii() diff --git a/mailboxes/folders-lib.pl b/mailboxes/folders-lib.pl index b735d5e06..a699ce1fa 100755 --- a/mailboxes/folders-lib.pl +++ b/mailboxes/folders-lib.pl @@ -310,6 +310,10 @@ elsif ($_[2]->{'type'} == 7) { print DEBUG "listing MBX $_[2]->{'file'}\n"; @mail = &list_mbxfile($_[2]->{'file'}, $_[0], $_[1]); } +foreach my $mail (@mail) { + $mail->{'open_user'} ||= $_[2]->{'user'} + if ($mail && $_[2]->{'user'}); + } &switch_from_folder_user($_[2]); return @mail; } @@ -559,6 +563,9 @@ elsif ($folder->{'type'} == 7) { # MBX folder @mail = &select_mbxfile($folder->{'file'}, $ids, $headersonly); } +foreach my $mail (@mail) { + $mail->{'open_user'} ||= $folder->{'user'} if ($mail && $folder->{'user'}); + } &switch_from_folder_user($_[0]); return @mail; } diff --git a/vendor_perl/Convert/TNEF.pm b/vendor_perl/Convert/TNEF.pm new file mode 100644 index 000000000..5a0e36b04 --- /dev/null +++ b/vendor_perl/Convert/TNEF.pm @@ -0,0 +1,735 @@ +# Convert::TNEF.pm +# +# Copyright (c) 1999 Douglas Wilson . All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package Convert::TNEF; + +use strict; +use integer; +use vars qw( + $VERSION + $TNEF_SIGNATURE + $TNEF_PURE + $LVL_MESSAGE + $LVL_ATTACHMENT + $errstr + $g_file_cnt + %dflts + %atp + %att + %att_name +); + +use Carp; +use IO::Wrap; +use File::Spec; +use MIME::Body; + +$VERSION = '0.18'; + +# Set some TNEF constants. Everything turned +# out to be in little endian order, so I just added +# 'reverse' everywhere that I needed to +# instead of reversing the hex codes. +$TNEF_SIGNATURE = reverse pack( 'H*', '223E9F78' ); +$TNEF_PURE = reverse pack( 'H*', '00010000' ); + +$LVL_MESSAGE = pack( 'H*', '01' ); +$LVL_ATTACHMENT = pack( 'H*', '02' ); + +%atp = ( + Triples => pack( 'H*', '0000' ), + String => pack( 'H*', '0001' ), + Text => pack( 'H*', '0002' ), + Date => pack( 'H*', '0003' ), + Short => pack( 'H*', '0004' ), + Long => pack( 'H*', '0005' ), + Byte => pack( 'H*', '0006' ), + Word => pack( 'H*', '0007' ), + Dword => pack( 'H*', '0008' ), + Max => pack( 'H*', '0009' ), +); + +for ( keys %atp ) { + $atp{$_} = reverse $atp{$_}; +} + +sub _ATT { + my ( $att, $id ) = @_; + return reverse($id) . $att; +} + +# The side comments are 'MAPI' equivalents +%att = ( + Null => _ATT( pack( 'H*', '0000' ), pack( 'H4', '0000' ) ), + # PR_ORIGINATOR_RETURN_ADDRESS + From => _ATT( $atp{Triples}, pack( 'H*', '8000' ) ), + # PR_SUBJECT + Subject => _ATT( $atp{String}, pack( 'H*', '8004' ) ), + # PR_CLIENT_SUBMIT_TIME + DateSent => _ATT( $atp{Date}, pack( 'H*', '8005' ) ), + # PR_MESSAGE_DELIVERY_TIME + DateRecd => _ATT( $atp{Date}, pack( 'H*', '8006' ) ), + # PR_MESSAGE_FLAGS + MessageStatus => _ATT( $atp{Byte}, pack( 'H*', '8007' ) ), + # PR_MESSAGE_CLASS + MessageClass => _ATT( $atp{Word}, pack( 'H*', '8008' ) ), + # PR_MESSAGE_ID + MessageID => _ATT( $atp{String}, pack( 'H*', '8009' ) ), + # PR_PARENT_ID + ParentID => _ATT( $atp{String}, pack( 'H*', '800A' ) ), + # PR_CONVERSATION_ID + ConversationID => _ATT( $atp{String}, pack( 'H*', '800B' ) ), + Body => _ATT( $atp{Text}, pack( 'H*', '800C' ) ), # PR_BODY + # PR_IMPORTANCE + Priority => _ATT( $atp{Short}, pack( 'H*', '800D' ) ), + # PR_ATTACH_DATA_xxx + AttachData => _ATT( $atp{Byte}, pack( 'H*', '800F' ) ), + # PR_ATTACH_FILENAME + AttachTitle => _ATT( $atp{String}, pack( 'H*', '8010' ) ), + # PR_ATTACH_RENDERING + AttachMetaFile => _ATT( $atp{Byte}, pack( 'H*', '8011' ) ), + # PR_CREATION_TIME + AttachCreateDate => _ATT( $atp{Date}, pack( 'H*', '8012' ) ), + # PR_LAST_MODIFICATION_TIME + AttachModifyDate => _ATT( $atp{Date}, pack( 'H*', '8013' ) ), + # PR_LAST_MODIFICATION_TIME + DateModified => _ATT( $atp{Date}, pack( 'H*', '8020' ) ), + #PR_ATTACH_TRANSPORT_NAME + AttachTransportFilename => _ATT( $atp{Byte}, pack( 'H*', '9001' ) ), + AttachRenddata => _ATT( $atp{Byte}, pack( 'H*', '9002' ) ), + MAPIProps => _ATT( $atp{Byte}, pack( 'H*', '9003' ) ), + # PR_MESSAGE_RECIPIENTS + RecipTable => _ATT( $atp{Byte}, pack( 'H*', '9004' ) ), + Attachment => _ATT( $atp{Byte}, pack( 'H*', '9005' ) ), + TnefVersion => _ATT( $atp{Dword}, pack( 'H*', '9006' ) ), + OemCodepage => _ATT( $atp{Byte}, pack( 'H*', '9007' ) ), + # PR_ORIG_MESSAGE_CLASS + OriginalMessageClass => _ATT( $atp{Word}, pack( 'H*', '0006' ) ), + + # PR_RCVD_REPRESENTING_xxx or PR_SENT_REPRESENTING_xxx + Owner => _ATT( $atp{Byte}, pack( 'H*', '0000' ) ), + # PR_SENT_REPRESENTING_xxx + SentFor => _ATT( $atp{Byte}, pack( 'H*', '0001' ) ), + # PR_RCVD_REPRESENTING_xxx + Delegate => _ATT( $atp{Byte}, pack( 'H*', '0002' ) ), + # PR_DATE_START + DateStart => _ATT( $atp{Date}, pack( 'H*', '0006' ) ), + DateEnd => _ATT( $atp{Date}, pack( 'H*', '0007' ) ), # PR_DATE_END + # PR_OWNER_APPT_ID + AidOwner => _ATT( $atp{Long}, pack( 'H*', '0008' ) ), + # PR_RESPONSE_REQUESTED + RequestRes => _ATT( $atp{Short}, pack( 'H*', '0009' ) ), +); + +# Create reverse lookup table +%att_name = reverse %att; + +# Global counter for creating file names +$g_file_cnt = 0; + +# Set some package global defaults for new objects +# which can be overridden for any individual object. +%dflts = ( + debug => 0, + debug_max_display => 1024, + debug_max_line_size => 64, + ignore_checksum => 0, + display_after_err => 32, + output_to_core => 4096, + output_dir => File::Spec->curdir, + output_prefix => "tnef", + buffer_size => 1024, +); + +# Make a file name +sub _mk_fname { + my $parms = shift; + File::Spec->catfile( $parms->{output_dir}, + $parms->{output_prefix} . "-" . $$ . "-" + . ++$g_file_cnt . ".doc" ); +} + +sub _rtn_err { + my ( $errmsg, $fh, $parms ) = @_; + $errstr = $errmsg; + if ( $parms->{debug} ) { + my $read_size = $parms->{display_after_err} || 32; + my $data; + $fh->read( $data, $read_size ); + print "Error: $errstr\n"; + print "Data:\n"; + print $1, "\n" while $data =~ + /([^\r\n]{0,$parms->{debug_max_line_size}})\r?\n?/g; + print "HData:\n"; + my $hdata = unpack( "H*", $data ); + print $1, "\n" + while $hdata =~ /(.{0,$parms->{debug_max_line_size}})/g; + } + return undef; +} + +sub _read_err { + my ( $bytes, $fh, $errmsg ) = @_; + $errstr = + ( defined $bytes ) ? "Premature EOF" : "Read Error:" . $errmsg; + return undef; +} + +sub read_ent { + croak "Usage: Convert::TNEF->read_ent(entity, parameters) " + unless @_ == 2 or @_ == 3; + my $self = shift; + my ( $ent, $parms ) = @_; + my $io = $ent->open("r") or do { + $errstr = "Can't open entity: $!"; + return undef; + }; + my $tnef = $self->read( $io, $parms ); + $io->close or do { + $errstr = "Error closing handle: $!"; + return undef; + }; + return $tnef; +} + +sub read_in { + croak "Usage: Convert::TNEF->read_in(filename, parameters) " + unless @_ == 2 or @_ == 3; + my $self = shift; + my ( $fname, $parms ) = @_; + open( INFILE, "<$fname" ) or do { + $errstr = "Can't open $fname: $!"; + return undef; + }; + binmode INFILE; + my $tnef = $self->read( \*INFILE, $parms ); + close INFILE or do { + $errstr = "Error closing $fname: $!"; + return undef; + }; + return $tnef; +} + +sub read { + croak "Usage: Convert::TNEF->read(fh, parameters) " + unless @_ == 2 or @_ == 3; + my $self = shift; + my $class = ref($self) || $self; + $self = {}; + bless $self, $class; + my ( $fd, $parms ) = @_; + $fd = wraphandle($fd); + + my %parms = %dflts; + @parms{ keys %$parms } = values %$parms if defined $parms; + $parms = \%parms; + my $debug = $parms{debug}; + my $ignore_checksum = $parms{ignore_checksum}; + + # Start of TNEF stream + my $data; + my $num_bytes = $fd->read( $data, 4 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 4; + print "TNEF start: ", unpack( "H*", $data ), "\n" if $debug; + return _rtn_err( "Not TNEF-encapsulated", $fd, $parms ) + unless $data eq $TNEF_SIGNATURE; + + # Key + $num_bytes = $fd->read( $data, 2 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 2; + print "TNEF key: ", unpack( "H*", $data ), "\n" if $debug; + + # Start of First Object + $num_bytes = $fd->read( $data, 1 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 1; + + my $msg_att = ""; + + my $is_msg = ( $data eq $LVL_MESSAGE ); + my $is_att = ( $data eq $LVL_ATTACHMENT ); + print "TNEF object start: ", unpack( "H*", $data ), "\n" if $debug; + return _rtn_err( "Neither a message nor an attachment", $fd, + $parms ) + unless $is_msg or $is_att; + + my $msg = Convert::TNEF::Data->new; + my @atts; + + # Current message or attachment in loop + my $ent = $msg; + + # Read message and attachments + LOOP: { + my $type = $is_msg ? 'message' : 'attachment'; + print "Reading $type attribute\n" if $debug; + $num_bytes = $fd->read( $data, 4 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 4; + my $att_id = $data; + my $att_name = $att_name{$att_id}; + + print "TNEF $type attribute: ", unpack( "H*", $data ), "\n" + if $debug; + return _rtn_err( "Bad Attribute found in $type", $fd, $parms ) + unless $att_name{$att_id}; + if ( $att_id eq $att{TnefVersion} ) { + return _rtn_err( "Version attribute found in attachment", $fd, + $parms ) + if $is_att; + } elsif ( $att_id eq $att{MessageClass} ) { + return _rtn_err( "MessageClass attribute found in attachment", + $fd, $parms ) + if $is_att; + } elsif ( $att_id eq $att{AttachRenddata} ) { + return _rtn_err( "AttachRenddata attribute found in message", + $fd, $parms ) + if $is_msg; + push @atts, ( $ent = Convert::TNEF::Data->new ); + } else { + return _rtn_err( "AttachRenddata must be first attribute", $fd, + $parms ) + if $is_att + and !@atts + and $att_name ne "AttachRenddata"; + } + print "Got attribute:$att_name{$att_id}\n" if $debug; + + $num_bytes = $fd->read( $data, 4 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 4; + + print "HLength:", unpack( "H8", $data ), "\n" if $debug; + my $length = unpack( "V", $data ); + print "Length: $length\n" if $debug; + + # Get the attribute data (returns an object since data may + # actually end up in a file) + my $calc_chksum; + $data = _build_data( $fd, $length, \$calc_chksum, $parms ) + or return undef; + _debug_print( $length, $att_id, $data, $parms ) if $debug; + $ent->datahandle( $att_name, $data, $length ); + + $num_bytes = $fd->read( $data, 2 ); + return _read_err( $num_bytes, $fd, $! ) unless $num_bytes == 2; + my $file_chksum = $data; + if ($debug) { + print "Calc Chksum:", unpack( "H*", $calc_chksum ), "\n"; + print "File Chksum:", unpack( "H*", $file_chksum ), "\n"; + } + return _rtn_err( "Bad Checksum", $fd, $parms ) + unless $calc_chksum eq $file_chksum + or $ignore_checksum; + + my $num_bytes = $fd->read( $data, 1 ); + + # EOF (0 bytes) is ok + return _read_err( $num_bytes, $fd, $! ) unless defined $num_bytes; + last LOOP if $num_bytes < 1; + print "Next token:", unpack( "H2", $data ), "\n" if $debug; + $is_msg = ( $data eq $LVL_MESSAGE ); + return _rtn_err( "Found message data in attachment", $fd, $parms ) + if $is_msg and $is_att; + $is_att = ( $data eq $LVL_ATTACHMENT ); + redo LOOP if $is_msg or $is_att; + return _rtn_err( "Not a TNEF $type", $fd, $parms ); + } + + print "EOF\n" if $debug; + + $self->{TN_Message} = $msg; + $self->{TN_Attachments} = \@atts; + return $self; +} + +sub _debug_print { + my ( $length, $att_id, $data, $parms ) = @_; + if ( $length < $parms->{debug_max_display} ) { + $data = $data->data; + if ( $att_id eq $att{TnefVersion} ) { + $data = unpack( "L", $data ); + print "Version: $data\n"; + } elsif ( substr( $att_id, 2 ) eq $atp{Date} and $length == 14 ) { + my ( $yr, $mo, $day, $hr, $min, $sec, $dow ) = + unpack( "vvvvvvv", $data ); + my $date = join ":", $yr, $mo, $day, $hr, $min, $sec, $dow; + print "Date: $date\n"; + print "HDate:", unpack( "H*", $data ), "\n"; + } elsif ( $att_id eq $att{AttachRenddata} and $length == 14 ) { + my ( $atyp, $ulPosition, $dxWidth, $dyHeight, $dwFlags ) = + unpack( "vVvvV", $data ); + $data = join ":", $atyp, $ulPosition, $dxWidth, $dyHeight, + $dwFlags; + print "AttachRendData: $data\n"; + } else { + print "Data:\n"; + print $1, "\n" while $data =~ + /([^\r\n]{0,$parms->{debug_max_line_size}})\r?\n?/g; + print "HData:\n"; + my $hdata = unpack( "H*", $data ); + print $1, "\n" + while $hdata =~ /(.{0,$parms->{debug_max_line_size}})/g; + } + } else { + my $io = $data->open("r") + or croak "Error opening attachment data handle: $!"; + my $buffer; + $io->read( $buffer, $parms->{debug_max_display} ); + $io->close or croak "Error closing attachment data handle: $!"; + print "Data:\n"; + print $1, "\n" while $buffer =~ + /([^\r\n]{0,$parms->{debug_max_line_size}})\r?\n?/sg; + print "HData:\n"; + my $hdata = unpack( "H*", $buffer ); + print $1, "\n" + while $hdata =~ /(.{0,$parms->{debug_max_line_size}})/g; + } +} + +sub _build_data { + my ( $fd, $length, $chksumref, $parms ) = @_; + my $cutoff = $parms->{output_to_core}; + my $incore = do { + if ( $cutoff eq 'NONE' ) { 0 } #Everything to files + elsif ( $cutoff eq 'ALL' ) { 1 } #Everything in memory + elsif ( $cutoff < $length ) { 0 } #Large items in files + else { 1 } #Everything else in memory + }; + + # Just borrow some other objects for the attachment attribute data + my $body = + ($incore) + ? new MIME::Body::Scalar + : new MIME::Body::File _mk_fname($parms); + $body->binmode(1); + my $io = $body->open("w"); + my $bufsiz = $parms->{buffer_size}; + $bufsiz = $length if $length < $bufsiz; + my $buffer; + my $chksum = 0; + + while ( $length > 0 ) { + my $num_bytes = $fd->read( $buffer, $bufsiz ); + return _read_err( $num_bytes, $fd, $! ) + unless $num_bytes == $bufsiz; + $io->print($buffer); + $chksum += unpack( "%16C*", $buffer ); + $chksum %= 65536; + $length -= $bufsiz; + $bufsiz = $length if $length < $bufsiz; + } + $$chksumref = pack( "v", $chksum ); + $io->close; + return $body; +} + +sub purge { + my $self = shift; + my $msg = $self->{TN_Message}; + my @atts = $self->attachments; + for ( keys %$msg ) { + $msg->{$_}->purge if exists $att{$_}; + } + for my $attch (@atts) { + for ( keys %$attch ) { + $attch->{$_}->purge if exists $att{$_}; + } + } +} + +sub message { + my $self = shift; + $self->{TN_Message}; +} + +sub attachments { + my $self = shift; + return @{ $self->{TN_Attachments} } if wantarray; + $self->{TN_Attachments}; +} + +# This is for Messages or Attachments +# since they are essentially the same thing except +# for the leading attribute code +package Convert::TNEF::Data; + +sub new { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = {}; + $self->{TN_Size} = {}; + bless $self, $class; +} + +sub data { + my $self = shift; + my $attr = shift || 'AttachData'; + return $self->{$attr} && $self->{$attr}->as_string; +} + +sub name { + my $self = shift; + my $attr = shift || 'AttachTitle'; + my $name = $self->{$attr} && $self->{$attr}->data; + $name =~ s/\x00+$// if $name; + return $name; +} + +# Try to get the long filename out of the +# 'Attachment' attribute. +sub longname { + my $self = shift; + + my $data = $self->data("Attachment"); + return unless $data; + my $pos = index( $data, pack( "H*", "1e00013001" ) ); + $pos = index( $data, pack( "H*", "1e00073701" ) ) if ($pos < 0); + return $self->name unless $pos >= 0; + my $len = unpack( "V", substr( $data, $pos + 8, 4 ) ); + my $longname = substr( $data, $pos + 12, $len ); + $longname =~ s/\x00+$// if $longname; + return $longname || $self->name; +} + +sub datahandle { + my $self = shift; + my $attr = shift || 'AttachData'; + $self->{$attr} = shift if @_; + $self->size( $attr, shift ) if @_; + return $self->{$attr}; +} + +sub size { + my $self = shift; + my $attr = shift || 'AttachData'; + $self->{TN_Size}->{$attr} = shift if @_; + return $self->{TN_Size}->{$attr}; +} + +# Autoload methods go after =cut, and are processed by the autosplit program. + +1; +__END__ + + +=head1 NAME + + Convert::TNEF - Perl module to read TNEF files + +=head1 SYNOPSIS + + use Convert::TNEF; + + $tnef = Convert::TNEF->read($iohandle, \%parms) + or die Convert::TNEF::errstr; + + $tnef = Convert::TNEF->read_in($filename, \%parms) + or die Convert::TNEF::errstr; + + $tnef = Convert::TNEF->read_ent($mime_entity, \%parms) + or die Convert::TNEF::errstr; + + $tnef->purge; + + $message = $tnef->message; + + @attachments = $tnef->attachments; + + $attribute_value = $attachments[$i]->data($att_attribute_name); + $attribute_value_size = $attachments[$i]->size($att_attribute_name); + $attachment_name = $attachments[$i]->name; + $long_attachment_name = $attachments[$i]->longname; + + $datahandle = $attachments[$i]->datahandle($att_attribute_name); + +=head1 DESCRIPTION + + TNEF stands for Transport Neutral Encapsulation Format, and if you've + ever been unfortunate enough to receive one of these files as an email + attachment, you may want to use this module. + + read() takes as its first argument any file handle open + for reading. The optional second argument is a hash reference + which contains one or more of the following keys: + +=head2 + + output_dir - Path for storing TNEF attribute data kept in files + (default: current directory). + + output_prefix - File prefix for TNEF attribute data kept in files + (default: 'tnef'). + + output_to_core - TNEF attribute data will be saved in core memory unless + it is greater than this many bytes (default: 4096). May also be set to + 'NONE' to keep all data in files, or 'ALL' to keep all data in core. + + buffer_size - Buffer size for reading in the TNEF file (default: 1024). + + debug - If true, outputs all sorts of info about what the read() function + is reading, including the raw ascii data along with the data converted + to hex (default: false). + + display_after_err - If debug is true and an error is encountered, + reads and displays this many bytes of data following the error + (default: 32). + + debug_max_display - If debug is true then read and display at most + this many bytes of data for each TNEF attribute (default: 1024). + + debug_max_line_size - If debug is true then at most this many bytes of + data will be displayed on each line for each TNEF attribute + (default: 64). + + ignore_checksum - If true, will ignore checksum errors while parsing + data (default: false). + + read() returns an object containing the TNEF 'attributes' read from the + file and the data for those attributes. If all you want are the + attachments, then this is mostly garbage, but if you're interested then + you can see all the garbage by turning on debugging. If the garbage + proves useful to you, then let me know how I can maybe make it more + useful. + + If an error is encountered, an undefined value is returned and the + package variable $errstr is set to some helpful message. + + read_in() is a convienient front end for read() which takes a filename + instead of a handle. + + read_ent() is another convient front end for read() which can take a + MIME::Entity object (or any object with like methods, specifically + open("r"), read($buff,$num_bytes), and close ). + + purge() deletes any on-disk data that may be in the attachments of + the TNEF object. + + message() returns the message portion of the tnef object, if any. + The thing it returns is like an attachment, but its not an attachment. + For instance, it more than likely does not have a name or any + attachment data. + + attachments() returns a list of the attachments that the given TNEF + object contains. Returns a list ref if not called in array context. + + data() takes a TNEF attribute name, and returns a string value for that + attribute for that attachment. Its your own problem if the string is too + big for memory. If no argument is given, then the 'AttachData' attribute + is assumed, which is probably the attachment data you're looking for. + + name() is the same as data(), except the attribute 'AttachTitle' is + the default, which returns the 8 character + 3 character extension name + of the attachment. + + longname() returns the long filename and extension of an attachment. This + is embedded within a MAPI property of the 'Attachment' attribute data, so + we attempt to extract the name out of that. + + size() takes an TNEF attribute name, and returns the size in bytes for + the data for that attachment attribute. + + datahandle() is a method for attachments which takes a TNEF attribute + name, and returns the data for that attribute as a handle which is + the same as a MIME::Body handle. See MIME::Body for all the applicable + methods. If no argument is given, then 'AttachData' is assumed. + + +=head1 EXAMPLES + + # Here's a rather long example where mail is retrieved + # from a POP3 server based on header information, then + # it is MIME parsed, and then the TNEF contents + # are extracted and converted. + + use strict; + use Net::POP3; + use MIME::Parser; + use Convert::TNEF; + + my $mail_dir = "mailout"; + my $mail_prefix = "mail"; + + my $pop = new Net::POP3 ( "pop3server_name" ); + my $num_msgs = $pop->login("user_name","password"); + die "Can't login: $!" unless defined $num_msgs; + + # Get mail by sender and subject + my $mail_out_idx = 0; + MESSAGE: for ( my $i=1; $i<= $num_msgs; $i++ ) { + my $header = join "", @{$pop->top($i)}; + + for ($header) { + next MESSAGE unless + /^from:.*someone\@somewhere.net/im && + /^subject:\s*important stuff/im + } + + my $fname = $mail_prefix."-".$$.++$mail_out_idx.".doc"; + open (MAILOUT, ">$mail_dir/$fname") + or die "Can't open $mail_dir/$fname: $!"; + # If the get() complains, you need the new libnet bundle + $pop->get($i, \*MAILOUT) or die "Can't read mail"; + close MAILOUT or die "Error closing $mail_dir/$fname"; + # If you want to delete the mail on the server + # $pop->delete($i); + } + + close MAILOUT; + $pop->quit(); + + # Parse the mail message into separate mime entities + my $parser=new MIME::Parser; + $parser->output_dir("mimemail"); + + opendir(DIR, $mail_dir) or die "Can't open directory $mail_dir: $!"; + my @files = map { $mail_dir."/".$_ } sort + grep { -f "$mail_dir/$_" and /$mail_prefix-$$-/o } readdir DIR; + closedir DIR; + + for my $file ( @files ) { + my $entity=$parser->parse_in($file) or die "Couldn't parse mail"; + print_tnef_parts($entity); + # If you want to delete the working files + # $entity->purge; + } + + sub print_tnef_parts { + my $ent = shift; + + if ( $ent->parts ) { + for my $sub_ent ( $ent->parts ) { + print_tnef_parts($sub_ent); + } + } elsif ( $ent->mime_type =~ /ms-tnef/i ) { + + # Create a tnef object + my $tnef = Convert::TNEF->read_ent($ent,{output_dir=>"tnefmail"}) + or die $Convert::TNEF::errstr; + for ($tnef->attachments) { + print "Title:",$_->name,"\n"; + print "Data:\n",$_->data,"\n"; + } + + # If you want to delete the working files + # $tnef->purge; + } + } + +=head1 SEE ALSO + +perl(1), IO::Wrap(3), MIME::Parser(3), MIME::Entity(3), MIME::Body(3) + +=head1 CAVEATS + + The parsing may depend on the endianness (see perlport) and width of + integers on the system where the TNEF file was created. If this proves + to be the case (check the debug output), I'll see what I can do + about it. + +=head1 AUTHOR + + Douglas Wilson, dougw@cpan.org + +=cut + diff --git a/vendor_perl/MIME/Body.pm b/vendor_perl/MIME/Body.pm new file mode 100644 index 000000000..c4a84a91d --- /dev/null +++ b/vendor_perl/MIME/Body.pm @@ -0,0 +1,672 @@ +package MIME::Body; + +=head1 NAME + +MIME::Body - the body of a MIME message + + +=head1 SYNOPSIS + +Before reading further, you should see L to make sure that +you understand where this module fits into the grand scheme of things. +Go on, do it now. I'll wait. + +Ready? Ok... + + +=head2 Obtaining bodies + + ### Get the bodyhandle of a MIME::Entity object: + $body = $entity->bodyhandle; + + ### Create a body which stores data in a disk file: + $body = new MIME::Body::File "/path/to/file"; + + ### Create a body which stores data in an in-core array: + $body = new MIME::Body::InCore \@strings; + + +=head2 Opening, closing, and using IO handles + + ### Write data to the body: + $IO = $body->open("w") || die "open body: $!"; + $IO->print($message); + $IO->close || die "close I/O handle: $!"; + + ### Read data from the body (in this case, line by line): + $IO = $body->open("r") || die "open body: $!"; + while (defined($_ = $IO->getline)) { + ### do stuff + } + $IO->close || die "close I/O handle: $!"; + + +=head2 Other I/O + + ### Dump the ENCODED body data to a filehandle: + $body->print(\*STDOUT); + + ### Slurp all the UNENCODED data in, and put it in a scalar: + $string = $body->as_string; + + ### Slurp all the UNENCODED data in, and put it in an array of lines: + @lines = $body->as_lines; + + +=head2 Working directly with paths to underlying files + + ### Where's the data? + if (defined($body->path)) { ### data is on disk: + print "data is stored externally, in ", $body->path; + } + else { ### data is in core: + print "data is already in core, and is...\n", $body->as_string; + } + + ### Get rid of anything on disk: + $body->purge; + + +=head1 DESCRIPTION + +MIME messages can be very long (e.g., tar files, MPEGs, etc.) or very +short (short textual notes, as in ordinary mail). Long messages +are best stored in files, while short ones are perhaps best stored +in core. + +This class is an attempt to define a common interface for objects +which contain message data, regardless of how the data is +physically stored. The lifespan of a "body" object +usually looks like this: + +=over 4 + +=item 1. + +B +It's at this point that the actual MIME::Body subclass is chosen, +and new() is invoked. (For example: if the body data is going to +a file, then it is at this point that the class MIME::Body::File, +and the filename, is chosen). + +=item 2. + +B (usually by the MIME parser) like this: +The body is opened for writing, via C. This will trash any +previous contents, and return an "I/O handle" opened for writing. +Data is written to this I/O handle, via print(). +Then the I/O handle is closed, via close(). + +=item 3. + +B (usually by the user application) like this: +The body is opened for reading by a user application, via C. +This will return an "I/O handle" opened for reading. +Data is read from the I/O handle, via read(), getline(), or getlines(). +Then the I/O handle is closed, via close(). + +=item 4. + +B + +=back + +You can write your own subclasses, as long as they follow the +interface described below. Implementers of subclasses should assume +that steps 2 and 3 may be repeated any number of times, and in +different orders (e.g., 1-2-2-3-2-3-3-3-3-3-2-4). + +In any case, once a MIME::Body has been created, you ask to open it +for reading or writing, which gets you an "i/o handle": you then use +the same mechanisms for reading from or writing to that handle, no matter +what class it is. + +Beware: unless you know for certain what kind of body you have, you +should I assume that the body has an underlying filehandle. + + +=head1 PUBLIC INTERFACE + +=over 4 + +=cut + + +### Pragmas: +use strict; +use vars qw($VERSION); + +### System modules: +use Carp; +use IO::File; + +### The package version, both in 1.23 style *and* usable by MakeMaker: +$VERSION = "5.517"; + + +#------------------------------ + +=item new ARGS... + +I +Create a new body. Any ARGS are sent to init(). + +=cut + +sub new { + my $self = bless {}, shift; + $self->init(@_); + $self; +} + +#------------------------------ + +=item init ARGS... + +I +This is called automatically by C, with the arguments given +to C. The arguments are optional, and entirely up to the +subclass. The default method does nothing, + +=cut + +sub init { 1 } + +#------------------------------ + +=item as_lines + +I +Return the contents of the body as an array of lines (each terminated +by a newline, with the possible exception of the final one). +Returns empty on failure (NB: indistinguishable from an empty body!). + +Note: the default method gets the data via +repeated getline() calls; your subclass might wish to override this. + +=cut + +sub as_lines { + my $self = shift; + my @lines; + my $io = $self->open("r") || return (); + local $_; + push @lines, $_ while (defined($_ = $io->getline())); + $io->close; + @lines; +} + +#------------------------------ + +=item as_string + +I +Return the body data as a string (slurping it into core if necessary). +Best not to do this unless you're I that the body is reasonably small! +Returns empty string for an empty body, and undef on failure. + +Note: the default method uses print(), which gets the data via +repeated read() calls; your subclass might wish to override this. + +=cut + +sub as_string { + my $self = shift; + my $str = ''; + my $fh = IO::File->new(\$str, '>:') or croak("Cannot open in-memory file: $!"); + $self->print($fh); + close($fh); + return $str; +} +*data = \&as_string; ### silently invoke preferred usage + + +#------------------------------ + +=item binmode [ONOFF] + +I +With argument, flags whether or not open() should return an I/O handle +which has binmode() activated. With no argument, just returns the +current value. + +=cut + +sub binmode { + my ($self, $onoff) = @_; + $self->{MB_Binmode} = $onoff if (@_ > 1); + $self->{MB_Binmode}; +} + +#------------------------------ + +=item is_encoded [ONOFF] + +I +If set to yes, no decoding is applied on output. This flag is set +by MIME::Parser, if the parser runs in decode_bodies(0) mode, so the +content is handled unmodified. + +=cut + +sub is_encoded { + my ($self, $yesno) = @_; + $self->{MB_IsEncoded} = $yesno if (@_ > 1); + $self->{MB_IsEncoded}; +} + +#------------------------------ + +=item dup + +I +Duplicate the bodyhandle. + +I external data in bodyhandles is I copied to new files! +Changing the data in one body's data file, or purging that body, +I affect its duplicate. Bodies with in-core data probably need +not worry. + +=cut + +sub dup { + my $self = shift; + bless { %$self }, ref($self); ### shallow copy ok for ::File and ::Scalar +} + +#------------------------------ + +=item open READWRITE + +I +This should do whatever is necessary to open the body for either +writing (if READWRITE is "w") or reading (if mode is "r"). + +This method is expected to return an "I/O handle" object on success, +and undef on error. An I/O handle can be any object that supports a +small set of standard methods for reading/writing data. +See the IO::Handle class for an example. + +=cut + +sub open { + undef; +} + +#------------------------------ + +=item path [PATH] + +I +If you're storing the body data externally (e.g., in a disk file), you'll +want to give applications the ability to get at that data, for cleanup. +This method should return the path to the data, or undef if there is none. + +Where appropriate, the path I be a simple string, like a filename. +With argument, sets the PATH, which should be undef if there is none. + +=cut + +sub path { + my $self = shift; + $self->{MB_Path} = shift if @_; + $self->{MB_Path}; +} + +#------------------------------ + +=item print FILEHANDLE + +I +Output the body data to the given filehandle, or to the currently-selected +one if none is given. + +=cut + +sub print { + my ($self, $fh) = @_; + my $nread; + + ### Get output filehandle, and ensure that it's a printable object: + $fh ||= select; + + ### Write it: + my $buf = ''; + my $io = $self->open("r") || return undef; + $fh->print($buf) while ($nread = $io->read($buf, 8192)); + $io->close; + return defined($nread); ### how'd we do? +} + +#------------------------------ + +=item purge + +I +Remove any data which resides external to the program (e.g., in disk files). +Immediately after a purge(), the path() should return undef to indicate +that the external data is no longer available. + +=cut + +sub purge { + 1; +} + + + +=back + +=head1 SUBCLASSES + +The following built-in classes are provided: + + Body Stores body When open()ed, + class: data in: returns: + -------------------------------------------------------- + MIME::Body::File disk file IO::Handle + MIME::Body::Scalar scalar IO::Handle + MIME::Body::InCore scalar array IO::Handle + +=cut + + +#------------------------------------------------------------ +package MIME::Body::File; +#------------------------------------------------------------ + +=head2 MIME::Body::File + +A body class that stores the data in a disk file. Invoke the +constructor as: + + $body = new MIME::Body::File "/path/to/file"; + +In this case, the C method would return the given path, +so you I say: + + if (defined($body->path)) { + open BODY, $body->path or die "open: $!"; + while () { + ### do stuff + } + close BODY; + } + +But you're best off not doing this. + +=cut + + +### Pragmas: +use vars qw(@ISA); +use strict; + +### System modules: +use IO::File; + +### Kit modules: +use MIME::Tools qw(whine); + +@ISA = qw(MIME::Body); + + +#------------------------------ +# init PATH +#------------------------------ +sub init { + my ($self, $path) = @_; + $self->path($path); ### use it as-is + $self; +} + +#------------------------------ +# open READWRITE +#------------------------------ +sub open { + my ($self, $mode) = @_; + + my $path = $self->path; + + if( $mode ne 'r' && $mode ne 'w' ) { + die "bad mode: '$mode'"; + } + + my $IO = IO::File->new($path, $mode) || die "MIME::Body::File->open $path: $!"; + + $IO->binmode() if $self->binmode; + + return $IO; +} + +#------------------------------ +# purge +#------------------------------ +# Unlink the path (and undefine it). +# +sub purge { + my $self = shift; + if (defined($self->path)) { + unlink $self->path or whine "couldn't unlink ".$self->path.": $!"; + $self->path(undef); + } + 1; +} + + + + +#------------------------------------------------------------ +package MIME::Body::Scalar; +#------------------------------------------------------------ + +=head2 MIME::Body::Scalar + +A body class that stores the data in-core, in a simple scalar. +Invoke the constructor as: + + $body = new MIME::Body::Scalar \$string; + +A single scalar argument sets the body to that value, exactly as though +you'd opened for the body for writing, written the value, +and closed the body again: + + $body = new MIME::Body::Scalar "Line 1\nLine 2\nLine 3"; + +A single array reference sets the body to the result of joining all the +elements of that array together: + + $body = new MIME::Body::Scalar ["Line 1\n", + "Line 2\n", + "Line 3"]; + +=cut + +use vars qw(@ISA); +use strict; + +use Carp; + +@ISA = qw(MIME::Body); + + +#------------------------------ +# init DATA +#------------------------------ +sub init { + my ($self, $data) = @_; + $data = join('', @$data) if (ref($data) && (ref($data) eq 'ARRAY')); + $self->{MBS_Data} = (defined($data) ? $data : ''); + $self; +} + +#------------------------------ +# as_string +#------------------------------ +sub as_string { + shift->{MBS_Data}; +} + +#------------------------------ +# open READWRITE +#------------------------------ +sub open { + my ($self, $mode) = @_; + $self->{MBS_Data} = '' if ($mode eq 'w'); ### writing + + if ($mode eq 'w') { + $mode = '>:'; + } elsif ($mode eq 'r') { + $mode = '<:'; + } else { + die "bad mode: $mode"; + } + + return IO::File->new(\ $self->{MBS_Data}, $mode); +} + + + + + +#------------------------------------------------------------ +package MIME::Body::InCore; +#------------------------------------------------------------ + +=head2 MIME::Body::InCore + +A body class that stores the data in-core. +Invoke the constructor as: + + $body = new MIME::Body::InCore \$string; + $body = new MIME::Body::InCore $string; + $body = new MIME::Body::InCore \@stringarray + +A simple scalar argument sets the body to that value, exactly as though +you'd opened for the body for writing, written the value, +and closed the body again: + + $body = new MIME::Body::InCore "Line 1\nLine 2\nLine 3"; + +A single array reference sets the body to the concatenation of all +scalars that it holds: + + $body = new MIME::Body::InCore ["Line 1\n", + "Line 2\n", + "Line 3"]; + +=cut + +use vars qw(@ISA); +use strict; + +use Carp; + +@ISA = qw(MIME::Body::Scalar); + + +#------------------------------ +# init DATA +#------------------------------ +sub init { + my ($self, $data) = @_; + if (!defined($data)) { ### nothing + $self->{MBS_Data} = ''; + } + elsif (!ref($data)) { ### simple scalar + $self->{MBS_Data} = $data; + } + elsif (ref($data) eq 'SCALAR') { + $self->{MBS_Data} = $$data; + } + elsif (ref($data) eq 'ARRAY') { + $self->{MBS_Data} = join('', @$data); + } + else { + croak "I can't handle DATA which is a ".ref($data)."\n"; + } + $self; +} + +1; +__END__ + + +#------------------------------ + +=head2 Defining your own subclasses + +So you're not happy with files and scalar-arrays? +No problem: just define your own MIME::Body subclass, and make a subclass +of MIME::Parser or MIME::ParserBase which returns an instance of your +body class whenever appropriate in the C method. + +Your "body" class must inherit from MIME::Body (or some subclass of it), +and it must either provide (or inherit the default for) the following +methods... + +The default inherited method I for all these: + + new + binmode [ONOFF] + path + +The default inherited method I for these, but perhaps +there's a better implementation for your subclass. + + init ARGS... + as_lines + as_string + dup + print + purge + +The default inherited method I for these: + + open + + + +=head1 NOTES + +One reason I didn't just use IO::Handle objects for message bodies was +that I wanted a "body" object to be a form of completely encapsulated +program-persistent storage; that is, I wanted users to be able to write +code like this... + + ### Get body handle from this MIME message, and read its data: + $body = $entity->bodyhandle; + $IO = $body->open("r"); + while (defined($_ = $IO->getline)) { + print STDOUT $_; + } + $IO->close; + +...without requiring that they know anything more about how the +$body object is actually storing its data (disk file, scalar variable, +array variable, or whatever). + +Storing the body of each MIME message in a persistently-open +IO::Handle was a possibility, but it seemed like a bad idea, +considering that a single multipart MIME message could easily suck up +all the available file descriptors on some systems. This risk increases +if the user application is processing more than one MIME entity at a time. + +=head1 SEE ALSO + +L + +=head1 AUTHOR + +Eryq (F), ZeeGee Software Inc (F). +Dianne Skoll (F) + +All rights reserved. This program is free software; you can redistribute +it and/or modify it under the same terms as Perl itself. + +Thanks to Achim Bohnet for suggesting that MIME::Parser not be restricted +to the use of FileHandles. + +#------------------------------ +1; + diff --git a/vendor_perl/MIME/Tools.pm b/vendor_perl/MIME/Tools.pm new file mode 100644 index 000000000..e250a18a9 --- /dev/null +++ b/vendor_perl/MIME/Tools.pm @@ -0,0 +1,1043 @@ +package MIME::Tools; + +#------------------------------ +# Because the POD documentation is pretty extensive, it follows +# the __END__ statement below... +#------------------------------ + +use strict; +use vars (qw(@ISA %CONFIG @EXPORT_OK %EXPORT_TAGS $VERSION $ME + $M_DEBUG $M_WARNING $M_ERROR )); + +require Exporter; +use IO::File; +use File::Temp 0.18 (); +use Carp; + +$ME = "MIME-tools"; + +@ISA = qw(Exporter); + +# Exporting (importing should only be done by modules in this toolkit!): +%EXPORT_TAGS = ( + 'config' => [qw(%CONFIG)], + 'msgs' => [qw(usage debug whine error)], + 'msgtypes'=> [qw($M_DEBUG $M_WARNING $M_ERROR)], + 'utils' => [qw(textual_type tmpopen )], + ); +Exporter::export_ok_tags('config', 'msgs', 'msgtypes', 'utils'); + +# The TOOLKIT version, both in 1.23 style *and* usable by MakeMaker: +$VERSION = "5.517"; + +# Configuration (do NOT alter this directly)... +# All legal CONFIG vars *must* be in here, even if only to be set to undef: +%CONFIG = + ( + DEBUGGING => 0, + QUIET => 1, + ); + +# Message-logging constants: +$M_DEBUG = 'debug'; +$M_WARNING = 'warning'; +$M_ERROR = 'error'; + + + +#------------------------------ +# +# CONFIGURATION... (see below) +# +#------------------------------ + +sub config { + my $class = shift; + usage("config() is obsolete"); + + # No args? Just return list: + @_ or return keys %CONFIG; + my $method = lc(shift); + return $class->$method(@_); +} + +sub debugging { + my ($class, $value) = @_; + $CONFIG{'DEBUGGING'} = $value if (@_ > 1); + return $CONFIG{'DEBUGGING'}; +} + +sub quiet { + my ($class, $value) = @_; + $CONFIG{'QUIET'} = $value if (@_ > 1); + return $CONFIG{'QUIET'}; +} + +sub version { + my ($class, $value) = @_; + return $VERSION; +} + + + +#------------------------------ +# +# MESSAGES... +# +#------------------------------ + +#------------------------------ +# +# debug MESSAGE... +# +# Function, private. +# Output a debug message. +# +sub debug { + print STDERR "$ME: $M_DEBUG: ", @_, "\n" if $CONFIG{DEBUGGING}; +} + +#------------------------------ +# +# whine MESSAGE... +# +# Function, private. +# Something doesn't look right: issue a warning. +# Only output if $^W (-w) is true, and we're not being QUIET. +# +sub whine { + my $msg = "$ME: $M_WARNING: ".join('', @_)."\n"; + warn $msg if ($^W && !$CONFIG{QUIET}); + return (wantarray ? () : undef); +} + +#------------------------------ +# +# error MESSAGE... +# +# Function, private. +# Something failed, but not so badly that we want to throw an +# exception. Just report our general unhappiness. +# Only output if $^W (-w) is true, and we're not being QUIET. +# +sub error { + my $msg = "$ME: $M_ERROR: ".join('', @_)."\n"; + warn $msg if ($^W && !$CONFIG{QUIET}); + return (wantarray ? () : undef); +} + +#------------------------------ +# +# usage MESSAGE... +# +# Register unhappiness about usage. +# +sub usage { + my ( $p, $f, $l, $s) = caller(1); + my ($cp, $cf, $cl, $cs) = caller(2); + my $msg = join('', (($s =~ /::/) ? "$s() " : "${p}::$s() "), @_, "\n"); + my $loc = ($cf ? "\tin code called from $cf l.$cl" : ''); + + warn "$msg$loc\n" if ($^W && !$CONFIG{QUIET}); + return (wantarray ? () : undef); +} + + + +#------------------------------ +# +# UTILS... +# +#------------------------------ + +#------------------------------ +# +# textual_type MIMETYPE +# +# Function. Does the given MIME type indicate a textlike document? +# +sub textual_type { + ($_[0] =~ m{^(text|message)(/|\Z)}i); +} + +#------------------------------ +# +# tmpopen +# +# +sub tmpopen +{ + my ($args) = @_; + $args ||= {}; + return File::Temp->new( %{$args} ); +} + +#------------------------------ +1; +__END__ + + +=head1 NAME + +MIME-tools - modules for parsing (and creating!) MIME entities + + +=head1 SYNOPSIS + +Here's some pretty basic code for B and outputting +its decoded components to a given directory: + + use MIME::Parser; + + ### Create parser, and set some parsing options: + my $parser = new MIME::Parser; + $parser->output_under("$ENV{HOME}/mimemail"); + + ### Parse input: + $entity = $parser->parse(\*STDIN) or die "parse failed\n"; + + ### Take a look at the top-level entity (and any parts it has): + $entity->dump_skeleton; + + +Here's some code which B containing +three parts: a text file, an attached GIF, and some more text: + + use MIME::Entity; + + ### Create the top-level, and set up the mail headers: + $top = MIME::Entity->build(Type =>"multipart/mixed", + From => "me\@myhost.com", + To => "you\@yourhost.com", + Subject => "Hello, nurse!"); + + ### Part #1: a simple text document: + $top->attach(Path=>"./testin/short.txt"); + + ### Part #2: a GIF file: + $top->attach(Path => "./docs/mime-sm.gif", + Type => "image/gif", + Encoding => "base64"); + + ### Part #3: some literal text: + $top->attach(Data=>$message); + + ### Send it: + open MAIL, "| /usr/lib/sendmail -t -oi -oem" or die "open: $!"; + $top->print(\*MAIL); + close MAIL; + + +For more examples, look at the scripts in the B directory +of the MIME-tools distribution. + + + +=head1 DESCRIPTION + +MIME-tools is a collection of Perl5 MIME:: modules for parsing, decoding, +I single- or multipart (even nested multipart) MIME +messages. (Yes, kids, that means you can send messages with attached +GIF files). + + +=head1 REQUIREMENTS + +You will need the following installed on your system: + + File::Path + File::Spec + IPC::Open2 (optional) + MIME::Base64 + MIME::QuotedPrint + Net::SMTP + Mail::Internet, ... from the MailTools distribution. + +See the Makefile.PL in your distribution for the most-comprehensive +list of prerequisite modules and their version numbers. + + +=head1 A QUICK TOUR + +=head2 Overview of the classes + +Here are the classes you'll generally be dealing with directly: + + + (START HERE) results() .-----------------. + \ .-------->| MIME:: | + .-----------. / | Parser::Results | + | MIME:: |--' `-----------------' + | Parser |--. .-----------------. + `-----------' \ filer() | MIME:: | + | parse() `-------->| Parser::Filer | + | gives you `-----------------' + | a... | output_path() + | | determines + | | path() of... + | head() .--------. | + | returns... | MIME:: | get() | + V .-------->| Head | etc... | + .--------./ `--------' | + .---> | MIME:: | | + `-----| Entity | .--------. | + parts() `--------'\ | MIME:: | / + returns `-------->| Body |<---------' + sub-entities bodyhandle() `--------' + (if any) returns... | open() + | returns... + | + V + .--------. read() + | IO:: | getline() + | Handle | print() + `--------' etc... + + +To illustrate, parsing works this way: + +=over 4 + +=item * + +B +A parser is an instance of C. +You hand it an input stream (like a filehandle) to parse a message from: +if the parse is successful, the result is an "entity". + +=item * + +B +An entity is an instance of C (a subclass of C). +If the message had "parts" (e.g., attachments), then those parts +are "entities" as well, contained inside the top-level entity. +Each entity has a "head" and a "body". + +=item * + +B +A "head" is an instance of C (a subclass of C). +It contains information from the message header: content type, +sender, subject line, etc. + +=item * + +B +You can ask to "open" this data source for I or I, +and you will get back an "I/O handle". + +=item * + +B +This handle is an object that is basically like an IO::Handle... it +can be any class, so long as it supports a small, standard set of +methods for reading from or writing to the underlying data source. + +=back + +A typical multipart message containing two parts -- a textual greeting +and an "attached" GIF file -- would be a tree of MIME::Entity objects, +each of which would have its own MIME::Head. Like this: + + .--------. + | MIME:: | Content-type: multipart/mixed + | Entity | Subject: Happy Samhaine! + `--------' + | + `----. + parts | + | .--------. + |---| MIME:: | Content-type: text/plain; charset=us-ascii + | | Entity | Content-transfer-encoding: 7bit + | `--------' + | .--------. + |---| MIME:: | Content-type: image/gif + | Entity | Content-transfer-encoding: base64 + `--------' Content-disposition: inline; + filename="hs.gif" + + + +=head2 Parsing messages + +You usually start by creating an instance of B +and setting up certain parsing parameters: what directory to save +extracted files to, how to name the files, etc. + +You then give that instance a readable filehandle on which waits a +MIME message. If all goes well, you will get back a B +object (a subclass of B), which consists of... + +=over 4 + +=item * + +A B (a subclass of B) which holds the MIME +header data. + +=item * + +A B, which is a object that knows where the body data is. +You ask this object to "open" itself for reading, and it +will hand you back an "I/O handle" for reading the data: this could be +of any class, so long as it conforms to a subset of the B +interface. + +=back + +If the original message was a multipart document, the MIME::Entity +object will have a non-empty list of "parts", each of which is in +turn a MIME::Entity (which might also be a multipart entity, etc, +etc...). + +Internally, the parser (in MIME::Parser) asks for instances +of B whenever it needs to decode an encoded file. +MIME::Decoder has a mapping from supported encodings (e.g., 'base64') +to classes whose instances can decode them. You can add to this mapping +to try out new/experiment encodings. You can also use +MIME::Decoder by itself. + + +=head2 Composing messages + +All message composition is done via the B class. +For single-part messages, you can use the B +constructor to create MIME entities very easily. + +For multipart messages, you can start by creating a top-level +C entity with B, and then use +the similar B method to attach parts to +that message. I what most people think of as +"a text message with an attached GIF file" is I a multipart +message with 2 parts: the first being the text message, and the +second being the GIF file. + +When building MIME a entity, you'll have to provide two very important +pieces of information: the I and the +I. The type is usually easy, as it is directly +determined by the file format; e.g., an HTML file is C. +The encoding, however, is trickier... for example, some HTML files are +C<7bit>-compliant, but others might have very long lines and would need to be +sent C for reliability. + +See the section on encoding/decoding for more details, as well as +L<"A MIME PRIMER"> below. + + +=head2 Sending email + +Since MIME::Entity inherits directly from Mail::Internet, +you can use the normal Mail::Internet mechanisms to send +email. For example, + + $entity->smtpsend; + + + +=head2 Encoding/decoding support + +The B class can be used to I as well; this is done +when printing MIME entities. All the standard encodings are supported +(see L<"A MIME PRIMER"> below for details): + + Encoding: | Normally used when message contents are: + ------------------------------------------------------------------- + 7bit | 7-bit data with under 1000 chars/line, or multipart. + 8bit | 8-bit data with under 1000 chars/line. + binary | 8-bit data with some long lines (or no line breaks). + quoted-printable | Text files with some 8-bit chars (e.g., Latin-1 text). + base64 | Binary files. + +Which encoding you choose for a given document depends largely on +(1) what you know about the document's contents (text vs binary), and +(2) whether you need the resulting message to have a reliable encoding +for 7-bit Internet email transport. + +In general, only C and C guarantee reliable +transport of all data; the other three "no-encoding" encodings simply +pass the data through, and are only reliable if that data is 7bit ASCII +with under 1000 characters per line, and has no conflicts with the +multipart boundaries. + +I've considered making it so that the content-type and encoding +can be automatically inferred from the file's path, but that seems +to be asking for trouble... or at least, for Mail::Cap... + + + +=head2 Message-logging + +MIME-tools is a large and complex toolkit which tries to deal with +a wide variety of external input. It's sometimes helpful to see +what's really going on behind the scenes. +There are several kinds of messages logged by the toolkit itself: + +=over 4 + +=item Debug messages + +These are printed directly to the STDERR, with a prefix of +C<"MIME-tools: debug">. + +Debug message are only logged if you have turned +L on in the MIME::Tools configuration. + + +=item Warning messages + +These are logged by the standard Perl warn() mechanism +to indicate an unusual situation. +They all have a prefix of C<"MIME-tools: warning">. + +Warning messages are only logged if C<$^W> is set true +and MIME::Tools is not configured to be L. + + +=item Error messages + +These are logged by the standard Perl warn() mechanism +to indicate that something actually failed. +They all have a prefix of C<"MIME-tools: error">. + +Error messages are only logged if C<$^W> is set true +and MIME::Tools is not configured to be L. + + +=item Usage messages + +Unlike "typical" warnings above, which warn about problems processing +data, usage-warnings are for alerting developers of deprecated methods +and suspicious invocations. + +Usage messages are currently only logged if C<$^W> is set true +and MIME::Tools is not configured to be L. + +=back + +When a MIME::Parser (or one of its internal helper classes) +wants to report a message, it generally does so by recording +the message to the B object +immediately before invoking the appropriate function above. +That means each parsing run has its own trace-log which +can be examined for problems. + + +=head2 Configuring the toolkit + +If you want to tweak the way this toolkit works (for example, to +turn on debugging), use the routines in the B module. + +=over + +=item debugging + +Turn debugging on or off. +Default is false (off). + + MIME::Tools->debugging(1); + + +=item quiet + +Turn the reporting of warning/error messages on or off. +Default is true, meaning that these message are silenced. + + MIME::Tools->quiet(1); + + +=item version + +Return the toolkit version. + + print MIME::Tools->version, "\n"; + +=back + + + + + + + + +=head1 THINGS YOU SHOULD DO + + +=head2 Take a look at the examples + +The MIME-Tools distribution comes with an "examples" directory. +The scripts in there are basically just tossed-together, but +they'll give you some ideas of how to use the parser. + + +=head2 Run with warnings enabled + +I run your Perl script with C<-w>. +If you see a warning about a deprecated method, change your +code ASAP. This will ease upgrades tremendously. + + +=head2 Avoid non-standard encodings + +Don't try to MIME-encode using the non-standard MIME encodings. +It's just not a good practice if you want people to be able to +read your messages. + + +=head2 Plan for thrown exceptions + +For example, if your mail-handling code absolutely must not die, +then perform mail parsing like this: + + $entity = eval { $parser->parse(\*INPUT) }; + +Parsing is a complex process, and some components may throw exceptions +if seriously-bad things happen. Since "seriously-bad" is in the +eye of the beholder, you're better off I possible exceptions +instead of asking me to propagate C up the stack. Use of exceptions in +reusable modules is one of those religious issues we're never all +going to agree upon; thankfully, that's what C is good for. + + +=head2 Check the parser results for warnings/errors + +As of 5.3xx, the parser tries extremely hard to give you a +MIME::Entity. If there were any problems, it logs warnings/errors +to the underlying "results" object (see L). +Look at that object after each parse. +Print out the warnings and errors, I if messages don't +parse the way you thought they would. + + +=head2 Don't plan on printing exactly what you parsed! + +I +Because of things like ambiguities in base64-encoding, the following +is I going to spit out its input unchanged in all cases: + + $entity = $parser->parse(\*STDIN); + $entity->print(\*STDOUT); + +If you're using MIME::Tools to process email, remember to save +the data you parse if you want to send it on unchanged. +This is vital for things like PGP-signed email. + + +=head2 Understand how international characters are represented + +The MIME standard allows for text strings in headers to contain +characters from any character set, by using special sequences +which look like this: + + =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= + +To be consistent with the existing Mail::Field classes, MIME::Tools +does I automatically unencode these strings, since doing so would +lose the character-set information and interfere with the parsing +of fields (see L for a full explanation). +That means you should be prepared to deal with these encoded strings. + +The most common question then is, B +The answer depends on what you want to decode them I: +ASCII, Latin1, UTF-8, etc. Be aware that your "target" representation +may not support all possible character sets you might encounter; +for example, Latin1 (ISO-8859-1) has no way of representing Big5 +(Chinese) characters. A common practice is to represent "untranslateable" +characters as "?"s, or to ignore them completely. + +To unencode the strings into some of the more-popular Western byte +representations (e.g., Latin1, Latin2, etc.), you can use the decoders +in MIME::WordDecoder (see L). +The simplest way is by using C, a function wrapped +around your "default" decoder, as follows: + + use MIME::WordDecoder; + ... + $subject = unmime $entity->head->get('subject'); + +One place this I done automatically is in extracting the recommended +filename for a part while parsing. That's why you should start by +setting up the best "default" decoder if the default target of Latin1 +isn't to your liking. + + + +=head1 THINGS I DO THAT YOU SHOULD KNOW ABOUT + + +=head2 Fuzzing of CRLF and newline on input + +RFC 2045 dictates that MIME streams have lines terminated by CRLF +(C<"\r\n">). However, it is extremely likely that folks will want to +parse MIME streams where each line ends in the local newline +character C<"\n"> instead. + +An attempt has been made to allow the parser to handle both CRLF +and newline-terminated input. + + +=head2 Fuzzing of CRLF and newline when decoding + +The C<"7bit"> and C<"8bit"> decoders will decode both +a C<"\n"> and a C<"\r\n"> end-of-line sequence into a C<"\n">. + +The C<"binary"> decoder (default if no encoding specified) +still outputs stuff verbatim... so a MIME message with CRLFs +and no explicit encoding will be output as a text file +that, on many systems, will have an annoying ^M at the end of +each line... I. + + +=head2 Fuzzing of CRLF and newline when encoding/composing + +TODO FIXME +All encoders currently output the end-of-line sequence as a C<"\n">, +with the assumption that the local mail agent will perform +the conversion from newline to CRLF when sending the mail. +However, there probably should be an option to output CRLF as per RFC 2045 + + +=head2 Inability to handle multipart boundaries with embedded newlines + +Let's get something straight: this is an evil, EVIL practice. +If your mailer creates multipart boundary strings that contain +newlines, give it two weeks notice and find another one. If your +mail robot receives MIME mail like this, regard it as syntactically +incorrect, which it is. + + +=head2 Ignoring non-header headers + +People like to hand the parser raw messages straight from +POP3 or from a mailbox. There is often predictable non-header +information in front of the real headers; e.g., the initial +"From" line in the following message: + + From - Wed Mar 22 02:13:18 2000 + Return-Path: + Subject: Hello + +The parser simply ignores such stuff quietly. Perhaps it +shouldn't, but most people seem to want that behavior. + + +=head2 Fuzzing of empty multipart preambles + +Please note that there is currently an ambiguity in the way +preambles are parsed in. The following message fragments I +are regarded as having an empty preamble (where C<\n> indicates a +newline character): + + Content-type: multipart/mixed; boundary="xyz"\n + Subject: This message (#1) has an empty preamble\n + \n + --xyz\n + ... + + Content-type: multipart/mixed; boundary="xyz"\n + Subject: This message (#2) also has an empty preamble\n + \n + \n + --xyz\n + ... + +In both cases, the I completely-empty line (after the "Subject") +marks the end of the header. + +But we should clearly ignore the I empty line in message #2, +since it fills the role of I<"the newline which is only there to make +sure that the boundary is at the beginning of a line">. +Such newlines are I part of the content preceding the boundary; +thus, there is no preamble "content" in message #2. + +However, it seems clear that message #1 I has no preamble +"content", and is in fact merely a compact representation of an +empty preamble. + + +=head2 Use of a temp file during parsing + +I +Although the amount of core available on even a modest home +system continues to grow, the size of attachments continues +to grow with it. I wanted to make sure that even users with small +systems could deal with decoding multi-megabyte sounds and movie files. +That means not being core-bound. + +As of the released 5.3xx, MIME::Parser gets by with only +one temp file open per parser. This temp file provides +a sort of infinite scratch space for dealing with the current +message part. It's fast and lightweight, but you should know +about it anyway. + + +=head2 Why do I assume that MIME objects are email objects? + +Achim Bohnet once pointed out that MIME headers do nothing more than +store a collection of attributes, and thus could be represented as +objects which don't inherit from Mail::Header. + +I agree in principle, but RFC 2045 says otherwise. +RFC 2045 [MIME] headers are a syntactic subset of RFC-822 [email] headers. +Perhaps a better name for these modules would have been RFC1521:: +instead of MIME::, but we're a little beyond that stage now. + +When I originally wrote these modules for the CPAN, I agonized for a long +time about whether or not they really should subclass from B +(then at version 1.17). Thanks to Graham Barr, who graciously evolved +MailTools 1.06 to be more MIME-friendly, unification was achieved +at MIME-tools release 2.0. +The benefits in reuse alone have been substantial. + + + + +=head1 A MIME PRIMER + +So you need to parse (or create) MIME, but you're not quite up on +the specifics? No problem... + + + +=head2 Glossary + +Here are some definitions adapted from RFC 1521 (predecessor of the +current RFC 204[56789] defining MIME) explaining the terminology we +use; each is accompanied by the equivalent in MIME:: module terms... + +=over 4 + +=item attachment + +An "attachment" is common slang for any part of a multipart message -- +except, perhaps, for the first part, which normally carries a user +message describing the attachments that follow (e.g.: "Hey dude, here's +that GIF file I promised you."). + +In our system, an attachment is just a B under the +top-level entity, probably one of its L. + +=item body + +The "body" of an L is that portion of the entity +which follows the L and which contains the real message +content. For example, if your MIME message has a GIF file attachment, +then the body of that attachment is the base64-encoded GIF file itself. + +A body is represented by an instance of B. You get the +body of an entity by sending it a L +message. + +=item body part + +One of the parts of the body of a multipart B. +A body part has a B and a B, so it makes sense to +speak about the body of a body part. + +Since a body part is just a kind of entity, it's represented by +an instance of B. + +=item entity + +An "entity" means either a B or a B. +All entities have a B and a B. + +An entity is represented by an instance of B. +There are instance methods for recovering the +L (a B) and the +L (a B). + +=item header + +This is the top portion of the MIME message, which contains the +"Content-type", "Content-transfer-encoding", etc. Every MIME entity has +a header, represented by an instance of B. You get the +header of an entity by sending it a head() message. + +=item message + +A "message" generally means the complete (or "top-level") message being +transferred on a network. + +There currently is no explicit package for "messages"; under MIME::, +messages are streams of data which may be read in from files or +filehandles. You can think of the B returned by the +B as representing the full message. + + +=back + + +=head2 Content types + +This indicates what kind of data is in the MIME message, usually +as I. The standard major types are shown below. +A more-comprehensive listing may be found in RFC-2046. + +=over 4 + +=item application + +Data which does not fit in any of the other categories, particularly +data to be processed by some type of application program. +C, C, C... + +=item audio + +Audio data. +C