##---------------------------------------------------------------------------##
## File:
## $Id: mhtxtplain.pl,v 2.19 2002/06/21 22:14:15 ehood Exp $
## Author:
## Earl Hood mhonarc@mhonarc.org
## Description:
## Library defines routine to filter text/plain body parts to HTML
## for MHonArc.
## Filter routine can be registered with the following:
##
'; $EndFlowedQuote = ""; ##---------------------------------------------------------------------------## ## Text/plain filter for mhonarc. The following filter arguments ## are recognized ($args): ## ## asis=set1:set2:... ## Colon separated lists of charsets to leave as-is. ## Only HTML special characters will be converted into ## entities. The default value is "us-ascii:iso-8859-1". ## ## attachcheck Honor attachment disposition. By default, ## all text/plain data is displayed inline on ## the message page. If attachcheck is specified ## and Content-Disposition specifies the data as ## an attachment, the data is saved to a file ## with a link to it from the message page. ## ## default=set Default charset to use if not set. ## ## inlineexts="ext1,ext2,..." ## A comma separated list of message specified filename ## extensions to treat as inline data. ## Applicable only when uudecode options specified. ## ## htmlcheck Check if message is actually an HTML message ## (to get around abhorrent MUAs). The message ## is treated as HTML if the first non-whitespace ## data looks like the start of an HTML document. ## ## keepspace Preserve whitespace if nonfixed ## ## nourl Do hyperlink URLs ## ## nonfixed Use normal typeface ## ## maxwidth=# Set the maximum width of lines. Lines exceeding ## the maxwidth will be broken up across multiple lines. ## ## quote Italicize quoted message text ## ## target=name Set TARGET attribute for links if converting URLs ## to links. Defaults to _top. ## ## usename Use filename specified in uuencoded data when ## converting uuencoded data. This option is only ## applicable of uudecode is specified. ## ## uudecode Decoded any embedded uuencoded data. ## ## All arguments should be separated by at least one space ## sub filter { my($fields, $data, $isdecode, $args) = @_; local($_); ## Parse arguments $args = "" unless defined($args); ## Check if content-disposition should be checked if ($args =~ /\battachcheck\b/i) { my($disp, $nameparm) = readmail::MAILhead_get_disposition($fields); if ($disp =~ /\battachment\b/i) { require 'mhexternal.pl'; return (m2h_external::filter( $fields, $data, $isdecode, readmail::get_filter_args('m2h_external::filter'))); } } ## Check if decoding uuencoded data. The implementation chosen here ## for decoding uuencoded data was done so when uudecode is not ## specified, there is no extra overhead (besides the $args check for ## uudecode). However, when uudecode is specified, more overhead may ## exist over other potential implementations. ## I.e. We only try to penalize performance when uudecode is specified. if ($args =~ s/\buudecode\b//ig) { # $args has uudecode stripped out for recursive calls # Make sure we have needed routines my $decoder = readmail::load_decoder("uuencode"); if (!defined($decoder) || !defined(&$decoder)) { require 'base64.pl'; $decoder = \&base64::uudecode; } require 'mhmimetypes.pl'; # Grab any filename extensions that imply inlining my $inlineexts = ''; if ($args =~ /\binlineexts=(\S+)/) { $inlineexts = ',' . lc($1) . ','; $inlineexts =~ s/['"]//g; } my $usename = $args =~ /\busename\b/; my($pdata); # have to use local() since typeglobs used my($inext, $uddata, $file, $urlfile); my @files = ( ); my $ret = ""; my $i = 0; #
' . &iso_2022_jp::jp2022_to_html($$data, $nourl). ''); } # Registered in CHARSETCONVERTERS } elsif (defined($charcnv) && defined(&$charcnv)) { $$data = &$charcnv($$data, $charset); # Other } else { warn qq/\n/, qq/Warning: Unrecognized character set: $charset\n/, qq/ Message-Id: <$mhonarc::MHAmsgid>\n/, qq/ Message Number: $mhonarc::MHAmsgnum\n/; esc_chars_inplace($data); } } else { esc_chars_inplace($data); } if ($textformat eq 'flowed') { # Initial code for format=flowed contributed by Ken Hirsch (May 2002). # text/plain; format=flowed defined in RFC2646 my $currdepth = 0; my $ret=''; s!^?x-flowed>\r?\n>!!mg; # we don't know why Eudora puts these in while (length($$data)) { $$data =~ /^((?:>)*)/; my $qd = $1; if ($$data =~ s/^(.*(?:(?:\n|\r\n?)$qd(?!>).*)*\n?)//) { # divide message into chunks by "quote-depth", # which is the number of leading > signs my $chunk = $1; $chunk =~ s/^$qd ?//mg; # N.B. also takes care of # space-stuffing $chunk =~ s/^-- $/--/mg; # special case for '-- ' if ($chunk =~ / \r?\n/) { # Treat this chunk as format=flowed # Lines that end with spaces are # considered to have soft line breaks. # Lines that end with no spaces are # considered to have hard line breaks. # XXX: Negative look-behind assertion not supported # on older versions of Perl 5 (<5.6) #$chunk =~ s/(?$1/g; $chunk =~ s/(^|[^ ])(\r?\n|\Z)/$1
" . $chunk . "\n"; } } my $newdepth = length($qd)/length('>'); if ($currdepth < $newdepth) { $chunk = $StartFlowedQuote x ($newdepth - $currdepth) . $chunk; } elsif ($currdepth > $newdepth) { $chunk = $EndFlowedQuote x ($currdepth - $newdepth) . $chunk; } $currdepth = $newdepth; $ret .= $chunk; } else { # The above regex should always match, but just in case... warn qq/\n/, qq/Warning: Dequoting problem with format=flowed data\n/, qq/ Message-Id: <$MHAmsgid>\n/, qq/ Message Number: $MHAmsgnum\n/; $ret .= $$data; last; } } if ($currdepth > 0) { $ret .= $EndFlowedQuote x $currdepth; } ## Post-processing cleanup: makes things look nicer $ret =~ s/
" . $$data . "\n"; } } ## Convert URLs to hyperlinks $$data =~ s@($HUrlExp)@$1@gio unless $nourl; ($$data); } ##---------------------------------------------------------------------------## sub esc_chars_inplace { my($foo) = shift; $$foo =~ s/&/&/g; $$foo =~ s/</g; $$foo =~ s/>/>/g; $$foo =~ s/"/"/g; 1; } ##---------------------------------------------------------------------------## sub preserve_space { my($str) = shift; 1 while $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e; $str =~ s/ /\ /g; $str; } ##---------------------------------------------------------------------------## sub break_line { my($str) = shift; my($width) = shift; my($q, $new) = ('', ''); my($try, $trywidth, $len); ## Translate tabs to spaces 1 while $str =~ s/^([^\t]*)(\t+)/$1 . ' ' x (length($2) * 8 - length($1) % 8)/e; ## Do nothing if str <= width return $str if length($str) <= $width; ## See if str begins with a quote char if ($str =~ s/^( ?$QuoteChars)//o) { $q = $1; --$width; } ## Create new string by breaking up str while ($str ne "") { # If $str less than width, break out if (length($str) <= $width) { $new .= $q . $str; last; } # handle case where no-whitespace line larger than width if (($str =~ /^(\S+)/) && (($len = length($1)) >= $width)) { $new .= $q . $1; substr($str, 0, $len) = ""; next; } # Break string at whitespace $try = ''; $trywidth = $width; $try = substr($str, 0, $trywidth); if ($try =~ /(\S+)$/) { $trywidth -= length($1); $new .= $q . substr($str, 0, $trywidth); } else { $new .= $q . $try; } substr($str, 0, $trywidth) = ''; } continue { $new .= "\n" if $str; } $new; } ##---------------------------------------------------------------------------## 1;