Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26846
Modified Files:
mhtxtenrich.pl mhtxthtml.pl mhtxtplain.pl
Log Message:
* text filters changed to use x-mha-charset field to get charset of
body.
* mhtxtenrich.pl and mhtxthtml.pl now use CHARSETCONVERTERS to
process character data in the same manner as mhtxtplain.pl. This
helps character data to be normalized to ascii + entity references.
Index: mhtxtenrich.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtenrich.pl,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** mhtxtenrich.pl 27 Oct 2002 03:08:17 -0000 2.6
--- mhtxtenrich.pl 18 Dec 2002 05:47:46 -0000 2.7
***************
*** 37,40 ****
--- 37,45 ----
package m2h_text_enriched;
+ my %special_to_char = (
+ 'lt' => '<',
+ 'gt' => '>',
+ );
+
##---------------------------------------------------------------------------
## Filter routine.
***************
*** 44,62 ****
sub filter {
my($fields, $data, $isdecode, $args) = @_;
! my($innofill, $chunk, $ret, $charset);
! $ret = "";
! $args = "" unless defined($args);
! $charset = "";
!
! ## Grab charset parameter (if defined)
! if ((defined($fields->{'content-type'}[0])) &&
! ($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
! $charset = lc $1;
! $charset =~ s/['";\s]//g;
}
## Convert specials
! $$data =~ s|&|\&|gi;
! $$data =~ s|<<|\<|gi;
## Translate text/enriched commands
--- 49,76 ----
sub filter {
my($fields, $data, $isdecode, $args) = @_;
! my($innofill, $chunk);
! my $charset = $fields->{'x-mha-charset'};
! my($charcnv, $real_charset_name) =
! readmail::MAILload_charset_converter($charset);
! my $ret = "";
! $args = "" unless defined($args);
!
! if (defined($charcnv) && defined(&$charcnv)) {
! $$data = &$charcnv($$data, $real_charset_name);
! } else {
! warn qq/\n/,
! qq/Warning: Unrecognized character set: $charset\n/,
! qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
! qq/ Message Number: $mhonarc::MHAmsgnum\n/;
}
+ ## Fixup any EOL mess
+ $$data =~ s/\r?\n/\n/g;
+ $$data =~ s/\r/\n/g;
+
+ # translate back <>'s for tag processing
+ $$data =~ s/&([lg]t);/$special_to_char{$1}/g;
## Convert specials
! $$data =~ s/<</\</g;
## Translate text/enriched commands
***************
*** 75,90 ****
convert_tags(\$chunk);
if (!$innofill) {
! $chunk =~ s|(\r?\n\s*)|&nl_seq_to_brs($1)|gie;
}
$ret .= $chunk;
}
-
- ## Translate 8-bit characters to entity refs based on charset
- ## (we already did '<' and '&' characters)
- if ($charset =~ /iso-8859-([2-9]|10)/i) {
- require 'iso8859.pl';
- $ret = iso_8859::str2sgml($ret, $charset, 1);
- }
-
$ret;
}
--- 89,96 ----
convert_tags(\$chunk);
if (!$innofill) {
! $chunk =~ s/(\n\s*)/&nl_seq_to_brs($1)/ge;
}
$ret .= $chunk;
}
$ret;
}
***************
*** 122,127 ****
# Not supported commands
! $$str =~ s|<lang>\s*<param>([^<]*)</param>||gi;
! $$str =~ s|</lang>||gi;
}
--- 128,133 ----
# Not supported commands
! $$str =~ s|<lang>\s*<param>([^<]*)</param>|<div lang="$1">|gi;
! $$str =~ s|</lang>|</div>|gi;
}
Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.27
retrieving revision 2.28
diff -C2 -r2.27 -r2.28
*** mhtxthtml.pl 13 Dec 2002 07:19:01 -0000 2.27
--- mhtxthtml.pl 18 Dec 2002 05:47:46 -0000 2.28
***************
*** 50,53 ****
--- 50,60 ----
q/dynsrc|for|href|longdesc|profile|src|url|usemap)\b/;
+ my %special_to_char = (
+ 'lt' => '<',
+ 'gt' => '>',
+ 'amp' => '&',
+ 'quot' => '"',
+ );
+
##---------------------------------------------------------------------------
## The filter must modify HTML content parts for merging into the
***************
*** 122,125 ****
--- 129,146 ----
my $atdir = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
my $tmp;
+
+ my $charset = $fields->{'x-mha-charset'};
+ my($charcnv, $real_charset_name) =
+ readmail::MAILload_charset_converter($charset);
+ if (defined($charcnv) && defined(&$charcnv)) {
+ $$data = &$charcnv($$data, $real_charset_name);
+ } else {
+ warn qq/\n/,
+ qq/Warning: Unrecognized character set: $charset\n/,
+ qq/ Message-Id: <$mhonarc::MHAmsgid>\n/,
+ qq/ Message Number: $mhonarc::MHAmsgnum\n/;
+ }
+ # translate back HTML specials back
+ $$data =~ s/&([lg]t|amp|quot);/$special_to_char{$1}/g;
## Check comment declarations: may screw-up mhonarc processing
Index: mhtxtplain.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtplain.pl,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** mhtxtplain.pl 9 Dec 2002 23:21:52 -0000 2.33
--- mhtxtplain.pl 18 Dec 2002 05:47:46 -0000 2.34
***************
*** 72,77 ****
## with a link to it from the message page.
##
- ## default=set Default charset to use if not set.
- ##
## disableflowed
## Ignore flowed formatting for message text
--- 72,75 ----
***************
*** 243,247 ****
my($charset, $nourl, $igncharset, $nonfixed,
! $keepspace, $maxwidth, $target, $defset, $xhtml);
my(%asis) = ( );
--- 241,245 ----
my($charset, $nourl, $igncharset, $nonfixed,
! $keepspace, $maxwidth, $target, $xhtml);
my(%asis) = ( );
***************
*** 251,256 ****
if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
else { $maxwidth = 0; }
- if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
- else { $defset = 'us-ascii'; }
$target = "";
if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
--- 249,252 ----
***************
*** 260,273 ****
$target = qq/target="$target"/;
}
- $defset =~ s/['"\s]//g;
! ## Grab charset parameter (if defined)
! if ( (defined($fields->{'content-type'}[0])) &&
! ($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
! $charset = lc $1;
! $charset =~ s/['";\s]//g;
! } else {
! $charset = $defset;
! }
## Grab format parameter (if defined)
my $textformat = 'fixed';
--- 256,263 ----
$target = qq/target="$target"/;
}
! ## Grab charset parameter
! $charset = $fields->{'x-mha-charset'};
!
## Grab format parameter (if defined)
my $textformat = 'fixed';
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV