mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib mhtxtenrich.pl,2.6,2.7 mhtxthtml.pl,2.27,2.28 mhtx...

2002-12-17 22:47:53
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26846

Modified Files:
	mhtxtenrich.pl mhtxthtml.pl mhtxtplain.pl 
Log Message:
* text filters changed to use x-mha-charset field to get charset of
  body.
* mhtxtenrich.pl and mhtxthtml.pl now use CHARSETCONVERTERS to
  process character data in the same manner as mhtxtplain.pl.  This
  helps character data to be normalized to ascii + entity references.


Index: mhtxtenrich.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtenrich.pl,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** mhtxtenrich.pl	27 Oct 2002 03:08:17 -0000	2.6
--- mhtxtenrich.pl	18 Dec 2002 05:47:46 -0000	2.7
***************
*** 37,40 ****
--- 37,45 ----
  package m2h_text_enriched;
  
+ my %special_to_char = (
+     'lt'  => '<',
+     'gt'  => '>',
+ );
+ 
  ##---------------------------------------------------------------------------
  ##	Filter routine.
***************
*** 44,62 ****
  sub filter {
      my($fields, $data, $isdecode, $args) = @_;
!     my($innofill, $chunk, $ret, $charset);
!     $ret  = "";
!     $args = ""  unless defined($args);
!     $charset = "";
! 
!     ## Grab charset parameter (if defined)
!     if ((defined($fields->{'content-type'}[0])) &&
! 	($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
! 	$charset = lc $1;
! 	$charset =~ s/['";\s]//g;
      }
  
      ## Convert specials
!     $$data =~ s|&|\&amp;|gi;
!     $$data =~ s|<<|\&lt;|gi;
  
      ## Translate text/enriched commands
--- 49,76 ----
  sub filter {
      my($fields, $data, $isdecode, $args) = @_;
!     my($innofill, $chunk);
!     my $charset = $fields->{'x-mha-charset'};
!     my($charcnv, $real_charset_name) =
! 	    readmail::MAILload_charset_converter($charset);
!     my $ret = "";
!     $args   = ""  unless defined($args);
! 
!     if (defined($charcnv) && defined(&$charcnv)) {
! 	$$data = &$charcnv($$data, $real_charset_name);
!     } else {
! 	warn qq/\n/,
! 	     qq/Warning: Unrecognized character set: $charset\n/,
! 	     qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
! 	     qq/         Message Number: $mhonarc::MHAmsgnum\n/;
      }
+     ## Fixup any EOL mess
+     $$data =~ s/\r?\n/\n/g;
+     $$data =~ s/\r/\n/g;
+ 
+     # translate back <>'s for tag processing
+     $$data =~ s/&([lg]t);/$special_to_char{$1}/g;
  
      ## Convert specials
!     $$data =~ s/<</\&lt;/g;
  
      ## Translate text/enriched commands
***************
*** 75,90 ****
  	convert_tags(\$chunk);
  	if (!$innofill) {
! 	    $chunk =~ s|(\r?\n\s*)|&nl_seq_to_brs($1)|gie;
  	}
  	$ret .= $chunk;
      }
- 
-     ## Translate 8-bit characters to entity refs based on charset
-     ## 		(we already did '<' and '&' characters)
-     if ($charset =~ /iso-8859-([2-9]|10)/i) {
- 	require 'iso8859.pl';
- 	$ret = iso_8859::str2sgml($ret, $charset, 1);
-     }
- 
      $ret;
  }
--- 89,96 ----
  	convert_tags(\$chunk);
  	if (!$innofill) {
! 	    $chunk =~ s/(\n\s*)/&nl_seq_to_brs($1)/ge;
  	}
  	$ret .= $chunk;
      }
      $ret;
  }
***************
*** 122,127 ****
  
      # Not supported commands
!     $$str =~ s|<lang>\s*<param>([^<]*)</param>||gi;
!     $$str =~ s|</lang>||gi;
  }
  
--- 128,133 ----
  
      # Not supported commands
!     $$str =~ s|<lang>\s*<param>([^<]*)</param>|<div lang="$1">|gi;
!     $$str =~ s|</lang>|</div>|gi;
  }
  

Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.27
retrieving revision 2.28
diff -C2 -r2.27 -r2.28
*** mhtxthtml.pl	13 Dec 2002 07:19:01 -0000	2.27
--- mhtxthtml.pl	18 Dec 2002 05:47:46 -0000	2.28
***************
*** 50,53 ****
--- 50,60 ----
  	         q/dynsrc|for|href|longdesc|profile|src|url|usemap)\b/;
  
+ my %special_to_char = (
+     'lt'    => '<',
+     'gt'    => '>',
+     'amp'   => '&',
+     'quot'  => '"',
+ );
+ 
  ##---------------------------------------------------------------------------
  ##	The filter must modify HTML content parts for merging into the
***************
*** 122,125 ****
--- 129,146 ----
      my $atdir    = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
      my $tmp;
+ 
+     my $charset = $fields->{'x-mha-charset'};
+     my($charcnv, $real_charset_name) =
+ 	    readmail::MAILload_charset_converter($charset);
+     if (defined($charcnv) && defined(&$charcnv)) {
+ 	$$data = &$charcnv($$data, $real_charset_name);
+     } else {
+ 	warn qq/\n/,
+ 	     qq/Warning: Unrecognized character set: $charset\n/,
+ 	     qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
+ 	     qq/         Message Number: $mhonarc::MHAmsgnum\n/;
+     }
+     # translate back HTML specials back
+     $$data =~ s/&([lg]t|amp|quot);/$special_to_char{$1}/g;
  
      ## Check comment declarations: may screw-up mhonarc processing

Index: mhtxtplain.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtplain.pl,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** mhtxtplain.pl	9 Dec 2002 23:21:52 -0000	2.33
--- mhtxtplain.pl	18 Dec 2002 05:47:46 -0000	2.34
***************
*** 72,77 ****
  ##			with a link to it from the message page.
  ##
- ##	default=set 	Default charset to use if not set.
- ##
  ##	disableflowed
  ##			Ignore flowed formatting for message text
--- 72,75 ----
***************
*** 243,247 ****
  
      my($charset, $nourl, $igncharset, $nonfixed,
!        $keepspace, $maxwidth, $target, $defset, $xhtml);
      my(%asis) = ( );
  
--- 241,245 ----
  
      my($charset, $nourl, $igncharset, $nonfixed,
!        $keepspace, $maxwidth, $target, $xhtml);
      my(%asis) = ( );
  
***************
*** 251,256 ****
      if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
  	else { $maxwidth = 0; }
-     if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
- 	else { $defset = 'us-ascii'; }
      $target = "";
      if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
--- 249,252 ----
***************
*** 260,273 ****
  	$target = qq/target="$target"/;
      }
-     $defset =~ s/['"\s]//g;
  
!     ## Grab charset parameter (if defined)
!     if ( (defined($fields->{'content-type'}[0])) &&
! 	 ($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
! 	$charset = lc $1;
! 	$charset =~ s/['";\s]//g;
!     } else {
! 	$charset = $defset;
!     }
      ## Grab format parameter (if defined)
      my $textformat = 'fixed';
--- 256,263 ----
  	$target = qq/target="$target"/;
      }
  
!     ## Grab charset parameter
!     $charset = $fields->{'x-mha-charset'};
! 
      ## Grab format parameter (if defined)
      my $textformat = 'fixed';

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV