mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib readmail.pl,2.24,2.25

2002-12-17 22:43:54
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26472

Modified Files:
	readmail.pl 
Log Message:
* Added hooks for text entity character encoding support.
* Define special x-mha-charset entity header field for text entities.
  text-based MIMEFILTERS should now use it to know what the charset
  is of the entity body.


Index: readmail.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/readmail.pl,v
retrieving revision 2.24
retrieving revision 2.25
diff -C2 -r2.24 -r2.25
*** readmail.pl	17 Nov 2002 03:38:52 -0000	2.24
--- readmail.pl	18 Dec 2002 05:43:48 -0000	2.25
***************
*** 209,212 ****
--- 209,223 ----
  
  ##---------------------------------------------------------------------------
+ 
+ $TextEncode = undef
+     unless defined($TextEncode);
+ 
+ $TextEncoderFunc = undef
+     unless defined($TextEncodingFunc);
+ 
+ $TextDefCharset = 'us-ascii'
+     unless defined($TextDefCharset);
+ 
+ ##---------------------------------------------------------------------------
  ##	Variables holding functions for generating processed output
  ##	for MAILread_body().  The default functions generate HTML.
***************
*** 442,445 ****
--- 453,468 ----
      $uribase =~ s|(.*/).*|$1|  if $uribase;
  
+     ## Convert text encoding
+     if ($type eq 'text') {
+ 	my $charset = extract_charset($content, $subtype, $body);
+ 	$fields->{'x-mha-charset'} = $charset;
+ 	if ($TextEncode &&
+ 		defined($TextEncoderFunc) &&
+ 		defined(&$TextEncoderFunc) &&
+ 		&$TextEncoderFunc($body, $charset, $TextEncode)) {
+ 	    $fields->{'x-mha-charset'} = $TextEncode;
+ 	}
+     }
+ 
      ## Load content-type filter
      if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
***************
*** 1142,1145 ****
--- 1165,1193 ----
      }
      $ret;
+ }
+ ##---------------------------------------------------------------------------##
+ 
+ sub extract_charset {
+     my $content = shift;  # Content-type string of entity
+     my $subtype = shift;  # Text sub-type
+     my $body    = shift;  # Reference to entity text
+     my $charset = $TextDefCharset;
+ 
+     if ($content =~ /\bcharset\s*=\s*([^\s;]+)/i) {
+ 	$charset =  lc $1;
+ 	$charset =~ s/['";\s]//g;
+     }
+ 
+     # If HTML, check <meta http-equiv=content-type> tag since it
+     # can be different than what is specified in the entity header.
+     if (($subtype eq 'html' || $subtype eq 'x-html') &&
+ 	($body =~ m/(<meta\s+http-equiv\s*=\s*['"]?
+ 		     content-type\b[^>]*>)/xi)) {
+ 	my $meta = $1;
+ 	if ($meta =~ m/\bcharset\s*=\s*['"]?([\w\.\-]+)/i) {
+ 	    $charset = lc $1;
+ 	}
+     }
+     $charset;
  }
  

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV