Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26472
Modified Files:
readmail.pl
Log Message:
* Added hooks for text entity character encoding support.
* Define special x-mha-charset entity header field for text entities.
text-based MIMEFILTERS should now use it to know what the charset
is of the entity body.
Index: readmail.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/readmail.pl,v
retrieving revision 2.24
retrieving revision 2.25
diff -C2 -r2.24 -r2.25
*** readmail.pl 17 Nov 2002 03:38:52 -0000 2.24
--- readmail.pl 18 Dec 2002 05:43:48 -0000 2.25
***************
*** 209,212 ****
--- 209,223 ----
##---------------------------------------------------------------------------
+
+ $TextEncode = undef
+ unless defined($TextEncode);
+
+ $TextEncoderFunc = undef
+ unless defined($TextEncodingFunc);
+
+ $TextDefCharset = 'us-ascii'
+ unless defined($TextDefCharset);
+
+ ##---------------------------------------------------------------------------
## Variables holding functions for generating processed output
## for MAILread_body(). The default functions generate HTML.
***************
*** 442,445 ****
--- 453,468 ----
$uribase =~ s|(.*/).*|$1| if $uribase;
+ ## Convert text encoding
+ if ($type eq 'text') {
+ my $charset = extract_charset($content, $subtype, $body);
+ $fields->{'x-mha-charset'} = $charset;
+ if ($TextEncode &&
+ defined($TextEncoderFunc) &&
+ defined(&$TextEncoderFunc) &&
+ &$TextEncoderFunc($body, $charset, $TextEncode)) {
+ $fields->{'x-mha-charset'} = $TextEncode;
+ }
+ }
+
## Load content-type filter
if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
***************
*** 1142,1145 ****
--- 1165,1193 ----
}
$ret;
+ }
+ ##---------------------------------------------------------------------------##
+
+ sub extract_charset {
+ my $content = shift; # Content-type string of entity
+ my $subtype = shift; # Text sub-type
+ my $body = shift; # Reference to entity text
+ my $charset = $TextDefCharset;
+
+ if ($content =~ /\bcharset\s*=\s*([^\s;]+)/i) {
+ $charset = lc $1;
+ $charset =~ s/['";\s]//g;
+ }
+
+ # If HTML, check <meta http-equiv=content-type> tag since it
+ # can be different than what is specified in the entity header.
+ if (($subtype eq 'html' || $subtype eq 'x-html') &&
+ ($body =~ m/(<meta\s+http-equiv\s*=\s*['"]?
+ content-type\b[^>]*>)/xi)) {
+ my $meta = $1;
+ if ($meta =~ m/\bcharset\s*=\s*['"]?([\w\.\-]+)/i) {
+ $charset = lc $1;
+ }
+ }
+ $charset;
}
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV