Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv3849
Modified Files:
mhdb.pl mhopt.pl mhrcfile.pl mhutil.pl readmail.pl
Log Message:
* Added implementation of TEXTENCODE resource.
Index: mhdb.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhdb.pl,v
retrieving revision 2.25
retrieving revision 2.26
diff -C2 -r2.25 -r2.26
*** mhdb.pl 24 Nov 2002 06:22:24 -0000 2.25
--- mhdb.pl 20 Dec 2002 08:01:11 -0000 2.26
***************
*** 94,97 ****
--- 94,109 ----
\%readmail::MIMECharSetConvertersSrc);
}
+ unless ($readmail::TextDefCharset eq 'us-ascii') {
+ print_var($db,'readmail::TextDefCharset',
+ \$readmail::TextDefCharset);
+ }
+ unless ($IsDefault{'TEXTENCODE'}) {
+ print_var($db,'readmail::TextEncode',
+ \$readmail::TextEncode);
+ print_var($db,'readmail::TextEncoderFunc',
+ \$readmail::TextEncode);
+ print_var($db,'readmail::TextEncodeSrc',
+ \$readmail::TextEncode);
+ }
unless ($IsDefault{'MIMEDECODERS'}) {
print_var($db,'readmail::MIMEDecoders',
Index: mhopt.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhopt.pl,v
retrieving revision 2.44
retrieving revision 2.45
diff -C2 -r2.44 -r2.45
*** mhopt.pl 18 Dec 2002 05:41:40 -0000 2.44
--- mhopt.pl 20 Dec 2002 08:01:11 -0000 2.45
***************
*** 428,436 ****
}
## Check if extension for HTML files defined on the command-line
$HtmlExt = $opt{'htmlext'} if defined($opt{'htmlext'});
- $RFC1522 = 1; # Always True
-
## Other indexes resource files
if (defined($opt{'otherindex'})) {
--- 428,442 ----
}
+ ## Re-check readmail settings
+ if (!$SCAN) {
+ # If text encoding has been specified, change $MHeadCnvFunc.
+ if (defined(readmail::load_textencoder())) {
+ $MHeadCnvFunc = \&htmlize_enc_head;
+ }
+ }
+
## Check if extension for HTML files defined on the command-line
$HtmlExt = $opt{'htmlext'} if defined($opt{'htmlext'});
## Other indexes resource files
if (defined($opt{'otherindex'})) {
***************
*** 1166,1169 ****
--- 1172,1180 ----
unless (@MIMEAltPrefs) {
$IsDefault{'MIMEALTPREFS'} = 1;
+ }
+
+ ## Text encoding
+ unless ($readmail::TextEncode) {
+ $IsDefault{'TEXTENCODE'} = 1;
}
}
Index: mhrcfile.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhrcfile.pl,v
retrieving revision 2.29
retrieving revision 2.30
diff -C2 -r2.29 -r2.30
*** mhrcfile.pl 4 Dec 2002 05:17:43 -0000 2.29
--- mhrcfile.pl 20 Dec 2002 08:01:11 -0000 2.30
***************
*** 853,856 ****
--- 853,881 ----
}
}
+ if ($elem eq 'defcharset') { # Default charset
+ $readmail::TextDefCharset = lc get_elem_last_line($handle, $elem);
+ $readmail::TextDefCharset = s/\s//g;
+ $readmail::TextDefCharset = 'us-ascii'
+ if $readmail::TextDefCharset eq '';
+ }
+ if ($elem eq 'textencode') { # Text encoder
+ $readmail::TextEncode = undef;
+ $readmail::TextEncoderFunc = undef;
+ $readmail::TextEncoderSrc = undef;
+ while (defined($line = <$handle>)) {
+ last if $line =~ /^\s*<\/textencode\s*>/i;
+ next unless $line =~ /\S/;
+ ($type,$routine,$plfile) = split(/;/,$line,3);
+ $type =~ s/\s//g;
+ $routine =~ s/\s//g;
+ $plfile =~ s/^\s+//; $plfile =~ s/\s+\z//g;
+ $readmail::TextEncode = lc $type;
+ $readmail::TextEncoderFunc = $routine;
+ $readmail::TextEncoderSrc = $plfile
+ if defined($plfile) and $plfile =~ /\S/;
+ $IsDefault{'TEXTENCODE'} = 0;
+ }
+ last FMTSW;
+ }
if ($elem eq 'tfirstpglink') { # First thread page link
$TFIRSTPGLINK = &get_elem_content($handle, $elem, $chop);
Index: mhutil.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhutil.pl,v
retrieving revision 2.22
retrieving revision 2.23
diff -C2 -r2.22 -r2.23
*** mhutil.pl 19 Dec 2002 05:07:20 -0000 2.22
--- mhutil.pl 20 Dec 2002 08:01:11 -0000 2.23
***************
*** 64,67 ****
--- 64,78 ----
);
+ ##---------------------------------------------------------------------------
+ ## Convert message header string to HTML encoded in
+ ## $readmail::TextEncode encoding.
+ ##
+ sub htmlize_enc_head {
+ my($cnvfunc, $charset) =
+ readmail::MAILload_charset_converter($readmail::TextEncode);
+ return htmlize($_[0])
+ if ($cnvfunc eq '-decode-' || $cnvfunc eq '-ignore-');
+ return &$cnvfunc($_[0], $charset);
+ }
##---------------------------------------------------------------------------
Index: readmail.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/readmail.pl,v
retrieving revision 2.25
retrieving revision 2.26
diff -C2 -r2.25 -r2.26
*** readmail.pl 18 Dec 2002 05:43:48 -0000 2.25
--- readmail.pl 20 Dec 2002 08:01:11 -0000 2.26
***************
*** 64,67 ****
--- 64,68 ----
sub JUST_DECODE() { 1; }
sub DECODE_ALL() { 2; }
+ sub TEXT_ENCODE() { 3; }
##---------------------------------------------------------------------------##
***************
*** 209,221 ****
##---------------------------------------------------------------------------
$TextEncode = undef
unless defined($TextEncode);
!
$TextEncoderFunc = undef
unless defined($TextEncodingFunc);
!
! $TextDefCharset = 'us-ascii'
! unless defined($TextDefCharset);
##---------------------------------------------------------------------------
--- 210,229 ----
##---------------------------------------------------------------------------
+ ## Text entity-related variables
+ ##
+
+ ## Default character set if none specified.
+ $TextDefCharset = 'us-ascii'
+ unless defined($TextDefCharset);
+ ## Destination character encoding for text entities.
$TextEncode = undef
unless defined($TextEncode);
! ## Text encoding function.
$TextEncoderFunc = undef
unless defined($TextEncodingFunc);
! ## Text encoding function source file.
! $TextEncoderSrc = undef
! unless defined($TextEncodingSrc);
##---------------------------------------------------------------------------
***************
*** 289,303 ****
## Usage:
##
! ## $ret_data = &MAILdecode_1522_str($str, $decoding_flag);
##
! ## If $decoding_flag is JUST_DECODE, $str will be decoded for only
## the charsets specified as "-decode-". If it is equal to
## DECODE_ALL, all encoded data is decoded without any conversion.
##
sub MAILdecode_1522_str {
! my($str) = shift;
! my($decoding_flag) = shift || 0;
my($charset,
$encoding,
$dec,
$charcnv,
--- 297,316 ----
## Usage:
##
! ## $ret_data = &MAILdecode_1522_str($str, $dec_flag);
##
! ## If $dec_flag is JUST_DECODE, $str will be decoded for only
## the charsets specified as "-decode-". If it is equal to
## DECODE_ALL, all encoded data is decoded without any conversion.
+ ## If $dec_flag is TEXT_ENCODE, then all data will be converted
+ ## and encoded according to $readmail::TextEncode and
+ ## $readmail::TextEncoderFunc.
##
sub MAILdecode_1522_str {
! my $str = shift;
! my $dec_flag = shift || 0;
! my $ret = ('');
my($charset,
$encoding,
+ $pos,
$dec,
$charcnv,
***************
*** 307,311 ****
$strtxt,
$str_before);
! my($ret) = ('');
# Get plain converter
--- 320,333 ----
$strtxt,
$str_before);
!
! # Get text encoder
! my $encfunc = undef;
! if ($dec_flag == TEXT_ENCODE) {
! $encfunc = load_textencoder();
! if (!defined($encfunc)) {
! $encfunc = undef unless defined($encfunc);
! $dec_flag = 0;
! }
! }
# Get plain converter
***************
*** 314,325 ****
# Decode string
! while ($str =~ /=\?([^?]+)\?(.)\?([^?]*)\?=/) {
!
# Grab components
! ($charset, $encoding) = ($1, $2);
! $strtxt = $3; $str_before = $`; $str = $';
# Check encoding method and grab proper decoder
! if ($encoding =~ /b/i) {
$dec = &load_decoder('base64');
} else {
--- 336,349 ----
# Decode string
! while ($str =~ /(=\?([^?]+)\?(.)\?([^?]*)\?=)/g) {
# Grab components
! $pos = pos($str);
! ($charset, $encoding, $strtxt) = (lc($2), lc($3), $4);
! $str_before = substr($str, 0, $pos-length($1));
! substr($str, 0, $pos) = '';
! pos($str) = 0;
# Check encoding method and grab proper decoder
! if ($encoding eq 'b') {
$dec = &load_decoder('base64');
} else {
***************
*** 328,332 ****
# Convert before (unencoded) text
! if ($decoding_flag) { # ignore if just decode
$ret .= $str_before;
} elsif (defined(&$plaincnv)) { # decode and convert
--- 352,359 ----
# Convert before (unencoded) text
! if (defined($encfunc)) { # encoding
! &$encfunc(\$str_before, $plain_real_charset, $TextEncode);
! $ret .= $str_before;
! } elsif ($dec_flag) { # ignore if just decode
$ret .= $str_before;
} elsif (defined(&$plaincnv)) { # decode and convert
***************
*** 336,368 ****
}
! # Convert encoded text
! if ($decoding_flag == DECODE_ALL) {
! $charcnv = '-decode-';
! } else {
! ($charcnv, $real_charset) = MAILload_charset_converter($charset);
! }
!
! # Decode only
! if ($charcnv eq '-decode-') {
! $strtxt =~ s/_/ /g;
! $ret .= &$dec($strtxt);
!
! # Ignore if just decoding
! } elsif ($decoding_flag) {
! $ret .= "=?$charset?$encoding?$strtxt?=";
!
! # Decode and convert
! } elsif (defined(&$charcnv)) {
$strtxt =~ s/_/ /g;
! $ret .= &$charcnv(&$dec($strtxt), $real_charset);
! # Fallback is to ignore
} else {
! $ret .= "=?$charset?$encoding?$strtxt?=";
}
}
# Convert left-over unencoded text
! if ($decoding_flag) { # ignore if just decode
$ret .= $str;
} elsif (defined(&$plaincnv)) { # decode and convert
--- 363,409 ----
}
! # Encoding text
! if (defined($encfunc)) {
! $real_charset = $MIMECharsetAliases{$charset}
! ? $MIMECharsetAliases{$charset} : $charset;
$strtxt =~ s/_/ /g;
! $strtxt = &$dec($strtxt);
! &$encfunc(\$strtxt, $charset, $TextEncode);
! $ret .= $strtxt;
! # Regular conversion
} else {
! if ($dec_flag == DECODE_ALL) {
! $charcnv = '-decode-';
! } else {
! ($charcnv, $real_charset) =
! MAILload_charset_converter($charset);
! }
! # Decode only
! if ($charcnv eq '-decode-') {
! $strtxt =~ s/_/ /g;
! $ret .= &$dec($strtxt);
!
! # Ignore if just decoding
! } elsif ($dec_flag) {
! $ret .= "=?$charset?$encoding?$strtxt?=";
!
! # Decode and convert
! } elsif (defined(&$charcnv)) {
! $strtxt =~ s/_/ /g;
! $ret .= &$charcnv(&$dec($strtxt), $real_charset);
!
! # Fallback is to ignore
! } else {
! $ret .= "=?$charset?$encoding?$strtxt?=";
! }
}
}
# Convert left-over unencoded text
! if (defined($encfunc)) { # encoding
! &$encfunc(\$str, $plain_real_charset, $TextEncode);
! $ret .= $str;
! } elsif ($dec_flag) { # ignore if just decode
$ret .= $str;
} elsif (defined(&$plaincnv)) { # decode and convert
***************
*** 453,468 ****
$uribase =~ s|(.*/).*|$1| if $uribase;
- ## Convert text encoding
- if ($type eq 'text') {
- my $charset = extract_charset($content, $subtype, $body);
- $fields->{'x-mha-charset'} = $charset;
- if ($TextEncode &&
- defined($TextEncoderFunc) &&
- defined(&$TextEncoderFunc) &&
- &$TextEncoderFunc($body, $charset, $TextEncode)) {
- $fields->{'x-mha-charset'} = $TextEncode;
- }
- }
-
## Load content-type filter
if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
--- 494,497 ----
***************
*** 489,508 ****
$decodefunc = undef;
}
! ## A filter is defined for given content-type
! if ($filter && defined(&$filter)) {
! ## decode data
! if (defined($decodefunc)) {
! if (defined(&$decodefunc)) {
! $decoded = &$decodefunc($$body);
! @array = &$filter($fields, \$decoded, 1, $args);
! } else {
! @array = &$filter($fields, $body,
! $decodefunc =~ /as-is/i, $args);
! }
! } else {
! @array = &$filter($fields, $body, 0, $args);
}
## Setup return variables
$ret = shift @array; # Return string
--- 518,543 ----
$decodefunc = undef;
}
+ my $decoded = 0;
+ if (defined($decodefunc) && defined(&$decodefunc)) {
+ $$body = &$decodefunc($$body);
+ $decoded = 1;
+ } elsif ($decodefunc =~ /as-is/i) {
+ $decoded = 1;
+ }
! ## Convert text encoding
! if ($type eq 'text') {
! my $charset = extract_charset($content, $subtype, $body);
! $fields->{'x-mha-charset'} = $charset;
! my $textfunc = load_textencoder();
! if (defined($textfunc)) {
! $fields->{'x-mha-charset'} = $TextEncode
! if defined(&$textfunc($body, $charset, $TextEncode));
}
+ }
+ ## A filter is defined for given content-type
+ if ($filter && defined(&$filter)) {
+ @array = &$filter($fields, $body, $decoded, $args);
## Setup return variables
$ret = shift @array; # Return string
***************
*** 743,747 ****
##
sub MAILread_header {
! my($mesg) = shift;
my $fields = { };
--- 778,782 ----
##
sub MAILread_header {
! my $mesg = shift;
my $fields = { };
***************
*** 750,753 ****
--- 785,790 ----
my($value, $tmp, $pos);
+ my $encfunc = load_textencoder();
+
## Read a line at a time.
for ($pos=0; $pos >= 0; ) {
***************
*** 767,771 ****
## Decode text if requested
! $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader;
## Check for continuation of a field
--- 804,812 ----
## Decode text if requested
! if (defined($encfunc)) {
! $tmp = &MAILdecode_1522_str($tmp,TEXT_ENCODE);
! } elsif ($DecodeHeader) {
! $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE);
! }
## Check for continuation of a field
***************
*** 796,800 ****
##
sub MAILread_file_header {
! my($handle) = @_;
my $label = '';
my $header = '';
--- 837,845 ----
##
sub MAILread_file_header {
! my $handle = shift;
! my $encode = shift;
!
! my $encfunc = load_textencoder();
!
my $label = '';
my $header = '';
***************
*** 811,815 ****
## Decode text if requested
! $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE) if $DecodeHeader;
## Check for continuation of a field
--- 856,864 ----
## Decode text if requested
! if (defined($encfunc)) {
! $tmp = &MAILdecode_1522_str($tmp,TEXT_ENCODE);
! } elsif ($DecodeHeader) {
! $tmp = &MAILdecode_1522_str($tmp,JUST_DECODE);
! }
## Check for continuation of a field
***************
*** 1113,1116 ****
--- 1162,1187 ----
$args;
}
+ sub load_textencoder {
+ return undef unless $TextEncode;
+ TRY: {
+ if (!defined($TextEncoderFunc)) {
+ last TRY;
+ }
+ if (defined(&$TextEncoderFunc)) {
+ return $TextEncoderFunc;
+ }
+ if (!defined($TextEncoderSrc)) {
+ last TRY;
+ }
+ require $TextEncoderSrc;
+ if (defined(&$TextEncoderFunc)) {
+ return $TextEncoderFunc;
+ }
+ }
+ warn qq/Warning: Unable to load text encode for "$TextEncode"\n/;
+ $TextEncode = undef;
+ $TextEncoderFunc = undef;
+ $TextEncoderSrc = undef;
+ }
##---------------------------------------------------------------------------##
***************
*** 1188,1191 ****
--- 1259,1269 ----
$charset = lc $1;
}
+ }
+ $charset = $MIMECharsetAliases{$charset}
+ if $MIMECharsetAliases{$charset};
+
+ # If us-ascii, but 8-bit chars in body, we change to iso-8859-1
+ if ($charset eq 'us-ascii') {
+ $charset = 'iso-8859-1' if $$body =~ /[\x80-\xFF]/;
}
$charset;
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV