Update of /cvsroot/mhonarc/mhonarc/MHonArc/examples
In directory subversions:/tmp/cvs-serv368/examples
Modified Files:
def-mime.mrc
Log Message:
* MHonArc::CharEnt:
+ Added support for Chinese charsets.
+ Added support for converting UTF-8.
* Optimized the conversion code to use s/// operation for conversion.
Some simpling testing shows it is faster than the loop/substr()
method (mainly because character iteration is now done in the
perl internals).
- Removed $8bitonly option to str2sgml(). It not very applicable
now with all the newer charsets added and it complicates the
conversion code.
* Some additional charset aliases added.
Index: def-mime.mrc
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/examples/def-mime.mrc,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -r1.15 -r1.16
*** def-mime.mrc 17 Nov 2002 03:52:34 -0000 1.15
--- def-mime.mrc 28 Nov 2002 08:57:19 -0000 1.16
***************
*** 10,54 ****
<!-- ================================================================== -->
! <CharsetAliases>
! us-ascii; ascii, iso646, iso646-us, cp367, csascii, iso-ir-6, us
! iso-8859-1; latin1, l1, iso_8859_1, iso_8859-1:1987, iso8859-1, iso8859_1, 8859-1, 8859_1, cp819, ibm819, iso-ir-100
! iso-8859-2; latin2, l2, iso_8859_2, iso_8859-2:1987, iso8859-2, iso8859_2, 8859-2, 8859_2, iso-ir-101
! iso-8859-3; latin3, l3, iso_8859_3, iso_8859-3:1988, iso8859-3, iso8859_3, 8859-3, 8859_3, iso-ir-109
! iso-8859-4; latin4, l4, iso_8859_4, iso_8859-4:1988, iso8859-4, iso8859_4, 8859-4, 8859_4, iso-ir-110
! iso-8859-5; iso_8859-5:1988, cyrillic, iso-ir-144
! iso-8859-6; iso_8859-6:1987, arabic, asmo-708, ecma-114, iso-ir-127
! iso-8859-7; iso_8859-7:1987, greek, greek8, ecma-118, elot_928, iso-ir-126
! iso-8859-8; iso-8859-8-i, iso_8859-8:1988, hebrew, iso-ir-138
! iso-8859-9; latin5, l5, iso_8859_9, iso-8859_9:1989, iso8859-9, iso8859_9, 8859-9, 8859_9, iso-ir-148
! iso-8859-10; latin6, l6, iso_8859_10, iso_8859-10:1993, iso8859-10, iso8859_10, 8859-10, 8859_10, iso-ir-157
! iso-8859-13; latin7 ,l7
! iso-8859-14; latin8 ,l8
! iso-8859-15; latin9, latin0, l9, l0, iso_8859_15, iso8859-15, iso8859_15, 8859-15, 8859_15
! cp1250; windows-1250
! cp1251; windows-1251
! cp1252; windows-1252
! cp1253; windows-1253
! cp1254; windows-1254
! cp1255; windows-1255
! cp1256; windows-1256
! cp1257; windows-1257
! cp1258; windows-1258
! koi-0; gost-13052
! koi8-e; iso-ir-111, ecma-113:1986
! koi8-r; cp878
! gost-19768-87; ecma-cyrillic, ecma-113, ecma-113:1988
! apple-arabic; x-mac-arabic
! apple-centeuro; x-mac-centeuro
! apple-croatian; x-mac-croatian
! apple-cyrillic; x-mac-cyrillic
! apple-greek; x-mac-greek
! apple-hebrew; x-mac-hebrew
! apple-iceland; x-mac-iceland
! apple-romanian; x-mac-romanian
! apple-roman; x-mac-roman
! apple-thai; x-mac-thai
! apple-turkish; x-mac-turkish
! </CharsetAliases>
<CharsetConverters>
plain; mhonarc::htmlize;
--- 10,78 ----
<!-- ================================================================== -->
! <!-- MIMEFILTERS maps content-types to converter functions.
! -->
! <MIMEFilters>
! application/ms-tnef; m2h_null::filter; mhnull.pl
! application/octet-stream; m2h_external::filter; mhexternal.pl
! application/*; m2h_external::filter; mhexternal.pl
! application/x-patch; m2h_text_plain::filter; mhtxtplain.pl
! audio/*; m2h_external::filter; mhexternal.pl
! chemical/*; m2h_external::filter; mhexternal.pl
! model/*; m2h_external::filter; mhexternal.pl
! image/*; m2h_external::filter; mhexternal.pl
! message/delivery-status; m2h_text_plain::filter; mhtxtplain.pl
! message/external-body; m2h_msg_extbody::filter; mhmsgextbody.pl
! message/partial; m2h_text_plain::filter; mhtxtplain.pl
! text/*; m2h_text_plain::filter; mhtxtplain.pl
! text/enriched; m2h_text_enriched::filter; mhtxtenrich.pl
! text/html; m2h_text_html::filter; mhtxthtml.pl
! text/plain; m2h_text_plain::filter; mhtxtplain.pl
! text/richtext; m2h_text_enriched::filter; mhtxtenrich.pl
! text/setext; m2h_text_setext::filter; mhtxtsetext.pl
! text/tab-separated-values; m2h_text_tsv::filter; mhtxttsv.pl
! text/x-html; m2h_text_html::filter; mhtxthtml.pl
! text/x-setext; m2h_text_setext::filter; mhtxtsetext.pl
! video/*; m2h_external::filter; mhexternal.pl
! x-sun-attachment; m2h_text_plain::filter; mhtxtplain.pl
! </MIMEFilters>
!
! <!-- MIMEARGS defines arguments to pass to filters registered via
! MIMEFILTERS.
! -->
! <MIMEArgs>
! m2h_external::filter; inline
! </MIMEArgs>
!
! <!-- MIMEALTPREFS defines content-type preferences for
! multipart/alternative messages.
! -->
! <MIMEAltPrefs>
! </MIMEAltPrefs>
!
! <!-- MIMEDECODERS defines functions that decode data according
! to an entitie's Content-Transfer-Encoding.
! -->
! <MIMEDecoders>
! 7bit; as-is;
! 8bit; as-is;
! binary; as-is;
! base64; base64::b64decode; base64.pl
! quoted-printable; quoted_printable::qprdecode; qprint.pl
! x-uuencode; base64::uudecode; base64.pl
! xuue; base64::uudecode; base64.pl
! uuencode; base64::uudecode; base64.pl
! </MIMEDecoders>
!
! <!-- CHARSETCONVERTERS defines functions that convert character
! data encoded in a character set to HTML. These functions are
! used for non-ASCII encoded message header field data and used
! by m2h_text_plain::filter.
+ Since many charsets are known by a variety of names,
+ CHARSETCONVERTERS is used to map a function to one of the
+ names for a charset, and the CHARSETALIASES resource (shown
+ later) is used to define all alternate names for a given
+ charset.
+ -->
<CharsetConverters>
plain; mhonarc::htmlize;
***************
*** 70,74 ****
--- 94,101 ----
iso-8859-16; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
iso-2022-jp; iso_2022_jp::str2html; iso2022jp.pl
+ utf-8; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
cp866; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
+ cp936; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
+ cp950; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
cp1250; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
cp1251; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
***************
*** 90,93 ****
--- 117,122 ----
gost-19768-87; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
viscii; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
+ big5-hkscs; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
+ gb2312; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
apple-arabic; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
apple-centeuro; MHonArc::CharEnt::str2sgml; MHonArc/CharEnt.pm
***************
*** 104,147 ****
</CharsetConverters>
! <MIMEFilters>
! application/ms-tnef; m2h_null::filter; mhnull.pl
! application/octet-stream; m2h_external::filter; mhexternal.pl
! application/*; m2h_external::filter; mhexternal.pl
! application/x-patch; m2h_text_plain::filter; mhtxtplain.pl
! audio/*; m2h_external::filter; mhexternal.pl
! chemical/*; m2h_external::filter; mhexternal.pl
! model/*; m2h_external::filter; mhexternal.pl
! image/*; m2h_external::filter; mhexternal.pl
! message/delivery-status; m2h_text_plain::filter; mhtxtplain.pl
! message/external-body; m2h_msg_extbody::filter; mhmsgextbody.pl
! message/partial; m2h_text_plain::filter; mhtxtplain.pl
! text/*; m2h_text_plain::filter; mhtxtplain.pl
! text/enriched; m2h_text_enriched::filter; mhtxtenrich.pl
! text/html; m2h_text_html::filter; mhtxthtml.pl
! text/plain; m2h_text_plain::filter; mhtxtplain.pl
! text/richtext; m2h_text_enriched::filter; mhtxtenrich.pl
! text/setext; m2h_text_setext::filter; mhtxtsetext.pl
! text/tab-separated-values; m2h_text_tsv::filter; mhtxttsv.pl
! text/x-html; m2h_text_html::filter; mhtxthtml.pl
! text/x-setext; m2h_text_setext::filter; mhtxtsetext.pl
! video/*; m2h_external::filter; mhexternal.pl
! x-sun-attachment; m2h_text_plain::filter; mhtxtplain.pl
! </MIMEFilters>
!
! <MIMEArgs>
! m2h_external::filter; inline
! </MIMEArgs>
!
! <MIMEAltPrefs>
! </MIMEAltPrefs>
!
! <MIMEDecoders>
! 7bit; as-is;
! 8bit; as-is;
! binary; as-is;
! base64; base64::b64decode; base64.pl
! quoted-printable; quoted_printable::qprdecode; qprint.pl
! x-uuencode; base64::uudecode; base64.pl
! xuue; base64::uudecode; base64.pl
! uuencode; base64::uudecode; base64.pl
! </MIMEDecoders>
--- 133,274 ----
</CharsetConverters>
! <!-- CHARSETALIASES defines alternate names (aliases) for charsets
! listed in CHARSETCONVERTERS.
! -->
! <CharsetAliases>
! us-ascii; ascii
! us-ascii; ansi_x3.4-1986
! us-ascii; iso646
! us-ascii; iso646-us
! us-ascii; iso646.irv:1991
! us-ascii; cp367
! us-ascii; ibm367
! us-ascii; csascii
! us-ascii; iso-ir-6
! us-ascii; us
! iso-8859-1; latin1
! iso-8859-1; l1
! iso-8859-1; iso_8859_1
! iso-8859-1; iso_8859-1:1987
! iso-8859-1; iso8859-1
! iso-8859-1; iso8859_1
! iso-8859-1; 8859-1
! iso-8859-1; 8859_1
! iso-8859-1; cp819
! iso-8859-1; ibm819
! iso-8859-1; iso-ir-100
! iso-8859-2; latin2
! iso-8859-2; l2
! iso-8859-2; iso_8859_2
! iso-8859-2; iso_8859-2:1987
! iso-8859-2; iso8859-2
! iso-8859-2; iso8859_2
! iso-8859-2; 8859-2
! iso-8859-2; 8859_2
! iso-8859-2; iso-ir-101
! iso-8859-3; latin3
! iso-8859-3; l3
! iso-8859-3; iso_8859_3
! iso-8859-3; iso_8859-3:1988
! iso-8859-3; iso8859-3
! iso-8859-3; iso8859_3
! iso-8859-3; 8859-3
! iso-8859-3; 8859_3
! iso-8859-3; iso-ir-109
! iso-8859-4; latin4
! iso-8859-4; l4
! iso-8859-4; iso_8859_4
! iso-8859-4; iso_8859-4:1988
! iso-8859-4; iso8859-4
! iso-8859-4; iso8859_4
! iso-8859-4; 8859-4
! iso-8859-4; 8859_4
! iso-8859-4; iso-ir-110
! iso-8859-5; iso_8859-5:1988
! iso-8859-5; cyrillic
! iso-8859-5; iso-ir-144
! iso-8859-6; iso_8859-6:1987
! iso-8859-6; arabic
! iso-8859-6; asmo-708
! iso-8859-6; ecma-114
! iso-8859-6; iso-ir-127
! iso-8859-7; iso_8859-7:1987
! iso-8859-7; greek
! iso-8859-7; greek8
! iso-8859-7; ecma-118
! iso-8859-7; elot_928
! iso-8859-7; iso-ir-126
! iso-8859-8; iso-8859-8-i
! iso-8859-8; iso_8859-8:1988
! iso-8859-8; hebrew
! iso-8859-8; iso-ir-138
! iso-8859-9; latin5
! iso-8859-9; l5
! iso-8859-9; iso_8859_9
! iso-8859-9; iso-8859_9:1989
! iso-8859-9; iso8859-9
! iso-8859-9; iso8859_9
! iso-8859-9; 8859-9
! iso-8859-9; 8859_9
! iso-8859-9; iso-ir-148
! iso-8859-10; latin6
! iso-8859-10; l6
! iso-8859-10; iso_8859_10
! iso-8859-10; iso_8859-10:1993
! iso-8859-10; iso8859-10
! iso-8859-10; iso8859_10
! iso-8859-10; 8859-10
! iso-8859-10; 8859_10
! iso-8859-10; iso-ir-157
! iso-8859-13; latin7 ,l7
! iso-8859-14; latin8 ,l8
! iso-8859-15; latin9
! iso-8859-15; latin0
! iso-8859-15; l9
! iso-8859-15; l0
! iso-8859-15; iso_8859_15
! iso-8859-15; iso8859-15
! iso-8859-15; iso8859_15
! iso-8859-15; 8859-15
! iso-8859-15; 8859_15
! utf-8; utf8
! cp936; gbk
! cp936; ms936
! cp936; windows-936
! cp950; big5
! cp950; csbig5
! cp1250; windows-1250
! cp1251; windows-1251
! cp1252; windows-1252
! cp1253; windows-1253
! cp1254; windows-1254
! cp1255; windows-1255
! cp1256; windows-1256
! cp1257; windows-1257
! cp1258; windows-1258
! koi-0; gost-13052
! koi8-e; iso-ir-111
! koi8-e; ecma-113:1986
! koi8-r; cp878
! gost-19768-87; ecma-cyrillic
! gost-19768-87; ecma-113
! gost-19768-87; ecma-113:1988
! big5-hkscs; big5hkscs
! gb2312; gb_2312-80
! gb2312; csgb2312
! gb2312; hz-gb-2312
! gb2312; iso-ir-58
! gb2312; chinese
! gb2312; csiso58gb231280
! apple-arabic; x-mac-arabic
! apple-centeuro; x-mac-centeuro
! apple-croatian; x-mac-croatian
! apple-cyrillic; x-mac-cyrillic
! apple-greek; x-mac-greek
! apple-hebrew; x-mac-hebrew
! apple-iceland; x-mac-iceland
! apple-romanian; x-mac-romanian
! apple-roman; x-mac-roman
! apple-thai; x-mac-thai
! apple-turkish; x-mac-turkish
! </CharsetAliases>
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV