mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/examples def-mime.mrc,1.15,1.16

2002-11-28 01:57:38
Update of /cvsroot/mhonarc/mhonarc/MHonArc/examples
In directory subversions:/tmp/cvs-serv368/examples

Modified Files:
	def-mime.mrc 
Log Message:
* MHonArc::CharEnt:
  + Added support for Chinese charsets.
  + Added support for converting UTF-8.
  * Optimized the conversion code to use s/// operation for conversion.
    Some simpling testing shows it is faster than the loop/substr()
    method (mainly because character iteration is now done in the
    perl internals).
  - Removed $8bitonly option to str2sgml().  It not very applicable
    now with all the newer charsets added and it complicates the
    conversion code.
* Some additional charset aliases added.


Index: def-mime.mrc
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/examples/def-mime.mrc,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -r1.15 -r1.16
*** def-mime.mrc	17 Nov 2002 03:52:34 -0000	1.15
--- def-mime.mrc	28 Nov 2002 08:57:19 -0000	1.16
***************
*** 10,54 ****
  <!-- ================================================================== -->
  
! <CharsetAliases>
! us-ascii; ascii, iso646, iso646-us, cp367, csascii, iso-ir-6, us
! iso-8859-1; latin1, l1, iso_8859_1, iso_8859-1:1987, iso8859-1, iso8859_1, 8859-1, 8859_1, cp819, ibm819, iso-ir-100
! iso-8859-2; latin2, l2, iso_8859_2, iso_8859-2:1987, iso8859-2, iso8859_2, 8859-2, 8859_2, iso-ir-101
! iso-8859-3; latin3, l3, iso_8859_3, iso_8859-3:1988, iso8859-3, iso8859_3, 8859-3, 8859_3, iso-ir-109
! iso-8859-4; latin4, l4, iso_8859_4, iso_8859-4:1988, iso8859-4, iso8859_4, 8859-4, 8859_4, iso-ir-110
! iso-8859-5; iso_8859-5:1988, cyrillic, iso-ir-144
! iso-8859-6; iso_8859-6:1987, arabic, asmo-708, ecma-114, iso-ir-127
! iso-8859-7; iso_8859-7:1987, greek, greek8, ecma-118, elot_928, iso-ir-126
! iso-8859-8; iso-8859-8-i, iso_8859-8:1988, hebrew, iso-ir-138
! iso-8859-9; latin5, l5, iso_8859_9, iso-8859_9:1989, iso8859-9, iso8859_9, 8859-9, 8859_9, iso-ir-148
! iso-8859-10; latin6, l6, iso_8859_10, iso_8859-10:1993, iso8859-10, iso8859_10, 8859-10, 8859_10, iso-ir-157
! iso-8859-13; latin7 ,l7
! iso-8859-14; latin8 ,l8
! iso-8859-15; latin9, latin0, l9, l0, iso_8859_15, iso8859-15, iso8859_15, 8859-15, 8859_15
! cp1250; windows-1250
! cp1251; windows-1251
! cp1252; windows-1252
! cp1253; windows-1253
! cp1254; windows-1254
! cp1255; windows-1255
! cp1256; windows-1256
! cp1257; windows-1257
! cp1258; windows-1258
! koi-0; gost-13052
! koi8-e; iso-ir-111, ecma-113:1986
! koi8-r; cp878
! gost-19768-87; ecma-cyrillic, ecma-113, ecma-113:1988
! apple-arabic; x-mac-arabic
! apple-centeuro; x-mac-centeuro
! apple-croatian; x-mac-croatian
! apple-cyrillic; x-mac-cyrillic
! apple-greek; x-mac-greek
! apple-hebrew; x-mac-hebrew
! apple-iceland; x-mac-iceland
! apple-romanian; x-mac-romanian
! apple-roman; x-mac-roman
! apple-thai; x-mac-thai
! apple-turkish; x-mac-turkish
! </CharsetAliases>
  
  <CharsetConverters>
  plain;          mhonarc::htmlize;
--- 10,78 ----
  <!-- ================================================================== -->
  
! <!-- MIMEFILTERS maps content-types to converter functions.
!   -->
! <MIMEFilters>
! application/ms-tnef;       m2h_null::filter;            mhnull.pl
! application/octet-stream;  m2h_external::filter;        mhexternal.pl
! application/*;             m2h_external::filter;        mhexternal.pl
! application/x-patch;       m2h_text_plain::filter;      mhtxtplain.pl
! audio/*;                   m2h_external::filter;        mhexternal.pl
! chemical/*;                m2h_external::filter;        mhexternal.pl
! model/*;                   m2h_external::filter;        mhexternal.pl
! image/*;                   m2h_external::filter;        mhexternal.pl
! message/delivery-status;   m2h_text_plain::filter;      mhtxtplain.pl
! message/external-body;     m2h_msg_extbody::filter;     mhmsgextbody.pl
! message/partial;           m2h_text_plain::filter;      mhtxtplain.pl
! text/*;                    m2h_text_plain::filter;      mhtxtplain.pl
! text/enriched;             m2h_text_enriched::filter;   mhtxtenrich.pl
! text/html;                 m2h_text_html::filter;       mhtxthtml.pl
! text/plain;                m2h_text_plain::filter;      mhtxtplain.pl
! text/richtext;             m2h_text_enriched::filter;   mhtxtenrich.pl
! text/setext;               m2h_text_setext::filter;     mhtxtsetext.pl
! text/tab-separated-values; m2h_text_tsv::filter;        mhtxttsv.pl
! text/x-html;               m2h_text_html::filter;       mhtxthtml.pl
! text/x-setext;             m2h_text_setext::filter;     mhtxtsetext.pl
! video/*;                   m2h_external::filter;        mhexternal.pl
! x-sun-attachment;          m2h_text_plain::filter;      mhtxtplain.pl
! </MIMEFilters>
! 
! <!-- MIMEARGS defines arguments to pass to filters registered via
!      MIMEFILTERS.
!   -->
! <MIMEArgs>
! m2h_external::filter; inline
! </MIMEArgs>
! 
! <!-- MIMEALTPREFS defines content-type preferences for
!      multipart/alternative messages.
!   -->
! <MIMEAltPrefs>
! </MIMEAltPrefs>
! 
! <!-- MIMEDECODERS defines functions that decode data according
!      to an entitie's Content-Transfer-Encoding.
!   -->
! <MIMEDecoders>
! 7bit;   	  as-is;
! 8bit;   	  as-is;
! binary;   	  as-is;
! base64;   	  base64::b64decode;		base64.pl
! quoted-printable; quoted_printable::qprdecode;	qprint.pl
! x-uuencode;   	  base64::uudecode;		base64.pl
! xuue;   	  base64::uudecode;		base64.pl
! uuencode;   	  base64::uudecode;		base64.pl
! </MIMEDecoders>
! 
! <!-- CHARSETCONVERTERS defines functions that convert character
!      data encoded in a character set to HTML.  These functions are
!      used for non-ASCII encoded message header field data and used
!      by m2h_text_plain::filter.
  
+      Since many charsets are known by a variety of names,
+      CHARSETCONVERTERS is used to map a function to one of the
+      names for a charset, and the CHARSETALIASES resource (shown
+      later) is used to define all alternate names for a given
+      charset.
+   -->
  <CharsetConverters>
  plain;          mhonarc::htmlize;
***************
*** 70,74 ****
--- 94,101 ----
  iso-8859-16;    MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  iso-2022-jp;    iso_2022_jp::str2html;          iso2022jp.pl
+ utf-8;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp866;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ cp936;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ cp950;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp1250;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp1251;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
***************
*** 90,93 ****
--- 117,122 ----
  gost-19768-87;  MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  viscii;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ big5-hkscs;     MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ gb2312;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  apple-arabic;   MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  apple-centeuro; MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
***************
*** 104,147 ****
  </CharsetConverters>
  
! <MIMEFilters>
! application/ms-tnef;       m2h_null::filter;            mhnull.pl
! application/octet-stream;  m2h_external::filter;        mhexternal.pl
! application/*;             m2h_external::filter;        mhexternal.pl
! application/x-patch;       m2h_text_plain::filter;      mhtxtplain.pl
! audio/*;                   m2h_external::filter;        mhexternal.pl
! chemical/*;                m2h_external::filter;        mhexternal.pl
! model/*;                   m2h_external::filter;        mhexternal.pl
! image/*;                   m2h_external::filter;        mhexternal.pl
! message/delivery-status;   m2h_text_plain::filter;      mhtxtplain.pl
! message/external-body;     m2h_msg_extbody::filter;     mhmsgextbody.pl
! message/partial;           m2h_text_plain::filter;      mhtxtplain.pl
! text/*;                    m2h_text_plain::filter;      mhtxtplain.pl
! text/enriched;             m2h_text_enriched::filter;   mhtxtenrich.pl
! text/html;                 m2h_text_html::filter;       mhtxthtml.pl
! text/plain;                m2h_text_plain::filter;      mhtxtplain.pl
! text/richtext;             m2h_text_enriched::filter;   mhtxtenrich.pl
! text/setext;               m2h_text_setext::filter;     mhtxtsetext.pl
! text/tab-separated-values; m2h_text_tsv::filter;        mhtxttsv.pl
! text/x-html;               m2h_text_html::filter;       mhtxthtml.pl
! text/x-setext;             m2h_text_setext::filter;     mhtxtsetext.pl
! video/*;                   m2h_external::filter;        mhexternal.pl
! x-sun-attachment;          m2h_text_plain::filter;      mhtxtplain.pl
! </MIMEFilters>
! 
! <MIMEArgs>
! m2h_external::filter; inline
! </MIMEArgs>
! 
! <MIMEAltPrefs>
! </MIMEAltPrefs>
! 
! <MIMEDecoders>
! 7bit;   	  as-is;
! 8bit;   	  as-is;
! binary;   	  as-is;
! base64;   	  base64::b64decode;		base64.pl
! quoted-printable; quoted_printable::qprdecode;	qprint.pl
! x-uuencode;   	  base64::uudecode;		base64.pl
! xuue;   	  base64::uudecode;		base64.pl
! uuencode;   	  base64::uudecode;		base64.pl
! </MIMEDecoders>
--- 133,274 ----
  </CharsetConverters>
  
! <!-- CHARSETALIASES defines alternate names (aliases) for charsets
!      listed in CHARSETCONVERTERS.
!   -->
! <CharsetAliases>
! us-ascii; ascii
! us-ascii; ansi_x3.4-1986
! us-ascii; iso646
! us-ascii; iso646-us
! us-ascii; iso646.irv:1991
! us-ascii; cp367
! us-ascii; ibm367
! us-ascii; csascii
! us-ascii; iso-ir-6
! us-ascii; us
! iso-8859-1; latin1
! iso-8859-1; l1
! iso-8859-1; iso_8859_1
! iso-8859-1; iso_8859-1:1987
! iso-8859-1; iso8859-1
! iso-8859-1; iso8859_1
! iso-8859-1; 8859-1
! iso-8859-1; 8859_1
! iso-8859-1; cp819
! iso-8859-1; ibm819
! iso-8859-1; iso-ir-100
! iso-8859-2; latin2
! iso-8859-2; l2
! iso-8859-2; iso_8859_2
! iso-8859-2; iso_8859-2:1987
! iso-8859-2; iso8859-2
! iso-8859-2; iso8859_2
! iso-8859-2; 8859-2
! iso-8859-2; 8859_2
! iso-8859-2; iso-ir-101
! iso-8859-3; latin3
! iso-8859-3; l3
! iso-8859-3; iso_8859_3
! iso-8859-3; iso_8859-3:1988
! iso-8859-3; iso8859-3
! iso-8859-3; iso8859_3
! iso-8859-3; 8859-3
! iso-8859-3; 8859_3
! iso-8859-3; iso-ir-109
! iso-8859-4; latin4
! iso-8859-4; l4
! iso-8859-4; iso_8859_4
! iso-8859-4; iso_8859-4:1988
! iso-8859-4; iso8859-4
! iso-8859-4; iso8859_4
! iso-8859-4; 8859-4
! iso-8859-4; 8859_4
! iso-8859-4; iso-ir-110
! iso-8859-5; iso_8859-5:1988
! iso-8859-5; cyrillic
! iso-8859-5; iso-ir-144
! iso-8859-6; iso_8859-6:1987
! iso-8859-6; arabic
! iso-8859-6; asmo-708
! iso-8859-6; ecma-114
! iso-8859-6; iso-ir-127
! iso-8859-7; iso_8859-7:1987
! iso-8859-7; greek
! iso-8859-7; greek8
! iso-8859-7; ecma-118
! iso-8859-7; elot_928
! iso-8859-7; iso-ir-126
! iso-8859-8; iso-8859-8-i
! iso-8859-8; iso_8859-8:1988
! iso-8859-8; hebrew
! iso-8859-8; iso-ir-138
! iso-8859-9; latin5
! iso-8859-9; l5
! iso-8859-9; iso_8859_9
! iso-8859-9; iso-8859_9:1989
! iso-8859-9; iso8859-9
! iso-8859-9; iso8859_9
! iso-8859-9; 8859-9
! iso-8859-9; 8859_9
! iso-8859-9; iso-ir-148
! iso-8859-10; latin6
! iso-8859-10; l6
! iso-8859-10; iso_8859_10
! iso-8859-10; iso_8859-10:1993
! iso-8859-10; iso8859-10
! iso-8859-10; iso8859_10
! iso-8859-10; 8859-10
! iso-8859-10; 8859_10
! iso-8859-10; iso-ir-157
! iso-8859-13; latin7 ,l7
! iso-8859-14; latin8 ,l8
! iso-8859-15; latin9
! iso-8859-15; latin0
! iso-8859-15; l9
! iso-8859-15; l0
! iso-8859-15; iso_8859_15
! iso-8859-15; iso8859-15
! iso-8859-15; iso8859_15
! iso-8859-15; 8859-15
! iso-8859-15; 8859_15
! utf-8; utf8
! cp936; gbk
! cp936; ms936
! cp936; windows-936
! cp950; big5
! cp950; csbig5
! cp1250; windows-1250
! cp1251; windows-1251
! cp1252; windows-1252
! cp1253; windows-1253
! cp1254; windows-1254
! cp1255; windows-1255
! cp1256; windows-1256
! cp1257; windows-1257
! cp1258; windows-1258
! koi-0; gost-13052
! koi8-e; iso-ir-111
! koi8-e; ecma-113:1986
! koi8-r; cp878
! gost-19768-87; ecma-cyrillic
! gost-19768-87; ecma-113
! gost-19768-87; ecma-113:1988
! big5-hkscs; big5hkscs
! gb2312; gb_2312-80
! gb2312; csgb2312
! gb2312; hz-gb-2312
! gb2312; iso-ir-58
! gb2312; chinese
! gb2312; csiso58gb231280
! apple-arabic; x-mac-arabic
! apple-centeuro; x-mac-centeuro
! apple-croatian; x-mac-croatian
! apple-cyrillic; x-mac-cyrillic
! apple-greek; x-mac-greek
! apple-hebrew; x-mac-hebrew
! apple-iceland; x-mac-iceland
! apple-romanian; x-mac-romanian
! apple-roman; x-mac-roman
! apple-thai; x-mac-thai
! apple-turkish; x-mac-turkish
! </CharsetAliases>

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV