mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/doc/resources charsetaliases.html,1.3,1.4 charsetconve...

2002-11-28 01:57:45
Update of /cvsroot/mhonarc/mhonarc/MHonArc/doc/resources
In directory subversions:/tmp/cvs-serv368/doc/resources

Modified Files:
	charsetaliases.html charsetconverters.html 
Log Message:
* MHonArc::CharEnt:
  + Added support for Chinese charsets.
  + Added support for converting UTF-8.
  * Optimized the conversion code to use s/// operation for conversion.
    Some simpling testing shows it is faster than the loop/substr()
    method (mainly because character iteration is now done in the
    perl internals).
  - Removed $8bitonly option to str2sgml().  It not very applicable
    now with all the newer charsets added and it complicates the
    conversion code.
* Some additional charset aliases added.


Index: charsetaliases.html
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/doc/resources/charsetaliases.html,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** charsetaliases.html	24 Nov 2002 04:18:36 -0000	1.3
--- charsetaliases.html	28 Nov 2002 08:57:19 -0000	1.4
***************
*** 110,127 ****
  <pre class="code">
  <b>&lt;CharsetAliases&gt;</b>
! us-ascii; ascii, iso646, iso646-us, cp367, csascii, iso-ir-6, us
! iso-8859-1; latin1, l1, iso_8859_1, iso_8859-1:1987, iso8859-1, iso8859_1, 8859-1, 8859_1, cp819, ibm819, iso-ir-100
! iso-8859-2; latin2, l2, iso_8859_2, iso_8859-2:1987, iso8859-2, iso8859_2, 8859-2, 8859_2, iso-ir-101
! iso-8859-3; latin3, l3, iso_8859_3, iso_8859-3:1988, iso8859-3, iso8859_3, 8859-3, 8859_3, iso-ir-109
! iso-8859-4; latin4, l4, iso_8859_4, iso_8859-4:1988, iso8859-4, iso8859_4, 8859-4, 8859_4, iso-ir-110
! iso-8859-5; iso_8859-5:1988, cyrillic, iso-ir-144
! iso-8859-6; iso_8859-6:1987, arabic, asmo-708, ecma-114, iso-ir-127
! iso-8859-7; iso_8859-7:1987, greek, greek8, ecma-118, elot_928, iso-ir-126
! iso-8859-8; iso-8859-8-i, iso_8859-8:1988, hebrew, iso-ir-138
! iso-8859-9; latin5, l5, iso_8859_9, iso-8859_9:1989, iso8859-9, iso8859_9, 8859-9, 8859_9, iso-ir-148
! iso-8859-10; latin6, l6, iso_8859_10, iso_8859-10:1993, iso8859-10, iso8859_10, 8859-10, 8859_10, iso-ir-157
  iso-8859-13; latin7 ,l7
  iso-8859-14; latin8 ,l8
! iso-8859-15; latin9, latin0, l9, l0, iso_8859_15, iso8859-15, iso8859_15, 8859-15, 8859_15
  cp1250; windows-1250
  cp1251; windows-1251
--- 110,214 ----
  <pre class="code">
  <b>&lt;CharsetAliases&gt;</b>
! us-ascii; ascii
! us-ascii; ansi_x3.4-1986
! us-ascii; iso646
! us-ascii; iso646-us
! us-ascii; iso646.irv:1991
! us-ascii; cp367
! us-ascii; ibm367
! us-ascii; csascii
! us-ascii; iso-ir-6
! us-ascii; us
! iso-8859-1; latin1
! iso-8859-1; l1
! iso-8859-1; iso_8859_1
! iso-8859-1; iso_8859-1:1987
! iso-8859-1; iso8859-1
! iso-8859-1; iso8859_1
! iso-8859-1; 8859-1
! iso-8859-1; 8859_1
! iso-8859-1; cp819
! iso-8859-1; ibm819
! iso-8859-1; iso-ir-100
! iso-8859-2; latin2
! iso-8859-2; l2
! iso-8859-2; iso_8859_2
! iso-8859-2; iso_8859-2:1987
! iso-8859-2; iso8859-2
! iso-8859-2; iso8859_2
! iso-8859-2; 8859-2
! iso-8859-2; 8859_2
! iso-8859-2; iso-ir-101
! iso-8859-3; latin3
! iso-8859-3; l3
! iso-8859-3; iso_8859_3
! iso-8859-3; iso_8859-3:1988
! iso-8859-3; iso8859-3
! iso-8859-3; iso8859_3
! iso-8859-3; 8859-3
! iso-8859-3; 8859_3
! iso-8859-3; iso-ir-109
! iso-8859-4; latin4
! iso-8859-4; l4
! iso-8859-4; iso_8859_4
! iso-8859-4; iso_8859-4:1988
! iso-8859-4; iso8859-4
! iso-8859-4; iso8859_4
! iso-8859-4; 8859-4
! iso-8859-4; 8859_4
! iso-8859-4; iso-ir-110
! iso-8859-5; iso_8859-5:1988
! iso-8859-5; cyrillic
! iso-8859-5; iso-ir-144
! iso-8859-6; iso_8859-6:1987
! iso-8859-6; arabic
! iso-8859-6; asmo-708
! iso-8859-6; ecma-114
! iso-8859-6; iso-ir-127
! iso-8859-7; iso_8859-7:1987
! iso-8859-7; greek
! iso-8859-7; greek8
! iso-8859-7; ecma-118
! iso-8859-7; elot_928
! iso-8859-7; iso-ir-126
! iso-8859-8; iso-8859-8-i
! iso-8859-8; iso_8859-8:1988
! iso-8859-8; hebrew
! iso-8859-8; iso-ir-138
! iso-8859-9; latin5
! iso-8859-9; l5
! iso-8859-9; iso_8859_9
! iso-8859-9; iso-8859_9:1989
! iso-8859-9; iso8859-9
! iso-8859-9; iso8859_9
! iso-8859-9; 8859-9
! iso-8859-9; 8859_9
! iso-8859-9; iso-ir-148
! iso-8859-10; latin6
! iso-8859-10; l6
! iso-8859-10; iso_8859_10
! iso-8859-10; iso_8859-10:1993
! iso-8859-10; iso8859-10
! iso-8859-10; iso8859_10
! iso-8859-10; 8859-10
! iso-8859-10; 8859_10
! iso-8859-10; iso-ir-157
  iso-8859-13; latin7 ,l7
  iso-8859-14; latin8 ,l8
! iso-8859-15; latin9
! iso-8859-15; latin0
! iso-8859-15; l9
! iso-8859-15; l0
! iso-8859-15; iso_8859_15
! iso-8859-15; iso8859-15
! iso-8859-15; iso8859_15
! iso-8859-15; 8859-15
! iso-8859-15; 8859_15
! utf-8; utf8
! cp936; gbk
! cp936; ms936
! cp936; windows-936
! cp950; big5
! cp950; csbig5
  cp1250; windows-1250
  cp1251; windows-1251
***************
*** 134,140 ****
  cp1258; windows-1258
  koi-0; gost-13052
! koi8-e; iso-ir-111, ecma-113:1986
  koi8-r; cp878
! gost-19768-87; ecma-cyrillic, ecma-113, ecma-113:1988
  apple-arabic; x-mac-arabic
  apple-centeuro; x-mac-centeuro
--- 221,237 ----
  cp1258; windows-1258
  koi-0; gost-13052
! koi8-e; iso-ir-111
! koi8-e; ecma-113:1986
  koi8-r; cp878
! gost-19768-87; ecma-cyrillic
! gost-19768-87; ecma-113
! gost-19768-87; ecma-113:1988
! big5-hkscs; big5hkscs
! gb2312; gb_2312-80
! gb2312; csgb2312
! gb2312; hz-gb-2312
! gb2312; iso-ir-58
! gb2312; chinese
! gb2312; csiso58gb231280
  apple-arabic; x-mac-arabic
  apple-centeuro; x-mac-centeuro

Index: charsetconverters.html
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/doc/resources/charsetconverters.html,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -r1.14 -r1.15
*** charsetconverters.html	24 Nov 2002 04:18:36 -0000	1.14
--- charsetconverters.html	28 Nov 2002 08:57:19 -0000	1.15
***************
*** 152,159 ****
  <h2>Default Setting</h2>
  
! <table border=0 cellpadding=4>
  <tr valign=top>
! <td><strong>NOTE</strong></td>
! <td><p>As of MHonArc v2.6.0, filters should only be defined for
  official charsets.  The
  <a href="charsetaliases.html">CHARSETALIASES</a> resource can
--- 152,159 ----
  <h2>Default Setting</h2>
  
! <table class="note" width="100%">
  <tr valign=top>
! <td><strong>NOTE:</strong></td>
! <td width="100%"><p>As of MHonArc v2.6.0, filters should only be defined for
  official charsets.  The
  <a href="charsetaliases.html">CHARSETALIASES</a> resource can
***************
*** 185,189 ****
--- 185,192 ----
  iso-8859-16;    MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  iso-2022-jp;    iso_2022_jp::str2html;          iso2022jp.pl
+ utf-8;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp866;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ cp936;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ cp950;          MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp1250;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  cp1251;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
***************
*** 205,208 ****
--- 208,213 ----
  gost-19768-87;  MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  viscii;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ big5-hkscs;     MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
+ gb2312;         MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  apple-arabic;   MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
  apple-centeuro; MHonArc::CharEnt::str2sgml;     MHonArc/CharEnt.pm
***************
*** 243,249 ****
  </pre>
  
! <p>See the
  <a href="../rcfileexs/utf-8.mrc.html"><tt>utf-8.mrc</tt></a> example
! resource file on how convert data to Unicode UTF-8.
  </p>
  
--- 248,259 ----
  </pre>
  
! <p>MHonArc <tt>MHonArc::CharEnt</tt> module supports the
! conversion various character sets, including UTF-8 data,
! into standard HTML character entity references (e.g.
! <tt class="icode">&amp;Aelig;</tt>) and numeric Unicode character references
! (e.g. <tt class="icode">&amp;#x203E;</tt>).  However, if you want
! archive pages to be in native UTF-8, see the
  <a href="../rcfileexs/utf-8.mrc.html"><tt>utf-8.mrc</tt></a> example
! for a possible solution.
  </p>
  

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV