Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc/CharEnt
In directory subversions:/tmp/cvs-serv368/lib/MHonArc/CharEnt
Added Files:
BIG5_HKSCS.pm CP936.pm CP950.pm GB2312.pm
Log Message:
* MHonArc::CharEnt:
+ Added support for Chinese charsets.
+ Added support for converting UTF-8.
* Optimized the conversion code to use s/// operation for conversion.
Some simpling testing shows it is faster than the loop/substr()
method (mainly because character iteration is now done in the
perl internals).
- Removed $8bitonly option to str2sgml(). It not very applicable
now with all the newer charsets added and it complicates the
conversion code.
* Some additional charset aliases added.
--- NEW FILE ---
package MHonArc::CharEnt::BIG5_HKSCS;
# Hong Kong Chinese (BIG5-HKSCS):
# Obtained from
# <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5HKSCS.gz>
+{
#--------------------------------------------------------------------------
# Hex Code Entity Ref # ISO external entity and description
#--------------------------------------------------------------------------
0x8840 => '#xF303', # CJK UNIFIED IDEOGRAPH
0x8841 => '#xF304', # CJK UNIFIED IDEOGRAPH
0x8842 => '#xF305', # CJK UNIFIED IDEOGRAPH
0x8843 => '#xF306', # CJK UNIFIED IDEOGRAPH
0x8844 => '#xF307', # CJK UNIFIED IDEOGRAPH
0x8845 => '#xF308', # CJK UNIFIED IDEOGRAPH
0x8846 => '#xF309', # CJK UNIFIED IDEOGRAPH
0x8847 => '#xF30A', # CJK UNIFIED IDEOGRAPH
0x8848 => '#xF30B', # CJK UNIFIED IDEOGRAPH
[...18128 lines suppressed...]
0xFEEC => '#x8884', # CJK UNIFIED IDEOGRAPH
0xFEED => '#xE2FF', # CJK UNIFIED IDEOGRAPH
0xFEEE => '#xE300', # CJK UNIFIED IDEOGRAPH
0xFEEF => '#xE301', # CJK UNIFIED IDEOGRAPH
0xFEF0 => '#x7986', # CJK UNIFIED IDEOGRAPH
0xFEF1 => '#x8900', # CJK UNIFIED IDEOGRAPH
0xFEF2 => '#x6902', # CJK UNIFIED IDEOGRAPH
0xFEF3 => '#x7980', # CJK UNIFIED IDEOGRAPH
0xFEF4 => '#xE306', # CJK UNIFIED IDEOGRAPH
0xFEF5 => '#x799D', # CJK UNIFIED IDEOGRAPH
0xFEF6 => '#xE308', # CJK UNIFIED IDEOGRAPH
0xFEF7 => '#x793C', # CJK UNIFIED IDEOGRAPH
0xFEF8 => '#x79A9', # CJK UNIFIED IDEOGRAPH
0xFEF9 => '#x6E2A', # CJK UNIFIED IDEOGRAPH
0xFEFA => '#xE30C', # CJK UNIFIED IDEOGRAPH
0xFEFB => '#x3EA8', # CJK UNIFIED IDEOGRAPH
0xFEFC => '#x79C6', # CJK UNIFIED IDEOGRAPH
0xFEFD => '#xE30F', # CJK UNIFIED IDEOGRAPH
0xFEFE => '#x79D4', # CJK UNIFIED IDEOGRAPH
};
--- NEW FILE ---
package MHonArc::CharEnt::CP936;
# Chinese cp936 (GBK)
+{
#--------------------------------------------------------------------------
# Hex Code Entity Ref # ISO external entity and description
#--------------------------------------------------------------------------
0x80 => '#x20AC', # EURO SIGN
0x8140 => '#x4E02', # CJK UNIFIED IDEOGRAPH
0x8141 => '#x4E04', # CJK UNIFIED IDEOGRAPH
0x8142 => '#x4E05', # CJK UNIFIED IDEOGRAPH
0x8143 => '#x4E06', # CJK UNIFIED IDEOGRAPH
0x8144 => '#x4E0F', # CJK UNIFIED IDEOGRAPH
0x8145 => '#x4E12', # CJK UNIFIED IDEOGRAPH
0x8146 => '#x4E17', # CJK UNIFIED IDEOGRAPH
0x8147 => '#x4E1F', # CJK UNIFIED IDEOGRAPH
0x8148 => '#x4E20', # CJK UNIFIED IDEOGRAPH
0x8149 => '#x4E21', # CJK UNIFIED IDEOGRAPH
[...21762 lines suppressed...]
0xFD9E => '#xF995', # CJK COMPATIBILITY IDEOGRAPH
0xFD9F => '#xF9E7', # CJK COMPATIBILITY IDEOGRAPH
0xFDA0 => '#xF9F1', # CJK COMPATIBILITY IDEOGRAPH
0xFE40 => '#xFA0C', # CJK COMPATIBILITY IDEOGRAPH
0xFE41 => '#xFA0D', # CJK COMPATIBILITY IDEOGRAPH
0xFE42 => '#xFA0E', # CJK COMPATIBILITY IDEOGRAPH
0xFE43 => '#xFA0F', # CJK COMPATIBILITY IDEOGRAPH
0xFE44 => '#xFA11', # CJK COMPATIBILITY IDEOGRAPH
0xFE45 => '#xFA13', # CJK COMPATIBILITY IDEOGRAPH
0xFE46 => '#xFA14', # CJK COMPATIBILITY IDEOGRAPH
0xFE47 => '#xFA18', # CJK COMPATIBILITY IDEOGRAPH
0xFE48 => '#xFA1F', # CJK COMPATIBILITY IDEOGRAPH
0xFE49 => '#xFA20', # CJK COMPATIBILITY IDEOGRAPH
0xFE4A => '#xFA21', # CJK COMPATIBILITY IDEOGRAPH
0xFE4B => '#xFA23', # CJK COMPATIBILITY IDEOGRAPH
0xFE4C => '#xFA24', # CJK COMPATIBILITY IDEOGRAPH
0xFE4D => '#xFA27', # CJK COMPATIBILITY IDEOGRAPH
0xFE4E => '#xFA28', # CJK COMPATIBILITY IDEOGRAPH
0xFE4F => '#xFA29', # CJK COMPATIBILITY IDEOGRAPH
};
--- NEW FILE ---
package MHonArc::CharEnt::CP950;
# Chinese cp950 (BIG5):
# Derived from cp950.txt from unicode.org and
# <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5.gz>
+{
#--------------------------------------------------------------------------
# Hex Code Entity Ref # ISO external entity and description
#--------------------------------------------------------------------------
0xA140 => '#x3000', # IDEOGRAPHIC SPACE
0xA141 => '#xFF0C', # FULLWIDTH COMMA
0xA142 => '#x3001', # IDEOGRAPHIC COMMA
0xA143 => '#x3002', # IDEOGRAPHIC FULL STOP
0xA144 => '#xFF0E', # FULLWIDTH FULL STOP
0xA145 => '#x2027', # HYPHENATION POINT
0xA146 => '#xFF1B', # FULLWIDTH SEMICOLON
0xA147 => '#xFF1A', # FULLWIDTH COLON
0xA148 => '#xFF1F', # FULLWIDTH QUESTION MARK
[...13883 lines suppressed...]
0xF9EC => '#x2558', # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0xF9ED => '#x2567', # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0xF9EE => '#x255B', # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0xF9EF => '#x2553', # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0xF9F0 => '#x2565', # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0xF9F1 => '#x2556', # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0xF9F2 => '#x255F', # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0xF9F3 => '#x256B', # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0xF9F4 => '#x2562', # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0xF9F5 => '#x2559', # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0xF9F6 => '#x2568', # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0xF9F7 => '#x255C', # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0xF9F8 => '#x2551', # BOX DRAWINGS DOUBLE VERTICAL
0xF9F9 => '#x2550', # BOX DRAWINGS DOUBLE HORIZONTAL
0xF9FA => '#x256D', # BOX DRAWINGS LIGHT ARC DOWN AND RIGHT
0xF9FB => '#x256E', # BOX DRAWINGS LIGHT ARC DOWN AND LEFT
0xF9FC => '#x2570', # BOX DRAWINGS LIGHT ARC UP AND RIGHT
0xF9FD => '#x256F', # BOX DRAWINGS LIGHT ARC UP AND LEFT
0xF9FE => '#x2593', # DARK SHADE
};
--- NEW FILE ---
package MHonArc::CharEnt::GB2312;
# Chinese GB2312
# Derived from <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/GB2312.gz>
+{
#--------------------------------------------------------------------------
# Hex Code Entity Ref # ISO external entity and description
#--------------------------------------------------------------------------
0xA1A1 => '#x3000', # IDEOGRAPHIC SPACE
0xA1A2 => '#x3001', # IDEOGRAPHIC COMMA
0xA1A3 => '#x3002', # IDEOGRAPHIC FULL STOP
0xA1A4 => '#x30FB', # KATAKANA MIDDLE DOT
0xA1A5 => '#x02C9', # MODIFIER LETTER MACRON (Mandarin Chinese first tone)
0xA1A6 => '#x02C7', # CARON (Mandarin Chinese third tone)
0xA1A7 => '#x00A8', # DIAERESIS
0xA1A8 => '#x3003', # DITTO MARK
0xA1A9 => '#x3005', # IDEOGRAPHIC ITERATION MARK
0xA1AA => '#x2015', # HORIZONTAL BAR
[...7416 lines suppressed...]
0xF7EC => '#x9EDB', # CJK UNIFIED IDEOGRAPH
0xF7ED => '#x9EDC', # CJK UNIFIED IDEOGRAPH
0xF7EE => '#x9EDD', # CJK UNIFIED IDEOGRAPH
0xF7EF => '#x9EE0', # CJK UNIFIED IDEOGRAPH
0xF7F0 => '#x9EDF', # CJK UNIFIED IDEOGRAPH
0xF7F1 => '#x9EE2', # CJK UNIFIED IDEOGRAPH
0xF7F2 => '#x9EE9', # CJK UNIFIED IDEOGRAPH
0xF7F3 => '#x9EE7', # CJK UNIFIED IDEOGRAPH
0xF7F4 => '#x9EE5', # CJK UNIFIED IDEOGRAPH
0xF7F5 => '#x9EEA', # CJK UNIFIED IDEOGRAPH
0xF7F6 => '#x9EEF', # CJK UNIFIED IDEOGRAPH
0xF7F7 => '#x9F22', # CJK UNIFIED IDEOGRAPH
0xF7F8 => '#x9F2C', # CJK UNIFIED IDEOGRAPH
0xF7F9 => '#x9F2F', # CJK UNIFIED IDEOGRAPH
0xF7FA => '#x9F39', # CJK UNIFIED IDEOGRAPH
0xF7FB => '#x9F37', # CJK UNIFIED IDEOGRAPH
0xF7FC => '#x9F3D', # CJK UNIFIED IDEOGRAPH
0xF7FD => '#x9F3E', # CJK UNIFIED IDEOGRAPH
0xF7FE => '#x9F44', # CJK UNIFIED IDEOGRAPH
};
---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV