mhonarc-commits
[Top] [All Lists]

CVS: mhonarc/MHonArc/lib/MHonArc/CharEnt BIG5_HKSCS.pm,NONE,1.1 CP936.pm,NO...

2002-11-28 01:57:37
Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib/MHonArc/CharEnt
In directory subversions:/tmp/cvs-serv368/lib/MHonArc/CharEnt

Added Files:
	BIG5_HKSCS.pm CP936.pm CP950.pm GB2312.pm 
Log Message:
* MHonArc::CharEnt:
  + Added support for Chinese charsets.
  + Added support for converting UTF-8.
  * Optimized the conversion code to use s/// operation for conversion.
    Some simpling testing shows it is faster than the loop/substr()
    method (mainly because character iteration is now done in the
    perl internals).
  - Removed $8bitonly option to str2sgml().  It not very applicable
    now with all the newer charsets added and it complicates the
    conversion code.
* Some additional charset aliases added.


--- NEW FILE ---

package MHonArc::CharEnt::BIG5_HKSCS;

# Hong Kong Chinese (BIG5-HKSCS):
#	Obtained from
#	<ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5HKSCS.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0x8840 =>	'#xF303',	# CJK UNIFIED IDEOGRAPH
    0x8841 =>	'#xF304',	# CJK UNIFIED IDEOGRAPH
    0x8842 =>	'#xF305',	# CJK UNIFIED IDEOGRAPH
    0x8843 =>	'#xF306',	# CJK UNIFIED IDEOGRAPH
    0x8844 =>	'#xF307',	# CJK UNIFIED IDEOGRAPH
    0x8845 =>	'#xF308',	# CJK UNIFIED IDEOGRAPH
    0x8846 =>	'#xF309',	# CJK UNIFIED IDEOGRAPH
    0x8847 =>	'#xF30A',	# CJK UNIFIED IDEOGRAPH
    0x8848 =>	'#xF30B',	# CJK UNIFIED IDEOGRAPH
[...18128 lines suppressed...]
    0xFEEC =>	'#x8884',	# CJK UNIFIED IDEOGRAPH
    0xFEED =>	'#xE2FF',	# CJK UNIFIED IDEOGRAPH
    0xFEEE =>	'#xE300',	# CJK UNIFIED IDEOGRAPH
    0xFEEF =>	'#xE301',	# CJK UNIFIED IDEOGRAPH
    0xFEF0 =>	'#x7986',	# CJK UNIFIED IDEOGRAPH
    0xFEF1 =>	'#x8900',	# CJK UNIFIED IDEOGRAPH
    0xFEF2 =>	'#x6902',	# CJK UNIFIED IDEOGRAPH
    0xFEF3 =>	'#x7980',	# CJK UNIFIED IDEOGRAPH
    0xFEF4 =>	'#xE306',	# CJK UNIFIED IDEOGRAPH
    0xFEF5 =>	'#x799D',	# CJK UNIFIED IDEOGRAPH
    0xFEF6 =>	'#xE308',	# CJK UNIFIED IDEOGRAPH
    0xFEF7 =>	'#x793C',	# CJK UNIFIED IDEOGRAPH
    0xFEF8 =>	'#x79A9',	# CJK UNIFIED IDEOGRAPH
    0xFEF9 =>	'#x6E2A',	# CJK UNIFIED IDEOGRAPH
    0xFEFA =>	'#xE30C',	# CJK UNIFIED IDEOGRAPH
    0xFEFB =>	'#x3EA8',	# CJK UNIFIED IDEOGRAPH
    0xFEFC =>	'#x79C6',	# CJK UNIFIED IDEOGRAPH
    0xFEFD =>	'#xE30F',	# CJK UNIFIED IDEOGRAPH
    0xFEFE =>	'#x79D4',	# CJK UNIFIED IDEOGRAPH
};

--- NEW FILE ---

package MHonArc::CharEnt::CP936;

# Chinese cp936 (GBK)
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0x80 =>	'#x20AC',	# EURO SIGN
    0x8140 =>	'#x4E02',	# CJK UNIFIED IDEOGRAPH
    0x8141 =>	'#x4E04',	# CJK UNIFIED IDEOGRAPH
    0x8142 =>	'#x4E05',	# CJK UNIFIED IDEOGRAPH
    0x8143 =>	'#x4E06',	# CJK UNIFIED IDEOGRAPH
    0x8144 =>	'#x4E0F',	# CJK UNIFIED IDEOGRAPH
    0x8145 =>	'#x4E12',	# CJK UNIFIED IDEOGRAPH
    0x8146 =>	'#x4E17',	# CJK UNIFIED IDEOGRAPH
    0x8147 =>	'#x4E1F',	# CJK UNIFIED IDEOGRAPH
    0x8148 =>	'#x4E20',	# CJK UNIFIED IDEOGRAPH
    0x8149 =>	'#x4E21',	# CJK UNIFIED IDEOGRAPH
[...21762 lines suppressed...]
    0xFD9E =>	'#xF995',	# CJK COMPATIBILITY IDEOGRAPH
    0xFD9F =>	'#xF9E7',	# CJK COMPATIBILITY IDEOGRAPH
    0xFDA0 =>	'#xF9F1',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE40 =>	'#xFA0C',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE41 =>	'#xFA0D',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE42 =>	'#xFA0E',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE43 =>	'#xFA0F',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE44 =>	'#xFA11',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE45 =>	'#xFA13',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE46 =>	'#xFA14',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE47 =>	'#xFA18',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE48 =>	'#xFA1F',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE49 =>	'#xFA20',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4A =>	'#xFA21',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4B =>	'#xFA23',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4C =>	'#xFA24',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4D =>	'#xFA27',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4E =>	'#xFA28',	# CJK COMPATIBILITY IDEOGRAPH
    0xFE4F =>	'#xFA29',	# CJK COMPATIBILITY IDEOGRAPH
};

--- NEW FILE ---

package MHonArc::CharEnt::CP950;

# Chinese cp950 (BIG5):
#	Derived from cp950.txt from unicode.org and
#	<ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/BIG5.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0xA140 =>	'#x3000',	# IDEOGRAPHIC SPACE
    0xA141 =>	'#xFF0C',	# FULLWIDTH COMMA
    0xA142 =>	'#x3001',	# IDEOGRAPHIC COMMA
    0xA143 =>	'#x3002',	# IDEOGRAPHIC FULL STOP
    0xA144 =>	'#xFF0E',	# FULLWIDTH FULL STOP
    0xA145 =>	'#x2027',	# HYPHENATION POINT
    0xA146 =>	'#xFF1B',	# FULLWIDTH SEMICOLON
    0xA147 =>	'#xFF1A',	# FULLWIDTH COLON
    0xA148 =>	'#xFF1F',	# FULLWIDTH QUESTION MARK
[...13883 lines suppressed...]
    0xF9EC =>	'#x2558',	# BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
    0xF9ED =>	'#x2567',	# BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
    0xF9EE =>	'#x255B',	# BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
    0xF9EF =>	'#x2553',	# BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
    0xF9F0 =>	'#x2565',	# BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
    0xF9F1 =>	'#x2556',	# BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
    0xF9F2 =>	'#x255F',	# BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
    0xF9F3 =>	'#x256B',	# BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
    0xF9F4 =>	'#x2562',	# BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
    0xF9F5 =>	'#x2559',	# BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
    0xF9F6 =>	'#x2568',	# BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
    0xF9F7 =>	'#x255C',	# BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
    0xF9F8 =>	'#x2551',	# BOX DRAWINGS DOUBLE VERTICAL
    0xF9F9 =>	'#x2550',	# BOX DRAWINGS DOUBLE HORIZONTAL
    0xF9FA =>	'#x256D',	# BOX DRAWINGS LIGHT ARC DOWN AND RIGHT
    0xF9FB =>	'#x256E',	# BOX DRAWINGS LIGHT ARC DOWN AND LEFT
    0xF9FC =>	'#x2570',	# BOX DRAWINGS LIGHT ARC UP AND RIGHT
    0xF9FD =>	'#x256F',	# BOX DRAWINGS LIGHT ARC UP AND LEFT
    0xF9FE =>	'#x2593',	# DARK SHADE
};

--- NEW FILE ---

package MHonArc::CharEnt::GB2312;

# Chinese GB2312
#	Derived from <ftp://xcin.linux.org.tw/pub/xcin/i18n/charset/GB2312.gz>
+{
  #--------------------------------------------------------------------------
  # Hex Code    Entity Ref      # ISO external entity and description
  #--------------------------------------------------------------------------
    0xA1A1 =>	'#x3000',	# IDEOGRAPHIC SPACE
    0xA1A2 =>	'#x3001',	# IDEOGRAPHIC COMMA
    0xA1A3 =>	'#x3002',	# IDEOGRAPHIC FULL STOP
    0xA1A4 =>	'#x30FB',	# KATAKANA MIDDLE DOT
    0xA1A5 =>	'#x02C9',	# MODIFIER LETTER MACRON (Mandarin Chinese first tone)
    0xA1A6 =>	'#x02C7',	# CARON (Mandarin Chinese third tone)
    0xA1A7 =>	'#x00A8',	# DIAERESIS
    0xA1A8 =>	'#x3003',	# DITTO MARK
    0xA1A9 =>	'#x3005',	# IDEOGRAPHIC ITERATION MARK
    0xA1AA =>	'#x2015',	# HORIZONTAL BAR
[...7416 lines suppressed...]
    0xF7EC =>	'#x9EDB',	# CJK UNIFIED IDEOGRAPH
    0xF7ED =>	'#x9EDC',	# CJK UNIFIED IDEOGRAPH
    0xF7EE =>	'#x9EDD',	# CJK UNIFIED IDEOGRAPH
    0xF7EF =>	'#x9EE0',	# CJK UNIFIED IDEOGRAPH
    0xF7F0 =>	'#x9EDF',	# CJK UNIFIED IDEOGRAPH
    0xF7F1 =>	'#x9EE2',	# CJK UNIFIED IDEOGRAPH
    0xF7F2 =>	'#x9EE9',	# CJK UNIFIED IDEOGRAPH
    0xF7F3 =>	'#x9EE7',	# CJK UNIFIED IDEOGRAPH
    0xF7F4 =>	'#x9EE5',	# CJK UNIFIED IDEOGRAPH
    0xF7F5 =>	'#x9EEA',	# CJK UNIFIED IDEOGRAPH
    0xF7F6 =>	'#x9EEF',	# CJK UNIFIED IDEOGRAPH
    0xF7F7 =>	'#x9F22',	# CJK UNIFIED IDEOGRAPH
    0xF7F8 =>	'#x9F2C',	# CJK UNIFIED IDEOGRAPH
    0xF7F9 =>	'#x9F2F',	# CJK UNIFIED IDEOGRAPH
    0xF7FA =>	'#x9F39',	# CJK UNIFIED IDEOGRAPH
    0xF7FB =>	'#x9F37',	# CJK UNIFIED IDEOGRAPH
    0xF7FC =>	'#x9F3D',	# CJK UNIFIED IDEOGRAPH
    0xF7FD =>	'#x9F3E',	# CJK UNIFIED IDEOGRAPH
    0xF7FE =>	'#x9F44',	# CJK UNIFIED IDEOGRAPH
};

---------------------------------------------------------------------
To sign-off this list, send email to majordomo(_at_)mhonarc(_dot_)org with the
message text UNSUBSCRIBE MHONARC-DEV