perl-unicode

Re: jisx0212 support in Encode::JP is close

2002-03-19 08:31:23


Dan Kogai <dankogai(_at_)dan(_dot_)co(_dot_)jp> wrote:
Perl Encode Hackers,

   Since Encode/JP/JIS.pm and Encode/JP/ISO_2022_JP is already coded to 
handle jisx0212 (if euc-jp supports that), it automagically adds 
jisx0212 support to other encodings as well
   I need to fix pod and t/JP.t so it tests 0212 part but I will upload 
new Encode package within 24 hours.
   Thank you Nick for making compile this smart!

Dan the Man with a New Encoding

Excellent!

Here is a patch to remove Encode::Tcl::Extended.
(some corrections of Encode::Tcl:: are included)
and please remove the following files, too.

  -Encode/lib/Encode/Tcl/Extended.pm
  -Encode/Encode/euc-jp-0212.enc

IMO, Tcl's euc-jp.enc lacking JIS X 0212 deserves removing;
is it worth retaining if renamed?
(say, Java seems to have EUC_JP_LINUX for EUC-JP w/o JIS X 0212.)

  http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html

Nevertheless, it would be better for the euc-jp test
to be gone from Tcl.t.



### Patch starts from here
diff -urN Encode~/lib/Encode/Tcl/Escape.pm Encode/lib/Encode/Tcl/Escape.pm
--- Encode~/lib/Encode/Tcl/Escape.pm    Tue Mar 12 04:56:38 2002
+++ Encode/lib/Encode/Tcl/Escape.pm     Tue Mar 19 23:20:26 2002
@@ -31,7 +31,7 @@
                    $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
                        $val =~ /[\x40-\x5F]$/ ? 2 : # double byte
                            $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
-                               $val =~ /[\x70-\x7F]$/ ? 4 :
+                               $val =~ /[\x70-\x7E]$/ ? 4 :
                                  # 4 or more (only 4 is supported)
                                    croak("odd sequence is defined");
 
@@ -96,8 +96,8 @@
            }
            else
            {
-               # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
-               $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
+               # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped.
+               $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//;
                if ($chk && ! length $str)
                {
                    $str = "\e$1"; # split sequence
@@ -215,7 +215,7 @@
   SINGLE SHIFT TWO (SS2)             ESC 04/14
   SINGLE SHIFT THREE (SS3)           ESC 04/15
 
-Designation of control character sets are not supported.
+Designation of control character sets is not supported.
 
 =head1 SEE ALSO
 
diff -urN Encode~/lib/Encode/Tcl/Table.pm Encode/lib/Encode/Tcl/Table.pm
--- Encode~/lib/Encode/Tcl/Table.pm     Tue Mar 12 04:56:38 2002
+++ Encode/lib/Encode/Tcl/Table.pm      Tue Mar 19 23:10:16 2002
@@ -152,7 +152,7 @@
 and handles types S, D, and M of Tcl encodings.
 
 Implementation for type M is restricted to encodings
-in which bytes per a character is up to 2.
+in which the number of bytes per a character is up to 2.
 
 =head1 SEE ALSO
 
diff -urN Encode~/t/Tcl.t Encode/t/Tcl.t
--- Encode~/t/Tcl.t     Tue Mar 12 13:57:34 2002
+++ Encode/t/Tcl.t      Tue Mar 19 23:59:48 2002
@@ -15,12 +15,11 @@
 use Encode qw(encode decode);
 use Encode::Tcl;
 
-my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
+my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK
 my $n = 2;
 
 my %greek = (
   'euc-cn'   => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
-  'euc-jp'   => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
   'euc-kr'   => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
   'big5'     => [0xA344..0xA35B,0xA35C..0xA373],
   'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
@@ -37,7 +36,6 @@
 
 my %ideodigit = ( # cjk ideograph 'one' to 'ten'
   'euc-cn'   => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
-  'euc-jp'   => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
   'euc-kr'   => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
   'big5'     => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
   'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
@@ -45,23 +43,9 @@
 );
 my @ideodigit = qw(one two three four five six seven eight nine ten);
 
-my $jis = '7bit-jis';
 my $kr  = '2022-kr';
 my %esc_str;
 
-$esc_str{$jis} = {qw(
-  1b24422422242424262428242a1b2842
-  3042304430463048304a
-  1b284931323334355d1b2842
-  ff71ff72ff73ff74ff75ff9d
-  1b2442467c4b5c1b2842
-  65e5672c
-  3132331b244234413b7a1b28425065726c
-  0031003200336f225b57005000650072006c
-  546573740a1b24422546253925481b28420a
-  0054006500730074000a30c630b930c8000a
-)};
-
 $esc_str{$kr} = {qw(
   1b2429430e2a22213e0f410d0a
   304200b10041000d000a
@@ -84,24 +68,15 @@
 my @ary_buff = (  # [ encoding, decoded, encoded ]
 # type-M
   ["euc-cn",      hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
-  ["euc-jp",      hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
-  ["euc-jp",      han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
   ["euc-kr",      hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
   ["shiftjis",    hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
   ["shiftjis",    han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
 # type-E
   ["2022-cn",     hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
-  ["2022-jp",     hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
   ["2022-kr",     hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
-#  [ $jis,         han_kana, "\e\(I".'12345'."\e(B" ],
   ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
   ["2022-jp2", "\x{C0}" . macron . "\x{C1}", 
        "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
-# type-X
-  ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
-  ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
-  ["euc-jp-0212", macron, 
-     "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
 );
 
 plan test => $n*(_at_)encodings + $n*(_at_)encodings*@greek

#End of Patch

sincerely,
SADAHIRO Tomoyuki

<Prev in Thread] Current Thread [Next in Thread>