perl-unicode

[Encode 1.30] Patch to correct BOM value for 32LE

2002-04-08 07:22:34
jhi,

The following patch will correct incorrect value for BOM for 32LE. The first one is essentially identical to that of Anton. And the second will fix t/Unicode.t so it is more independent of Encode::Unicode (that is, should there be an error there t/Unicode.t will find it -- currently errors cancel one another.

Dan the Encode Maintainer

-----
diff -dbBu -r1.26 lib/Encode/Unicode.pm
--- lib/Encode/Unicode.pm       2002/04/08 02:35:48     1.26
+++ lib/Encode/Unicode.pm       2002/04/08 14:17:01
@@ -3,7 +3,7 @@
 use strict;
 use warnings;

-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.27 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };

 #
 # Aux. subs & constants
@@ -12,7 +12,7 @@
 sub FBCHAR(){ 0xFFFd }
 sub BOM_BE(){ 0xFeFF }
 sub BOM16LE(){ 0xFFFe }
-sub BOM32LE(){ 0xFeFF0000 }
+sub BOM32LE(){ 0xFFFe0000 }

 sub valid_ucs2($){
     if ($_[0] < 0xD800){
@@ -345,7 +345,7 @@
             16         32 bits/char
 -------------------------
 BE     0xFeFF 0x0000FeFF
-LE      0xFFeF 0xFeFF0000
+LE      0xFFeF 0xFFFe0000
 -------------------------

 =back
@@ -377,6 +377,7 @@
 C<UCS-2> is an exception.  Unlike others this is an alias of UCS-2BE.
 UCS-2 is already registered by IANA and others that way.

+=back

 =head1 The Surrogate Pair

@@ -418,6 +419,8 @@

 =head1 SEE ALSO

-L<Encode>, L<http://www.unicode.org/glossary/>
+L<Encode>, L<http://www.unicode.org/glossary/>,

-=back
+RFC 2781 L<http://rfc.net/rfc2781.html>,
+
+L<http://www.unicode.org/unicode/faq/utf_bom.html>

diff -dbBu -r1.4 t/Unicode.t
--- t/Unicode.t 2002/04/08 02:35:48     1.4
+++ t/Unicode.t 2002/04/08 14:17:21
@@ -1,5 +1,5 @@
 #
-# $Id: Unicode.t,v 1.4 2002/04/08 02:35:48 dankogai Exp $
+# $Id: Unicode.t,v 1.5 2002/04/08 14:17:19 dankogai Exp dankogai $
 #
 # This script is written entirely in ASCII, even though quoted literals
 # do include non-BMP unicode characters -- Are you happy, jhi?
@@ -28,7 +28,6 @@
 #use Test::More 'no_plan';
 use Test::More tests => 22;
 use Encode qw(encode decode);
-use Encode::Unicode; # to load BOM defs

 #
 # see
@@ -54,10 +53,10 @@
 my $n_32le =
     pack("C*", map {hex($_)} qw<4D 00 00 00 61 00 00 00 cd ab 01 00>);

-my $n_16bb = pack('n', Encode::Unicode::BOM_BE)  . $n_16be;
-my $n_16lb = pack('n', Encode::Unicode::BOM16LE) . $n_16le;
-my $n_32bb = pack('N', Encode::Unicode::BOM_BE ) . $n_32be;
-my $n_32lb = pack('N', Encode::Unicode::BOM32LE) . $n_32le;
+my $n_16bb = pack('n', 0xFeFF) . $n_16be;
+my $n_16lb = pack('v', 0xFeFF) . $n_16le;
+my $n_32bb = pack('N', 0xFeFF) . $n_32be;
+my $n_32lb = pack('V', 0xFeFF) . $n_32le;

 is($n_16be, encode('UTF-16BE', $nasty),  qq{encode UTF-16BE});
 is($n_16le, encode('UTF-16LE', $nasty),  qq{encode UTF-16LE});
@@ -91,7 +90,3 @@

 1;
 __END__
-
-use Devel::Peek;
-my $foo = decode('UTF-16BE', $n_16be);
-Dump $n_16be; Dump $foo;