jhi,
The following patch will correct incorrect value for BOM for 32LE.
The first one is essentially identical to that of Anton. And the second
will fix t/Unicode.t so it is more independent of Encode::Unicode (that
is, should there be an error there t/Unicode.t will find it -- currently
errors cancel one another.
Dan the Encode Maintainer
-----
diff -dbBu -r1.26 lib/Encode/Unicode.pm
--- lib/Encode/Unicode.pm 2002/04/08 02:35:48 1.26
+++ lib/Encode/Unicode.pm 2002/04/08 14:17:01
@@ -3,7 +3,7 @@
use strict;
use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.27 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
#
# Aux. subs & constants
@@ -12,7 +12,7 @@
sub FBCHAR(){ 0xFFFd }
sub BOM_BE(){ 0xFeFF }
sub BOM16LE(){ 0xFFFe }
-sub BOM32LE(){ 0xFeFF0000 }
+sub BOM32LE(){ 0xFFFe0000 }
sub valid_ucs2($){
if ($_[0] < 0xD800){
@@ -345,7 +345,7 @@
16 32 bits/char
-------------------------
BE 0xFeFF 0x0000FeFF
-LE 0xFFeF 0xFeFF0000
+LE 0xFFeF 0xFFFe0000
-------------------------
=back
@@ -377,6 +377,7 @@
C<UCS-2> is an exception. Unlike others this is an alias of UCS-2BE.
UCS-2 is already registered by IANA and others that way.
+=back
=head1 The Surrogate Pair
@@ -418,6 +419,8 @@
=head1 SEE ALSO
-L<Encode>, L<http://www.unicode.org/glossary/>
+L<Encode>, L<http://www.unicode.org/glossary/>,
-=back
+RFC 2781 L<http://rfc.net/rfc2781.html>,
+
+L<http://www.unicode.org/unicode/faq/utf_bom.html>
diff -dbBu -r1.4 t/Unicode.t
--- t/Unicode.t 2002/04/08 02:35:48 1.4
+++ t/Unicode.t 2002/04/08 14:17:21
@@ -1,5 +1,5 @@
#
-# $Id: Unicode.t,v 1.4 2002/04/08 02:35:48 dankogai Exp $
+# $Id: Unicode.t,v 1.5 2002/04/08 14:17:19 dankogai Exp dankogai $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
@@ -28,7 +28,6 @@
#use Test::More 'no_plan';
use Test::More tests => 22;
use Encode qw(encode decode);
-use Encode::Unicode; # to load BOM defs
#
# see
@@ -54,10 +53,10 @@
my $n_32le =
pack("C*", map {hex($_)} qw<4D 00 00 00 61 00 00 00 cd ab 01 00>);
-my $n_16bb = pack('n', Encode::Unicode::BOM_BE) . $n_16be;
-my $n_16lb = pack('n', Encode::Unicode::BOM16LE) . $n_16le;
-my $n_32bb = pack('N', Encode::Unicode::BOM_BE ) . $n_32be;
-my $n_32lb = pack('N', Encode::Unicode::BOM32LE) . $n_32le;
+my $n_16bb = pack('n', 0xFeFF) . $n_16be;
+my $n_16lb = pack('v', 0xFeFF) . $n_16le;
+my $n_32bb = pack('N', 0xFeFF) . $n_32be;
+my $n_32lb = pack('V', 0xFeFF) . $n_32le;
is($n_16be, encode('UTF-16BE', $nasty), qq{encode UTF-16BE});
is($n_16le, encode('UTF-16LE', $nasty), qq{encode UTF-16LE});
@@ -91,7 +90,3 @@
1;
__END__
-
-use Devel::Peek;
-my $foo = decode('UTF-16BE', $n_16be);
-Dump $n_16be; Dump $foo;