Gisle Aas <gisle(_at_)ActiveState(_dot_)com> writes:
When using ':encoding(utf8)' all data after a bad byte is simply lost.
This seems like a serious perl-5.8.6 recession to me.
This is a fix:
Index: perl/ext/Encode/Encode.pm
--- perl/ext/Encode/Encode.pm.~1~ Fri Dec 3 15:04:36 2004
+++ perl/ext/Encode/Encode.pm Fri Dec 3 15:04:36 2004
@@ -3,7 +3,7 @@
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.8.1 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
Index: perl/ext/Encode/Encode.xs
--- perl/ext/Encode/Encode.xs.~1~ Fri Dec 3 15:04:36 2004
+++ perl/ext/Encode/Encode.xs Fri Dec 3 15:04:36 2004
@@ -279,7 +279,6 @@
#if 0
fprintf(stderr, "renewed == %d\n", renewed);
#endif
- if (renewed){ check |= ENCODE_RETURN_ON_ERR; }
}
FREETMPS; LEAVE;
/* end PerlIO check */
@@ -302,6 +301,8 @@
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
/* Partial character - done */
+ if (renewed)
+ break;
goto decode_utf8_fallback;
}
else if (is_utf8_char(s)) {
Index: perl/ext/PerlIO/t/encoding.t
--- perl/ext/PerlIO/t/encoding.t.~1~ Fri Dec 3 15:04:36 2004
+++ perl/ext/PerlIO/t/encoding.t Fri Dec 3 15:04:36 2004
@@ -16,7 +16,7 @@
}
}
-print "1..14\n";
+print "1..15\n";
my $grk = "grk$$";
my $utf = "utf$$";
@@ -150,6 +150,18 @@
print "not " unless ($dstr eq $str);
print "ok 14\n";
+# Try decoding some bad stuff
+open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
+print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
+close(F);
+
+open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
+$dstr = join(":", <F>);
+close(F);
+print "not " unless $dstr eq "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n";
+print "ok 15\n";
+
+
END {
1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
}
End of Patch.