I just checked in these changes to ext/Encode/... as change 16022
on perlio branch.
- switch to XSLoader
- spelling & trailing whitespace removal.
- remove a "use loop" (Encode loaded PerlIO::encoding, loaded Encode)
it never loops, but such things cause problems for imports.
- Changed how LEAVE_SRC was tested
x & ~y is not same as !(x & y)
- Moved Unicode.xs towards supporting same check values.
- Set @Encode::XS::ISA to Encode::Encoding
- added ->needs_lines method with my best guess at which ones do.
I still cannot get TODO tests in t/perlio.t despite some work
on PerlIO::encoding to honour ->needs_lines. I need to study it some more.
What I really want to do is get have PerlIO::encoding use fallback
schemes. Which ENCODE_FB_XXX flag bit(s) give me fallback characters but still
remove translated stuff from the src buffer?
Perhaps "update src" should be an active rather than a passive bit?
--
Nick Ing-Simmons
http://www.ni-s.u-net.com/
==== //depot/perlio/ext/Encode/Encode.pm#64 -
/home/p4work/perl/perlio/ext/Encode/Encode.pm ====
Index: perlio/ext/Encode/Encode.pm
--- perlio/ext/Encode/Encode.pm.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/Encode.pm Sat Apr 20 20:36:47 2002
@@ -2,12 +2,12 @@
use strict;
our $VERSION = do { my @r = (q$Revision: 1.50 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
our $DEBUG = 0;
+use XSLoader ();
+XSLoader::load 'Encode';
-require DynaLoader;
require Exporter;
+our @ISA = qw(Exporter);
-our @ISA = qw(Exporter DynaLoader);
-
# Public, encouraged API is exported by default
our @EXPORT = qw(
@@ -19,7 +19,7 @@
our @FB_CONSTS = qw(FB_DEFAULT FB_QUIET FB_WARN FB_PERLQQ FB_CROAK);
our @EXPORT_OK =
- (
+ (
qw(
_utf8_off _utf8_on define_encoding from_to is_16bit is_8bit
is_utf8 perlio_ok resolve_alias utf8_downgrade utf8_upgrade
@@ -27,16 +27,13 @@
@FB_FLAGS, @FB_CONSTS,
);
-our %EXPORT_TAGS =
+our %EXPORT_TAGS =
(
all => [ @EXPORT, @EXPORT_OK ],
fallbacks => [ @FB_CONSTS ],
fallback_all => [ @FB_CONSTS, @FB_FLAGS ],
);
-
-bootstrap Encode ();
-
# Documentation moved after __END__ for speed - NI-S
use Carp;
@@ -57,7 +54,7 @@
my @modules = (@_ and $_[0] eq ":all") ? values %ExtModule : @_;
for my $mod (@modules){
$mod =~ s,::,/,g or $mod = "Encode/$mod";
- $mod .= '.pm';
+ $mod .= '.pm';
$DEBUG and warn "about to require $mod;";
eval { require $mod; };
}
@@ -193,7 +190,7 @@
# This is to restore %Encoding if really needed;
#
sub predefine_encodings{
- if ($ON_EBCDIC) {
+ if ($ON_EBCDIC) {
# was in Encode::UTF_EBCDIC
package Encode::UTF_EBCDIC;
*name = sub{ shift->{'Name'} };
@@ -202,7 +199,7 @@
my ($obj,$str,$chk) = @_;
my $res = '';
for (my $i = 0; $i < length($str); $i++) {
- $res .=
+ $res .=
chr(utf8::unicode_to_native(ord(substr($str,$i,1))));
}
$_[1] = '' if $chk;
@@ -212,15 +209,15 @@
my ($obj,$str,$chk) = @_;
my $res = '';
for (my $i = 0; $i < length($str); $i++) {
- $res .=
+ $res .=
chr(utf8::native_to_unicode(ord(substr($str,$i,1))));
}
$_[1] = '' if $chk;
return $res;
};
- $Encode::Encoding{Unicode} =
+ $Encode::Encoding{Unicode} =
bless {Name => "UTF_EBCDIC"} => "Encode::UTF_EBCDIC";
- } else {
+ } else {
# was in Encode::UTF_EBCDIC
package Encode::Internal;
*name = sub{ shift->{'Name'} };
@@ -232,7 +229,7 @@
return $str;
};
*encode = \&decode;
- $Encode::Encoding{Unicode} =
+ $Encode::Encoding{Unicode} =
bless {Name => "Internal"} => "Encode::Internal";
}
@@ -256,15 +253,14 @@
$_[1] = '' if $chk;
return $octets;
};
- $Encode::Encoding{utf8} =
+ $Encode::Encoding{utf8} =
bless {Name => "utf8"} => "Encode::utf8";
}
}
require Encode::Encoding;
+(_at_)Encode::XS::ISA = qw(Encode::Encoding);
-eval qq{ use PerlIO::encoding 0.02 };
-# warn $@ if $@;
1;
@@ -281,14 +277,14 @@
=head2 Table of Contents
-Encode consists of a collection of modules which details are too big
+Encode consists of a collection of modules which details are too big
to fit in one document. This POD itself explains the top-level APIs
-and general topics at a glance. For other topics and more details,
+and general topics at a glance. For other topics and more details,
see the PODs below;
Name Description
--------------------------------------------------------
- Encode::Alias Alias defintions to encodings
+ Encode::Alias Alias definitions to encodings
Encode::Encoding Encode Implementation Base Class
Encode::Supported List of Supported Encodings
Encode::CN Simplified Chinese Encodings
@@ -359,7 +355,7 @@
For CHECK see L</"Handling Malformed Data">.
For example to convert (internally UTF-8 encoded) Unicode string to
-iso-8859-1 (also known as Latin1),
+iso-8859-1 (also known as Latin1),
$octets = encode("iso-8859-1", $unicode);
@@ -439,7 +435,7 @@
@ebcdic = Encode->encodings("EBCDIC");
-To find which encodings are supported by this package in details,
+To find which encodings are supported by this package in details,
see L<Encode::Supported>.
=head2 Defining Aliases
@@ -462,7 +458,7 @@
Encode::resolve_alias("iso-8859-12") # false; nonexistent
Encode::resolve_alias($name) eq $name # true if $name is canonical
-This resolve_alias() does not need C<use Encode::Alias> and is
+This resolve_alias() does not need C<use Encode::Alias> and is
exported via C<use encode qw(resolve_alias)>.
See L<Encode::Alias> on details.
@@ -481,7 +477,7 @@
# via from_to
open my $in, $infile or die;
open my $out, $outfile or die;
- while(<>){
+ while(<>){
from_to($_, "shiftjis", "euc", 1);
}
@@ -508,7 +504,7 @@
place of the malformed character. for UCM-based encodings,
E<lt>subcharE<gt> will be used. For Unicode, \xFFFD is used. If the
data is supposed to be UTF-8, an optional lexical warning (category
-utf8) is given.
+utf8) is given.
=item I<CHECK> = Encode::DIE_ON_ERROR (== 1)
@@ -519,10 +515,10 @@
=item I<CHECK> = Encode::FB_QUIET
If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
-return proccessed part on error, with data passed via argument
-overwritten with unproccessed part. This is handy when have to
+return processed part on error, with data passed via argument
+overwritten with unprocessed part. This is handy when have to
repeatedly call because the source data is chopped in the middle for
-some reasons, such as fixed-width buffer. Here is a sample code that
+some reasons, such as fixed-width buffer. Here is a sample code that
just does this.
my $data = '';
@@ -547,7 +543,7 @@
representation of the octet that could not be decoded to utf8. And
when you encode, '\x{I<xxxx>}' will be placed where I<xxxx> is the
Unicode ID of the character that cannot be found in the character
-repartoire of the encoding.
+repertoire of the encoding.
=item The bitmask
@@ -616,12 +612,12 @@
L<Encode::Encoding>,
L<Encode::Supported>,
-L<Encode::PerlIO>,
+L<Encode::PerlIO>,
L<encoding>,
-L<perlebcdic>,
-L<perlfunc/open>,
-L<perlunicode>,
-L<utf8>,
+L<perlebcdic>,
+L<perlfunc/open>,
+L<perlunicode>,
+L<utf8>,
the Perl Unicode Mailing List E<lt>perl-unicode(_at_)perl(_dot_)orgE<gt>
=head1 MAINTAINER
==== //depot/perlio/ext/Encode/Encode.xs#65 -
/home/p4work/perl/perlio/ext/Encode/Encode.xs ====
Index: perlio/ext/Encode/Encode.xs
--- perlio/ext/Encode/Encode.xs.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/Encode.xs Sat Apr 20 20:36:47 2002
@@ -193,8 +193,8 @@
}
}
ENCODE_SET_SRC:
- if (check & ~ENCODE_LEAVE_SRC){
- sdone = SvCUR(src) - (slen+sdone);
+ if (check && !(check & ENCODE_LEAVE_SRC)){
+ sdone = SvCUR(src) - (slen+sdone);
if (sdone) {
sv_setpvn(src, (char*)s+slen, sdone);
}
==== //depot/perlio/ext/Encode/Unicode/Unicode.xs#1 -
/home/p4work/perl/perlio/ext/Encode/Unicode/Unicode.xs ====
Index: perlio/ext/Encode/Unicode/Unicode.xs
--- perlio/ext/Encode/Unicode/Unicode.xs.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/Unicode/Unicode.xs Sat Apr 20 20:36:47 2002
@@ -6,6 +6,8 @@
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
+#define U8 U8
+#include "../Encode/encode.h"
#define FBCHAR 0xFFFd
#define BOM_BE 0xFeFF
@@ -80,11 +82,13 @@
MODULE = Encode::Unicode PACKAGE = Encode::Unicode
+PROTOTYPES: DISABLE
+
void
-decode_xs(obj, str, chk = &PL_sv_undef)
+decode_xs(obj, str, check = 0)
SV * obj
SV * str
-SV * chk
+IV check
CODE:
{
int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
@@ -124,14 +128,14 @@
U8 *d;
if (size != 4 && invalid_ucs2(ord)) {
if (ucs2) {
- if (SvTRUE(chk)) {
+ if (check) {
croak("%s:no surrogates allowed %"UVxf,
SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)),
ord);
}
if (s+size <= e) {
/* skip the next one as well */
- enc_unpack(aTHX_ &s,e,size,endian);
+ enc_unpack(aTHX_ &s,e,size,endian);
}
ord = FBCHAR;
}
@@ -160,10 +164,12 @@
d = uvuni_to_utf8_flags(d+SvCUR(result), ord, 0);
SvCUR_set(result,d - (U8 *)SvPVX(result));
}
- if (SvTRUE(chk)) {
- if (s < e) {
+ if (s < e) {
Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+ }
+ if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (s < e) {
Move(s,SvPVX(str),e-s,U8);
SvCUR_set(str,(e-s));
}
@@ -176,10 +182,10 @@
}
void
-encode_xs(obj, utf8, chk = &PL_sv_undef)
- SV * obj
+encode_xs(obj, utf8, check = 0)
+SV * obj
SV * utf8
-SV * chk
+IV check
CODE:
{
int size = SvIV(*hv_fetch((HV *)SvRV(obj),"size",4,0));
@@ -205,7 +211,7 @@
if (size != 4 && invalid_ucs2(ord)) {
if (!issurrogate(ord)){
if (ucs2) {
- if (SvTRUE(chk)) {
+ if (check) {
croak("%s:code point \"\\x{"UVxf"}\" too high",
SvPV_nolen(
*hv_fetch((HV *)SvRV(obj),"Name",4,0))
@@ -228,10 +234,12 @@
enc_pack(aTHX_ result,size,endian,ord);
}
}
- if (SvTRUE(chk)) {
+ if (s < e) {
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
+ SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
+ }
+ if (check && !(check & ENCODE_LEAVE_SRC)){
if (s < e) {
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%s:Partial character",
- SvPV_nolen(*hv_fetch((HV *)SvRV(obj),"Name",4,0)));
Move(s,SvPVX(utf8),e-s,U8);
SvCUR_set(utf8,(e-s));
}
==== //depot/perlio/ext/Encode/lib/Encode/Encoding.pm#8 -
/home/p4work/perl/perlio/ext/Encode/lib/Encode/Encoding.pm ====
Index: perlio/ext/Encode/lib/Encode/Encoding.pm
--- perlio/ext/Encode/lib/Encode/Encoding.pm.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/lib/Encode/Encoding.pm Sat Apr 20 20:36:47 2002
@@ -20,6 +20,8 @@
sub new_sequence { return $_[0] }
+sub needs_lines { 0 }
+
sub DESTROY {}
1;
==== //depot/perlio/ext/Encode/lib/Encode/JP/JIS7.pm#2 -
/home/p4work/perl/perlio/ext/Encode/lib/Encode/JP/JIS7.pm ====
Index: perlio/ext/Encode/lib/Encode/JP/JIS7.pm
--- perlio/ext/Encode/lib/Encode/JP/JIS7.pm.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/lib/Encode/JP/JIS7.pm Sat Apr 20 20:36:47 2002
@@ -7,8 +7,8 @@
for my $name ('7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1'){
my $h2z = ($name eq '7bit-jis') ? 0 : 1;
my $jis0212 = ($name eq 'iso-2022-jp') ? 0 : 1;
-
- $Encode::Encoding{$name} =
+
+ $Encode::Encoding{$name} =
bless {
Name => $name,
h2z => $h2z,
@@ -17,7 +17,10 @@
}
sub name { shift->{'Name'} }
-sub new_sequence { $_[0] };
+
+sub new_sequence { $_[0] }
+
+sub needs_lines { 1 }
use Encode::CJKConstants qw(:all);
@@ -87,7 +90,7 @@
((?:$RE{EUC_C})+|(?:$RE{EUC_KANA})+|(?:$RE{EUC_0212})+)
}{
my $chunk = $1;
- my $esc =
+ my $esc =
( $chunk =~ tr/\x8E//d ) ? $ESC{KANA} :
( $chunk =~ tr/\x8F//d ) ? $ESC{JIS_0212} :
$ESC{JIS_0208};
==== //depot/perlio/ext/Encode/lib/Encode/KR/2022_KR.pm#3 -
/home/p4work/perl/perlio/ext/Encode/lib/Encode/KR/2022_KR.pm ====
Index: perlio/ext/Encode/lib/Encode/KR/2022_KR.pm
--- perlio/ext/Encode/lib/Encode/KR/2022_KR.pm.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/lib/Encode/KR/2022_KR.pm Sat Apr 20 20:36:47 2002
@@ -13,6 +13,8 @@
sub name { return $_[0]->{name}; }
+sub needs_lines { 1 }
+
sub decode
{
my ($obj,$str,$chk) = @_;
@@ -35,14 +37,14 @@
sub iso_euc{
my $r_str = shift;
- $$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator
+ $$r_str =~ s/$RE{'2022_KR'}//gox; # remove the designator
$$r_str =~ s{ # replace chars. in GL
\x0e # between SO(\x0e) and SI(\x0f)
([^\x0f]*) # with chars. in GR
\x0f
}
{
- my $out= $1;
+ my $out= $1;
$out =~ tr/\x21-\x7e/\xa1-\xfe/;
$out;
}geox;
@@ -51,7 +53,7 @@
sub euc_iso{
my $r_str = shift;
- substr($$r_str,0,0)=$ESC{'2022_KR'}; # put the designator at the beg.
+ substr($$r_str,0,0)=$ESC{'2022_KR'}; # put the designator at the beg.
$$r_str =~ s{ # move KS X 1001 chars. in GR to GL
($RE{EUC_C}+) # and enclose them with SO and SI
}{
==== //depot/perlio/ext/Encode/t/perlio.t#1 -
/home/p4work/perl/perlio/ext/Encode/t/perlio.t ====
Index: perlio/ext/Encode/t/perlio.t
--- perlio/ext/Encode/t/perlio.t.~1~ Sat Apr 20 20:36:47 2002
+++ perlio/ext/Encode/t/perlio.t Sat Apr 20 20:36:47 2002
@@ -13,7 +13,8 @@
exit 0;
}
require Encode;
- unless ($INC{"PerlIO/encoding.pm"}
+ eval { require PerlIO::encoding };
+ unless ($INC{"PerlIO/encoding.pm"}
and PerlIO::encoding->VERSION >= 0.02
){
print "1..0 # Skip:: PerlIO::encoding 0.02 or better required\n";
@@ -95,7 +96,7 @@
}
close $fh;
ok($utext eq $dtext, "<:encoding($e); line-by-line");
- }
+ }
$DEBUG or unlink ($sfile, $pfile);
}
End of Patch.