Hello Dan!
I think
DK> if (ref($alias) eq 'Regexp' && $k =~ $alias)
DK> {
DK> $DEBUG and warn $k;
DK> delete $Alias{$k};
This line is ok ^^^^^^^^^
DK> }
DK> elsif (ref($alias) eq 'CODE')
DK> {
DK> delete $Alias{$alias->($name)};
While this is wrong ^^^^^^^^^^
DK> }
DK> }
DK> }else{
DK> delete $Alias{$alias};
And so is ^^^^^^^^^^^^^^^^^
DK> }
I propose a patch, of course this can be fixed 10 ways at least,
mine is only one of the ways.
I also propose a patch to Aliases.t that demonstrates how
the current code is failing with alias being a simple
line of text. The patched code does not fail it.
I do not propose a patch that tests the case with 'CODE',
but just from the looking at the current and patched
sources we may concluded that there was being done
something wrong.
I have also added a test that tests if utf8 is mapped
to UTF-8.
BTW I feel greatly surprised by seeing
define_alias( qr/^utf8$/i => 'utf-8' );
and yet getting UTF-8. How does it work?
And was it designed to accept KOI8-R as well as koi8-r?
My best regards - Anton
P.S. It's a bad, bad, bad habit not to sleep at nights!
We should get rid of it ;-)
--- ext/Encode/lib/Encode/Alias.pm.orig Sat Mar 23 01:51:30 2002
+++ ext/Encode/lib/Encode/Alias.pm Sun Mar 24 08:24:48 2002
@@ -18,6 +18,38 @@
our @Alias; # ordered matching list
our %Alias; # cached known aliases
+#implementation sub
+sub hammer_alias
+{
+ my ($key,$alias,$val)=(shift,shift,shift);
+ my $new;
+
+ if (ref($alias) eq 'Regexp' && $key =~ $alias)
+ {
+ $new = eval $val;
+ # $@ and warn "$val, $@";
+ }
+ elsif (ref($alias) eq 'CODE')
+ {
+ $new = $alias->($val);
+ }
+ elsif (lc($key) eq lc($alias))
+ {
+ $new = $val;
+ }
+ # ne avoids (direct) recursion on bugs
+ if (defined($new) && $new ne $key )
+ {
+ my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
+ if ($enc)
+ {
+ $Alias{$key} = $enc;
+ return 1;
+ }
+ }
+ return undef;
+}
+
sub find_alias
{
my $class = shift;
@@ -26,32 +58,7 @@
{
for (my $i=0; $i < @Alias; $i += 2)
{
- my $alias = $Alias[$i];
- my $val = $Alias[$i+1];
- my $new;
- if (ref($alias) eq 'Regexp' && $_ =~ $alias)
- {
- $new = eval $val;
- # $@ and warn "$val, $@";
- }
- elsif (ref($alias) eq 'CODE')
- {
- $new = $alias->($val);
- }
- elsif (lc($_) eq lc($alias))
- {
- $new = $val;
- }
- if (defined($new))
- {
- next if $new eq $_; # avoid (direct) recursion on bugs
- my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
- if ($enc)
- {
- $Alias{$_} = $enc;
- last;
- }
- }
+ last if hammer_alias( $_, $Alias[$i], $Alias[$i+1] );
}
}
return $Alias{$_};
@@ -64,21 +71,31 @@
my ($alias,$name) = splice(@_,0,2);
unshift(@Alias, $alias => $name); # newer one has precedence
# clear %Alias cache to allow overrides
- if (ref($alias)){
- my @a = keys %Alias;
+ my @a = keys %Alias;
+ if (ref($alias) eq 'CODE')
+ {
for my $k (@a){
- if (ref($alias) eq 'Regexp' && $k =~ $alias)
+ hammer_alias( $k, $alias, $name );
+ }
+ }
+ elsif (ref($alias) eq 'Regexp')
+ {
+ for my $k (@a){
+ if ($k =~ $alias)
{
$DEBUG and warn $k;
delete $Alias{$k};
}
- elsif (ref($alias) eq 'CODE')
+ }
+ }
+ else{
+ for my $k (@a){
+ if (lc($k) eq lc($alias))
{
- delete $Alias{$alias->($name)};
+ $DEBUG and warn $k;
+ delete $Alias{$k};
}
}
- }else{
- delete $Alias{$alias};
}
}
}
-------------------------------------end of patch 1--------
--- ext/Encode/t/Aliases.t.orig Sat Mar 23 21:22:32 2002
+++ ext/Encode/t/Aliases.t Sun Mar 24 08:41:12 2002
@@ -20,6 +20,7 @@
'arabic' => 'iso-8859-6',
'greek' => 'iso-8859-7',
'hebrew' => 'iso-8859-8',
+ 'Hebrew' => 'iso-8859-8',
'thai' => 'iso-8859-11',
'tis620' => 'iso-8859-11',
'ja_JP.euc' => 'euc-jp',
@@ -44,6 +45,7 @@
'WinArabic' => 'cp1256',
'WinBaltic' => 'cp1257',
'WinVietnamese' => 'cp1258',
+ 'utf8' => 'UTF-8',
);
for my $i (1..11,13..16){
@@ -75,6 +77,10 @@
define_alias( qr/sjis$/i => '"cp932"' );
@a2c{qw(Shift_JIS x-sjis)} = qw(macjapan cp932);
+
+define_alias( 'hebrew' => 'UTF-8' );
+
+(_at_)a2c{qw(hebrew Hebrew)} = qw(UTF-8 UTF-8);
print "# alias test with alias overrides\n";