perl-unicode

[PATCH] again! (was: Encode alias implementation fixed!)

2002-03-23 22:44:44
Hello Dan!

I think
DK>                  if (ref($alias) eq 'Regexp' && $k =~ $alias)
DK>                  {
DK>                      $DEBUG and warn $k;
DK>                      delete $Alias{$k};
              This line is ok ^^^^^^^^^
DK>                  }
DK>                  elsif (ref($alias) eq 'CODE')
DK>                  {
DK>                      delete $Alias{$alias->($name)};
              While this is wrong ^^^^^^^^^^
DK>                  }
DK>              }
DK>          }else{
DK>              delete $Alias{$alias};
              And so is ^^^^^^^^^^^^^^^^^
DK>          }

I propose a patch, of course this can be fixed 10 ways at least,
mine is only one of the ways.

I also propose a patch to Aliases.t that demonstrates how
the current code is failing with alias being a simple
line of text. The patched code does not fail it.

I do not propose a patch that tests the case with 'CODE',
but just from the looking at the current and patched
sources we may concluded that there was being done
something wrong.

I have also added a test that tests if utf8 is mapped
to UTF-8.

BTW I feel greatly surprised by seeing

    define_alias( qr/^utf8$/i  => 'utf-8' );

and yet getting UTF-8. How does it work?

And was it designed to accept KOI8-R as well as koi8-r?

My best regards - Anton

P.S. It's a bad, bad, bad habit not to sleep at nights!
We should get rid of it ;-)


--- ext/Encode/lib/Encode/Alias.pm.orig Sat Mar 23 01:51:30 2002
+++ ext/Encode/lib/Encode/Alias.pm      Sun Mar 24 08:24:48 2002
@@ -18,6 +18,38 @@
 our @Alias;  # ordered matching list
 our %Alias;  # cached known aliases
 
+#implementation sub
+sub hammer_alias
+{
+    my ($key,$alias,$val)=(shift,shift,shift);
+    my $new;
+
+    if (ref($alias) eq 'Regexp' && $key =~ $alias)
+    {
+       $new = eval $val;
+       # $@ and warn "$val, $@";
+    }
+    elsif (ref($alias) eq 'CODE')
+    {
+       $new = $alias->($val);
+    }
+    elsif (lc($key) eq lc($alias))
+    {
+       $new = $val;
+    }
+    # ne avoids (direct) recursion on bugs
+    if (defined($new) && $new ne $key )
+    {                    
+       my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
+       if ($enc)
+       {
+           $Alias{$key} = $enc;
+           return 1;
+       }
+    }
+    return undef;
+}
+
 sub find_alias
 {
     my $class = shift;
@@ -26,32 +58,7 @@
     {
        for (my $i=0; $i < @Alias; $i += 2)
        {
-           my $alias = $Alias[$i];
-           my $val   = $Alias[$i+1];
-           my $new;
-           if (ref($alias) eq 'Regexp' && $_ =~ $alias)
-           {
-               $new = eval $val;
-               # $@ and warn "$val, $@";
-           }
-           elsif (ref($alias) eq 'CODE')
-           {
-               $new = $alias->($val);
-           }
-           elsif (lc($_) eq lc($alias))
-           {
-               $new = $val;
-           }
-           if (defined($new))
-           {
-               next if $new eq $_; # avoid (direct) recursion on bugs
-               my $enc = (ref($new)) ? $new : Encode::find_encoding($new);
-               if ($enc)
-               {
-                   $Alias{$_} = $enc;
-                   last;
-               }
-           }
+            last if hammer_alias( $_, $Alias[$i], $Alias[$i+1] );
        }
     }
     return $Alias{$_};
@@ -64,21 +71,31 @@
        my ($alias,$name) = splice(@_,0,2);
        unshift(@Alias, $alias => $name);   # newer one has precedence
        # clear %Alias cache to allow overrides
-       if (ref($alias)){
-           my @a = keys %Alias;
+       my @a = keys %Alias;
+        if (ref($alias) eq 'CODE')
+        {
            for my $k (@a){
-               if (ref($alias) eq 'Regexp' && $k =~ $alias)
+               hammer_alias( $k, $alias, $name );
+           }
+        }
+        elsif (ref($alias) eq 'Regexp')
+        {
+           for my $k (@a){
+               if ($k =~ $alias)
                {
                    $DEBUG and warn $k;
                    delete $Alias{$k};
                }
-               elsif (ref($alias) eq 'CODE')
+           }
+       }
+        else{
+           for my $k (@a){
+               if (lc($k) eq lc($alias))
                {
-                   delete $Alias{$alias->($name)};
+                   $DEBUG and warn $k;
+                   delete $Alias{$k};
                }
            }
-       }else{
-           delete $Alias{$alias};
        }
     }
 }


-------------------------------------end of patch 1--------

--- ext/Encode/t/Aliases.t.orig Sat Mar 23 21:22:32 2002
+++ ext/Encode/t/Aliases.t      Sun Mar 24 08:41:12 2002
@@ -20,6 +20,7 @@
                'arabic'   => 'iso-8859-6',
                'greek'    => 'iso-8859-7',
                'hebrew'   => 'iso-8859-8',
+               'Hebrew'   => 'iso-8859-8',
                'thai'     => 'iso-8859-11',
                'tis620'   => 'iso-8859-11',
                'ja_JP.euc'     => 'euc-jp',
@@ -44,6 +45,7 @@
                'WinArabic'     => 'cp1256',
                'WinBaltic'     => 'cp1257',
                'WinVietnamese' => 'cp1258',
+                'utf8'          => 'UTF-8',
                );
 
        for my $i (1..11,13..16){
@@ -75,6 +77,10 @@
 define_alias( qr/sjis$/i        => '"cp932"' );
 
 @a2c{qw(Shift_JIS x-sjis)} = qw(macjapan cp932);
+
+define_alias( 'hebrew' => 'UTF-8' );
+
+(_at_)a2c{qw(hebrew Hebrew)} = qw(UTF-8 UTF-8);
 
 print "# alias test with alias overrides\n";


<Prev in Thread] Current Thread [Next in Thread>