perl-unicode

Re: binary compare of scalars

2000-05-03 08:48:18
On Wed, 03 May 2000 10:19:47 +0200, "Stefan Eissing" wrote:
Background: I have patched DBD::Oracle to recognize utf8 locale
and return utf8 scalars for Perl 5.6.0. It works. In one of the
standard tests however, a string with utf8 chars is inserted
into a BLOB, correctly read back again, but not eq to the original
string. 

Perl 5.6.0 has known bugs in Unicode support (which is why it is
marked "experimental").  eq not knowing about SvUTF8 is one of them.

The attached patch should help.


Sarathy
gsar(_at_)ActiveState(_dot_)com
-----------------------------------8<-----------------------------------
Change 5921 by gsar(_at_)auger on 2000/04/24 06:58:26

        make eq unicode-aware (from Gisle Aas); fix bogus tests revealed
        by fix

Affected files ...

... //depot/perl/sv.c#226 edit
... //depot/perl/t/lib/charnames.t#9 edit
... //depot/perl/t/pragma/utf8.t#6 edit

Differences ...

==== //depot/perl/sv.c#226 (text) ====
Index: perl/sv.c
--- perl/sv.c.~1~       Wed May  3 08:41:47 2000
+++ perl/sv.c   Wed May  3 08:41:47 2000
@@ -3920,10 +3920,19 @@
     else
        pv1 = SvPV(str1, cur1);
 
-    if (!str2)
-       return !cur1;
-    else
-       pv2 = SvPV(str2, cur2);
+    if (cur1) {
+       if (!str2)
+           return 0;
+       if (SvUTF8(str1) != SvUTF8(str2)) {
+           if (SvUTF8(str1)) {
+               sv_utf8_upgrade(str2);
+           }
+           else {
+               sv_utf8_upgrade(str1);
+           }
+       }
+    }
+    pv2 = SvPV(str2, cur2);
 
     if (cur1 != cur2)
        return 0;

==== //depot/perl/t/lib/charnames.t#9 (text) ====
Index: perl/t/lib/charnames.t
--- perl/t/lib/charnames.t.~1~  Wed May  3 08:41:47 2000
+++ perl/t/lib/charnames.t      Wed May  3 08:41:47 2000
@@ -42,15 +42,21 @@
 $encoded_be = "\320\261";
 $encoded_alpha = "\316\261";
 $encoded_bet = "\327\221";
+
+sub to_bytes {
+    use bytes;
+    my $bytes = shift;
+}
+
 {
   use charnames ':full';
 
-  print "not " unless "\N{CYRILLIC SMALL LETTER BE}" eq $encoded_be;
+  print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") eq $encoded_be;
   print "ok 4\n";
 
   use charnames qw(cyrillic greek :short);
 
-  print "not " unless "\N{be},\N{alpha},\N{hebrew:bet}" 
+  print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
     eq "$encoded_be,$encoded_alpha,$encoded_bet";
   print "ok 5\n";
 }

==== //depot/perl/t/pragma/utf8.t#6 (xtext) ====
Index: perl/t/pragma/utf8.t
--- perl/t/pragma/utf8.t.~1~    Wed May  3 08:41:47 2000
+++ perl/t/pragma/utf8.t        Wed May  3 08:41:47 2000
@@ -25,64 +25,64 @@
     $_ = ">\x{263A}<"; 
     s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; 
     ok $_, '>&#9786;<';
-    $test++;
+    $test++;                           # 1
 
     $_ = ">\x{263A}<"; 
     my $rx = "\x{80}-\x{10ffff}";
     s/([$rx])/"&#".ord($1).";"/eg; 
     ok $_, '>&#9786;<';
-    $test++;
+    $test++;                           # 2
 
     $_ = ">\x{263A}<"; 
     my $rx = "\\x{80}-\\x{10ffff}";
     s/([$rx])/"&#".ord($1).";"/eg; 
     ok $_, '>&#9786;<';
-    $test++;
+    $test++;                           # 3
 
     $_ = "alpha,numeric"; 
     m/([[:alpha:]]+)/; 
     ok $1, 'alpha';
-    $test++;
+    $test++;                           # 4
 
     $_ = "alphaNUMERICstring";
     m/([[:^lower:]]+)/; 
     ok $1, 'NUMERIC';
-    $test++;
+    $test++;                           # 5
 
     $_ = "alphaNUMERICstring";
     m/(\p{Ll}+)/; 
     ok $1, 'alpha';
-    $test++;
+    $test++;                           # 6
 
     $_ = "alphaNUMERICstring"; 
     m/(\p{Lu}+)/; 
     ok $1, 'NUMERIC';
-    $test++;
+    $test++;                           # 7
 
     $_ = "alpha,numeric"; 
     m/([\p{IsAlpha}]+)/; 
     ok $1, 'alpha';
-    $test++;
+    $test++;                           # 8
 
     $_ = "alphaNUMERICstring";
     m/([^\p{IsLower}]+)/; 
     ok $1, 'NUMERIC';
-    $test++;
+    $test++;                           # 9
 
     $_ = "alpha123numeric456"; 
     m/([\p{IsDigit}]+)/; 
     ok $1, '123';
-    $test++;
+    $test++;                           # 10
 
     $_ = "alpha123numeric456"; 
     m/([^\p{IsDigit}]+)/; 
     ok $1, 'alpha';
-    $test++;
+    $test++;                           # 11
 
     $_ = ",123alpha,456numeric"; 
     m/([\p{IsAlnum}]+)/; 
     ok $1, '123alpha';
-    $test++;
+    $test++;                           # 12
 }
 {
     use utf8;
@@ -90,80 +90,88 @@
     $_ = "\x{263A}>\x{263A}\x{263A}"; 
 
     ok length, 4;
-    $test++;
+    $test++;                           # 13
 
     ok length((m/>(.)/)[0]), 1;
-    $test++;
+    $test++;                           # 14
 
     ok length($&), 2;
-    $test++;
+    $test++;                           # 15
 
     ok length($'), 1;
-    $test++;
+    $test++;                           # 16
 
     ok length($`), 1;
-    $test++;
+    $test++;                           # 17
 
     ok length($1), 1;
-    $test++;
+    $test++;                           # 18
 
     ok length($tmp=$&), 2;
-    $test++;
+    $test++;                           # 19
 
     ok length($tmp=$'), 1;
-    $test++;
+    $test++;                           # 20
 
     ok length($tmp=$`), 1;
-    $test++;
+    $test++;                           # 21
 
     ok length($tmp=$1), 1;
-    $test++;
+    $test++;                           # 22
+
+    {
+        use bytes;
 
-    ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
-    $test++;
+        my $tmp = $&;
+       ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
+       $test++;                                # 23
 
-    ok $', pack("C*", 0342, 0230, 0272);
-    $test++;
+       $tmp = $';
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 24
 
-    ok $`, pack("C*", 0342, 0230, 0272);
-    $test++;
+       $tmp = $`;
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 25
 
-    ok $1, pack("C*", 0342, 0230, 0272);
-    $test++;
+       $tmp = $1;
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 26
+    }
 
     {
        use bytes;
        no utf8;
 
        ok length, 10;
-       $test++;
+       $test++;                                # 27
 
        ok length((m/>(.)/)[0]), 1;
-       $test++;
+       $test++;                                # 28
 
        ok length($&), 2;
-       $test++;
+       $test++;                                # 29
 
        ok length($'), 5;
-       $test++;
+       $test++;                                # 30
 
        ok length($`), 3;
-       $test++;
+       $test++;                                # 31
 
        ok length($1), 1;
-       $test++;
+       $test++;                                # 32
 
        ok $&, pack("C*", ord(">"), 0342);
-       $test++;
+       $test++;                                # 33
 
        ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
-       $test++;
+       $test++;                                # 34
 
        ok $`, pack("C*", 0342, 0230, 0272);
-       $test++;
+       $test++;                                # 35
 
        ok $1, pack("C*", 0342);
-       $test++;
+       $test++;                                # 36
 
     }
 
@@ -174,80 +182,87 @@
     }
 
     ok length, 10;
-    $test++;
+    $test++;                           # 37
 
     ok length((m/>(.)/)[0]), 1;
-    $test++;
+    $test++;                           # 38
 
     ok length($&), 2;
-    $test++;
+    $test++;                           # 39
 
     ok length($'), 1;
-    $test++;
+    $test++;                           # 40
 
     ok length($`), 1;
-    $test++;
+    $test++;                           # 41
 
     ok length($1), 1;
-    $test++;
+    $test++;                           # 42
 
     ok length($tmp=$&), 2;
-    $test++;
+    $test++;                           # 43
 
     ok length($tmp=$'), 1;
-    $test++;
+    $test++;                           # 44
 
     ok length($tmp=$`), 1;
-    $test++;
+    $test++;                           # 45
 
     ok length($tmp=$1), 1;
-    $test++;
+    $test++;                           # 46
 
-    ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
-    $test++;
+    {
+       use bytes;
 
-    ok $', pack("C*", 0342, 0230, 0272);
-    $test++;
+        my $tmp = $&;
+       ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
+       $test++;                                # 47
 
-    ok $`, pack("C*", 0342, 0230, 0272);
-    $test++;
+        $tmp = $';
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 48
 
-    ok $1, pack("C*", 0342, 0230, 0272);
-    $test++;
+        $tmp = $`;
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 49
 
+        $tmp = $1;
+       ok $tmp, pack("C*", 0342, 0230, 0272);
+       $test++;                                # 50
+    }
     {
        use bytes;
        no utf8;
 
        ok length, 10;
-       $test++;
+       $test++;                                # 51
 
        ok length((m/>(.)/)[0]), 1;
-       $test++;
+       $test++;                                # 52
 
        ok length($&), 2;
-       $test++;
+       $test++;                                # 53
 
        ok length($'), 5;
-       $test++;
+       $test++;                                # 54
 
        ok length($`), 3;
-       $test++;
+       $test++;                                # 55
 
        ok length($1), 1;
-       $test++;
+       $test++;                                # 56
 
        ok $&, pack("C*", ord(">"), 0342);
-       $test++;
+       $test++;                                # 57
 
        ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
-       $test++;
+       $test++;                                # 58
 
        ok $`, pack("C*", 0342, 0230, 0272);
-       $test++;
+       $test++;                                # 59
 
        ok $1, pack("C*", 0342);
-       $test++;
+       $test++;                                # 60
 
     }
 }
End of Patch.

<Prev in Thread] Current Thread [Next in Thread>