On Wed, 03 May 2000 10:19:47 +0200, "Stefan Eissing" wrote:
Background: I have patched DBD::Oracle to recognize utf8 locale
and return utf8 scalars for Perl 5.6.0. It works. In one of the
standard tests however, a string with utf8 chars is inserted
into a BLOB, correctly read back again, but not eq to the original
string.
Perl 5.6.0 has known bugs in Unicode support (which is why it is
marked "experimental"). eq not knowing about SvUTF8 is one of them.
The attached patch should help.
Sarathy
gsar(_at_)ActiveState(_dot_)com
-----------------------------------8<-----------------------------------
Change 5921 by gsar(_at_)auger on 2000/04/24 06:58:26
make eq unicode-aware (from Gisle Aas); fix bogus tests revealed
by fix
Affected files ...
... //depot/perl/sv.c#226 edit
... //depot/perl/t/lib/charnames.t#9 edit
... //depot/perl/t/pragma/utf8.t#6 edit
Differences ...
==== //depot/perl/sv.c#226 (text) ====
Index: perl/sv.c
--- perl/sv.c.~1~ Wed May 3 08:41:47 2000
+++ perl/sv.c Wed May 3 08:41:47 2000
@@ -3920,10 +3920,19 @@
else
pv1 = SvPV(str1, cur1);
- if (!str2)
- return !cur1;
- else
- pv2 = SvPV(str2, cur2);
+ if (cur1) {
+ if (!str2)
+ return 0;
+ if (SvUTF8(str1) != SvUTF8(str2)) {
+ if (SvUTF8(str1)) {
+ sv_utf8_upgrade(str2);
+ }
+ else {
+ sv_utf8_upgrade(str1);
+ }
+ }
+ }
+ pv2 = SvPV(str2, cur2);
if (cur1 != cur2)
return 0;
==== //depot/perl/t/lib/charnames.t#9 (text) ====
Index: perl/t/lib/charnames.t
--- perl/t/lib/charnames.t.~1~ Wed May 3 08:41:47 2000
+++ perl/t/lib/charnames.t Wed May 3 08:41:47 2000
@@ -42,15 +42,21 @@
$encoded_be = "\320\261";
$encoded_alpha = "\316\261";
$encoded_bet = "\327\221";
+
+sub to_bytes {
+ use bytes;
+ my $bytes = shift;
+}
+
{
use charnames ':full';
- print "not " unless "\N{CYRILLIC SMALL LETTER BE}" eq $encoded_be;
+ print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") eq $encoded_be;
print "ok 4\n";
use charnames qw(cyrillic greek :short);
- print "not " unless "\N{be},\N{alpha},\N{hebrew:bet}"
+ print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
eq "$encoded_be,$encoded_alpha,$encoded_bet";
print "ok 5\n";
}
==== //depot/perl/t/pragma/utf8.t#6 (xtext) ====
Index: perl/t/pragma/utf8.t
--- perl/t/pragma/utf8.t.~1~ Wed May 3 08:41:47 2000
+++ perl/t/pragma/utf8.t Wed May 3 08:41:47 2000
@@ -25,64 +25,64 @@
$_ = ">\x{263A}<";
s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
ok $_, '>☺<';
- $test++;
+ $test++; # 1
$_ = ">\x{263A}<";
my $rx = "\x{80}-\x{10ffff}";
s/([$rx])/"&#".ord($1).";"/eg;
ok $_, '>☺<';
- $test++;
+ $test++; # 2
$_ = ">\x{263A}<";
my $rx = "\\x{80}-\\x{10ffff}";
s/([$rx])/"&#".ord($1).";"/eg;
ok $_, '>☺<';
- $test++;
+ $test++; # 3
$_ = "alpha,numeric";
m/([[:alpha:]]+)/;
ok $1, 'alpha';
- $test++;
+ $test++; # 4
$_ = "alphaNUMERICstring";
m/([[:^lower:]]+)/;
ok $1, 'NUMERIC';
- $test++;
+ $test++; # 5
$_ = "alphaNUMERICstring";
m/(\p{Ll}+)/;
ok $1, 'alpha';
- $test++;
+ $test++; # 6
$_ = "alphaNUMERICstring";
m/(\p{Lu}+)/;
ok $1, 'NUMERIC';
- $test++;
+ $test++; # 7
$_ = "alpha,numeric";
m/([\p{IsAlpha}]+)/;
ok $1, 'alpha';
- $test++;
+ $test++; # 8
$_ = "alphaNUMERICstring";
m/([^\p{IsLower}]+)/;
ok $1, 'NUMERIC';
- $test++;
+ $test++; # 9
$_ = "alpha123numeric456";
m/([\p{IsDigit}]+)/;
ok $1, '123';
- $test++;
+ $test++; # 10
$_ = "alpha123numeric456";
m/([^\p{IsDigit}]+)/;
ok $1, 'alpha';
- $test++;
+ $test++; # 11
$_ = ",123alpha,456numeric";
m/([\p{IsAlnum}]+)/;
ok $1, '123alpha';
- $test++;
+ $test++; # 12
}
{
use utf8;
@@ -90,80 +90,88 @@
$_ = "\x{263A}>\x{263A}\x{263A}";
ok length, 4;
- $test++;
+ $test++; # 13
ok length((m/>(.)/)[0]), 1;
- $test++;
+ $test++; # 14
ok length($&), 2;
- $test++;
+ $test++; # 15
ok length($'), 1;
- $test++;
+ $test++; # 16
ok length($`), 1;
- $test++;
+ $test++; # 17
ok length($1), 1;
- $test++;
+ $test++; # 18
ok length($tmp=$&), 2;
- $test++;
+ $test++; # 19
ok length($tmp=$'), 1;
- $test++;
+ $test++; # 20
ok length($tmp=$`), 1;
- $test++;
+ $test++; # 21
ok length($tmp=$1), 1;
- $test++;
+ $test++; # 22
+
+ {
+ use bytes;
- ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
- $test++;
+ my $tmp = $&;
+ ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
+ $test++; # 23
- ok $', pack("C*", 0342, 0230, 0272);
- $test++;
+ $tmp = $';
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 24
- ok $`, pack("C*", 0342, 0230, 0272);
- $test++;
+ $tmp = $`;
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 25
- ok $1, pack("C*", 0342, 0230, 0272);
- $test++;
+ $tmp = $1;
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 26
+ }
{
use bytes;
no utf8;
ok length, 10;
- $test++;
+ $test++; # 27
ok length((m/>(.)/)[0]), 1;
- $test++;
+ $test++; # 28
ok length($&), 2;
- $test++;
+ $test++; # 29
ok length($'), 5;
- $test++;
+ $test++; # 30
ok length($`), 3;
- $test++;
+ $test++; # 31
ok length($1), 1;
- $test++;
+ $test++; # 32
ok $&, pack("C*", ord(">"), 0342);
- $test++;
+ $test++; # 33
ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
- $test++;
+ $test++; # 34
ok $`, pack("C*", 0342, 0230, 0272);
- $test++;
+ $test++; # 35
ok $1, pack("C*", 0342);
- $test++;
+ $test++; # 36
}
@@ -174,80 +182,87 @@
}
ok length, 10;
- $test++;
+ $test++; # 37
ok length((m/>(.)/)[0]), 1;
- $test++;
+ $test++; # 38
ok length($&), 2;
- $test++;
+ $test++; # 39
ok length($'), 1;
- $test++;
+ $test++; # 40
ok length($`), 1;
- $test++;
+ $test++; # 41
ok length($1), 1;
- $test++;
+ $test++; # 42
ok length($tmp=$&), 2;
- $test++;
+ $test++; # 43
ok length($tmp=$'), 1;
- $test++;
+ $test++; # 44
ok length($tmp=$`), 1;
- $test++;
+ $test++; # 45
ok length($tmp=$1), 1;
- $test++;
+ $test++; # 46
- ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
- $test++;
+ {
+ use bytes;
- ok $', pack("C*", 0342, 0230, 0272);
- $test++;
+ my $tmp = $&;
+ ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
+ $test++; # 47
- ok $`, pack("C*", 0342, 0230, 0272);
- $test++;
+ $tmp = $';
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 48
- ok $1, pack("C*", 0342, 0230, 0272);
- $test++;
+ $tmp = $`;
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 49
+ $tmp = $1;
+ ok $tmp, pack("C*", 0342, 0230, 0272);
+ $test++; # 50
+ }
{
use bytes;
no utf8;
ok length, 10;
- $test++;
+ $test++; # 51
ok length((m/>(.)/)[0]), 1;
- $test++;
+ $test++; # 52
ok length($&), 2;
- $test++;
+ $test++; # 53
ok length($'), 5;
- $test++;
+ $test++; # 54
ok length($`), 3;
- $test++;
+ $test++; # 55
ok length($1), 1;
- $test++;
+ $test++; # 56
ok $&, pack("C*", ord(">"), 0342);
- $test++;
+ $test++; # 57
ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
- $test++;
+ $test++; # 58
ok $`, pack("C*", 0342, 0230, 0272);
- $test++;
+ $test++; # 59
ok $1, pack("C*", 0342);
- $test++;
+ $test++; # 60
}
}
End of Patch.