On June 16, 2005 at 00:10, Earl Hood wrote:
> Here is a new patch for DTD.pm that includes an additional
> fix: Parsing of attlist does not take into account the use
> of ',' as a value separator.
[snip]
Yet another updated patch containing another bug fix. This
includes a bug fix with the tokenizer inadvertently scarfing up
parameter entity references. For example, if you have
%x;%y;%z;
In an ATTLIST declaration, and x is defined as:
<!ENTITY % x "attr CDATA #IMPLIED">
you will end up with
#IMPLIED%y;%z;
as the attribute type.
The following a patch against DTD.pm with the CVS ID:
$Id: DTD.pm,v 1.3 2001/08/01 14:45:40 nwalsh Exp $
Fixing the following bugs:
. Tag minimization look-ahead bug with empty parm entity reference.
. Setting of debug level from command-line.
. ATTLIST parsing does not support ',' as enumeration list separator.
. Tokenizer scarfing up parm entity references.
--- DTD.pm.org 2005-05-27 13:12:37.562500000 -0500
+++ DTD.pm 2005-06-16 03:26:59.109375000 -0500
@@ -491,7 +491,7 @@
$self->{'ATTR'} = {};
$self->{'NOTN'} = {};
$self->{'VERBOSE'} = $param{'Verbose'} || $param{'Debug'};
- $self->{'DEBUG'} = $param{'Debug'};
+ $self->debug($param{'Debug'});
$self->{'TITLE'} = $param{'Title'};
$self->{'UNEXPANDED_CONTENT'}
= $param{'UnexpandedContent'} ? 1 : 0;
@@ -618,10 +618,14 @@
sub debug {
my $self = shift;
my $val = shift;
- my $dbg = $self->{'DEBUG'};
-
- $self->{'DEBUG'} = $val if defined($val);
+ my $dbg = $debug;
+ if (defined($val)) {
+ $debug = $val;
+ if (ref($self)) {
+ $self->{'DEBUG'} = $debug;
+ }
+ }
return $dbg;
}
@@ -900,7 +904,7 @@
$dtd = $rest . $dtd;
($etagm, $dtd) = $self->next_token($dtd);
} else {
- $dtd = $tok . $dtd;
+ $dtd = $tok . $dtd if $expand =~ /\S/;
}
} elsif ($tok =~ /^[\-\o]/is) {
$stagm = $tok;
@@ -1028,7 +1032,7 @@
($pre, $enum, $dtd) = $mc->match($values . $dtd);
($ntok, $rest) = $self->next_token($enum);
while ($ntok) {
- if ($ntok =~ /[\|\(\)]/) {
+ if ($ntok =~ /[,\|\(\)]/) {
# nop
} else {
push (@enum, $ntok);
@@ -1188,26 +1192,26 @@
if ($dtd =~ /^<!\[/s) {
# beginning of a marked section
- print "TOK: [$&]\n" if $debug > 3;
+ print "TOK(ms): [$&]\n" if $debug > 3;
return ($&, $');
}
if ($dtd =~ /^[\(\)\-\+\|\&\,\>]/) {
# beginning of a model group, or incl., or excl., or end decl
- print "TOK: [$&]\n" if $debug > 3;
+ print "TOK(group): [$&]\n" if $debug > 3;
return ($&, $');
}
if ($dtd =~ /^[\"\']/) {
# quoted string
$dtd =~ /^(([\"\'])(.*?)\2)/s;
- print "TOK: [$1]\n" if $debug > 3;
+ print "TOK(quoted-string): [$1]\n" if $debug > 3;
return ($&, $');
}
if ($dtd =~ /^\%([a-zA-Z0-9\_\-\.]+);?/) {
# peref
- print "TOK: [$1]\n" if $debug > 3;
+ print "TOK(peref): [$1]\n" if $debug > 3;
if ($return_peref) {
return ("%$1;", $');
} else {
@@ -1217,9 +1221,15 @@
}
}
- if ($dtd =~ /^\s*([^\s\|\&\,\(\)\[\]\>]+)/s) {
+ if ($dtd =~ /^([^\s\|\&\,\(\)\[\]\>\%]+)/s) {
# next non-space sequence
- print "TOK: [$1]\n" if $debug > 3;
+ print "TOK(non-space): [$1]\n" if $debug > 3;
+ return ($1, $');
+ }
+
+ if ($dtd =~ /^(\%)/s) {
+ # lone % (for param entity declarations)
+ print "TOK(%): [$1]\n" if $debug > 3;
return ($1, $');
}