perl-unicode

Re: Program with utf8 identifiers fails to compile

1998-10-23 11:47:01
Gisle Aas writes:
: The included program will not compile.  Perl5.005_52 says:
: 
:   Unrecognized character \xE5 at ident.t line 8.
: 
: even though utf8.pm told me:
: 
:   Identifiers within the Perl script may contain Unicode alphanumeric
:   characters, including ideographs.  (You are currently on your own
:   when it comes to using the canonical forms of characters--Perl
:   doesn't (yet) attempt to canonicalize variable names for you.)
: 
: (There might of other errors in this program, as I had a hard time
: reading it myself :-)
: 
: Regards,
: Gisle
: 
: 
: begin 664 utf8/ident.t
: M<')I;G0@(C$N+C%<;B(["@HC(%1E<W0@:68(_at_)=71F(_dot_)"!C:&%R86-T97)S(&EN
: M(&ED96YT:69I97)S('-U8V@@87,@<')O8V5D=7)E(&YA;65S(&%N9`HC('9A
: M<FEA8FQE<R!W;W)K"@IU<V4(_at_)=71F(_dot_)#L*"G-U8B#ENZ`*>PH@("!P<FEN="`B
: M;F]T("(@=6YL97-S('-H:69T(#T](#,S.PH@("!P<FEN="`B;VL@,5QN(CL*
: M?0H*<W5B('-NP[AVPZ9R('L*("`@;7D@)'-NP[@@/2`S,SL*("`@;7D@),.E
: J(#T(_at_)-#4["B`@(.6[H"@D<V[#N"P@),.E*3L*?0H*<V[#N';#IG(H*3L*
: `
: end
: [Bulk--not acked]

Here's a patch for this (and some related problems).

[Sarathy, this is already in the repository.]

Larry

diff -c -r perl5.005_52/doop.c perl5.005_52pat/doop.c
*** perl5.005_52/doop.c Thu Sep 24 23:03:34 1998
--- perl5.005_52pat/doop.c      Wed Oct 21 16:51:14 1998
***************
*** 215,224 ****
      send = s + len;
  
      while (s < send) {
!       if ((uv = swash_fetch(rv, s)) < none) {
!           s += UTF8SKIP(s);
            matches++;
!       }
      }
  
      return matches;
--- 215,223 ----
      send = s + len;
  
      while (s < send) {
!       if ((uv = swash_fetch(rv, s)) < none)
            matches++;
!       s += UTF8SKIP(s);
      }
  
      return matches;
diff -c -r perl5.005_52/gv.c perl5.005_52pat/gv.c
*** perl5.005_52/gv.c   Tue Sep 22 19:45:28 1998
--- perl5.005_52pat/gv.c        Wed Oct 21 11:47:39 1998
***************
*** 499,505 ****
      /* No stash in name, so see how we can default */
  
      if (!stash) {
!       if (isIDFIRST(*name)) {
            bool global = FALSE;
  
            if (isUPPER(*name)) {
--- 499,505 ----
      /* No stash in name, so see how we can default */
  
      if (!stash) {
!       if (isIDFIRST(*name) || (IN_UTF8 && ((*name & 0xc0) == 0xc0) && 
isIDFIRST_utf8(name))) {
            bool global = FALSE;
  
            if (isUPPER(*name)) {
diff -c -r perl5.005_52/op.c perl5.005_52pat/op.c
*** perl5.005_52/op.c   Thu Sep 24 03:16:53 1998
--- perl5.005_52pat/op.c        Wed Oct 21 15:42:27 1998
***************
*** 118,124 ****
      PADOFFSET off;
      SV *sv;
  
!     if (!(isALPHA(name[1]) || name[1] == '_' && (int)strlen(name) > 2)) {
        if (!isPRINT(name[1])) {
            name[3] = '\0';
            name[2] = toCTRL(name[1]);
--- 118,128 ----
      PADOFFSET off;
      SV *sv;
  
!     if (!(
!       isALPHA(name[1]) ||
!       (PL_hints & HINT_UTF8 && (name[1] & 0xc0) == 0xc0) ||
!       name[1] == '_' && (int)strlen(name) > 2))
!     {
        if (!isPRINT(name[1])) {
            name[3] = '\0';
            name[2] = toCTRL(name[1]);
***************
*** 1662,1668 ****
        dTHR;
        if (ckWARN(WARN_PARENTHESIS) && PL_bufptr > PL_oldbufptr && 
PL_bufptr[-1] == ',') {
            char *s;
!           for (s = PL_bufptr; *s && (isALNUM(*s) || strchr("@$%, ",*s)); s++) 
;
            if (*s == ';' || *s == '=')
                warner(WARN_PARENTHESIS, "Parens missing around \"%s\" list",
                                lex ? "my" : "local");
--- 1666,1672 ----
        dTHR;
        if (ckWARN(WARN_PARENTHESIS) && PL_bufptr > PL_oldbufptr && 
PL_bufptr[-1] == ',') {
            char *s;
!           for (s = PL_bufptr; *s && (isALNUM(*s) || (*s & 0x80) || 
strchr("@$%, ",*s)); s++) ;
            if (*s == ';' || *s == '=')
                warner(WARN_PARENTHESIS, "Parens missing around \"%s\" list",
                                lex ? "my" : "local");
diff -c -r perl5.005_52/pp.c perl5.005_52pat/pp.c
*** perl5.005_52/pp.c   Wed Sep 23 03:29:38 1998
--- perl5.005_52pat/pp.c        Wed Oct 21 11:55:55 1998
***************
*** 1316,1321 ****
--- 1316,1325 ----
                sv_setsv(TARG, sv);
                *SvPV_force(TARG, len) = *s == '-' ? '+' : '-';
            }
+           else if (IN_UTF8 && *(U8*)s >= 0xc0 && isIDFIRST_utf8(s)) {
+               sv_setpvn(TARG, "-", 1);
+               sv_catsv(TARG, sv);
+           }
            else
                sv_setnv(TARG, -SvNV(sv));
            SETTARG;
diff -c -r perl5.005_52/pp_hot.c perl5.005_52pat/pp_hot.c
*** perl5.005_52/pp_hot.c       Sun Aug  9 23:36:13 1998
--- perl5.005_52pat/pp_hot.c    Wed Oct 21 12:03:06 1998
***************
*** 2502,2511 ****
            !(iogv = gv_fetchpv(packname, FALSE, SVt_PVIO)) ||
            !(ob=(SV*)GvIO(iogv)))
        {
!           if (!packname || !isIDFIRST(*packname))
                DIE("Can't call method \"%s\" %s", name,
                    SvOK(sv)? "without a package or object reference"
                            : "on an undefined value");
            stash = gv_stashpvn(packname, packlen, TRUE);
            goto fetch;
        }
--- 2502,2517 ----
            !(iogv = gv_fetchpv(packname, FALSE, SVt_PVIO)) ||
            !(ob=(SV*)GvIO(iogv)))
        {
!           if (!packname || 
!               ((*(U8*)packname >= 0xc0 && IN_UTF8)
!                   ? !isIDFIRST_utf8(packname)
!                   : !isIDFIRST(*packname)
!               ))
!           {
                DIE("Can't call method \"%s\" %s", name,
                    SvOK(sv)? "without a package or object reference"
                            : "on an undefined value");
+           }
            stash = gv_stashpvn(packname, packlen, TRUE);
            goto fetch;
        }
diff -c -r perl5.005_52/toke.c perl5.005_52pat/toke.c
*** perl5.005_52/toke.c Thu Sep 24 01:56:07 1998
--- perl5.005_52pat/toke.c      Fri Oct 23 10:50:05 1998
***************
*** 61,66 ****
--- 61,78 ----
  static char ident_too_long[] = "Identifier too long";
  
  #define UTF (PL_hints & HINT_UTF8)
+ /*
+  * Note: we try to be careful never to call the isXXX_utf8() functions
+  * unless we're pretty sure we've seen the beginning of a UTF-8 character
+  * (that is, the two high bits are set).  Otherwise we risk loading in the
+  * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
+  */
+ #define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
+                               ? isIDFIRST(*(p)) \
+                               : isIDFIRST_utf8((U8*)p))
+ #define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
+                               ? isALNUM(*(p)) \
+                               : isALNUM_utf8((U8*)p))
  
  /* The following are arranged oddly so that the guard on the switch statement
   * can get by with a single comparison (if the compiler is smart enough).
***************
*** 167,175 ****
      yywarn(form("%s found where operator expected", what));
      if (is_first)
        warn("\t(Missing semicolon on previous line?)\n");
!     else if (PL_oldoldbufptr && isIDFIRST(*PL_oldoldbufptr)) {
        char *t;
!       for (t = PL_oldoldbufptr; *t && (isALNUM(*t) || *t == ':'); t++) ;
        if (t < PL_bufptr && isSPACE(*t))
            warn("\t(Do you need to predeclare %.*s?)\n",
                t - PL_oldoldbufptr, PL_oldoldbufptr);
--- 179,187 ----
      yywarn(form("%s found where operator expected", what));
      if (is_first)
        warn("\t(Missing semicolon on previous line?)\n");
!     else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
        char *t;
!       for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
        if (t < PL_bufptr && isSPACE(*t))
            warn("\t(Do you need to predeclare %.*s?)\n",
                t - PL_oldoldbufptr, PL_oldoldbufptr);
***************
*** 476,482 ****
        return;
      while (isSPACE(*PL_last_uni))
        PL_last_uni++;
!     for (s = PL_last_uni; isALNUM(*s) || *s == '-'; s++) ;
      if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
      ch = *s;
--- 488,494 ----
        return;
      while (isSPACE(*PL_last_uni))
        PL_last_uni++;
!     for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
      if ((t = strchr(s, '(')) && t < PL_bufptr)
        return;
      ch = *s;
***************
*** 552,558 ****
      
      start = skipspace(start);
      s = start;
!     if (isIDFIRST(*s) ||
        (allow_pack && *s == ':') ||
        (allow_initial_tick && *s == '\'') )
      {
--- 564,570 ----
      
      start = skipspace(start);
      s = start;
!     if (isIDFIRST_lazy(s) ||
        (allow_pack && *s == ':') ||
        (allow_initial_tick && *s == '\'') )
      {
***************
*** 993,999 ****
        }
  
        /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
!       else if (*s == '@' && s[1] && (isALNUM(s[1]) || strchr(":'{$", s[1])))
            break;
  
        /* check for embedded scalars.  only stop if we're sure it's a
--- 1005,1011 ----
        }
  
        /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
!       else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", 
s[1])))
            break;
  
        /* check for embedded scalars.  only stop if we're sure it's a
***************
*** 1247,1253 ****
            case '&':
            case '$':
                weight -= seen[un_char] * 10;
!               if (isALNUM(s[1])) {
                    scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
                    if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, 
SVt_PV))
                        weight -= 100;
--- 1259,1265 ----
            case '&':
            case '$':
                weight -= seen[un_char] * 10;
!               if (isALNUM_lazy(s+1)) {
                    scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
                    if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, 
SVt_PV))
                        weight -= 100;
***************
*** 1856,1871 ****
    retry:
      switch (*s) {
      default:
!       /*
!        * Note: we try to be careful never to call the isXXX_utf8() functions 
unless we're
!        * pretty sure we've seen the beginning of a UTF-8 character (that is, 
the two high
!        * bits are set).  Otherwise we risk loading in the heavy-duty 
SWASHINIT and SWASHGET
!        * routines unnecessarily.  You will see this not just here but 
throughout this file.
!        */
!       if (UTF && (*s & 0xc0) == 0x80) {
!           if (isIDFIRST_utf8((U8*)s))
!               goto keylookup;
!       }
        croak("Unrecognized character \\x%02X", *s & 255);
      case 4:
      case 26:
--- 1868,1875 ----
    retry:
      switch (*s) {
      default:
!       if (isIDFIRST_lazy(s))
!           goto keylookup;
        croak("Unrecognized character \\x%02X", *s & 255);
      case 4:
      case 26:
***************
*** 2216,2222 ****
        else if (*s == '>') {
            s++;
            s = skipspace(s);
!           if (isIDFIRST(*s)) {
                s = force_word(s,METHOD,FALSE,TRUE,FALSE);
                TOKEN(ARROW);
            }
--- 2220,2226 ----
        else if (*s == '>') {
            s++;
            s = skipspace(s);
!           if (isIDFIRST_lazy(s)) {
                s = force_word(s,METHOD,FALSE,TRUE,FALSE);
                TOKEN(ARROW);
            }
***************
*** 2361,2367 ****
                while (d < PL_bufend && (*d == ' ' || *d == '\t'))
                    d++;
            }
!           if (d < PL_bufend && isIDFIRST(*d)) {
                d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
                              FALSE, &len);
                while (d < PL_bufend && (*d == ' ' || *d == '\t'))
--- 2365,2371 ----
                while (d < PL_bufend && (*d == ' ' || *d == '\t'))
                    d++;
            }
!           if (d < PL_bufend && isIDFIRST_lazy(d)) {
                d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
                              FALSE, &len);
                while (d < PL_bufend && (*d == ' ' || *d == '\t'))
***************
*** 2449,2456 ****
                    }
                    t++;
                }
!               else if (isALPHA(*s)) {
!                   for (t++; t < PL_bufend && isALNUM(*t); t++) ;
                }
                while (t < PL_bufend && isSPACE(*t))
                    t++;
--- 2453,2460 ----
                    }
                    t++;
                }
!               else if (isIDFIRST_lazy(s)) {
!                   for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
                }
                while (t < PL_bufend && isSPACE(*t))
                    t++;
***************
*** 2460,2466 ****
                                   || (*t == '=' && t[1] == '>')))
                    OPERATOR(HASHBRACK);
                if (PL_expect == XREF)
!                   PL_expect = XTERM;
                else {
                    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
                    PL_expect = XSTATE;
--- 2464,2470 ----
                                   || (*t == '=' && t[1] == '>')))
                    OPERATOR(HASHBRACK);
                if (PL_expect == XREF)
!                   PL_expect = XSTATE; /* was XTERM, trying XSTATE */
                else {
                    PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
                    PL_expect = XSTATE;
***************
*** 2508,2514 ****
            AOPERATOR(ANDAND);
        s--;
        if (PL_expect == XOPERATOR) {
!           if (ckWARN(WARN_SEMICOLON) && isALPHA(*s) && PL_bufptr == 
PL_linestart) {
                PL_curcop->cop_line--;
                warner(WARN_SEMICOLON, warn_nosemi);
                PL_curcop->cop_line++;
--- 2512,2518 ----
            AOPERATOR(ANDAND);
        s--;
        if (PL_expect == XOPERATOR) {
!           if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == 
PL_linestart) {
                PL_curcop->cop_line--;
                warner(WARN_SEMICOLON, warn_nosemi);
                PL_curcop->cop_line++;
***************
*** 2638,2644 ****
            }
        }
  
!       if (s[1] == '#' && (isALPHA(s[2]) || strchr("_{$:+-", s[2]))) {
            if (PL_expect == XOPERATOR)
                no_op("Array length", PL_bufptr);
            PL_tokenbuf[0] = '@';
--- 2642,2648 ----
            }
        }
  
!       if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
            if (PL_expect == XOPERATOR)
                no_op("Array length", PL_bufptr);
            PL_tokenbuf[0] = '@';
***************
*** 2679,2685 ****
                PL_tokenbuf[0] = '@';
                if (ckWARN(WARN_SYNTAX)) {
                    for(t = s + 1;
!                       isSPACE(*t) || isALNUM(*t) || *t == '$';
                        t++) ;
                    if (*t++ == ',') {
                        PL_bufptr = skipspace(PL_bufptr);
--- 2683,2689 ----
                PL_tokenbuf[0] = '@';
                if (ckWARN(WARN_SYNTAX)) {
                    for(t = s + 1;
!                       isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
                        t++) ;
                    if (*t++ == ',') {
                        PL_bufptr = skipspace(PL_bufptr);
***************
*** 2699,2705 ****
                    char tmpbuf[sizeof PL_tokenbuf];
                    STRLEN len;
                    for (t++; isSPACE(*t); t++) ;
!                   if (isIDFIRST(*t)) {
                        t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
                        if (*t != '(' && perl_get_cv(tmpbuf, FALSE))
                            warner(WARN_SYNTAX,
--- 2703,2709 ----
                    char tmpbuf[sizeof PL_tokenbuf];
                    STRLEN len;
                    for (t++; isSPACE(*t); t++) ;
!                   if (isIDFIRST_lazy(t)) {
                        t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
                        if (*t != '(' && perl_get_cv(tmpbuf, FALSE))
                            warner(WARN_SYNTAX,
***************
*** 2716,2724 ****
                PL_expect = XOPERATOR;
            else if (strchr("$(_at_)\"'`q", *s))
                PL_expect = XTERM;              /* e.g. print $fh "foo" */
!           else if (strchr("&*<%", *s) && isIDFIRST(s[1]))
                PL_expect = XTERM;              /* e.g. print $fh &sub */
!           else if (isIDFIRST(*s)) {
                char tmpbuf[sizeof PL_tokenbuf];
                scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
                if (tmp = keyword(tmpbuf, len)) {
--- 2720,2728 ----
                PL_expect = XOPERATOR;
            else if (strchr("$(_at_)\"'`q", *s))
                PL_expect = XTERM;              /* e.g. print $fh "foo" */
!           else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
                PL_expect = XTERM;              /* e.g. print $fh &sub */
!           else if (isIDFIRST_lazy(s)) {
                char tmpbuf[sizeof PL_tokenbuf];
                scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
                if (tmp = keyword(tmpbuf, len)) {
***************
*** 2776,2782 ****
            if (ckWARN(WARN_SYNTAX)) {
                if (*s == '[' || *s == '{') {
                    char *t = s + 1;
!                   while (*t && (isALNUM(*t) || strchr(" \t$#+-'\"", *t)))
                        t++;
                    if (*t == '}' || *t == ']') {
                        t++;
--- 2780,2786 ----
            if (ckWARN(WARN_SYNTAX)) {
                if (*s == '[' || *s == '{') {
                    char *t = s + 1;
!                   while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
                        t++;
                    if (*t == '}' || *t == ']') {
                        t++;
***************
*** 2797,2803 ****
            /* Disable warning on "study /blah/" */
            if (PL_oldoldbufptr == PL_last_uni 
                && (*PL_last_uni != 's' || s - PL_last_uni < 5 
!                   || memNE(PL_last_uni, "study", 5) || 
isALNUM(PL_last_uni[5])))
                check_uni();
            s = scan_pat(s,OP_MATCH);
            TERM(sublex_start());
--- 2801,2807 ----
            /* Disable warning on "study /blah/" */
            if (PL_oldoldbufptr == PL_last_uni 
                && (*PL_last_uni != 's' || s - PL_last_uni < 5 
!                   || memNE(PL_last_uni, "study", 5) || 
isALNUM_lazy(PL_last_uni+5)))
                check_uni();
            s = scan_pat(s,OP_MATCH);
            TERM(sublex_start());
***************
*** 3110,3116 ****
  
                    /* Two barewords in a row may indicate method call. */
  
!                   if ((isALPHA(*s) || *s == '$') && (tmp=intuit_method(s,gv)))
                        return tmp;
  
                    /* If not a declared subroutine, it's an indirect object. */
--- 3114,3120 ----
  
                    /* Two barewords in a row may indicate method call. */
  
!                   if ((isIDFIRST_lazy(s) || *s == '$') && 
(tmp=intuit_method(s,gv)))
                        return tmp;
  
                    /* If not a declared subroutine, it's an indirect object. */
***************
*** 3154,3160 ****
  
                /* If followed by a bareword, see if it looks like indir obj. */
  
!               if ((isALPHA(*s) || *s == '$') && (tmp = intuit_method(s,gv)))
                    return tmp;
  
                /* Not a method, so call it a subroutine (if defined) */
--- 3158,3164 ----
  
                /* If followed by a bareword, see if it looks like indir obj. */
  
!               if ((isIDFIRST_lazy(s) || *s == '$') && (tmp = 
intuit_method(s,gv)))
                    return tmp;
  
                /* Not a method, so call it a subroutine (if defined) */
***************
*** 3468,3480 ****
        case KEY_foreach:
            yylval.ival = PL_curcop->cop_line;
            s = skipspace(s);
!           if (PL_expect == XSTATE && isIDFIRST(*s)) {
                char *p = s;
                if ((PL_bufend - p) >= 3 &&
                    strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
                    p += 2;
                p = skipspace(p);
!               if (isIDFIRST(*p))
                    croak("Missing $ on loop variable");
            }
            OPERATOR(FOR);
--- 3472,3484 ----
        case KEY_foreach:
            yylval.ival = PL_curcop->cop_line;
            s = skipspace(s);
!           if (PL_expect == XSTATE && isIDFIRST_lazy(s)) {
                char *p = s;
                if ((PL_bufend - p) >= 3 &&
                    strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
                    p += 2;
                p = skipspace(p);
!               if (isIDFIRST_lazy(p))
                    croak("Missing $ on loop variable");
            }
            OPERATOR(FOR);
***************
*** 3662,3668 ****
            TERM(sublex_start());
  
        case KEY_map:
!           LOP(OP_MAPSTART,XREF);
            
        case KEY_mkdir:
            LOP(OP_MKDIR,XTERM);
--- 3666,3672 ----
            TERM(sublex_start());
  
        case KEY_map:
!           LOP(OP_MAPSTART, XREF);
            
        case KEY_mkdir:
            LOP(OP_MKDIR,XTERM);
***************
*** 3682,3688 ****
        case KEY_my:
            PL_in_my = TRUE;
            s = skipspace(s);
!           if (isIDFIRST(*s)) {
                s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                PL_in_my_stash = gv_stashpv(PL_tokenbuf, FALSE);
                if (!PL_in_my_stash) {
--- 3686,3692 ----
        case KEY_my:
            PL_in_my = TRUE;
            s = skipspace(s);
!           if (isIDFIRST_lazy(s)) {
                s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
                PL_in_my_stash = gv_stashpv(PL_tokenbuf, FALSE);
                if (!PL_in_my_stash) {
***************
*** 3714,3722 ****
  
        case KEY_open:
            s = skipspace(s);
!           if (isIDFIRST(*s)) {
                char *t;
!               for (d = s; isALNUM(*d); d++) ;
                t = skipspace(d);
                if (strchr("|&*+-=!?:.", *t))
                    warn("Precedence problem: open %.*s should be open(%.*s)",
--- 3718,3726 ----
  
        case KEY_open:
            s = skipspace(s);
!           if (isIDFIRST_lazy(s)) {
                char *t;
!               for (d = s; isALNUM_lazy(d); d++) ;
                t = skipspace(d);
                if (strchr("|&*+-=!?:.", *t))
                    warn("Precedence problem: open %.*s should be open(%.*s)",
***************
*** 3839,3845 ****
        case KEY_require:
            *PL_tokenbuf = '\0';
            s = force_word(s,WORD,TRUE,TRUE,FALSE);
!           if (isIDFIRST(*PL_tokenbuf))
                gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
            else if (*s == '<')
                yyerror("<> should be quotes");
--- 3843,3849 ----
        case KEY_require:
            *PL_tokenbuf = '\0';
            s = force_word(s,WORD,TRUE,TRUE,FALSE);
!           if (isIDFIRST_lazy(PL_tokenbuf))
                gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
            else if (*s == '<')
                yyerror("<> should be quotes");
***************
*** 4023,4029 ****
          really_sub:
            s = skipspace(s);
  
!           if (isIDFIRST(*s) || *s == '\'' || *s == ':') {
                char tmpbuf[sizeof PL_tokenbuf];
                PL_expect = XBLOCK;
                d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
--- 4027,4033 ----
          really_sub:
            s = skipspace(s);
  
!           if (isIDFIRST_lazy(s) || *s == '\'' || *s == ':') {
                char tmpbuf[sizeof PL_tokenbuf];
                PL_expect = XBLOCK;
                d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
***************
*** 4895,4903 ****
        s++;
      while (s < PL_bufend && isSPACE(*s))
        s++;
!     if (isIDFIRST(*s)) {
        w = s++;
!       while (isALNUM(*s))
            s++;
        while (s < PL_bufend && isSPACE(*s))
            s++;
--- 4899,4907 ----
        s++;
      while (s < PL_bufend && isSPACE(*s))
        s++;
!     if (isIDFIRST_lazy(s)) {
        w = s++;
!       while (isALNUM_lazy(s))
            s++;
        while (s < PL_bufend && isSPACE(*s))
            s++;
***************
*** 4990,4998 ****
      for (;;) {
        if (d >= e)
            croak(ident_too_long);
!       if (isALNUM(*s))
            *d++ = *s++;
!       else if (*s == '\'' && allow_package && isIDFIRST(s[1])) {
            *d++ = ':';
            *d++ = ':';
            s++;
--- 4994,5002 ----
      for (;;) {
        if (d >= e)
            croak(ident_too_long);
!       if (isALNUM(*s))        /* UTF handled below */
            *d++ = *s++;
!       else if (*s == '\'' && allow_package && isIDFIRST_lazy(s+1)) {
            *d++ = ':';
            *d++ = ':';
            s++;
***************
*** 5001,5007 ****
            *d++ = *s++;
            *d++ = *s++;
        }
!       else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
            char *t = s + UTF8SKIP(s);
            while (*t & 0x80 && is_utf8_mark((U8*)t))
                t += UTF8SKIP(t);
--- 5005,5011 ----
            *d++ = *s++;
            *d++ = *s++;
        }
!       else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
            char *t = s + UTF8SKIP(s);
            while (*t & 0x80 && is_utf8_mark((U8*)t))
                t += UTF8SKIP(t);
***************
*** 5044,5052 ****
        for (;;) {
            if (d >= e)
                croak(ident_too_long);
!           if (isALNUM(*s))
                *d++ = *s++;
!           else if (*s == '\'' && isIDFIRST(s[1])) {
                *d++ = ':';
                *d++ = ':';
                s++;
--- 5048,5056 ----
        for (;;) {
            if (d >= e)
                croak(ident_too_long);
!           if (isALNUM(*s))    /* UTF handled below */
                *d++ = *s++;
!           else if (*s == '\'' && isIDFIRST_lazy(s+1)) {
                *d++ = ':';
                *d++ = ':';
                s++;
***************
*** 5055,5061 ****
                *d++ = *s++;
                *d++ = *s++;
            }
!           else if (UTF && (*s & 0xc0) == 0x80 && isALNUM_utf8((U8*)s)) {
                char *t = s + UTF8SKIP(s);
                while (*t & 0x80 && is_utf8_mark((U8*)t))
                    t += UTF8SKIP(t);
--- 5059,5065 ----
                *d++ = *s++;
                *d++ = *s++;
            }
!           else if (UTF && *(U8*)s >= 0xc0 && isALNUM_utf8((U8*)s)) {
                char *t = s + UTF8SKIP(s);
                while (*t & 0x80 && is_utf8_mark((U8*)t))
                    t += UTF8SKIP(t);
***************
*** 5077,5083 ****
        return s;
      }
      if (*s == '$' && s[1] &&
!       (isALNUM(s[1]) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
      {
        return s;
      }
--- 5081,5087 ----
        return s;
      }
      if (*s == '$' && s[1] &&
!       (isALNUM_lazy(s+1) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
      {
        return s;
      }
***************
*** 5104,5114 ****
                }
            }
        }
!       if (isIDFIRST(*d) || (UTF && (*d & 0xc0) == 0x80 && 
isIDFIRST_utf8((U8*)d))) {
            d++;
            if (UTF) {
                e = s;
!               while (e < send && (isALNUM(*e) || ((*e & 0xc0) == 0x80 && 
isALNUM_utf8((U8*)e)) || *e == ':')) {
                    e += UTF8SKIP(e);
                    while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
                        e += UTF8SKIP(e);
--- 5108,5118 ----
                }
            }
        }
!       if (isIDFIRST_lazy(d)) {
            d++;
            if (UTF) {
                e = s;
!               while (e < send && isALNUM_lazy(e) || *e == ':') {
                    e += UTF8SKIP(e);
                    while (e < send && *e & 0x80 && is_utf8_mark((U8*)e))
                        e += UTF8SKIP(e);
***************
*** 5394,5402 ****
            s++, term = '\'';
        else
            term = '"';
!       if (!isALNUM(*s))
            deprecate("bare << to mean <<\"\"");
!       for (; isALNUM(*s); s++) {
            if (d < e)
                *d++ = *s;
        }
--- 5398,5406 ----
            s++, term = '\'';
        else
            term = '"';
!       if (!isALNUM_lazy(s))
            deprecate("bare << to mean <<\"\"");
!       for (; isALNUM_lazy(s); s++) {
            if (d < e)
                *d++ = *s;
        }
***************
*** 5577,5583 ****
      if (*d == '$' && d[1]) d++;
  
      /* allow <Pkg'VALUE> or <Pkg::VALUE> */
!     while (*d && (isALNUM(*d) || *d == '\'' || *d == ':'))
        d++;
  
      /* If we've tried to read what we allow filehandles to look like, and
--- 5581,5587 ----
      if (*d == '$' && d[1]) d++;
  
      /* allow <Pkg'VALUE> or <Pkg::VALUE> */
!     while (*d && (isALNUM_lazy(d) || *d == '\'' || *d == ':'))
        d++;
  
      /* If we've tried to read what we allow filehandles to look like, and

<Prev in Thread] Current Thread [Next in Thread>