mhonarc-dev

Re: New iso2022jp.pl

2002-07-29 22:52:21
From: "Takashi P.KATOH" 
<p-katoh(_at_)shiratori(_dot_)riec(_dot_)tohoku(_dot_)ac(_dot_)jp>
Subject: Re: New iso2022jp.pl
Date: Mon, 29 Jul 2002 18:07:38 +0900 (JST)
                                                One question I have
with respect to iso-2022-jp is if the clip function you implemented
can be expanded to handle the $has_tags flag as shown in the above
function.
I think I can.
Please wait for a while.

I've done.
The patch is attached to this mail.

-- 
Takashi P.KATOH

Index: iso2022jp.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/iso2022jp.pl,v
retrieving revision 1.7
diff -u -r1.7 iso2022jp.pl
--- iso2022jp.pl        28 Jul 2002 23:21:53 -0000      1.7
+++ iso2022jp.pl        30 Jul 2002 05:35:47 -0000
@@ -122,17 +122,20 @@
 
 
 ##---------------------------------------------------------------------------##
-##     clip($str, $length, $html): Clip an iso-2022-jp string.
+##     clip($str, $length, $is_html, $has_tags): Clip an iso-2022-jp string.
 ##
-##   The last argument $html specifies '&' should be treated
+##   The last argument $is_html specifies '&' should be treated
 ##   as HTML character or not.
-##   (i.e., the length of '&amp;' will be 1 if $html).
+##   (i.e., the length of '&amp;' will be 1 if $is_html).
 ##
-sub clip {     # &clip($str, 10, 1);
+sub clip {     # &clip($str, 10, 1, 1);
+
+print STDERR "iso2022jp::clip is called\n";
+
     my($str) = shift;
     my($length) = shift;
-    my($html) = shift;
-    my($tags) = shift; # Not implemented, yet
+    my($is_html) = shift;
+    my($has_tags) = shift;
     my($ret, $inascii);
     local($_) = $str;
 
@@ -147,17 +150,22 @@
            $ret .= $1;
            while(1) {
                if (s/^([^\033])//) {      # ASCII plain text
-                   if ($html) {
-                       if ($1 eq '&') {
-                           s/^([^\;]*\;)//;
-                           $ret .= "&$1";
+                   if ($is_html) {
+                       if (($1 eq '<') && $has_tags) {
+                           s/^[^>\033]*>//;
                        } else {
-                           $ret .= $1;
+                           if ($1 eq '&') {
+                               s/^([^\;]*\;)//;
+                               $ret .= "&$1";
+                           } else {
+                               $ret .= $1;
+                           }
+                           $length--;
                        }
                    } else {
                        $ret .= $1;
+                       $length--;
                    }
-                   $length--;
                } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
                    $ret .= $1;
                } elsif (s/(\033N[ -])//) { # Single Shift Sequence
@@ -174,6 +182,8 @@
            while (1) {
                if (s/^([!-~][!-~])//) { # Double Char plain text
                    $ret .= $1;
+                   # The length of a double-byte-char is assumed 2.
+                   # If we consider compatibility with UTF-8, it should be 1.
                    $length -= 2;
                } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
                    $ret .= $1;
<Prev in Thread] Current Thread [Next in Thread>