From: "Takashi P.KATOH"
<p-katoh(_at_)shiratori(_dot_)riec(_dot_)tohoku(_dot_)ac(_dot_)jp>
Subject: Re: New iso2022jp.pl
Date: Mon, 29 Jul 2002 18:07:38 +0900 (JST)
One question I have
with respect to iso-2022-jp is if the clip function you implemented
can be expanded to handle the $has_tags flag as shown in the above
function.
I think I can.
Please wait for a while.
I've done.
The patch is attached to this mail.
--
Takashi P.KATOH
Index: iso2022jp.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/iso2022jp.pl,v
retrieving revision 1.7
diff -u -r1.7 iso2022jp.pl
--- iso2022jp.pl 28 Jul 2002 23:21:53 -0000 1.7
+++ iso2022jp.pl 30 Jul 2002 05:35:47 -0000
@@ -122,17 +122,20 @@
##---------------------------------------------------------------------------##
-## clip($str, $length, $html): Clip an iso-2022-jp string.
+## clip($str, $length, $is_html, $has_tags): Clip an iso-2022-jp string.
##
-## The last argument $html specifies '&' should be treated
+## The last argument $is_html specifies '&' should be treated
## as HTML character or not.
-## (i.e., the length of '&' will be 1 if $html).
+## (i.e., the length of '&' will be 1 if $is_html).
##
-sub clip { # &clip($str, 10, 1);
+sub clip { # &clip($str, 10, 1, 1);
+
+print STDERR "iso2022jp::clip is called\n";
+
my($str) = shift;
my($length) = shift;
- my($html) = shift;
- my($tags) = shift; # Not implemented, yet
+ my($is_html) = shift;
+ my($has_tags) = shift;
my($ret, $inascii);
local($_) = $str;
@@ -147,17 +150,22 @@
$ret .= $1;
while(1) {
if (s/^([^\033])//) { # ASCII plain text
- if ($html) {
- if ($1 eq '&') {
- s/^([^\;]*\;)//;
- $ret .= "&$1";
+ if ($is_html) {
+ if (($1 eq '<') && $has_tags) {
+ s/^[^>\033]*>//;
} else {
- $ret .= $1;
+ if ($1 eq '&') {
+ s/^([^\;]*\;)//;
+ $ret .= "&$1";
+ } else {
+ $ret .= $1;
+ }
+ $length--;
}
} else {
$ret .= $1;
+ $length--;
}
- $length--;
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$ret .= $1;
} elsif (s/(\033N[ -])//) { # Single Shift Sequence
@@ -174,6 +182,8 @@
while (1) {
if (s/^([!-~][!-~])//) { # Double Char plain text
$ret .= $1;
+ # The length of a double-byte-char is assumed 2.
+ # If we consider compatibility with UTF-8, it should be 1.
$length -= 2;
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$ret .= $1;