$B$+$H$&(B $B$G$9!#(B
(B
(BFrom: $BEOJU>!0l(B <Shoichi(_dot_)Watanabe(_at_)furuno(_dot_)co(_dot_)jp>
(BSubject: Re: mail$B$N9TF,J8;z2=$1(B [ESC(B]
(BDate: Thu, 11 Sep 2008 08:15:18 +0900
(B> $B$+$H$&$5$s$NEj9F$N;d$N0zMQItJ,$K$b9TF,ItJ,$Ke:No$K(BESC(B ?$B$J%3!<%I$,(B
(B> $BIU$$$F$$$^$9$M!#(B
(B> html$B%U%!%$%k$KJQ49$9$k$H!$$3$N$h$&$KI=<($5$l$^$9!#(B
(B> http://www.mhonarc.org/archive/html/mhonarc-users-jp/2008-09/msg00000.html
(B
$B$&!";d$N4D6-$(_a$(D????$BHI=<($5$l$J$$$N$G!(B?$B$IU$$$F$^$;$s$G$7$?!(B\xA3
(B
(Biso2022jp.pl $B$N%P%0$N$h$&$J5$$,$7$F$-$^$7$?(B...
(B
(Blib/iso2022jp.pl $B$rE:IU$N$b$N$HCV$-49$($F;n$7$F$_$F$$$?$(_a$(D????$B1$(B\xCA
$B$$$G$7$g$&$+(B?
(B
(B# $B$3$l$C$F!"1F6AHO0O$,$+$J$j9-$$$+$b(B...
(B
(B--
$B$+$H$Z(B / $B2CF#(B $B5.;J(B
##---------------------------------------------------------------------------##
## File:
## $Id: iso2022jp.pl,v 1.9 2002/12/04 20:00:39 ehood Exp $
## Author(s):
## Earl Hood mhonarc(_at_)mhonarc(_dot_)org
## NIIBE Yutaka gniibe(_at_)mri(_dot_)co(_dot_)jp
## Takashi P.KATOH
p-katoh(_at_)shiratori(_dot_)riec(_dot_)tohoku(_dot_)ac(_dot_)jp
## Description:
## Library defines routine to process iso-2022-jp data.
##---------------------------------------------------------------------------##
## Copyright (C) 1995-2002
## Earl Hood, mhonarc(_at_)mhonarc(_dot_)org
## NIIBE Yutaka, gniibe(_at_)mri(_dot_)co(_dot_)jp
## Takashi P.KATOH,
p-katoh(_at_)shiratori(_dot_)riec(_dot_)tohoku(_dot_)ac(_dot_)jp
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
## 02111-1307, USA
##---------------------------------------------------------------------------##
package iso_2022_jp;
$Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' .
'|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)';
$UrlExp = $Url . q%[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]%;
$HUrlExp = $Url . q%[^\s\(\)\|<>"'\&]*[^\.?!;,"'\|\[\]\(\)\s<>\&]%;
##---------------------------------------------------------------------------##
## str2html(): Convert an iso-2022-jp string into HTML. Function
## interface similiar as iso8859.pl function.
##
sub str2html { jp2022_to_html($_[0], 1); }
##---------------------------------------------------------------------------##
## Function to convert ISO-2022-JP data into HTML. Function is based
## on the following RFCs:
##
## RFC-1468 I
## J. Murai, M. Crispin, E. van der Poel, "Japanese Character
## Encoding for Internet Messages", 06/04/1993. (Pages=6)
##
## RFC-1554 I
## M. Ohta, K. Handa, "ISO-2022-JP-2: Multilingual Extension of
## ISO-2022-JP", 12/23/1993. (Pages=6)
##
sub jp2022_to_html {
my($body) = shift;
my($nourl) = shift;
my(@lines) = split(/\r?\n/,$body);
my($ret, $ascii_text);
local($_);
$ret = "";
my $cnt = scalar(@lines);
my $i = 0;
foreach (@lines) {
my $line = "";
# a trick to process preceding ASCII text
$_ = "\033(B" . $_ unless /^\033/;
# Process Each Segment
while(1) {
if (s/^(\033\([BJ])//) { # Single Byte Segment
$line .= $1;
while(1) {
if (s/^([^\033]+)//) { # ASCII plain text
$ascii_text = $1;
# Replace meta characters in ASCII plain text
$ascii_text =~ s%\&%\&%g;
$ascii_text =~ s%<%\<%g;
$ascii_text =~ s%>%\>%g;
## Convert URLs to hyperlinks
$ascii_text =~ s%($HUrlExp)%<a href="$1">$1</a>%gio
unless $nourl;
$line .= $ascii_text;
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$line .= $1;
} elsif (s/(\033N[ -])//) { # Single Shift Sequence
$line .= $1;
} else {
last;
}
}
} elsif (s/^(\033\$[\(_at_)AB]|\033\$\([CD])//) { # Double Byte
Segment
$line .= $1;
while (1) {
if (s/^([!-~][!-~]+)//) { # Double Char plain text
$line .= $1;
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$line .= $1;
} elsif (s/(\033N[ -])//) { # Single Shift Sequence
$line .= $1;
} else {
last;
}
}
} else {
# Something wrong in text
$line .= $_;
last;
}
}
# remove a `trick'
$line =~ s/^\033\(B//;
$ret .= $line;
# add back eol
$ret .= "\n" unless (++$i >= $cnt);
}
($ret);
}
##---------------------------------------------------------------------------##
## clip($str, $length, $is_html, $has_tags): Clip an iso-2022-jp string.
##
## The last argument $is_html specifies '&' should be treated
## as HTML character or not.
## (i.e., the length of '&' will be 1 if $is_html).
##
sub clip { # &clip($str, 10, 1, 1);
my($str) = shift;
my($length) = shift;
my($is_html) = shift;
my($has_tags) = shift;
my($ret, $inascii);
local($_) = $str;
$ret = "";
# a trick to process preceding ASCII text
$_ = "\033(B" . $_ unless /^\033/;
# Process Each Segment
CLIP: while(1) {
if (s/^(\033\([BJ])//) { # Single Byte Segment
$inascii = 1;
$ret .= $1;
while(1) {
if (s/^([^\033])//) { # ASCII plain text
if ($is_html) {
if (($1 eq '<') && $has_tags) {
s/^[^>\033]*>//;
} else {
if ($1 eq '&') {
s/^([^\;]*\;)//;
$ret .= "&$1";
} else {
$ret .= $1;
}
$length--;
}
} else {
$ret .= $1;
$length--;
}
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$ret .= $1;
} elsif (s/(\033N[ -])//) { # Single Shift Sequence
$ret .= $1;
$length--;
} else {
last;
}
last CLIP if ($length <= 0);
}
} elsif (s/^(\033\$[\(_at_)AB]|\033\$\([CD])//) { # Double Byte Segment
$inascii = 0;
$ret .= $1;
while (1) {
if (s/^([!-~][!-~])//) { # Double Char plain text
$ret .= $1;
# The length of a double-byte-char is assumed 2.
# If we consider compatibility with UTF-8, it should be 1.
$length -= 2;
} elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence
$ret .= $1;
} elsif (s/(\033N[ -])//) { # Single Shift Sequence
$ret .= $1;
$length--;
} else {
last;
}
last CLIP if ($length <= 0);
}
} else {
# Something wrong in text
$ret .= $_;
last;
}
}
# remove a `trick'
$ret =~ s/^\033\(B//;
# Shuold we check the last \033\([BJ] sequence?
# (I believe it is too paranoid).
$ret .= "\033(B" unless $inascii;
($ret);
}
##---------------------------------------------------------------------------##
1;