##---------------------------------------------------------------------------## ## File: ## iso2022.pl 99/03/24 ## Author: ## Takashi P.KATOH ## Description: ## Routines to process data encoded in iso2022 character sets. ##---------------------------------------------------------------------------## ## Copyright (C) 1998,1999 Takashi P.KATOH, p-katoh(_at_)shiratori(_dot_)riec(_dot_)tohoku(_dot_)ac(_dot_)jp ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package iso_2022; ############################################################################### ## Routines ############################################################################### ##---------------------------------------------------------------------------## ## str2sgml converts a string encoded by $charset to an sgml ## string where special characters are converted to entity ## references. ## ## $return_data = iso_2022'str2sgml($data, $charset, $only8bit); ## ## Notice: ## $only8bit is not used. ## (It is only for compatibility with iso_8859'str2sgml). ## ## Almost all part of this code is taken from m2h_text_plain'jp2022 ## (in mhtxtplain.pl) by NIIBE Yutaka . ## sub str2sgml { local($data, $charset, $only8bit) = ($_[0], $_[1], $_[2]); local($ret, $ascii_text) = ('', ''); local($_) = $data; # Process preceding ASCII text while(1) { if (s/^([^\033]+)//) { # ASCII plain text $ascii_text = $1; # Replace meta characters in ASCII plain text $ascii_text =~ s%\&%\&%g; $ascii_text =~ s%<%\<%g; $ascii_text =~ s%>%\>%g; ## Convert URLs to hyperlinks $ascii_text =~ s%($HUrlExp)%$1%gio unless $mhonarc::NOURL; $ret .= $ascii_text; } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence $ret .= $1; } elsif (s/(\033N[ -])//) { # Single Shift Sequence $ret .= $1; } else { last; } } # Process Each Segment while(1) { if (s/^(\033\([BJ])//) { # Single Byte Segment $ret .= $1; while(1) { if (s/^([^\033]+)//) { # ASCII plain text $ascii_text = $1; # Replace meta characters in ASCII plain text $ascii_text =~ s%\&%\&%g; $ascii_text =~ s%<%\<%g; $ascii_text =~ s%>%\>%g; ## Convert URLs to hyperlinks $ascii_text =~ s%($HUrlExp)%$1%gio unless $mhonarc::NOURL; $ret .= $ascii_text; } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence $ret .= $1; } elsif (s/(\033N[ -])//) { # Single Shift Sequence $ret .= $1; } else { last; } } } elsif (s/^(\033\$[\(_at_)AB]|\033\$\([CD])//) { # Double Byte Segment $ret .= $1; while (1) { if (s/^([!-~][!-~]+)//) { # Double Char plain text $ret .= $1; } elsif (s/(\033\.[A-F])//) { # G2 Designate Sequence $ret .= $1; } elsif (s/(\033N[ -])//) { # Single Shift Sequence $ret .= $1; } else { last; } } } else { # Something wrong in text $ret .= $_; last; } } $ret; } ##---------------------------------------------------------------------------## 1;