On Sun, 07 Dec 2003 16:33:50 +0900
Tatsuhiko Miyagawa <miyagawa(_at_)edge(_dot_)co(_dot_)jp> wrote:
Maybe a way to globally force UTF8 (or some other encoding) to be used
on b=>u promotion is a good idea, but AFAIK it does not yet exist.
Can it be implemented as pragma? If it can be, it'd be very
convinient.
what Japanese people wants may be something like JISAutoDetect?
### ^
package Encode::JP::Auto;
use strict;
use warnings;
use Carp;
our $VERSION = '0.00';
use Encode qw();
use base qw(Encode::Encoding);
__PACKAGE__->Define('JISAutoDetect');
sub needs_lines { 1 }
sub perlio_ok { 0 }
my %Jobj = (
euc => Encode::find_encoding('euc-jp'),
jis => Encode::find_encoding('7bit-jis'),
sjis => Encode::find_encoding('shift-jis'),
);
sub _getcode { # very rough; [jJ]code::getcode?
my $s = shift;
return $s =~ /\e/ ? 'jis' :
$s =~ /[\201-\215\220-\240]/ ? 'sjis' : 'euc';
}
sub decode($$;$) {
my ($obj, $str, $chk) = @_;
my $code = _getcode($str);
my $ret = $Jobj{$code}->decode($str);
$_[1] = "" if $chk;
return $ret;
}
sub encode($$;$) {
croak "encode() isn't implemented";
}
1;
__END__
### $
#example
use Encode::JP::Auto;
for my $f (@ARGV) {
open FH, "<:encoding(JISAutoDetect)", $f or die;
while (<FH>) {
tr/\x{30A1}-\x{30F3}/\x{3041}-\x{3093}/; # katakana to hiragana
print;
}
close FH;
}
Regards,
SADAHIRO Tomoyuki