perl-unicode

[FYI] use encoding 'non-utf8-encoding'; use CGI;

2002-10-02 02:30:04
I am currently writing yet another CGI book. That is for the Japanese market and written in Japanese. So it is inevitable that you have to face the labyrinth of character encoding.

Before perl 5.8.0, most book teaches how to handle Japanese in CGI goes as follows;

* stick with EUC-JP.  it does not poison perl like Shift_JIS.
* use jcode.pl or Jcode.pm when you have to convert encoding.
* you can use jcode::tr or Jcode->tr when you have to convert between Hiragana and Katakana

fine, so far.  But....

* totally forget regex unless you are happy with a very counter-intuitive measure illustrated in 6.18 of the Cookbook
* if you are desperate in Kanji regex, use jperl instead.

That has now changed with 'use encoding'. But when it comes to CGI, 'use encoding' alone will not cut it. But CGI.pm can handle multipart/form-data . Together you can use regex safely and intuitively without resorting to convert your CGI script to UTF-8.

The 120-line script right after my signature illustrates that. Sorry, it contains some Japanese (or my point gets blurred).

As you see, tr/// is not subject to the magic of 'use encoding'. jhi, have we made it so deliberately ? I am begging to think tr/// is happier to enbrace the power thereof.

Still, it can be overcome by simple eval qq{} as illustrated. This much idiom would not hurt much, at least not as much as the Cookbook sample....

Dan the Transcoded Man

#!/usr/local/bin/perl
#
# Save me in EUC-JP!

use 5.008;
use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
our $Method  = 'POST';
#our $Method  = 'GET';
our $Enctype = 'multipart/form-data';
#our $Enctype = 'application/x-www-form-urlencoded';
our $Charset = 'euc-jp';
use encoding 'euc-jp';

my $cgi = CGI->new();

my %Label =
    (
     name    => '名前',
     kana    => 'フリガナ',
     mailto  => '電子メール',
     mailto2 => '電子メール(確認)',
     tel     => '電話',
     fax     => 'ファックス',
     zip     => '〒',
     address => '住所',
     comment => 'ご意見',
     );


unless ($cgi->param()){
    print_input($cgi);
}else{
    my $kana = $cgi->param('kana');
    $kana =~ s/[\s ]+//g; # beware of zenkaku space!
    eval qq{ \$kana =~ tr/ぁ-ん/ァ-ン/ };
    # $kana =~ tr/ぁ-ん/ァ-ン/; # will not work but do you know why?
    $cgi->param(kana => $kana);
    print_output($cgi);
}

sub print_input{
    my $c = shift;
    print_html(
               $c,
               title =>    "Form:入力",
               name    => $c->textfield(-name => 'name'),
               kana    => $c->textfield(-name => 'kana'),
               mailto  => $c->textfield(-name => 'mailto'),
               mailto2 => $c->textfield(-name => 'mailto2'),
               tel     => $c->textfield(-name => 'tel'),
               fax     => $c->textfield(-name => 'fax'),
               zip     => $c->textfield(-name => 'zip'),
               address => $c->textfield(-name => 'address'),
               comment => $c->textarea(-name => 'comment'),
               );
}

sub print_output{
    my $c = shift;
    print_html(
               $c,
               title   => "Form:出力",
               name    => $c->param('name'),
               kana    => $c->param('kana'),
               mailto  => $c->param('mailto'),
               mailto2 => $c->param('mailto2'),
               tel     => $c->param('tel'),
               fax     => $c->param('fax'),
               zip     => $c->param('zip'),
               address => $c->param('address'),
               comment => $c->param('comment'),
               );
};

sub print_html{
    my $c = shift;
    my %arg = @_;
    print
        $c->header(-charset   => $Charset),
        $c->start_html(-title => $arg{title}),
        $c->h1($arg{title});
    $c->param() or print
        $c->start_form(-method => $Method, -enctype => $Enctype);
    print
        $c->start_table({border => 1}),
        $c->Tr([
                $c->td([ $Label{name}    => $arg{name} ]),
                $c->td([ $Label{kana}    => $arg{kana} ]),
                $c->td([ $Label{mailto}  => $arg{mailto} ]),
                $c->td([ $Label{mailto2} => $arg{mailto2} ]),
                $c->td([ $Label{tel}     => $arg{tel} ]),
                $c->td([ $Label{fax}     => $arg{fax} ]),
                $c->td([ $Label{zip}     => $arg{zip} ]),
                $c->td([ $Label{address} => $arg{address} ]),
                $c->td([ $Label{comment} => $arg{comment} ]),
                ]);
    if ($c->param()){
        print
            $c->td($c->a({href=>$ENV{SCRIPT_TEXT}}, "Retry"));
    }else{
        print
            $c->td([$c->reset(), $c->submit()]),
        };
    print $c->end_form() unless $c->param();
    print
        $c->end_table(),
        $c->end_html();
}
__END__