#!/usr/bin/perl # $Id: map2hash,v 1.1 2003/05/19 09:37:22 ehood Exp $ use bytes; use Getopt::Long; require 'shellwords.pl'; my %opt = ( 'pkg' => 'XXXXXX', ); my $is_glib; my $is_ucm; my $incharmap; my $init = 1; while () { if ($init) { if (s/^[#%]!ucm2hash\s*//) { chomp; unshift(@ARGV, shellwords($_)); } GetOptions(\%opt, 'glibc!', 'pkg=s', 'utf8!', 'unimap!', ); $is_glib = $opt{'glibc'}; $is_ucm = !$opt{'unimap'} || $is_glib; $incharmap = !$is_ucm; if ($opt{'utf8'}) { print 'package MHonArc::UTF8::', $opt{'pkg'}, ";\n"; } else { print 'package MHonArc::CharEnt::', $opt{'pkg'}, ";\n"; } print '+{', "\n"; $init = 0; } if (/^CHARMAP/) { $incharmap = 1; next; } next unless $incharmap; s/\%IRREVERSIBLE\%//; next if /^[#%]/; next unless /\S/; last if /^END CHARMAP/; chomp; if (/^=/ || /^0x/) { # Unicode mapping table format: We assume 3 column format, but there # are some tables that have different formats. ($raw, $uni, $name) = split("\t", $_, 3); next unless $uni =~ /\S/; $raw =~ s/^0x//; $raw =~ s/=//; $uni =~ s/<[LR][RL]>\+//; $uni =~ s/^0x//; $uni =~ s/^U\+//; @uni = map { uc $_ } split(/(?:\+0x|U?\+)/, $uni); $name =~ s/^#\s*//; } else { # UCM if ($is_glib) { ($uni, $raw, $name) = split(' ', $_, 3); } else { ($uni, $raw, $num, $name) = split(' ', $_, 4); } $uni =~ s/^$//; @uni = map { uc $_ } split(/>