#!/usr/bin/perl # Make diff of 3 files # GNU (C) Gaspar Sinai # Tokyo 2002-04-10 $PLANE1="JISX0213-1.TXT"; $PLANE2="JISX0213-2.TXT"; $VERSION="Version 1.1"; # # SJIS is KEY # %JISMAP=(); %KUTENMAP=(); %UMAP=(); %ALLJ=(); %ALLU=(); &makeMap ($PLANE1); &makeMap ($PLANE2); # # NAME URL DELIMITER JIS SJIS UNICODE # @FILE1=("WAKABA", "jisx0213code-csv32.txt", "http://www.ksky.ne.jp/~smile4me/charcode/jisx0213code-csv_u32.lzh", ",", 4, 6, 7); # No JIS MAP. @FILE2=("IBM_GLY", "IBM1394toUCS4-GLY.txt", "http://www.cse.cuhk.edu.hk/~irg/irg/N807_TablesX0123-UCS.zip", " ", -1, 0, 1); # No JIS MAP. @FILE3=("IBM_IRV", "IBM1394toUCS4-IRV.txt", "http://www.cse.cuhk.edu.hk/~irg/irg/N807_TablesX0123-UCS.zip", " ", -1, 0, 1,); &makeCMP (@FILE1); &makeCMP (@FILE2); # Turned out to be the same. # &makeCMP (@FILE3); @KEYS = sort (keys (JISMAP)); $name1 = $FILE1[0]; $name2 = $FILE2[0]; $name3 = $FILE3[0]; print < and Gaspar Sinai . These two maps are totally unofficial, and they will not solve any problems mentioned by Tomohiro KUBOTA http://www.debian.or.jp/~kubota/unicode-symbols.html. It should also be noted that tone letters that are used in this mapping are mentioned mentioned in Section 7.8 (Modifier Letters). The rendering engine must render at least the following glyphs: U+02E9 U+02E5 # RISING (声調記号上昇調) in JIS X 0213 U+02E5 U+02E9 # FALLING (声調記号下降調) in JIS X 0213 ------------------------------------------------------------------ Further Unresolved Issues ------------------------------------------------------------------ 1) 0x83F6 0x2678 0x31F7 0x309A # 1-6-88 This character is a 'small' variant of 0x30D7. It will be the task of the display engine to position the small circle at the right position. Differences between unofficial mappings: EOD printf ("%-7.7s %-4.4s %-8.8s %-17.17s %-26.26s\n", "m-k-t", "SJIS", $name1, $name2,"LINUX"); print < Tokyo, $date EOD exit (0); sub makeCMP { ($name, $file, $url, $delim, $jis, $sjis, $uni) = @_; open (P1, "< $file") || die "Can not open $file"; while () { chomp; split ($delim); $sj = hex ($_[$sjis]); next if ($sj==0); $j = hex ($_[$jis]); $u = $_[$uni]; $ALLJ{$name}{$sj}=sprintf ("%04X", $j); if ($u =~ /........+........+......../) { $ALLU{$name}{$sj}=$u; } elsif ($u =~ /........+......../) { $ALLU{$name}{$sj}=$u; } else { $ALLU{$name}{$sj}=sprintf ("%08X", hex($u)); } } close (P1); } sub makeMap { open (P1, "<$_[0]") || die "Can not open $_[0]"; while () { chomp; next if (/UNASSIGNED .* JIS /); if (/^0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+(.*)/) { $SJ=hex($1); $JISMAP{($SJ)}=sprintf ("%04X", hex($2)); $UMAP{($SJ)}=sprintf ("%08X+%08X+%08X", hex($3), hex($4), hex($5)); $KT=$6; $KT=$1 if ($KT=~/(\d+-\d+-\d+)/); $KUTENMAP{$SJ}=$KT; } elsif (/^0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+(.*)/) { $SJ=hex($1); $JISMAP{($SJ)}=sprintf ("%04X", hex($2)); $UMAP{($SJ)}=sprintf ("%08X+%08X", hex($3), hex($4)); $KT=$5; $KT=$1 if ($KT=~/(\d+-\d+-\d+)/); $KUTENMAP{$SJ}=$KT; } elsif (/^0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+(.*)/) { $SJ=hex($1); $JISMAP{($SJ)}=sprintf ("%04X", hex($2)); $UMAP{($SJ)}=sprintf ("%08X", hex($3)); $KT=$4; $KT=$1 if ($KT=~/(\d+-\d+-\d+)/); $KUTENMAP{$SJ}=$KT; } elsif (/^0x([0-9A-F]+)[ \t]+0x([0-9A-F]+)[ \t]+(.*)/) { $SJ=hex($1); $JISMAP{($SJ)}=sprintf ("%04X", hex($2)); $UMAP{($SJ)}=""; $KT=$3; $KT=$1 if ($KT=~/(\d+-\d+-\d+)/); $KUTENMAP{$SJ}=$KT; } } close (P1); }