#!/usr/bin/perl # parsegba.pl - parse gba input, write mys output # GNU (C) Gaspar Sinai # Tokyo 2002-03-26 print < Tokyo 2002-03-26 COMM= COMM=Here is the algorithm to get GB 18030 codes from this COMM=garbage: COMM=If input unicode characters is less than 0x80 emit that code as single byte COMM=Search for closest lowest key that matches the unicode character: COMM= COMM=1) BMP COMM= 1a If value is greater than 0xffff output will be two bytes: COMM= value + (unicode_vle - closest_key) COMM= As always higher byte is emitted first. COMM= 1b If value is less than 0x8000 output will be four bytes: COMM= num = linear(value) + (unicode_vle - closest_key); COMM= num = nonlinear(num); COMM= COMM= function nonlinear (num) COMM= k3 = (num % 10)+0x30; num = num / 10; COMM= k2 = (num % 126)+0x81; num = num / 126; COMM= k1 = (num % 10)+0x30; num = num / 10; COMM= k0 = (num % 126)+0x81; COMM= return ((k0 << 24) + (k1 << 16) + (k2<<8) + k3); COMM= COMM= function linear(value): COMM= k0 = (value >> 24) & 0xff; // 0x81..0xfe COMM= k1 = (value >> 16) & 0xff; // 0x30..0x39 COMM= k2 = (value >> 8) & 0xff; // 0x81..0xfe COMM= k3 = (value >> 0) & 0xff; // 0x30..0x39 COMM= num = (k0-0x81); num = num * 10; COMM= num += (k1-0x30); num = num * 126; COMM= num += (k2-0x81); num = num * 10; COMM= num += (k3-0x30); COMM= return (num); COMM= COMM=2) NON-BMP (unicode_value between 0x10000..0x10FFFF) COMM= num = unicode_value - 0x10000 + 0x2E248; COMM= nonlinear (num); COMM= - 0x10000 should producce 0x90308130 COMM= - 0x10FFFF should be 0xE3329A35 #------------------------------------------------------ TYPE=0 SECTION=encode ENCODE=1 # # key 1 for 16 bit (16-bitunicode) # value 2 for 32 bit (4-byte-gb) values # KEY_WIDTH=1 VALUE_WIDTH=2 KEY_LENGTH=0 VALUE_LENGTH=0 # EOD $lastvle = 0; while (<>) { chomp; next unless (/^([0-9A-F]{4})\s+([0-9A-F]{1,8})/); $key = hex ($1); $vle = hex ($2); if ($key == 0) { printf ("%04X -> %08X\n", $key, $vle); next; } elsif ($vle==0) { next; } elsif ($key < 0x80) { next; } elsif ($vle > 0xffff) { $lastvle = &incGB($lastvle); if ($vle != $lastvle) { printf ("%04X -> %08X\n", $key, $vle); } $lastvle = $vle; } elsif ($vle != ++$lastvle) { printf ("%04X -> %08X\n", $key, $vle); $lastvle = $vle; } } printf ("%04X -> %08X\n", 0xFFFF, 0x8431A439); # Can not fit - can be checked #printf ("%04X -> %08X\n", 0x10000, 0x90308130); #printf ("%04X -> %08X\n", 0x10FFFF, 0xE3329A35); exit (0); # # increment a GB code # sub incGB { my $n = &fromGB($_[0]); $n++; return (&toGB($n)); } # # Convert linear code to GB # sub toGB { my $k3 = ($_[0] % 10)+0x30; $_[0] = $_[0] / 10; my $k2 = ($_[0] % 126)+0x81; $_[0] = $_[0] / 126; my $k1 = ($_[0] % 10)+0x30; $_[0] = $_[0] / 10; my $k0 = ($_[0] % 126)+0x81; return (($k0 << 24) + ($k1 << 16) + ($k2<<8) + $k3); } # # Convert GB to linear code # sub fromGB { my $k0 = ($_[0] >> 24) & 0xff; my $k1 = ($_[0] >> 16) & 0xff; my $k2 = ($_[0] >> 8) & 0xff; my $k3 = ($_[0] >> 0) & 0xff; my $num; $num = ($k0-0x81); $num = $num * 10; $num += ($k1-0x30); $num = $num * 126; $num += ($k2-0x81); $num = $num * 10; $num += ($k3-0x30); return ($num); }