#!/usr/bin/perl # Yudit Unicode Editor Source File # # Copyright (C) 2000 Gaspar Sinai # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # This script makes a compressed precompose map. # Compressed means: for a range only the first one will be # encoded. # In order to run the script you need to get # ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt # as and input. # encode composing chars -> precomposed char # decode precomposed char -> composing chars %BiDiCategory = ( "XX", 0x00, # Unknown "L", 0x01, # Left-to-Right "LRE", 0x02, # Left-to-Right Embedding "LRO", 0x03, # Left-to-Right Override "R", 0x04, # Right-to-Left "AL", 0x05, # Right-to-Left Arabic "RLE", 0x06, # Right-to-Left Embedding "RLO", 0x07, # Right-to-Left Override # # /* weak */ "PDF", 0x08, # Pop Directional Format "EN", 0x09, # European Number "ES", 0x0A, # European Number Separator "ET", 0x0B, # European Number Terminator "AN", 0x0C, # Arabic Number "CS", 0x0D, # Common Number Separator "NSM", 0x0E, # Non-Spacing Mark "BN", 0x0F, # Boundary Neutral # # /* neutral */ "B", 0x10, # Paragraph Separator "S", 0x11, # Segment Separator "WS", 0x12, # Whitespace "ON", 0x13 # Other Neutrals ); @encodes = (); @decodes = (); @full_date = localtime(time); $year = $full_date[5] + 1900; $mon = $full_date[4] + 1; $mday = $full_date[3]; $date = sprintf ("%4d-%02d-%02d", $year, $mon, $mday); $name="precompose.my"; die "No version specified." if ($#ARGV == -1); $VERSION=shift; print <) { @_ = split(';', $_); next if ($#_ < 6); $plain = hex $_[0]; $general = $_[4]; if (!defined ($BiDiCategory {$general})) { $general = 0; } else { $general = $BiDiCategory {$general}; } if ($last+1 == $plain && $lastvle == $general) { $last++; next; } # hole # Ranges out of hole if ($last+1 < $plain && $_[1]!~/Last>/) { printf ("%08X -> %02X$/", $last+1, 0); } printf ("%08X -> %02X$/", $plain, $general); $last = $plain; $lastvle = $general; } printf ("%08X -> %02X$/", $last+1, 0); print "$/#END$/";