#!/usr/bin/perl -w # # A script to convert tamil character clusters to # their presentation format and assign them internal Yudit ids. # # Tamil clusters are formed by # vowels = 12 # consonants = 22 (including grantha) # modifiers = pulli (VIRAMA), i, I, u, U, e, E, ai, o, O and au # and the aytham symbol # # In general, we need to output new glyphs for clusters # formed by modifiers (i I u U and pulli). We assign unique ids # internal to Yudit for the output glyphs, following the Pango convention # outlined in http://www.wholehog.fsnet.co.uk/robert/indic/fonts.htm # # Thuraippah Vaseeharan # Thu Dec 27 22:49:33 CST 2001 # # CHANGES: # merged Tamil.kmap.pl # Mon Dec 31 19:18:06 CST 2001 $KMAP_FILE="../../mytool/kmap/Tamil.kmap" ; open (KMAP_FILE, ">$KMAP_FILE") || die "unable to open kmap file for writing" ; print KMAP_FILE "//autogenerated by Tamil.cluster.pl\n" ; @consonant_keys = qw(k ng c j nj d N t q n p m y r R l L z v sh s h) ; %modifier_keys = ( '0bbe' => 'A', '0bcd' => 'f', '0bbf' => 'i', '0bc0' => 'I', '0bc1' => 'u', '0bc2' => 'U', '0bc6' => 'e', '0bc7' => 'E', '0bc8' => 'G', '0bca' => 'o', '0bcb' => 'O', '0bcc' => 'H' ) ; # The notation for vowels modifiers ai(G)/au(H) # is not very obvious :-( # They have been 'renamed' as G, H to prevent Yudit 'waiting' # for additional input, when typing the "a" vowel @vowel_keys = qw(a A i I u U e E G o O H) ; $aytham = "0b83" ; $pulli = "0bcd" ; $aravu = "0bbe" ; @vowels = qw(0b85 0b86 0b87 0b88 0b89 0b8a 0b8e 0b8f 0b90 0b92 0b93 0b94) ; @consonants = qw(0b95 0b99 0b9a 0b9c 0b9e 0b9f 0ba3 0ba4 0ba8 0ba9 0baa 0bae 0baf 0bb0 0bb1 0bb2 0bb3 0bb4 0bb5 0bb7 0bb8 0bb9) ; @modifiers_0 = qw(0bbf 0bc0 0bc1 0bc2) ; # i I u U @modifiers_1 = qw(0bc6 0bc7 0bc8) ; # e E ai @modifiers_2 = qw(0bca 0bcb 0bcc) ; # o O au %first_part = ( '0bca' => '0bc6', '0bcb' => '0bc7', '0bcc' => '0bc6' ) ; %last_part = ( '0bca' => '0bbe' , '0bcb' => '0bbe' , '0bcc' => '0bd7' ) ; $independent = 0x80060000 ; print "COMM=begin tamil clusters \n" ; print "COMM=autogenerated by Tamil.cluster.pl\n" ; # uyirs $i= 0; foreach $vowel (@vowels) { printf(KMAP_FILE "\"%s=0x%s\",\n", $vowel_keys[$i++], $vowel) ; printf("%s -> 0000%s %x\n", $vowel, $vowel, $independent++) ; } # aytham printf(KMAP_FILE "\"%s=0x%s\",\n", "Q", $aytham) ; printf("%s -> 0000%s %x\n", $aytham, $aytham, $independent++) ; # akara meys (base consonant) $i= 0; foreach $cons (@consonants) { printf(KMAP_FILE "\"%s%s=0x%s\",\n", $consonant_keys[$i++], "a", $cons) ; printf("%s -> 0000%s %x\n", $cons, $cons, $independent++) ; } # Akaaram meys (long form of base consonant) $i = 0 ; foreach $cons (@consonants) { printf(KMAP_FILE "\"%s%s=0x%s 0x%s\",\n", $consonant_keys[$i++], $modifier_keys{$aravu}, $cons, $aravu) ; printf("%s %s -> 0000%s 0000%s %x\n", $cons, $aravu, $cons, $aravu, $independent++) ; } # meys (base consonant + pulli) and other uyir-meys (including ligatures and re-ordered forms) $uyirmeys = 0x8006d000 ; $meys = 0x8006f000 ; $reordered = 0x8006e000 ; $i = 0 ; foreach $cons (@consonants) { # Tamil Meys: ligatures with pulli (virama) printf(KMAP_FILE "\"%s%s=0x%s 0x%s\",\n", $consonant_keys[$i], $modifier_keys{$pulli}, $cons, $pulli) ; printf("%s %s -> 0000%s 0000%s %x\n", $cons, $pulli, $cons, $pulli, $meys++) ; # Tamil Uyir-Meys: # ligatures with i, I, u, U foreach $mod (@modifiers_0) { printf(KMAP_FILE "\"%s%s=0x%s 0x%s\",\n", $consonant_keys[$i], $modifier_keys{$mod}, $cons, $mod) ; printf("%s %s -> 0000%s 0000%s %x\n", $cons, $mod, $cons, $mod, $uyirmeys++) ; } # re-ordered forms # for (e E ai), just flip foreach $mod (@modifiers_1) { printf(KMAP_FILE "\"%s%s=0x%s 0x%s\",\n", $consonant_keys[$i], $modifier_keys{$mod}, $cons, $mod) ; printf("%s %s -> 0000%s 0000%s %x\n", $cons, $mod, $mod, $cons, $reordered++) ; } # for (o O au), the modifier "splits" into two parts, # that surround the base consonant foreach $mod (@modifiers_2) { printf(KMAP_FILE "\"%s%s=0x%s 0x%s\",\n", $consonant_keys[$i], $modifier_keys{$mod}, $cons, $mod) ; printf("%s %s -> 0000%s 0000%s 0000%s %x\n", $cons, $mod, $first_part{$mod}, $cons, $last_part{$mod}, $reordered++) ; } $i++ ; } print "COMM=end tamil\n" ;