#!/usr/local/bin/perl # ucoverage - 1998-05-13 - Roman czyborra@cs.tu-berlin.de # lists the coverage of an *-iso10646-1 BDF font or a Unicode mapping file # I use this script with the following make rules: # coverage.%: %.bdf /usr/bin/ucoverage # ucoverage $< > $@ # %.bdf: %.ttf # ttf2bdf -v -r 75 -p 10 $< > $@ || : # First study the script block boundaries and names: while () { $count[++$block]=0; ($first[$block], $last[$block], $name[$block]) = split /;\s*|\n/; } # Then go counting the characters present in each block: while (<>) { $char = ''; $BDF = 1 if /^STARTFONT/; $char = sprintf ("%04X", $1) if /^ENCODING\s+(\d+)\D/; $char = $2 if ! $BDF && /(^|[U][+]?|0x)([0-9A-F]{4})\W/; $char || next; # warn "$char\n"; # next if $seen{$char}++; for ($_=0;$_++<$block;) { ++$count[$_] if $first[$_] le $char && $char le $last[$_]; } } # Lastly print what you've found: for ($_=0;$_++<$block;) { print "$count[$_]\tU+$first[$_]-U+$last[$_]:$name[$_]\n"; } # The following table is derived from unicode/2.0/unidata/blocks.txt # and http://www.unicode.org/unicode/alloc/Pipeline.html (1998-03-09) __END__ 0000; 007F; Basic Latin 0080; 00FF; Latin-1 Supplement 0100; 017F; Latin Extended-A 0180; 024F; Latin Extended-B 0250; 02AF; IPA Extensions 02B0; 02FF; Spacing Modifier Letters 0300; 036F; Combining Diacritical Marks 0370; 03FF; Greek 0400; 04FF; Cyrillic 0500; 052F; ??? 0530; 058F; Armenian 0590; 05FF; Hebrew 0600; 06FF; Arabic 0700; 074D; Syriac 074E; 077F; ??? 0780; 07B1; Thaana 07B2; 08FF; ??? 0900; 097F; Devanagari 0980; 09FF; Bengali 0A00; 0A7F; Gurmukhi 0A80; 0AFF; Gujarati 0B00; 0B7F; Oriya 0B80; 0BFF; Tamil 0C00; 0C7F; Telugu 0C80; 0CFF; Kannada 0D00; 0D7F; Malayalam 0D80; 0DFF; Sinhalese 0E00; 0E7F; Thai 0E80; 0EFF; Lao 0F00; 0FBF; Tibetan 0FC0; 109F; ??? 10A0; 10FF; Georgian 1100; 11FF; Hangul Jamo 1200; 137F; Ethiopic 1380; 139F; ??? 13A0; 13FF; Cherokee 1400; 167F; Canadian Syllabics 1680; 169F; Ogham 16A0; 16FF; Runic 1700; 1759; Burmese 175A; 177F; ??? 1780; 17E9; Khmer 17EA; 1DFF; ??? 1E00; 1EFF; Latin Extended Additional 1F00; 1FFF; Greek Extended 2000; 206F; General Punctuation 2070; 209F; Superscripts and Subscripts 20A0; 20CF; Currency Symbols 20D0; 20FF; Combining Marks for Symbols 2100; 214F; Letterlike Symbols 2150; 218F; Number Forms 2190; 21FF; Arrows 2200; 22FF; Mathematical Operators 2300; 23FF; Miscellaneous Technical 2400; 243F; Control Pictures 2440; 245F; Optical Character Recognition 2460; 24FF; Enclosed Alphanumerics 2500; 257F; Box Drawing 2580; 259F; Block Elements 25A0; 25FF; Geometric Shapes 2600; 26FF; Miscellaneous Symbols 2700; 27BF; Dingbats 27C0; 27FF; ??? 2800; 28FF; Braille Pattern Symbols 2900; 2EFF; ??? 2F00; 2FD5; KangXi radicals 2FD5; 2FFF; ??? 3000; 303F; CJK Symbols and Punctuation 3040; 309F; Hiragana 30A0; 30FF; Katakana 3100; 312F; Bopomofo 3130; 318F; Hangul Compatibility Jamo 3190; 319F; Kanbun 31A0; 31FF; ??? 3200; 32FF; Enclosed CJK Letters and Months 3300; 33FF; CJK Compatibility 3400; 4DFF; CJK Unified Ideographs, Extension A 4E00; 9FFF; CJK Unified Ideographs A000; A4C8; Yi A4C9; ABFF; ??? AC00; D7A3; Hangul Syllables D7A4; D7FF; ??? D800; DB7F; High Surrogates DB80; DBFF; High Private Use Surrogates DC00; DFFF; Low Surrogates E000; F8FF; Private Use F900; FAFF; CJK Compatibility Ideographs FB00; FB4F; Alphabetic Presentation Forms FB50; FDFF; Arabic Presentation Forms-A FE00; FE1F; ??? FE20; FE2F; Combining Half Marks FE30; FE4F; CJK Compatibility Forms FE50; FE6F; Small Form Variants FE70; FEFF; Arabic Presentation Forms-B FF00; FFEF; Halfwidth and Fullwidth Forms FFF0; FFFF; Specials