#!/usr/local/bin/perl -p # http://czyborra.com/translit/java 1998-07-13 roman@czyborra.com # Java's Unicode escaping as performed by this two-liner gives an # ASCII representation for UTF-8 encoded Unicode texts that is fairly # easy to comprehend for anybody who knows how to look up Unicode # characters by hexadecimal numbers but it will not be very legible. # Example: IJsselmeer (IJsselmeer) is transformed to \u0132sselmeer # I need no backslash protection like s/\\/\\\\/g or s/\\/\\u005C/g; s/([\xC0-\xDF])([\x80-\xBF])/sprintf("\\u%04X", unpack("c",$1)<<6&0x07C0|unpack("c",$2)&0x003F)/ge; s/([\xE0-\xEF])([\x80-\xBF])([\x80-\xBF])/sprintf("\\u%04X", unpack("c",$1)<<12&0xF000|unpack("c",$2)<<6&0x0FC0|unpack("c",$3)&0x003F)/ge;