#!/usr/local/bin/perl -p

# http://czyborra.com/translit/java 1998-07-13 roman@czyborra.com

# Java's Unicode escaping as performed by this two-liner gives an
# ASCII representation for UTF-8 encoded Unicode texts that is fairly
# easy to comprehend for anybody who knows how to look up Unicode
# characters by hexadecimal numbers but it will not be very legible.
# Example: Ĳsselmeer (IJsselmeer) is transformed to \u0132sselmeer

# I need no backslash protection like s/\\/\\\\/g or s/\\/\\u005C/g;

s/([\xC0-\xDF])([\x80-\xBF])/sprintf("\\u%04X",
unpack("c",$1)<<6&0x07C0|unpack("c",$2)&0x003F)/ge;
s/([\xE0-\xEF])([\x80-\xBF])([\x80-\xBF])/sprintf("\\u%04X",
unpack("c",$1)<<12&0xF000|unpack("c",$2)<<6&0x0FC0|unpack("c",$3)&0x003F)/ge;
