#!/usr/bin/perl while (<>) { chomp; s/([\xc0-\xdf])([\x80-\xbf])/&cn($1,$2)/ge; s/([\xe0-\xef])([\x80-\xbf])([\x80-\xbf])/&cn($1,$2,$3)/ge; print $_ . $/; } exit (0); # # Works for UCS2 # sub cn { $ret = 0x1f & unpack("C", $_[0]); for ($i=1; $i<=$#_; $i++) { $ret = ($ret << 6) + int (0x3f & unpack ("C", $_[$i])); } sprintf ("&#%u;", $ret); }