/* http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl
 * on Andrea's balcony in North Amsterdam on 1998-08-04
 * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion
 * to correct the haphazard "if" after UQU to "else if" on 1998-10-01
 * 
 * This is a deflator to UTF-8 output for input compressed in SCSU,
 * the (Reuters) Standard Compression Scheme for Unicode as described
 * in http://www.unicode.org/unicode/reports/tr6.html
 *
 * Simply compile it with make scsu or cc -o scsu scsu.c and add
 *
 * text/plain; scsu < %s | xviewer yudit; \
 *   test=case %{charset} in [Ss][Cc][Ss][Uu])\;\; *)[ ]\; esac
 *
 * to your mailcap.
 *
 * This is freeware as long as you properly attribute my contribution.  */

#include <stdio.h>

static void output (int c)
{
  /* join UTF-16 surrogates without any pairing sanity checks */

  static int d;

  if (c >= 0xD800 && c <= 0xDBFF) { d = c & 0x3FF; return;  }
  if (c >= 0xDC00 && c <= 0xDFFF) { c = c + 0x2400 + d * 0x400; }

  /* output one character as UTF-8 multibyte sequence */

  if (c < 0x80) { 
    putchar (c); 
  }
  else if (c < 0x800) { 
    putchar (0xC0 | c>>6); 
    putchar (0x80 | c & 0x3F);
  }
  else if (c < 0x10000) {
    putchar (0xE0 | c>>12); 
    putchar (0x80 | c>>6 & 0x3F); 
    putchar (0x80 | c & 0x3F);
  }
  else if (c < 0x200000) {
    putchar (0xF0 | c>>18);
    putchar (0x80 | c>>12 & 0x3F); 
    putchar (0x80 | c>>6 & 0x3F); 
    putchar (0x80 | c & 0x3F);
  }
}

static int nextchar ()
{
  /* read one byte if available */

  register c = getchar();
  if (c == EOF) exit (0);
  else return c;
}

/* SCSU uses the following variables and default values: */

static char active = 0, mode= 0;
static int c, d,
  start[8]={0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000},
  slide[8]={0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00},
    win[256]={
      0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
      0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
      0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
      0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
      0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
      0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
      0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
      0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
      0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
      0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
      0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
      0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
      0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
      0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
      0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
      0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
      0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
      0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
      0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
      0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
      0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60};

/* deflation algorithm */

void main()
{
  while (1)
    {
      c = nextchar();

      if (c >= 0x80)
	{
	  output (c - 0x80 + slide[active]);
	}
      else if (c >= 0x20 && c <= 0x7F)
	{
	  output (c);
	}
      else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) 
	{
	  output (c);
	}
      else if (c >= 0x1 && c <= 0x8) /* SQn */
	{
	  /* single quote */ d = nextchar ();

	  output (d < 0x80 ? d + start [c - 0x1] : 
		  d - 0x80 + slide [c - 0x1]);
	}
      else if (c >= 0x10 && c <= 0x17) /* SCn */
	{
	  /* change window */ active = c - 0x10;
	}
      else if (c >= 0x18 && c <= 0x1F) /* SDn */
	{
	  /* define window */ active = c - 0x18;
	  slide [active] = win [nextchar()];
	}
      else if (c == 0xB) /* SDX */
	{
	  c = nextchar(); d = nextchar();
	  slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
	}
      else if (c == 0xE) /* SQU */
	{
	  /* SQU */ c = nextchar(); output (c << 8 | nextchar());
	}
      else if (c == 0xF) /* SCU */
	{
	  /* change to Unicode mode */ mode = 1;

	  while (mode)
	    {
	      c = nextchar();

	      if (c <= 0xDF || c >= 0xF3)
		{
		  output (c << 8 | nextchar());
		}
	      else if (c == 0xF0) /* UQU */
		{
		  c = nextchar(); output (c << 8 | nextchar());
		}
	      else if (c >= 0xE0 && c <= 0xE7) /* UCn */
		{
		  active = c - 0xE0; mode = 0;
		}
	      else if (c >= 0xE8 && c <= 0xEF) /* UDn */
		{
		  slide [active=c-0xE8] = win [nextchar()]; mode = 0;
		}
	      else if (c == 0xF1) /* UDX */
		{
		  c = nextchar(); d = nextchar();
		  slide [active = c>>5] = 
		    0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;
		}
	    }
	}
    }
}

