/* 
 * Copyright 1994 Chris Smith
 *
 * Permission to use, copy, modify, and distribute this software and its
 * documentation for any purpose and without fee is hereby granted,
 * provided that the above copyright notice appears in all copies and that
 * both that copyright notice and this permission notice appears in
 * supporting documentation.  I make no representations about the
 * suitability of this software for any purpose.  It is provided "as is"
 * without express or implied warranty.
 */

#include <assert.h>
#include <stdio.h>
#include <string.h>

#include "bj200dev.h"

/* This is where the action is -- typically 50-75% of the run time is
   spent in union_cols and send_band, missing the data cache.
   union_cols must look at each word in the page buffer, and send_band
   looks at all of them with any bits set.

   union_cols is an attempt to get cute that did not work out so well.
   send_band looks across rows, a bad order for the cache, but it is
   likely to find some bits it can send, and we then only need to
   fault the data in once.

   Probably the only way to really speed it up is with a band buffer --
   a buffer 8 bytes high and 2880 dots wide would fit in 23K, leaving
   some breathing room.  (And requiring a sorted display list, and
   the drawing of little sections of chars and rules.)

   [ Actually, you know, that's not true.  Just use the display list,
   and render by rows, and send each band as it's completed.  The
   magic of VM will do the rest, perhaps.  Try to never touch a word
   of the page bitmap unless it's under the print head, basically. ]

   But this way does a little better than 1 page / sec on my 486/33,
   and that is good enough for me.  It's sure fast enough to keep
   the printer fed with a very low CPU load. */

/* The bitwise OR of all dot columns */

static unsigned long anyrow[NWCOL];

/* Current device position */

static int ypos, xpos;

/* Buffer for graphics data, 6-byte header then room for
   8 inches wide by 48 dots high */

static unsigned char buf[6 + 8 * 360 * 6];
static unsigned char *bufp;

/* Forwards */

static void union_cols (void);
static void send_page (void);
static void vskip (int y);
static void hskip (int x);
static void send_band (int y);
static void send_6 (unsigned long hi, unsigned long lo);
static void init_buf (void);
static void flush_buf (void);
static void mask_last_row (int y);


/* Called once at start of job */

void bj_init ()
{
  /* basic init string, everything off except \n -> crlf */
  fwrite ("\033[K\004\000\001\044\020\000", 9, 1, stdout);

  /* set vertical spacing unit to 1/360 in */
  fwrite ("\033[\\\004\000\000\000\150\001", 9, 1, stdout);
}

/* Called once at end */

void bj_eof ()
{
  /* reset to DIP switch defaults */
  fwrite ("\033[K\004\000\001\044\200\200", 9, 1, stdout);
}

/* Send page in page[] to printer */

void bj_printpage ()
{
  union_cols ();
  send_page ();
}

/* Or together all the columns, so we can spot all-white rows. */

static void union_cols ()
{
  int x, n;
  unsigned long *pg;
  unsigned long *any;

  bzero (anyrow, sizeof anyrow);

  pg = &page[X0 * NWCOL];

  assert (NWCOL % 5 == 0);

  for (x = X0; x < XN; x++)
    {
      any = anyrow;
      for (n = NWCOL / 5; n != 0; n--)
	{
	  any[0] |= pg[0];
	  any[1] |= pg[1];
	  any[2] |= pg[2];
	  any[3] |= pg[3];
	  any[4] |= pg[4];
	  any += 5, pg += 5;
	}
    }
}

/* Actually transmit the page.  anyrow[] tells which rows are nonblank. */

static void send_page ()
{
  unsigned y;
  unsigned long *yq, yr;

  /* initialize device pos to where print head actually starts on page */
  xpos = X0;
  ypos = Y0;

  /* set up graphics data buffer */
  init_buf ();

  /* scan from the top printable line */
  y = Y0;
  yq = anyrow + y / 32;
  yr = 0x80000000U >> (y % 32);
  
  for (;;)
    {
      /* find the next nonblank row */
      while ((*yq & yr) == 0)
	{
	  y++;
	  if (y == YN) goto break1;
	  if ((yr >>= 1) == 0)
	    {
	      yq++, yr = 0x80000000U;
	      while (*yq == 0)
		{
		  y += 32;
		  if (y >= YN) goto break1;
		  yq++;
		}
	    }
	}

      /* Do not let the bottom of the band pass the bottom printable row */
      if (y > YN - 48)
	{
	  mask_last_row (y);
	  y = YN - 48;
	}

      /* Send the 48-dot-high band starting at that row */
      vskip (y);
      send_band (y);

      /* advance 48 */
      y += 48;
      if (y == YN)
	break;
      if (yr & 0xffff0000)
	yq++, yr >>= 16;
      else
	yq += 2, yr <<= 16;
    }
 break1:

  /* end page, then wait for it to print so if we run mf there isn't
     a half-printed page sitting there looking stupid */
  putchar ('\f');
  fflush (stdout);
}

/* Output escape sequence to move down to row y */

static void vskip (int y)
{
  int d;

  /* must move down */
  assert (ypos < y || y == Y0);

  for (;;)
    {
      d = y - ypos;
      if (d == 0) break;
      if (d > 255) d = 255;
      putchar ('\033'); putchar ('J'); putchar (d);
      ypos += d;
    }

  /* esc-J did a \r (we have set the printer to newline mode) */
  xpos = X0;
}

/* Output the 48 rows with top row y */

static void send_band (int y)
{
  unsigned long hi, lo;
  int x, q, lsh, rsh;
  unsigned long *p;

  /* move up to a word boundary, keep remainder as shift count */

  lsh = y & 31;			/* warning: lsh == 0 here */
  rsh = 32 - lsh;		/* means rsh == 32 here, which does not */
  q = y >> 5;			/* work correctly on this dumbass so-called */
				/* processor */

  /* scan across */

  if (lsh == 0)
    for (x = X0, p = page + X0 * NWCOL + q; x < XN; x++, p += NWCOL)
      {
	hi = p[0];
	lo = p[1];
	if (hi || lo)
	  {
	    hskip (x);
	    send_6 (hi, lo);
	  }
      }
  else if (lsh < 16)
    for (x = X0, p = page + X0 * NWCOL + q; x < XN; x++, p += NWCOL)
      {
	hi = p[0] << lsh | p[1] >> rsh;
	lo = p[1] << lsh;
	if (hi || lo)
	  {
	    hskip (x);
	    send_6 (hi, lo);
	  }
      }
  else
    {
    for (x = X0, p = page + X0 * NWCOL + q; x < XN; x++, p += NWCOL)
	{
	  hi = p[0] << lsh | p[1] >> rsh;
	  lo = p[1] << lsh | p[2] >> rsh;
	  if (hi || lo)
	    {
	      hskip (x);
	      send_6 (hi, lo);
	    }
	}
    }

  flush_buf ();
}

/* Output escape sequence to move right to col x */

static void hskip (int x)
{
  int d, q, r;

  /* get distance to move */
  d = x - xpos;

  assert ((d & 0xffff0000) == 0);

  /* hskip unit is 3 columns */
  if (d < 3)
    r = d;
  else
    {
      q = d / 3, r = d % 3;

      /* move 1/120ths with an escape sequence */
      flush_buf ();
      putchar ('\033'); putchar ('d'); putchar (q & 255); putchar (q >> 8);
    }
  
  /* move 1/360ths by all-zero columns of graphics */
  while (r > 0)
    {
      send_6 (0, 0);
      r--;
    }

  xpos = x;
}

/* Send the 48 high bits of (hi,lo) as a graphics dot column. */

static void send_6 (unsigned long hi, unsigned long lo)
{
  unsigned char *p = bufp;

  /* permute the bytes and stuff them into the buffer. */

  p[0] = hi >> 24;
  p[1] = hi >> 16;
  p[2] = hi >> 8;
  p[3] = hi >> 0;
  p[4] = lo >> 24;
  p[5] = lo >> 16;
  bufp += 6;

  /* advance horizontal pos 1 dot */

  xpos++;
}

/* Initialize graphics data buffer */

static void init_buf ()
{
  bufp = buf + 6;
}

/* Send graphics data buffer */

static void flush_buf ()
{
  /* flush buffer */
  int cnt1 = bufp - buf;
  int cnt2 = cnt1 - 5;

  assert (bufp < buf + sizeof buf);
  
  if (cnt1 == 6)
    return;

  buf[0] = '\033';
  buf[1] = '[';
  buf[2] = 'g';
  buf[3] = cnt2 & 255;
  buf[4] = cnt2 >> 8;
  buf[5] = 16;		/* 360x360 */
  fwrite (buf, cnt1, 1, stdout);

  bufp = buf + 6;
}

/* When we print the bottom 48 rows, we may already have printed part of them
   in a previous pass.  Clear the already-printed bits so they do not get
   printed again.  When we come here, we are backing up from y to YN-48, the
   top of the bottom printable band.  So clear bits above y. */

static void mask_last_row (int y)
{
  unsigned long *p, mask;
  int q, r, x;
  
  r = y & 31;
  q = y >> 5;

  mask = -1U >> r;

  for (x = X0, p = page + X0 * NWCOL + q; x < XN; x++, p += NWCOL)
    {
      p[-1] = p[-2] = 0;
      p[0] &= mask;
    }
}
