/******************************************************************************
  File:     $Id: hpdjprn.c,v 1.12 1998-04-26 16:47:59+02 mjl Rel $
  Contents: Ghostscript device driver "hpdj":
	    The 'print_page' routine for the 'prn' device
  Author:   Martin Lottermoser, Metzgerfeldweg 9, 85737 Ismaning, Germany

*******************************************************************************
*									      *
*	Copyright (C) 1996, 1997, 1998 by Martin Lottermoser		      *
*	All rights reserved						      *
*									      *
*******************************************************************************

  Preprocessor defines:

    HPDJ_PRINT_BLANK_ROWS
	If defined, the driver will not use the PCL command Relative Vertical
	Pixel Movement within a page. Try defining this if you think your
	printer shows excessive head movement on pages having vertical white
	space.

    HPDJ_NO_PAGECOUNTFILE
	If this symbol is defined, the driver will not write to a page count
	file.

******************************************************************************/

/* Configuration management identification */
#ifndef lint
static const char
  cm_id[] = "@(#)$Id: hpdjprn.c,v 1.12 1998-04-26 16:47:59+02 mjl Rel $";
#endif

/*****************************************************************************/

#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE	500
#endif

/* Special Aladdin header, must be included before <sys/types.h> on some
   platforms (e.g., FreeBSD). Here apparently needed because of <stdio.h>. */
#include "std.h"

/* Standard headers */
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HPDJ_TIMING
#include <time.h>
#endif

/* Internal headers (we need also gdevprn.h included from hpdj.h) */
#include "hpdj.h"
#include "pclcomp.h"

/*****************************************************************************/

#ifndef lint
/* Permit deduction of compilation options from the binary */
static const char info_string[] = "@(#)HPDJ_PRINT_BLANK_ROWS is "
#ifndef HPDJ_PRINT_BLANK_ROWS
  "not "
#endif
  "defined.\n"
  "@(#)HPDJ_NO_PAGECOUNTFILE is "
#ifndef HPDJ_NO_PAGECOUNTFILE
  "not "
#endif
  "defined.";
#endif	/* lint */

/******************************************************************************

  Function: init

  This function writes initialization information for the printer on 'out'.

  This is information which one does not have to repeat on every page but
  which must always be sent before the first page of each job. There is no
  harm in repeating this between pages, although that is superfluous.

******************************************************************************/

static void init(hpdj_device *dev, FILE *out)
{
  /* Remember that we've passed here */
  dev->initialized = true;

  /* Printer reset and page layout initialization */
  assert(dev->ps_code != pcl_ps_none);
  fprintf(out,
    "\033E"	/* Printer Reset */
    "\033&l%da"	/* Page Size */
    "0o"	/* Page Orientation: portrait */
    "0L"	/* Perforation Skip Mode: off. This also sets the PCL top
		   margin to zero. */,
    dev->ps_code
  );

  /* Set Dry Time */
  if (dev->dry_time >= 0) fprintf(out, "\033&b%dT", dev->dry_time);

  /* Print quality */
  if (dev->model == hpdj500  || dev->model == hpdj500c ||
      dev->model == hpdj510  || dev->model == hpdj520  ||
      dev->model == hpdj550c || dev->model == hpdj560c) {
    int depletion, quality, shingling;

    /* Map to the older quality commands based on the table on page 6-34 of the
       TRG. It is imperative that all values outside the range known to DeskJet
       500 Series printers are mapped to normal quality on plain paper. */
    switch (dev->print_quality) {
    case -1 /* draft */:
      depletion = 3;	/* 50 % */
      quality = 1;	/* draft */
      if (dev->media_type == 4) shingling = 1;	/* 2 passes */
      else shingling = 0;	/* no shingling */
      break;
    case 1 /* presentation */:
      if (dev->media_type < 2 || 4 < dev->media_type) {
	if (dev->colour_mode == cmy) depletion = 2;	/* 25 % */
	else depletion = 5;	/* 50 % with gamma correction */
      }
      else depletion = 1;	/* no depletion */
      quality = 2;	/* high */
      shingling = 2;	/* 4 passes */
      break;
    default: /* normal or an illegal value */
      depletion = 2;	/* 25 % */
      quality = 0;	/* use current control panel setting */
      if (dev->media_type < 2 || 4 < dev->media_type) {
	if (dev->colour_mode == mono) shingling = 0;	/* no shingling */
	else shingling = 1;	/* 2 passes */
      } else if (dev->media_type == 4 && dev->colour_mode == cmy)
	shingling = 1;		/* 2 passes */
      else shingling = 2;	/* 4 passes */
    }

    fprintf(out,
      "\033*r%dQ"	/* Raster Graphics Quality */
      "\033*o%dQ"	/* Set Raster Graphics Shingling */,
      quality, shingling);
    if (dev->colour_mode != mono) fprintf(out, "\033*o%dD", depletion);
      /* Set Raster Graphics Depletion */
  }
  else
    fprintf(out,
      "\033&l%dM"	/* Media Type */
      "\033*o%dM"	/* Print Quality */,
      dev->media_type,
      dev->print_quality
    );

  /* End Raster Graphics (this provides a known graphics state) */
  fprintf(out, "\033*r%c", dev->model == hpdj500? 'B': 'C');

  /* Raster Graphics Setup */
  {
    int
      components = (dev->colour_mode == mono? 1: dev->colour_mode == cmy? 3: 4),
      resx = dev->HWResolution[0] + 0.5,
      resy = dev->HWResolution[1] + 0.5;

    /* Set Raster Graphics Resolution (pixels per inch) */
    fprintf(out, "\033*t%dR", resx < resy? resy: resx);
    /* If different x and y resolutions have been demanded but the printer does
       not support the combination, choosing the larger value here will prevent
       printing beyond the sheet---provided the printer accepts this resolution.
    */

    if (dev->undoc1 != 0)
      fprintf(out, "\033&u%dD", dev->undoc1 > 0? dev->undoc1:
	resx < resy? resx: resy);
     /* I have found no documentation for this command. But it does seem to
	slightly improve the output quality on a DeskJet 850C at 600 ppi.
	Interestingly enough, the "Raster Graphics Resolution" command could be
	left at 300 ppi.

	Looking at some output by the HP driver for a DeskJet 690C, I get the
	impression that this command is concerned with the smallest resolution
	used for any component.
      */

    /* Colour planes */
    if (hpdj_model[dev->model].colour_capability > mono)
      fprintf(out, "\033*r%dU",	/* Set Number of Planes per Row */
	-components);	/* (K)(CMY) palette */

    /* If the driver has accepted unequal x and y resolutions or more than
       2 intensity levels for this model (see final_checks()), we assume here
       that the model understands the Configure Raster Data command.
       In addition, I also assume that Configure Raster Data is needed only in
       these cases and that it overrides all possibly conflicting settings like
       those of Set Number of Planes per Row.
       This feature is experimental, as I do not have reliable HP documentation
       for this command.
    */
    if (resx != resy || dev->black_levels > 2 || dev->cmy_levels > 2) {
      int j,
	xa = resx/256, xb = resx%256, ya = resy/256, yb = resy%256;

      fprintf(out, "\033*g%dW"	/* Configure Raster Data */
	"\002%c",
	2 + 6*components, components);
      if (components != 3) fprintf(out, "%c%c%c%c%c%c", xa, xb, ya, yb,
	dev->black_levels/256, dev->black_levels%256);
      if (components != 1) {
	int a, b;
	a = dev->cmy_levels;
	b = a%256; a = a/256;
	for (j = 0; j < 3; j++)
	  fprintf(out, "%c%c%c%c%c%c", xa, xb, ya, yb, a, b);
      }
    }
  }

  /*  According to the TRG (p. 1-29), a "Media Source" command is not necessary
      unless one prints on envelopes, in which case one should switch (no reason
      is given) to "manual load (envelope feed)" for which there exist two
      values, 2 and 3.
      BPD02926 separates these two by calling the first "manual feed" and the
      second "envelope feed".

      My DeskJet 850C, on the other hand, seems to look first for paper
      in the manual feed and then in the tray without needing any Media Source
      command. I find this behaviour more sensible, hence I'm sending "Media
      Source" only for 5xx DeskJets when printing envelopes, unless the user
      explicitly selects the behaviour.

      Regarding the distinction between the values 2 and 3, my DJ 850C stops
      only for 2 but not for 3.
  */
  if (dev->manualfeed == requested ||
      dev->manualfeed == automatic &&
	*hpdj_model[dev->model].name == '5' && is_envelope(dev->ps_code))
      fputs("\033&l2H", out);
      /* Media Source: manual load (envelope feed) (BPD02926: manual feed) */
  /* No Media Source Command if 'manualfeed' is 'declined'. */

  return;
}

/******************************************************************************

  Function: send_plane

  This function sends a bit plane to the printer. It returns zero on success
  and a non-zero value otherwise. In the latter case, an error message will
  have been issued on stderr.

  'final' indicates whether this is the last plane of the row or not.
  'method_demanded' contains the PCL compression method desired, '*method_used'
  the one actually used in the last transmission. This last variable will be
  reset to the method used in this invocation.
  'in' points to the octet string to be sent as plane data, 'prev' points to
  the string previously sent (for this value of the plane pointer) and may be
  NULL if 'method_demanded' refers to a purely "horizontal" method.
  'out' is the file to which the plane should be written.
  'out_bf1' and 'out_bf2' are pointers to storage areas of at least length
  'in->length'+2 which can be used as scratch areas by this function.
  'out_bf2' need only be non-NULL if 'method_demanded' is 'pcl_cm_delta'.

******************************************************************************/

static int send_plane(bool final,
  pcl_compression method_demanded, pcl_compression *method_used,
  const pcl_octet_string *in, const pcl_octet_string *prev, FILE *out,
  pcl_octet *out_bf1, pcl_octet *out_bf2)
{
  int
    rc = 0;		/* Return code from commands */
  pcl_compression
    choice;		/* Method chosen */
  pcl_octet_string
    out1,
    out2,
    send;		/* Octets to be sent to the printer */

  /* Initialize 'out1' (no dynamic initializers for structs in ISO/ANSI C) */
  out1.str = out_bf1;
  out1.length = in->length + (*method_used == pcl_cm_none? 0: 2);
    /* 2 is the cost of switching to 'pcl_cm_none'. */

  /* Set 'send' to a compressed row to be sent and 'choice' to the compression
     method employed. */
  if (method_demanded == pcl_cm_delta) {
    /*  Method 3 (delta row compression) has a widely varying effectiveness,
	depending on the structure of the input. Hence it is best combined
	with a non-delta method like method 2, as is done here on a per-plane
	basis, or method 1, as inherent in method 9.
	The procedure here is simple: try both methods, and then take the one
	giving the shortest output.
    */
    int c1, c2;	/* cost in octets */

    /* Try delta row compression */
    rc = pcl_compress(pcl_cm_delta, in, prev, &out1);
    if (rc == 0) c1 = out1.length; else c1 = -1;
    if (*method_used != pcl_cm_delta && c1 >= 0) c1 += 2;
      /* cost of switching methods */

    /* Try TIFF compression */
    if (0 == c1) c2 = -1;
    else {
      int bound = in->length + (*method_used == pcl_cm_none? 0: 2);
      if (c1 >= 0 && c1 < bound) {
	/* We're interested in TIFF compression only if it results in an octet
	   string shorter than the one produced by delta row compression. */
	bound = c1;
	if (*method_used != pcl_cm_tiff && bound >= 2) bound -= 2;
      }
      out2.str = out_bf2; out2.length = bound;
      rc = pcl_compress(pcl_cm_tiff, in, NULL, &out2);
      if (rc == 0) c2 = out2.length; else c2 = -1;
      if (*method_used != pcl_cm_tiff && c2 >= 0) c2 += 2;
    }

    /* Select the better of the two, or no compression */
    if (c1 < 0) {
      if (c2 < 0) choice = pcl_cm_none;
      else choice = pcl_cm_tiff;
    }
    else {
      if (c2 < 0 || c1 <= c2) choice = pcl_cm_delta;
      else choice = pcl_cm_tiff;
    }
    switch (choice) {
    case pcl_cm_tiff:
      send = out2; break;
    case pcl_cm_delta:
      send = out1; break;
    default:
      send = *in;
    }
  }
  else {
    if (method_demanded != pcl_cm_none &&
	pcl_compress(method_demanded, in, prev, &out1) == 0) {
      /* Send compressed data */
      send = out1;
      choice = method_demanded;
    }
    else {
      /* Send uncompressed data */
      send = *in;
      choice = pcl_cm_none;
    }
  }

  /* Switch compression methods, if needed */
  if (*method_used != choice) {
    /* Raster Graphics Compression Method */
    if (fprintf(out, "%dm", choice) < 0) {
      fprintf(stderr, ERRPREF "Error from fprintf(), errno == %d.\n", errno);
      return -1;
    }
    *method_used = choice;
  }

  /* Transfer plane to the printer */
  if (send.length == 0) {
    errno = 0;
    if (final)
      fputc('w', out);  /* Transfer Raster Graphics Data by Row: 0 octets */
    else
      fputc('v', out);  /* Transfer Raster Graphics Data by Plane: 0 octets */
    if (errno != 0) {
      fprintf(stderr, ERRPREF "Error from fputc(), errno == %d.\n", errno);
      return -1;
    }
  }
  else {
    /* Transfer Raster Graphics Data by Row/Plane */
    if (fprintf(out, "%d%c", send.length, final? 'w': 'v') < 0) {
      fprintf(stderr, ERRPREF "Error from fprintf(), errno == %d.\n", errno);
      return -1;
    }
    if (fwrite(send.str, sizeof(pcl_octet), send.length, out) != send.length) {
      fprintf(stderr, ERRPREF "Error in fwrite(), errno == %d.\n", errno);
      return -1;
    }
  }

  return 0;
}

/******************************************************************************

  Function: send_row_flex

  This is a flexible but slow send_row() implementation.

  Restrictions:
  - The pixmap depth is either a multiple or a divisor of 8.
  - The pixmap depth is at most 32.
  - There may be at most 8 bits per component.

******************************************************************************/

static int send_row_flex(hpdj_device *dev, pcl_compression *method_used,
  const pcl_octet_string *row, pcl_octet_string *plane,
  pcl_octet_string *prev_plane, FILE *out, pcl_octet *out_bf1,
  pcl_octet *out_bf2)
{
  gx_color_index
    pixel;
  int
    black_planes,	/* number of planes to send for black */
    cmy_planes,		/* number of planes to send for each of C, M and Y */
    j,
    k,
    length = row->length,
    pixels,		/* number of pixels transferred to bit planes */
    planes,		/* number of planes to send */
    rc;
  pcl_octet
    comp_mask = 0,	/* bits_per_component 1s in the lowest part */
    component[4],
    pixel_mask = 0,	/* if depth < 8, depth 1s in the lowest part */
    *ptr[32];		/* pointers into planes (next octet to write to) */

  /* Determine the number of planes to send */
  switch (dev->colour_mode) {
  case mono:
    black_planes = dev->bits_per_component;
    cmy_planes = 0;
    break;
  case cmy:
    black_planes = 0;
    cmy_planes = dev->bits_per_component;
    break;
  default:
    {
      int n;
      /* Select minimum number of bits to send */
      for (n = 2, black_planes = 1; n < dev->black_levels; n *= 2)
	black_planes++;
      for (n = 2, cmy_planes = 1; n < dev->cmy_levels; n *= 2)
	cmy_planes++;
    }
  }
  planes = black_planes + 3*cmy_planes;

  /* Initialize the bit plane pointers */
  for (j = 0; j < planes; j++) ptr[j] = plane[j].str;

  /* Determine some bit masks */
  if (dev->color_info.depth < 8) {
    for (j = 0; j < dev->color_info.depth; j++)
      pixel_mask = (pixel_mask << 1) | 1;
  }
  for (j = 0; j < dev->bits_per_component; j++)
    comp_mask = (comp_mask << 1) | 1;

  /* Copy from 'row' to 'plane[]', converting Z format to XY format */
  pixels = 0;
  k = 0; /* Next octet index in the input row */
  while (k < length) {
    int l, m;

    /* Initialize plane storage if it's a new octet */
    if (pixels % 8 == 0) for (j = 0; j < planes; j++) *ptr[j] = 0;

    if (dev->color_info.depth < 8) {
      int p;

      /* Loop over pixels within the input octet, starting at the leftmost
	 pixel (highest-order bits) */
      p = 8/dev->color_info.depth - 1;
      do {
	/* Extract pixel */
	pixel = (row->str[k] >> p*dev->color_info.depth) & pixel_mask;

	/* Split and distribute over planes */
	for (j = 0; j < 4; j++)
	  component[j] = (pixel >> j*dev->bits_per_component) & comp_mask;
	for (j = 0; j < black_planes; j++) {
	  *ptr[j] = (*ptr[j] << 1) | component[BLACK_INDEX] & 1;
	  component[BLACK_INDEX] >>= 1;
	}
	for (l = 1; l < 4; l++)
	  for (m = 0; m < cmy_planes; m++, j++) {
	    *ptr[j] = (*ptr[j] << 1) | component[l] & 1;
	    component[l] >>= 1;
	  }

	pixels++;
	p--;
      } while (p >= 0);
      k++;
    }
    else {
      /* Reconstruct pixel from several octets */
      j = 0;
      pixel = row->str[k];
      do {
	j++; k++;
	if (j >= dev->color_info.depth/8) break;
	pixel = (pixel << 8) | row->str[k];
      } while (1);

      /* Split and distribute over planes */
      for (j = 0; j < 4; j++)
        component[j] = (pixel >> j*dev->bits_per_component) & comp_mask;
      for (j = 0; j < black_planes; j++) {
	*ptr[j] = (*ptr[j] << 1) | component[BLACK_INDEX] & 1;
	component[BLACK_INDEX] >>= 1;
      }
      for (l = 1; l < 4; l++)
	for (m = 0; m < cmy_planes; m++, j++) {
	  *ptr[j] = (*ptr[j] << 1) | component[l] & 1;
	  component[l] >>= 1;
	}

      pixels++;
    }

    /* Increase plane pointers if an octet boundary has been reached */
    if (pixels % 8 == 0) for (j = 0; j < planes; j++) ptr[j]++;
  }

  /* Execute remaining left shifts in the last octet of the output planes when
     the number of pixels is not a multiple of 8 */
  if (pixels % 8 != 0) {
    int shift = 8 - pixels % 8;
    for (j = 0; j < planes; j++)
      *(ptr[j]++) = *ptr[j] << shift;
  }

  /* Determine the lengths of the bit plane strings */
  for (j = 0; j < planes; j++) {
    if (length == 0) plane[j].length = 0;
    else {
      pcl_octet *end = ptr[j] - 1;

      /* Try to reduce the length before setting it */
      while (plane[j].str < end && *end == 0) end--;
      if (*end == 0) plane[j].length = 0;
      else plane[j].length = end - plane[j].str + 1;
    }
  }

  /* Send the bit planes in turn. The order must be (K)(CMY), with the lowest
     bit first in each component. */
  rc = 0;
  for (j = 0; rc == 0 && j < planes; j++)
    rc = send_plane(j == planes-1, dev->compression_method,
      method_used, plane + j, prev_plane + j, out, out_bf1, out_bf2);

  /* Switch plane pointers */
  for (j = 0; j < planes; j++) {
    pcl_octet_string tmp;
    tmp = prev_plane[j]; prev_plane[j] = plane[j]; plane[j] = tmp;
  }

  return rc;
}

/******************************************************************************

  Function: send_row_3or4x1

  This function is a send_row() implementation for a non-monochrome colour mode
  (3 or 4 components) with 1 bit per component.

  On a Linux Pentium system, this function is about 4-5 times as fast as
  send_row_flex() for the same values.

******************************************************************************/

static int send_row_3or4x1(hpdj_device *dev, pcl_compression *method_used,
  const pcl_octet_string *row, pcl_octet_string *plane,
  pcl_octet_string *prev_plane, FILE *out, pcl_octet *out_bf1,
  pcl_octet *out_bf2)
{
  int
    from = (dev->colour_mode == cmy? 1: 0),
    j,
    rc;

  /* Copy from 'row' to 'plane[]', converting Z format to XY format */
  {
    int k, l, length = row->length;
    pcl_octet *ptr[4];

    for (j = from; j < 4; j++) ptr[j] = plane[j].str;

    k = 0;
    /* Loop over the input row, taking four octets at a time, as far as
       available, and split them into four output octets.
       The content of this loop is executed quite frequently, hence some care
       should be taken in avoiding time-consuming operations. (My first
       implementation of this was slower than my second by a factor of 10.)
    */
    while (k < length) {
      pcl_octet octet[4] = {0, 0, 0, 0};

      for (l = 0; l < 4 && k < length; l++, k++) {
	pcl_octet part;
#define treat_quartet()						\
	octet[BLACK_INDEX] <<= 1;				\
	if (part & BLACK_MASK)   octet[BLACK_INDEX]   |= 1;	\
	octet[CYAN_INDEX] <<= 1;				\
	if (part & CYAN_MASK)    octet[CYAN_INDEX]    |= 1;	\
	octet[MAGENTA_INDEX] <<= 1;				\
	if (part & MAGENTA_MASK) octet[MAGENTA_INDEX] |= 1;	\
	octet[YELLOW_INDEX] <<= 1;				\
	if (part & YELLOW_MASK)  octet[YELLOW_INDEX]  |= 1;

	/* Upper four bits */
	part = (row->str[k] >> 4) & 0x0F;
	treat_quartet()

	/* Lower four bits */
	part = row->str[k] & 0x0F;
	treat_quartet()

#undef treat_quartet
      }
      if (l < 4) for (j = from; j < 4; j++) octet[j] <<= 8 - 2*l;
      for (j = from; j < 4; j++) *(ptr[j]++) = octet[j];
    }

    /* Determine the lengths of the bit plane strings */
    for (j = from; j < 4; j++) {
      if (length == 0) plane[j].length = 0;
      else {
	pcl_octet *end = ptr[j] - 1;

	/* Try to reduce the length before setting it */
	while (plane[j].str < end && *end == 0) end--;
	if (*end == 0) plane[j].length = 0;
	else plane[j].length = end - plane[j].str + 1;
      }
    }
  }

  /* Send the bit planes in turn. The order must be: (K)CMY. */
  rc = 0;
  for (j = from; rc == 0 && j < 4; j++)
    rc = send_plane(j == 3, dev->compression_method,
      method_used, plane + j, prev_plane + j, out, out_bf1, out_bf2);

  /* Switch plane pointers */
  for (j = from; j < 4; j++) {
    pcl_octet_string tmp;
    tmp = prev_plane[j]; prev_plane[j] = plane[j]; plane[j] = tmp;
  }

  return rc;
}

/******************************************************************************

  Function: send_row_4x2

  This is a send_row() implementation for 4 colour components with 3 or 4 levels
  each (2 bit planes to send for each component).

  On a Linux Pentium system, this routine is about 3-4 times as fast as
  send_row_flex().

******************************************************************************/

static int send_row_4x2(hpdj_device *dev, pcl_compression *method_used,
  const pcl_octet_string *row, pcl_octet_string *plane,
  pcl_octet_string *prev_plane, FILE *out, pcl_octet *out_bf1,
  pcl_octet *out_bf2)
{
  gx_color_index
    pixel;
  int
    j,
    k,
    length = row->length,	/* identical to the number of pixels */
    rc;
  pcl_octet
    *ptr[8];		/* pointers into planes (next octet to write to) */

  /* Initialize the bit plane pointers */
  for (j = 0; j < 8; j++) ptr[j] = plane[j].str;

  /* Copy from 'row' to 'plane[]', converting Z format to XY format */
  for (k = 0; k < length; k++) {
    /* k is the index of the next octet in the input row and the number of
       pixels processed so far. */

    /* Initialize plane storage if it's a new octet */
    if (k % 8 == 0) for (j = 0; j < 8; j++) *ptr[j] = 0;

    /* Fetch pixel */
    pixel = row->str[k];

    /* Split and distribute over planes */
    *ptr[0] = (*ptr[0] << 1) | pixel & 0x01;
#define assign_bit(index, mask) \
	*ptr[index] = (*ptr[index] << 1) | ((pixel & mask) >> index)
    assign_bit(1, 0x02);
    assign_bit(2, 0x04);
    assign_bit(3, 0x08);
    assign_bit(4, 0x10);
    assign_bit(5, 0x20);
    assign_bit(6, 0x40);
    assign_bit(7, 0x80);
#undef assign_bit

    /* Increase plane pointers if an octet boundary has been reached */
    if (k % 8 == 7) for (j = 0; j < 8; j++) ptr[j]++;
  }

  /* Execute remaining left shifts in the last octet of the output planes when
     the number of pixels is not a multiple of 8 */
  if (length % 8 != 0) {
    int shift = 8 - length % 8;
    for (j = 0; j < 8; j++)
      *(ptr[j]++) = *ptr[j] << shift;
  }

  /* Determine the lengths of the bit plane strings */
  for (j = 0; j < 8; j++) {
    if (length == 0) plane[j].length = 0;
    else {
      pcl_octet *end = ptr[j] - 1;

      /* Try to reduce the length before setting it */
      while (plane[j].str < end && *end == 0) end--;
      if (*end == 0) plane[j].length = 0;
      else plane[j].length = end - plane[j].str + 1;
    }
  }

  /* Send the bit planes in turn. The order must be KCMY, with the lowest
     bit first in each component. */
  rc = 0;
  for (j = 0; rc == 0 && j < 8; j++)
    rc = send_plane(j == 7, dev->compression_method,
      method_used, plane + j, prev_plane + j, out, out_bf1, out_bf2);

  /* Switch plane pointers */
  for (j = 0; j < 8; j++) {
    pcl_octet_string tmp;
    tmp = prev_plane[j]; prev_plane[j] = plane[j]; plane[j] = tmp;
  }

  return rc;
}

/******************************************************************************

  Function: send_row

  This function sends a row consisting of several bit planes to the printer.

  It distributes the call to subroutines, based on various device parameters.
  All these subroutines have the same signature as this function and are
  therefore called "send_row() implementations".

  The meaning of the parameters is:
  'dev' is the device pointer,
  '*method_used' must be the last-used compression method and will be updated
    by the function,
  'row' must point to the row to be sent, given in Z format as generated by
    ghostscript,
  'plane' must point to an array of 'dev->color_info.depth' octet strings which
    can be used for storing the bit planes extracted from 'row',
  'prev_plane' points to the corresponding set of bit planes for the row
    previously sent,
  'out' is of course the output file, and
  'out_bf1' and 'out_bf2' point to storage areas of at least
    '(row->length+dev->color_info.depth-1)/dev->color_info.depth' + 2 which can
    be used as scratch areas.

  'out_bf2' must be non-NULL only if 'dev->compression_method' is
  'pcl_cm_delta'.

  The function will exchange the contents of 'plane[]' and 'prev_plane[]'.

******************************************************************************/

static int send_row(hpdj_device *dev, pcl_compression *method_used,
  const pcl_octet_string *row,
  pcl_octet_string *plane, pcl_octet_string *prev_plane,
  FILE *out, pcl_octet *out_bf1, pcl_octet *out_bf2)
{
  if (dev->colour_mode != mono) {
    if (dev->bits_per_component == 1)
      return send_row_3or4x1(dev, method_used, row, plane, prev_plane, out,
	out_bf1, out_bf2);

    if (dev->bits_per_component == 2 && dev->black_levels > 2 &&
	dev->cmy_levels > 2)
      return send_row_4x2(dev, method_used, row, plane, prev_plane, out,
	out_bf1, out_bf2);
  }

  /* Default routine */
  return send_row_flex(dev, method_used, row, plane, prev_plane, out, out_bf1,
    out_bf2);
}

/******************************************************************************

  Function: write_page

  This function fetches a page from memory and transfers it to the output file,
  encoded in PCL.

******************************************************************************/

static int write_page(hpdj_device *dev, FILE *out)
{
#ifdef HPDJ_TIMING
  clock_t start_time = clock();
#endif
  int
    blank_rows,		/* number of blank rows read */
    j,			/* local loop index for various purposes */
    plane_length,	/* length of planes in octets */
    rc;			/* return code from commands */
  pcl_compression
    method;		/* current compression method */
  pcl_octet
    *comp_bf1,		/* compressed row buffer 1 */
    *comp_bf2 = NULL;	/* compressed row buffer 2 */
  pcl_octet_string
    /* Bit planes to be sent and last sent */
    *plane = NULL,
    *prev_plane = NULL,
    prev_row,		/* previous row sent */
    row;		/* row to be sent now */
  uint
    line_length,	/* length of ghostscript's scanlines in bytes */
    rowno;		/* number of row, starting at zero and increasing
			   downwards on the page */

#ifdef HPDJ_TRACE
  fputs("! Entering write_page()...\n", stderr);
#endif

  /*  Things are a bit complicated here. HP DeskJet printers want to have their
      scanlines (called rows) in what X Windows calls "XY format" (bit plane
      after bit plane), whereas ghostscript generates scanlines in "Z format"
      (all bits for a particular pixel are contiguous). These are not identical
      except for a pixel depth of 1. In order to avoid having to copy scanlines
      even in that case, I'm using two sets of variables: 'row' and 'prev_row'
      for black and white, and 'row', 'plane[]' and 'prev_plane[]' for colour.
      Hence allocations become a bit messy.
  */

  /* Length values and other numbers */
  line_length = gdev_prn_raster((gx_device_printer *)dev);
  plane_length =
    (line_length + dev->color_info.depth - 1)/dev->color_info.depth;
    /* 1 bit per pixel in each plane */

  /* Initialization of input buffers */
  row.str = (byte *) malloc(line_length*sizeof(byte));
  prev_row.str = (byte *) malloc(line_length*sizeof(byte));

  /* Initialization of output buffers */
  if (dev->color_info.depth > 1) {
    plane = (pcl_octet_string *)
      malloc(dev->color_info.depth*sizeof(pcl_octet_string));
    prev_plane = (pcl_octet_string *)
      malloc(dev->color_info.depth*sizeof(pcl_octet_string));
    if (plane != NULL && prev_plane != NULL) {
      for (j = 0; j < dev->color_info.depth; j++) {
	plane[j].str = (pcl_octet *)malloc(plane_length*sizeof(pcl_octet));
	prev_plane[j].str = (pcl_octet *)malloc(plane_length*sizeof(pcl_octet));
      }
    }
  }
  /* For the compression buffers, allocating 2 octets more than 'plane_length'
     permits to delay switching compression methods, see send_plane(). */
  comp_bf1 = (pcl_octet *) malloc((plane_length+2)*sizeof(pcl_octet));
  if (dev->compression_method == pcl_cm_delta)
    comp_bf2 = (pcl_octet *) malloc((plane_length+2)*sizeof(pcl_octet));

  /* Check for success of allocations */
  if (dev->color_info.depth == 1) j = dev->color_info.depth;
  else {
    j = 0;
    if (plane != NULL && prev_plane != NULL)
      while (j < dev->color_info.depth &&
	plane[j].str != NULL && prev_plane[j].str != NULL) j++;
  }
  if (j < dev->color_info.depth ||
      row.str == NULL || prev_row.str == NULL || comp_bf1 == NULL ||
      dev->compression_method == pcl_cm_delta && comp_bf2 == NULL) {
    fputs(ERRPREF "Memory allocation failure in write_page().\n", stderr);
    /* Remember: free(NULL) is legal. */
    free(row.str); free(prev_row.str); free(comp_bf1); free(comp_bf2);
    if (plane != NULL && prev_plane != NULL)
      for (j = 0; j < dev->color_info.depth; j++) {
	free(plane[j].str);
	free(prev_plane[j].str);
      }
    free(plane); free(prev_plane);
    return_error(gs_error_VMerror);
  }

  /*  Start the page. */

  fprintf(out,
    "\033*p0Y"	/* Vertical Cursor Positioning by Dots: set CAP to first line */
    "\033*r%ds"	/* Set Raster Graphics Width (number of pixels). This is reset
		   by End Raster Graphics. */
    "0A"	/* Start Raster Graphics: at leftmost printable position */
    , 8*plane_length
  );

  /* The seed row consists of all zeroes after Start Raster Graphics. */
  prev_row.length = 0;
  if (dev->color_info.depth > 1) for (j = 0; j < dev->color_info.depth; j++)
    prev_plane[j].length = 0;

  /* Loop over scanlines, one at a time.
     The 'prn' device interface returns scanlines such that the device space
     y coordinate is constant within a line ('rowno' below is the y coordinate)
     and that the device space x coordinate increases within each line, both
     starting at zero and going to positive values.
     This code also assumes that the (0, 0) pixel coincides with the origin of
     the PCL logical page (CAP position (0, 0)). See hpdj_set_page_layout() and
     hpdj_get_initial_matrix().
  */
  blank_rows = 0;
  method = pcl_cm_none;	/* This is the default after Start Raster Graphics */
  fputs("\033*b", out);
    /* We use combined escape sequences, all with this prefix. */
  for (rowno = 0;
      (rc = gdev_prn_copy_scan_lines((gx_device_printer *)dev, rowno, row.str,
							    line_length)) == 1;
      /* Use gdev_prn_get_bits() directly ??? */
      rowno++) {

    /* Clip empty bytes at the end of the row, but only complete pixels */
    pcl_octet *end = row.str + line_length - 1;
    /* In contrast to the get_bits procedure, gdev_prn_get_bits() (which is
       called by gdev_prn_copy_scan_lines()) ensures that there are no non-zero
       bits in the padding at the end. */
    while (*end == 0 && row.str < end) end--;
    if (*end == 0) row.length = 0;
    else row.length = end - row.str + 1;
    if (dev->color_info.depth > 8) {
      int octets = dev->color_info.depth/8;	/* octets per pixel */
      row.length = ((row.length + octets - 1)/octets)*octets;
      assert(row.length <= line_length);
    }

    if (row.length == 0) blank_rows++;
    else {
      /* We have to print something */

      /* Reposition if this line was preceded by blank rows */
      if (blank_rows > 0) {
	/*  A DeskJet printer prints several rows with one pass of its
	    print head, i.e., it prints strips. Should one now print the
            blank rows as blank rows or reposition the paper via the
	    Relative Vertical Pixel Movement command? (Unfortunately, the TRG
	    mentions but does not document this command.) The latter is
	    obviously faster for "large" amounts of vertical white space.
	    If it terminates the strip for small amounts, though, the print
	    head might be moved unnecessarily.

	    My impression is that the PCL interpreter should have all the
	    information necessary to make an optimal decision, regardless of
	    which command is sent. However, I can neither be sure that I am
	    right nor that all the interpreters have been implemented in this
	    manner. In addition, the correct solution probably depends on the
	    number of ink nozzles in the print head and other information which
	    is not available to me.
	    If you therefore have the impression that your printer's head
	    movements are excessive for pages containing blank rows, try
	    compiling with HPDJ_PRINT_BLANK_ROWS defined.
        */
#ifdef HPDJ_PRINT_BLANK_ROWS
	if (blank_rows == rowno) {
	  /* Blank rows preceding page contents */
	  fprintf(out, "%dy", blank_rows);
	    /* Relative Vertical Pixel Movement */
	}
	else {
	  /* Blank rows within a page */
	  int j;
	  pcl_octet_string zero_row = {NULL, 0};

	  /* First send the difference to the last non-blank row. */
	  if (dev->color_info.depth == 1)
	    rc = send_plane(true, dev->compression_method, &method, &zero_row,
	      &prev_row, out, comp_bf1, comp_bf2);
	  else
	    rc = send_row(dev, &method, &zero_row, plane, prev_plane, out,
	      comp_bf1, comp_bf2);
	  if (rc != 0) {
	    fputs(ERRPREF "Aborting.\n", stderr);
	    break;
	  }

	  /* Now send the remaining blank lines */
	  for (j = 1; j < blank_rows; j++) {
	    if (dev->color_info.depth == 1)
	      rc = send_plane(true, dev->compression_method, &method, &zero_row,
		&zero_row, out, comp_bf1, comp_bf2);
	    else
	      rc = send_row(dev, &method, &zero_row, plane, prev_plane,
		out, comp_bf1, comp_bf2);
	    if (rc != 0) {
	      fputs(ERRPREF "Aborting.\n", stderr);
	      break;
	    }
	  }
	}
#else
	fprintf(out, "%dy", blank_rows); /* Relative Vertical Pixel Movement */
#endif
	prev_row.length = 0;
	if (dev->color_info.depth > 1)
	  for (j = 0; j < dev->color_info.depth; j++)
	    prev_plane[j].length = 0;
	blank_rows = 0;
      }

      /* Send the row to the printer */
      if (dev->color_info.depth == 1)
	/* Transmit the row as a single bit plane */
	rc = send_plane(true, dev->compression_method, &method, &row, &prev_row,
	  out, comp_bf1, comp_bf2);
      else
	/* Transmit the row as several bit planes */
	rc = send_row(dev, &method, &row, plane, prev_plane, out,
	  comp_bf1, comp_bf2);
      if (rc != 0) {
	/* This must be a serious error in writing */
	fputs(ERRPREF "Aborting.\n", stderr);
	break;
      }

      /* Switch the row buffers */
      {
	pcl_octet *tmp;

	tmp = prev_row.str; prev_row = row; row.str = tmp;
	/* Plane buffers are switched in send_row() */
      }
    }
  }
  fputs("0Y", out);
    /* Relative Vertical Pixel Movement: 0. This is a slightly dirty but simple
       way to terminate the combined escape sequence started at the beginning
       of the page. */

  /* Terminate the page */
  fprintf(out, "\033*r%c\f",	/* End Raster Graphics, eject page */
    dev->model == hpdj500? 'B': 'C');
  if (rc == 0 && ferror(out)) {
    fputs(ERRPREF "Unknown system error in writing the output file.\n", stderr);
    rc = -1;
  }

  /* Release buffers */
  free(row.str); free(prev_row.str); free(comp_bf1); free(comp_bf2);
  if (dev->color_info.depth > 1) {
    for (j = 0; j < dev->color_info.depth; j++) {
      free(plane[j].str); free(prev_plane[j].str);
    }
    free(plane); free(prev_plane);
  }

#ifdef HPDJ_TIMING
  fprintf(stderr, "! Time used in write_page(): %f s.\n",
    ((float)(clock() - start_time))/CLOCKS_PER_SEC);
#endif

#ifdef HPDJ_TRACE
  fputs("! ... leaving write_page().\n", stderr);
#endif

  if (rc != 0) return_error(gs_error_ioerror);

#ifndef HPDJ_NO_PAGECOUNTFILE
  if (dev->pagecount_file != NULL) {
    if (pcf_inccount(dev->pagecount_file, 1) != 0) {
      /* pcf_inccount() has issued an error message. */
      fputs(
        "  No further attempts will be made to access the page count file.\n",
	stderr);
      gs_free(dev->pagecount_file, strlen(dev->pagecount_file) + 1,
	sizeof(char), "write_page");
      dev->pagecount_file = NULL;
    }
  }
#endif

  return 0;
}

/******************************************************************************

  Function: hpdj_print_page

  This is the implementation of prn's print_page() method for this device.

  It initializes the printer if necessary and prints the page.

******************************************************************************/

int hpdj_print_page(gx_device_printer *device, FILE *out)
{
  hpdj_device *dev = (hpdj_device *)device;

  /* See hpdj_put_params() */
  if (!dev->is_valid) {
    fputs(ERRPREF "Aborting because of previous error(s).\n", stderr);
    return_error(gs_error_unknownerror);
  }

  /* If this is a new file or we've decided to re-initialize, initialize the
     printer first */
  if (gdev_prn_file_is_new(device) || !dev->initialized) init(dev, out);

  /* Now print the page */
  return write_page(dev, out);
  /*
    It would be nice to know at this point whether this is the last page of the
    file, just as we are told about the first. This would permit to leave the
    printer in its default state by sending "Printer Reset" in that case, as
    recommended by HP. Note that we cannot use the 'close_device' device method
    for this purpose because at least for devices derived from 'prn' it has
    semantics different from what one might expect from its name, just as in
    the case of 'open_device' (which is why one needs in addition
    gdev_prn_file_is_new()).

    Of course one might reset the printer at the end of every page, but this
    would entail having to repeat the initialization at the beginning of
    every page. I regard this as logically inappropriate. As the present
    behaviour is only a problem for the following print job and only if it does
    not start by resetting the printer as recommended in the TRG (every job
    generated with this driver does), this seems not worth the effort.

    In addition, there is anyway a better way around this problem, provided
    you have a spooler with a configurable back end: just let *it* send the
    reset command at the end of *every* print job. This encapsulates not only
    the files produced by this driver but those of every PCL-generating
    program as well.
  */
}
