/* -*- Mode:Text -*- */
#ifndef lint
static char Rcs_Id[] =
	"$Id: makedent.c,v 1.28 91/08/10 14:09:40 geoff Exp $";
#endif

/*
 * Copyright 1988, 1989, by Geoff Kuenning, Manhattan Beach, CA
 * Permission for non-profit use is hereby granted.
 * All other rights reserved.
 * See "version.h" for a more complete copyright notice.
 */

/*
 * $Log:	makedent.c,v $
 * Revision 1.28  91/08/10  14:09:40  geoff
 * Fix stringcharlen to return zero for regular characters, so that
 * single-character stringchars can be supported.
 * 
 * Revision 1.27  91/07/15  19:27:09  geoff
 * Add comments mentioning that the input to makedent and chupcase must
 * be canonical.  Add a "canonical" parameter to strtoichar, strtosichar,
 * stringcharlen, and all calls to those routines and to l1_isstringch.
 * 
 * Revision 1.26  91/07/11  19:52:19  geoff
 * Remove the include of stdio.h, since ispell.h now does this.
 * 
 * Revision 1.25  91/07/05  20:48:59  geoff
 * Fix a typo in the previous code to obey onlykeep.
 * 
 * Revision 1.24  91/07/05  20:31:57  geoff
 * Fix some more lint complaints.  Fix toutent so it obeys "onlykeep"
 * regardless of the setting of the CAPITALIZATION option.
 * 
 * Revision 1.23  91/07/05  20:15:50  geoff
 * Don't conditionally compile ibuf;  it's always needed
 * 
 * Revision 1.22  91/07/05  19:51:49  geoff
 * Fix some lint complaints, including removing the unused "hasaffixes"
 * function.
 * 
 * Revision 1.21  91/07/04  00:04:17  geoff
 * Pick up findfiletype() from ispell.c.  Change stringcharlen to insist
 * on a match with the current string character type;  this allows
 * ambiguities between different notations to be resolved.
 * 
 * Revision 1.20  91/07/03  18:20:59  geoff
 * Replace all conversions of chars to ichar_t's with a macro call which
 * (a) does the conversion correctly and (b) makes future changes easy in
 * case (a) is false.
 * 
 * Revision 1.19  91/06/23  22:08:57  geoff
 * When casting to ichar_t, cast to unsigned first to avoid sign-extension
 * problems.
 * 
 * Revision 1.18  91/06/12  19:15:17  geoff
 * Range-check the bit number before setting mask bits.
 * 
 * Revision 1.17  91/06/11  02:26:02  geoff
 * Before allocating space for a word when making a dictionary entry,
 * canonicalize it by converting to an ichar_t and back.  Fix chupcase to
 * return the result in canonical form.  Add a warning to the header
 * comment for ichartostr, letting the user know that the length may change.
 * 
 * Revision 1.16  91/05/27  21:48:05  geoff
 * Change the string-character support to get the preferred string
 * character from a global flag based on the file type, rather than from
 * the last string processed (which may be internally-generated and thus
 * wrong).  Move a misplaced break statement.
 * 
 * Revision 1.15  90/12/31  00:59:41  geoff
 * Reformat to follow a consistent convention throughout ispell
 * 
 * Revision 1.14  90/04/26  22:44:14  geoff
 * Add support for alternate string characters.  This primarily involves changes
 * to the ichartostr and strtoichar routines.
 * 
 * Revision 1.13  90/04/17  15:34:19  geoff
 * Fix two incorrect declarations caught by gcc
 * 
 * Revision 1.12  89/12/27  03:18:22  geoff
 * Move all messages to msgs.h so they can be reconfigured
 * 
 * Revision 1.11  89/06/09  15:55:31  geoff
 * Add support for the internal "character" type, ichar_t.  This includes
 * providing the new routines chupcase, strtoichar, ichartostr, strtosichar,
 * ichartosstr, printichar, icharcpy, icharcmp, icharncmp, and icharlen.
 * Also add a little "lint library" for the various macros so that lint
 * can check macro calls for correctness too.
 * 
 * Revision 1.10  89/04/27  23:32:48  geoff
 * Add setfindlast et al, to speed up repetitive findlastchar calls.
 * 
 * Revision 1.9  89/04/03  01:57:25  geoff
 * Fix a bunch of lint complaints.  Add support for the selectable flag
 * marker character.  Add support for string characters.
 * 
 * Revision 1.8  89/02/20  22:11:40  geoff
 * Many minor upgrades to make it compile and run correctly when
 * capitalization support is turned off.  Most of these involve adding
 * more ifdef's.
 * 
 * Revision 1.7  88/12/26  02:30:46  geoff
 * Update the copyright notice.
 * 
 * Revision 1.6  88/11/16  02:19:46  geoff
 * Fix the ifdefs in combineaffixes to properly handle all sizes of MASKBITS.
 * 
 * Revision 1.5  88/04/30  22:14:46  geoff
 * Fix some lint complaints.
 * 
 * Revision 1.4  88/03/27  01:02:30  geoff
 * Add a copyright message.  Fix a curly-brace error.  Compile "combineaffixes"
 * regardless of the setting of CAPITALIZATION.
 * 
 * Revision 1.3  88/03/12  02:44:31  geoff
 * Put newlines before makedent's error messages, so they show up more
 * nicely on ispell's screen.
 * 
 * Revision 1.2  88/02/28  23:17:18  geoff
 * Combine affixes when the case and keep flag both match exactly.
 * 
 * Revision 1.1  88/02/24  00:18:23  geoff
 * Initial revision
 * 
 */

#include "config.h"
#include "ispell.h"
#include "msgs.h"

extern char *	index ();
extern char *	strcpy ();

extern char *	mymalloc ();
extern void	myfree ();
#ifdef CAPITALIZATION
long		whatcap ();
#endif
void		upcase ();
void		lowcase ();

static void	toutword ();
static void	flagout ();

static int  	hasmarker;

/*
 * Fill in a directory entry, including setting the capitalization flags, and
 * allocate and initialize memory for the d->word field.  Returns -1
 * if there was trouble.  The input word must be in canonical form.
 */

makedent (lbuf, d)
    char *		lbuf;
    struct dent *	d;
    {
    ichar_t		ibuf[INPUTWORDLEN + MAXAFFIXLEN];
    char *		p;
    int			bit;
    int			len;

    /* Strip off any trailing newline */
    len = strlen (lbuf) - 1;
    if (lbuf[len] == '\n')
	lbuf[len] = '\0';

    d->next = NULL;
    /* WARNING:  flagfield might be the same as mask! See ispell.h. */
    d->flagfield = 0;
    (void) bzero ((char *) d->mask, sizeof (d->mask));
    d->flagfield |= USED;
    d->flagfield &= ~KEEP;

    p = index (lbuf, hashheader.flagmarker);
    if (p != NULL)
	*p = 0;

    /*
    ** Convert the word to an ichar_t and back;  this makes sure that
    ** it is in canonical form and thus that the length is correct.
    */
    strtoichar (ibuf, lbuf, 1);
    ichartostr (lbuf, ibuf, 1);
    len = strlen (lbuf);
#ifdef CAPITALIZATION
    /*
    ** Figure out the capitalization rules from the capitalization of
    ** the sample entry.
    */
    d->flagfield |= whatcap (ibuf);
#endif

    if (len > INPUTWORDLEN - 1)
	{
	(void) fprintf (stderr, MAKEDENT_C_TOO_BIG, lbuf);
	return (-1);
	}

    d->word = mymalloc (len + 1);
    if (d->word == NULL)
	{
	(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, lbuf);
	return -1;
	}

    (void) strcpy (d->word, lbuf);
#ifdef CAPITALIZATION
    if (captype (d->flagfield) != FOLLOWCASE)
	chupcase (d->word);
#else /* CAPITALIZATION */
    chupcase (d->word);
#endif /* CAPITALIZATION */
    if (p == NULL)
	return (0);

    p++;
    while (*p != '\0'  &&  *p != '\n')
	{
#if MASKBITS <= 32
	bit = CHARTOBIT (mytoupper (chartoichar (*p)));
#else
	bit = CHARTOBIT ((unsigned char) *p);
#endif
	if (bit >= 0  &&  bit <= LARGESTFLAG)
	    SETMASKBIT (d->mask, bit);
	p++;
	if (*p == hashheader.flagmarker)
	    p++;		/* Handle old-format dictionaries too */
	}
    return (0);
    }

#ifdef CAPITALIZATION
/*
** Classify the capitalization of a sample entry.  Returns one of the
** four capitalization codes ANYCASE, ALLCAPS, CAPITALIZED, or FOLLOWCASE.
*/
long whatcap (word)
    register ichar_t *	word;
    {
    register ichar_t *	p;

    for (p = word;  *p;  p++)
	{
	if (mylower (*p))
	    break;
	}
    if (*p == '\0')
	return ALLCAPS;
    else
	{
	for (  ;  *p;  p++)
	    {
	    if (myupper (*p))
		break;
	    }
	if (*p == '\0')
	    {
	    /*
	    ** No uppercase letters follow the lowercase ones.
	    ** If there is more than one uppercase letter, it's
	    ** "followcase". If only the first one is capitalized,
	    ** it's "capitalize".  If there are no capitals
	    ** at all, it's ANYCASE.
	    */
	    if (myupper (word[0]))
		{
		for (p = word + 1;  *p != '\0';  p++)
		    {
		    if (myupper (*p))
			return FOLLOWCASE;
		    }
		return CAPITALIZED;
		}
	    else
		return ANYCASE;
	    }
	else
	    return FOLLOWCASE;	/* .../lower/upper */
	}
    }


/*
** Add a variant-capitalization header to a word.  This routine may be
** called even for a followcase word that doesn't yet have a header.
**
** Returns 0 if all was ok, -1 if allocation error.
*/
addvheader (dp)
    register struct dent *	dp;	/* Entry to update */
    {
    register struct dent *	tdent; /* Copy of entry */

    /*
    ** Add a second entry with the correct capitalization, and then make
    ** dp into a special dummy entry.
    */
    tdent = (struct dent *) mymalloc (sizeof (struct dent));
    if (tdent == NULL)
	{
	(void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
	return -1;
	}
    *tdent = *dp;
    if (captype (tdent->flagfield) != FOLLOWCASE)
	tdent->word = NULL;
    else
	{
	/* Followcase words need a copy of the capitalization */
	tdent->word = mymalloc (strlen (tdent->word) + 1);
	if (tdent->word == NULL)
	    {
	    (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, dp->word);
	    myfree ((char *) tdent);
	    return -1;
	    }
	(void) strcpy (tdent->word, dp->word);
	}
    chupcase (dp->word);
    dp->next = tdent;
    dp->flagfield &= ~CAPTYPEMASK;
    dp->flagfield |= (ALLCAPS | MOREVARIANTS);
    return 0;
    }
#endif /* CAPITALIZATION */

/*
** Combine and resolve the entries describing two capitalizations of the same
** word.  This may require allocating yet more entries.
**
** Hdrp is a pointer into a hash table.  If the word covered by hdrp has
** variations, hdrp must point to the header.  Newp is a pointer to temporary
** storage, and space is malloc'ed if newp is to be kept.  The newp->word
** field must have been allocated with mymalloc, so that this routine may free
** the space if it keeps newp but not the word.
**
** Return value:  0 if the word was added, 1 if the word was combined
** with an existing entry, and -1 if trouble occurred (e.g., malloc).
** If 1 is returned, newp->word may have been be freed using myfree.
**
** Life is made much more difficult by the KEEP flag's possibilities.  We
** must ensure that a !KEEP word doesn't find its way into the personal
** dictionary as a result of this routine's actions.  However, a !KEEP
** word that has affixes must have come from the main dictionary, so it
** is acceptable to combine entries in that case (got that?).
**
** The net result of all this is a set of rules that is a bloody pain
** to figure out.  Basically, we want to choose one of the following actions:
**
**	(1) Add newp's affixes and KEEP flag to oldp, and discard newp.
**	(2) Add oldp's affixes and KEEP flag to newp, replace oldp with
**	    newp, and discard newp.
#ifdef CAPITALIZATION
**	(3) Insert newp as a new entry in the variants list.  If there is
**	    currently no variant header, this requires adding one.  Adding a
**	    header splits into two sub-cases:
**
**	    (3a) If oldp is ALLCAPS and the KEEP flags match, just turn it
**		into the header.
**	    (3b) Otherwise, add a new entry to serve as the header.
**		To ease list linking, this is done by copying oldp into
**		the new entry, and then performing (3a).
**
**	    After newp has been added as a variant, its affixes and KEEP
**	    flag are OR-ed into the variant header.
#endif
**
** So how to choose which?  The default is always case (3), which adds newp
** as a new entry in the variants list.  Cases (1) and (2) are symmetrical
** except for which entry is discarded.  We can use case (1) or (2) whenever
** one entry "covers" the other.  "Covering" is defined as follows:
**
**	(4) For entries with matching capitalization types, A covers B
**	    if:
**
**	    (4a) B's affix flags are a subset of A's, or the KEEP flags
**		 match, and
**	    (4b) either the KEEP flags match, or A's KEEP flag is set.
**		(Since A has more suffixes, combining B with it won't
**		cause any extra suffixes to be added to the dictionary.)
**	    (4c) If the words are FOLLOWCASE, the capitalizations match
**		exactly.
**
#ifdef CAPITALIZATON
**	(5) For entries with mismatched capitalization types, A covers B
**	    if (4a) and (4b) are true, and:
**
**	    (5a) B is ALLCAPS, or
**	    (5b) A is ANYCASE, and B is CAPITALIZED.
#endif
**
** For any "hdrp" without variants, oldp is the same as hdrp.  Otherwise,
** the above tests are applied using each variant in turn for oldp.
*/
int combinecaps (hdrp, newp)
    struct dent *	hdrp;	/* Header of entry currently in dictionary */
    register struct dent *
			newp;	/* Entry to add */
    {
    register struct dent *
			oldp;	/* Current "oldp" entry */
#ifdef CAPITALIZATION
    register struct dent *
			tdent; /* Entry we'll add to the dictionary */
#endif /* CAPITALIZATION */
    register int	retval;	/* Return value from combine_two_entries */

    /*
    ** First, see if we can combine the two entries (cases 1 and 2).  If
    ** combine_two_entries does so, it will return 1.  If it has trouble,
    ** it will return zero.
    */
    oldp = hdrp;
#ifdef CAPITALIZATION
    if ((oldp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
      == (ALLCAPS | MOREVARIANTS))
	{
	while (oldp->flagfield & MOREVARIANTS)
	    {
	    oldp = oldp->next;
	    retval = combine_two_entries (hdrp, oldp, newp);
	    if (retval != 0)		/* Did we combine them? */
		break;
	    }
	}
    else
	retval = combine_two_entries (hdrp, oldp, newp);
    if (retval == 0)
	{
	/*
	** Couldn't combine the two entries.  Add a new variant.  For
	** ease, we'll stick it right behind the header, rather than
	** at the end of the list.
	*/
	forcevheader (hdrp, oldp, newp);
	tdent = (struct dent *) mymalloc (sizeof (struct dent));
	if (tdent == NULL)
	    {
	    (void) fprintf (stderr, MAKEDENT_C_NO_WORD_SPACE, newp->word);
	    return -1;
	    }
	*tdent = *newp;
	tdent->next = hdrp->next;
	hdrp->next = tdent;
	tdent->flagfield |= (hdrp->flagfield & MOREVARIANTS);
	hdrp->flagfield |= MOREVARIANTS;
	combineaffixes (hdrp, newp);
	hdrp->flagfield |= (newp->flagfield & KEEP);
	if (captype (newp->flagfield) == FOLLOWCASE)
	    tdent->word = newp->word;
	else
	    {
	    tdent->word = NULL;
	    myfree (newp->word);		/* newp->word isn't needed */
	    }
	}
#else /* CAPITALIZATION */
    retval = combine_two_entries (hdrp, oldp, newp);
#endif /* CAPITALIZATION */
    return retval;
    }

#ifdef CAPITALIZATION
/*
** The following routine implements steps 3a and 3b in the commentary
** for "combinecaps".
*/
int forcevheader (hdrp, oldp, newp)
    register struct dent *	hdrp;
    struct dent *		oldp;
    struct dent *		newp;
    {

    if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS
      &&  ((oldp->flagfield ^ newp->flagfield) & KEEP) == 0)
	return;			/* Caller will set MOREVARIANTS */
    else if ((hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
      != (ALLCAPS | MOREVARIANTS))
	(void) addvheader (hdrp);
    }
#endif /* CAPITALIZATION */

/*
** This routine implements steps 4 and 5 of the commentary for "combinecaps".
**
** Returns 1 if newp can be discarded, 0 if nothing done.
*/
int combine_two_entries (hdrp, oldp, newp)
    struct dent *	hdrp;	/* (Possible) header of variant chain */
    register struct dent *
			oldp;	/* Pre-existing dictionary entry */
    register struct dent *
			newp;	/* Entry to possibly combine */
    {

    if (acoversb (oldp, newp))
	{
	/* newp is superfluous.  Drop it, preserving affixes and keep flag */
	combineaffixes (oldp, newp);
	oldp->flagfield |= (newp->flagfield & KEEP);
	hdrp->flagfield |= (newp->flagfield & KEEP);
	myfree (newp->word);
	return 1;
	}
    else if (acoversb (newp, oldp))
	{
	/*
	** oldp is superfluous.  Replace it with newp, preserving affixes and
	** the keep flag.
	*/
	combineaffixes (newp, oldp);
#ifdef CAPITALIZATION
	newp->flagfield |= (oldp->flagfield & (KEEP | MOREVARIANTS));
#else /* CAPITALIZATION */
	newp->flagfield |= (oldp->flagfield & KEEP);
#endif /* CAPITALIZATION */
	hdrp->flagfield |= (newp->flagfield & KEEP);
	newp->next = oldp->next;
	/*
	** We really want to free oldp->word, but that might be part of
	** "hashstrings".  So we'll futz around to arrange things so we can
	** free newp->word instead.  This depends very much on the fact
	** that both words are the same length.
	*/
	if (oldp->word != NULL)
	    (void) strcpy (oldp->word, newp->word);
	myfree (newp->word);	/* No longer needed */
	newp->word = oldp->word;
	*oldp = *newp;
#ifdef CAPITALIZATION
	/* We may need to add a header if newp is followcase */
	if (captype (newp->flagfield) == FOLLOWCASE
	  &&  (hdrp->flagfield & (CAPTYPEMASK | MOREVARIANTS))
	    != (ALLCAPS | MOREVARIANTS))
	    (void) addvheader (hdrp);
#endif /* CAPITALIZATION */
	return 1;
	}
    else
	return 0;
    }

/*
** Determine if enta covers entb, according to the rules in steps 4 and 5
** of the commentary for "combinecaps".
*/
int acoversb (enta, entb)
    register struct dent *	enta;	/* "A" in the rules */
    register struct dent *	entb;	/* "B" in the rules */
    {
    int				subset;	/* NZ if entb is a subset of enta */

    if ((subset = issubset (entb, enta)) != 0)
	{
	/* entb is a subset of enta;  thus enta might cover entb */
	if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0
	  &&  (enta->flagfield & KEEP) == 0)	/* Inverse of condition (4b) */
	    return 0;
	}
    else
	{
	/* not a subset;  KEEP flags must match exactly (both (4a) and (4b) */
	if (((enta->flagfield ^ entb->flagfield) & KEEP) != 0)
	    return 0;
	}

    /* Rules (4a) and (4b) are satisfied;  check for capitalization match */
#ifdef CAPITALIZATION
    if (((enta->flagfield ^ entb->flagfield) & CAPTYPEMASK) == 0)
	{
	if (captype (enta->flagfield) != FOLLOWCASE	/* Condition (4c) */
	  ||  strcmp (enta->word, entb->word) == 0)
	    return 1;				/* Perfect match */
	else
	    return 0;
	}
    else if (subset == 0)			/* No flag subset, refuse */
	return 0;				/* ..near matches */
    else if (captype (entb->flagfield) == ALLCAPS)
	return 1;
    else if (captype (enta->flagfield) == ANYCASE
      &&  captype (entb->flagfield) == CAPITALIZED)
	return 1;
    else
	return 0;
#else /* CAPITALIZATION */
#ifdef lint
    return subset;				/* Just so it gets used */
#else /* lint */
    return 1;					/* All words match */
#endif /* lint */
#endif /* CAPITALIZATION */
    }

void upcase (s)
    register ichar_t *	s;
    {

    while (*s)
	{
	*s = mytoupper (*s);
	s++;
	}
    }

void lowcase (s)
    register ichar_t *	s;
    {

    while (*s)
	{
	*s = mytolower (*s);
	s++;
	}
    }

/*
 * Upcase variant that works on normal strings.  Note that it is a lot
 * slower than the normal upcase.  The input must be in canonical form.
 */
void chupcase (s)
    char *	s;
    {
    ichar_t *	is;

    is = strtosichar (s, 1);
    upcase (is);
    ichartostr (s, is, 1);
    }

/*
** See if one affix field is a subset of another.  Returns NZ if ent1
** is a subset of ent2.  The KEEP flag is not taken into consideration.
*/
int issubset (ent1, ent2)
    register struct dent *	ent1;
    register struct dent *	ent2;
    {
/* The following is really testing for MASKSIZE > 1, but cpp can't do that */
#if MASKBITS > 32
    register int		flagword;

#ifdef FULLMASKSET
#define MASKMAX	MASKSIZE
#else
#define MASKMAX	MASKSIZE - 1
#endif /* FULLMASKSET */
    for (flagword = MASKMAX;  --flagword >= 0;  )
	{
	if ((ent1->mask[flagword] & ent2->mask[flagword])
	  != ent1->mask[flagword])
	    return 0;
	}
#endif /* MASKBITS > 32 */
#ifdef FULLMASKSET
    return ((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1])
      == ent1->mask[MASKSIZE - 1]);
#else
    if (((ent1->mask[MASKSIZE - 1] & ent2->mask[MASKSIZE - 1])
      ^ ent1->mask[MASKSIZE - 1]) & ~ALLFLAGS)
	return 0;
    else
	return 1;
#endif /* FULLMASKSET */
    }

/*
** Add ent2's affix flags to ent1.
*/
combineaffixes (ent1, ent2)
    register struct dent *	ent1;
    register struct dent *	ent2;
    {
/* The following is really testing for MASKSIZE > 1, but cpp can't do that */
#if MASKBITS > 32
    register int		flagword;

    if (ent1 == ent2)
	return;
    /* MASKMAX is defined in issubset, just above */
    for (flagword = MASKMAX;  --flagword >= 0;  )
	ent1->mask[flagword] |= ent2->mask[flagword];
#endif /* MASKBITS > 32 */
#ifndef FULLMASKSET
    ent1->mask[MASKSIZE - 1] |= ent2->mask[MASKSIZE - 1] & ~ALLFLAGS;
#endif
    }

/*
** Write out a dictionary entry, including capitalization variants.
** If onlykeep is true, only those variants with KEEP set will be
** written.
*/
toutent (outfile, hent, onlykeep)
    register FILE *	outfile;
    struct dent *	hent;
    register int	onlykeep;
    {
#ifdef CAPITALIZATION
    register struct dent * cent;
    ichar_t		wbuf[INPUTWORDLEN + MAXAFFIXLEN];

    cent = hent;
    strtoichar (wbuf, cent->word, 1);
    while (1)
	{
	if (!onlykeep  ||  (cent->flagfield & KEEP))
	    {
	    switch (captype (cent->flagfield))
		{
		case ANYCASE:
		    lowcase (wbuf);
		    toutword (outfile, ichartosstr (wbuf, 1), cent);
		    break;
		case ALLCAPS:
		    if ((cent->flagfield & MOREVARIANTS) == 0
		      ||  cent != hent)
			{
			upcase (wbuf);
			toutword (outfile, ichartosstr (wbuf, 1), cent);
			}
		    break;
		case CAPITALIZED:
		    lowcase (wbuf);
		    wbuf[0] = mytoupper (wbuf[0]);
		    toutword (outfile, ichartosstr (wbuf, 1), cent);
		    break;
		case FOLLOWCASE:
		    toutword (outfile, cent->word, cent);
		    break;
		}
	    }
	if (cent->flagfield & MOREVARIANTS)
	    cent = cent->next;
	else
	    break;
	}
#else
    if (!onlykeep  ||  (hent->flagfield & KEEP))
	toutword (outfile, hent->word, hent);
#endif
    }
		
static void toutword (outfile, word, cent)
    register FILE *	outfile;
    char *		word;
    register struct dent * cent;
    {
    register int	bit;

    hasmarker = 0;
    (void) fprintf (outfile, "%s", word);
    for (bit = 0;  bit < LARGESTFLAG;  bit++)
	{
	if (TSTMASKBIT (cent->mask, bit))
	  flagout (outfile, BITTOCHAR (bit));
	}
    (void) fprintf (outfile, "\n");
    }

static void flagout (outfile, flag)
    register FILE *	outfile;
    int			flag;
    {
    if (!hasmarker)
	(void) putc (hashheader.flagmarker, outfile);
    hasmarker = 1;
    (void) putc (flag, outfile);
    }

/*
 * If the string under the given pointer begins with a string character,
 * return the length of that "character".  If not, return 0.
 * May be called any time, but it's best if "isstrstart" is first
 * used to filter out unnecessary calls.
 *
 * As a side effect, "laststringch" is set to the number of the string
 * found, or to -1 if none was found.  This can be useful for such things
 * as case conversion.
 */
int stringcharlen (bufp, canonical)
    char *		bufp;
    int			canonical;	/* NZ if input is in canonical form */
    {
#ifdef SLOWMULTIPLY
    static char *	sp[MAXSTRINGCHARS];
    static int		inited = 0;
#endif /* SLOWMULTIPLY */
    register char *	bufcur;
    register char *	stringcur;
    register int	stringno;
    register int	lowstringno;
    register int	highstringno;
    int			dupwanted;

#ifdef SLOWMULTIPLY
    if (!inited)
	{
	inited = 1;
	for (stringno = 0;  stringno < MAXSTRINGCHARS;  stringno++)
	    sp[stringno] = &hashheader.stringchars[stringno][0];
	}
#endif /* SLOWMULTIPLY */
    lowstringno = 0;
    highstringno = hashheader.nstrchars - 1;
    dupwanted = canonical ? 0 : defdupchar;
    while (lowstringno <= highstringno)
	{
	stringno = (lowstringno + highstringno) >> 1;
#ifdef SLOWMULTIPLY
	stringcur = sp[stringno];
#else /* SLOWMULTIPLY */
	stringcur = &hashheader.stringchars[stringno][0];
#endif /* SLOWMULTIPLY */
	bufcur = bufp;
	while (*stringcur)
	    {
#ifdef NO8BIT
	    if (((*bufcur++ ^ *stringcur) & 0x7F) != 0)
#else /* NO8BIT */
	    if (*bufcur++ != *stringcur)
#endif /* NO8BIT */
		break;
	    /*
	    ** We can't use autoincrement above because of the
	    ** test below.
	    */
	    stringcur++;
	    }
	if (*stringcur == '\0')
	    {
	    if (hashheader.dupnos[stringno] == dupwanted)
		{
		/* We have a match */
		laststringch = hashheader.stringdups[stringno];
#ifdef SLOWMULTIPLY
		return stringcur - sp[stringno];
#else /* SLOWMULTIPLY */
		return stringcur - &hashheader.stringchars[stringno][0];
#endif /* SLOWMULTIPLY */
		}
	    else
		--stringcur;
	    }
	/* No match - choose which side to search on */
#ifdef NO8BIT
	if ((*--bufcur & 0x7F) < (*stringcur & 0x7F))
	    highstringno = stringno - 1;
	else if ((*bufcur & 0x7F) > (*stringcur & 0x7F))
	    lowstringno = stringno + 1;
#else /* NO8BIT */
	if (*--bufcur < *stringcur)
	    highstringno = stringno - 1;
	else if (*bufcur > *stringcur)
	    lowstringno = stringno + 1;
#endif /* NO8BIT */
	else if (dupwanted < hashheader.dupnos[stringno])
	    highstringno = stringno - 1;
	else
	    lowstringno = stringno + 1;
	}
    laststringch = -1;
    return 0;			/* Not a string character */
    }

/*
 * Convert an external string to an ichar_t string.  If necessary, the parity
 * bit is stripped off as part of the process.
 */
strtoichar (out, in, canonical)
    register ichar_t *	out;		/* Where to put result */
    register char *	in;		/* String to convert */
    int			canonical;	/* NZ if input is in canonical form */
    {
    register int	len;		/* Length of next character */

    for (  ;  *in != '\0';  in += len)
	{
	if (l1_isstringch (in, len, canonical))
	    *out++ = SET_SIZE + laststringch;
	else
	    *out++ = *in & NOPARITY;
	}
    *out = 0;
    }

/*
 * Convert an ichar_t string to an external string.  WARNING:  the
 * resulting string may wind up being longer than the original.  In
 * fact, even the sequence strtoichar->ichartostr may produce a
 * result longer than the original, because the output form may use
 * a different string type set than the original input form.
 */
ichartostr (out, in, canonical)
    register char *	out;		/* Where to put result */
    register ichar_t *	in;		/* String to convert */
    int			canonical;	/* NZ for canonical form */
    {
    register int	ch;		/* Next character to store */
    register int	i;		/* Index into duplicates list */
    register char *	scharp;		/* Pointer into a string char */

    while ((ch = *in++) != 0)
	{
	if (ch < SET_SIZE)
	    *out++ = ch;
	else
	    {
	    ch -= SET_SIZE;
	    if (!canonical)
		{
		for (i = hashheader.nstrchars;  --i >= 0;  )
		    {
		    if (hashheader.dupnos[i] == defdupchar
		      &&  hashheader.stringdups[i] == ch)
			{
			ch = i;
			break;
			}
		    }
		}
	    scharp = hashheader.stringchars[(unsigned) ch];
	    while ((*out++ = *scharp++) != '\0')
		;
	    out--;
	    }
	}
    *out = '\0';
    }

/*
 * Convert a string to an ichar_t, storing the result in a static area.
 */
ichar_t * strtosichar (in, canonical)
    char *		in;		/* String to convert */
    int			canonical;	/* NZ if input is in canonical form */
    {
    static ichar_t	out[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];

    strtoichar (out, in, canonical);
    return out;
    }

/*
 * Convert an ichar_t to a string, storing the result in a static area.
 */
char * ichartosstr (in, canonical)
    ichar_t *		in;		/* Internal string to convert */
    int			canonical;	/* NZ for canonical conversion */
    {
    static char		out[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];

    ichartostr (out, in, canonical);
    return out;
    }

/*
 * Convert a single ichar to a printable string, storing the result in
 * a static area.
 */
char * printichar (in)
    ichar_t		in;
    {
    static char		out[MAXSTRINGCHARLEN + 1];

    if (in < SET_SIZE)
	{
	out[0] = in;
	out[1] = '\0';
	}
    else
	(void) strcpy (out, hashheader.stringchars[(unsigned) in - SET_SIZE]);
    return out;
    }

#ifndef ICHAR_IS_CHAR
/*
 * Copy an ichar_t.
 */
icharcpy (out, in)
    register ichar_t *	in;		/* Source */
    register ichar_t *	out;		/* Destination */
    {

    while ((*out++ = *in++) != 0)
	;
    }

/*
 * Return the length of an ichar_t.
 */
icharlen (in)
    register ichar_t *	in;		/* String to count */
    {
    register int	len;		/* Length so far */

    for (len = 0;  *in++ != 0;  len++)
	;
    return len;
    }

/*
 * Compare two ichar_t's.
 */
icharcmp (s1, s2)
    register ichar_t *	s1;
    register ichar_t *	s2;
    {

    while (*s1 != 0)
	{
	if (*s1++ != *s2++)
	    return *--s1 - *--s2;
	}
    return *s1 - *s2;
    }

/*
 * Strncmp for two ichar_t's.
 */
icharncmp (s1, s2, n)
    register ichar_t *	s1;
    register ichar_t *	s2;
    register int	n;
    {

    while (--n >= 0  &&  *s1 != 0)
	{
	if (*s1++ != *s2++)
	    return *--s1 - *--s2;
	}
    if (n < 0)
	return 0;
    else
	return *s1 - *s2;
    }

#endif /* ICHAR_IS_CHAR */

findfiletype (name, searchnames)
    char *		name;		/* Name to look up in suffix table */
    int			searchnames;	/* NZ to search name field of table */
    {
    char *		cp;		/* Pointer into suffix list */
    int			cplen;		/* Length of current suffix */
    register int	i;		/* Index into type table */
    int			len;		/* Length of the name */

    len = strlen (name);
    for (i = 0;  i < hashheader.nstrchartype;  i++)
	{
	if (searchnames)
	    {
	    if (strcmp (name, chartypes[i].name) == 0)
		return i;
	    }
	else
	    {
	    for (cp = chartypes[i].suffixes;  *cp != '\0';  cp += cplen + 1)
		{
		cplen = strlen (cp);
		if (len >= cplen  &&  strcmp (&name[len - cplen], cp) == 0)
		    return i;
		}
	    }
	}
    return -1;
    }

/*
 * The following routines are all dummies for the benefit of lint.
 */
#ifdef lint
TSTMASKBIT (mask, bit) MASKTYPE *mask; { return bit + (int) *mask; }
CLRMASKBIT (mask, bit) MASKTYPE *mask; { bit += (int) *mask; }
SETMASKBIT (mask, bit) MASKTYPE *mask; { bit += (int) *mask; }
BITTOCHAR (bit) { return bit; }
CHARTOBIT (ch) unsigned char ch; { return ch; }
myupper (ch) ichar_t ch; { return (int) ch; }
mylower (ch) ichar_t ch; { return (int) ch; }
myspace (ch) ichar_t ch; { return (int) ch; }
iswordch (ch) ichar_t ch; { return (int) ch; }
isboundarych (ch) ichar_t ch; { return (int) ch; }
isstringstart (ch) int ch; { return ch; }
ichar_t mytolower (ch) ichar_t ch; { return (int) ch; }
ichar_t mytoupper (ch) ichar_t ch; { return (int) ch; }
#endif /* lint */
