/*
 *	cook - file construction tool
 *	Copyright (C) 1991, 1992, 1993, 1994 Peter Miller.
 *	All rights reserved.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 2 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * MANIFEST: functions to perform recipe pattern matching
 *
 * This is in the inner loop, so it must perform well.
 * A free list of match structures is maintained to avoid malloc calls;
 * malloc is only called when this free list is empty.
 *
 * The tough part about designing a pattern matcher for something like cook is
 * that the patterns must be reversible.  That is, it must be possible to use
 * the same string both as a pattern to be matched against and as a template
 * for building a string once a pattern has matched.  Rather like the
 * difference between the left and right sides of an editor search-and-replace
 * command using the same description for both the search pattern and the
 * replace template.  This is why classic regular expressions have not been
 * used.  They tend to be slow to match, too.
 *
 * This matcher has eleven match "fields", referenced as % and %0 to %9.
 * The % character can be escaped as %%.  The % and %1 to %9 forms match any
 * character except '/'.  The %0 form matches all characters, but must be
 * either empty, or have whole path components, including the trailing '/' on
 * each component.  A few examples will make this clearer:
 *	"%.c" matches "fred.c" with %="fred"
 *	"%.c" failes to match "snot/fred.c"
 *	"%1/%2.c"matches "snot/fred.c" with %1="snot" and %2="fred"
 *	"%1/%2.c" fails to match "etc/boo/fred.c"
 *	"%0%5.c" matches "fred.c" with %0="" and %5="fred"
 *	"%0%6.c" matches "snot/fred.c" with %0="snot/" and %6="fred"
 *	"%0%7.c" matches "etc/boo/fred.c" with %0="etc/boo/" and %7="fred"
 *	"/usr/%1/%1%2/%3.%2%4" matches "/usr/man/man1/fred.1x" with %1="man",
 *		%2="1", %3="fred" and %4="x".
 * The %0 behaviour is designed to allow patterns to range over subtrees in a
 * controlled manner.  Note that the use of this sort of pattern in a recipe
 * will result in deeper seraches than the naive recipe designer would expect.
 */

#include <ac/stddef.h>
#include <ac/string.h>
#include <ac/stdlib.h>

#include <main.h>
#include <str.h>
#include <match.h>
#include <error.h>
#include <mem.h>
#include <trace.h>


#define ABSENT_MARKER ((string_ty *)-1)

static match_ty *match_free_list;
static match_ty *stack;


/*
 * NAME
 *	illegal_pattern - complain
 *
 * SYNOPSIS
 *	void illegal_pattern(char *s);
 *
 * DESCRIPTION
 *	The illegal_pattern function is used to complain about errors in
 *	pattern secifications.
 *
 * RETURNS
 *	void
 */

static void illegal_pattern _((char *, int));

static void
illegal_pattern(s, why)
	char		*s;
	int		why;
{
	if (why < 0)
	{
		fatal
		(
			"illegal position of '%c0' in \"%s\" pattern",
			MATCH_CHAR,
			s
		);
	}
	if (why >= 10)
		fatal("illegal use of '%c' in \"%s\" pattern", MATCH_CHAR, s);
	fatal("illegal use of '%c%d' in \"%s\" pattern", MATCH_CHAR, why, s);
}


/*
 * NAME
 *	match_alloc - allocate match structure
 *
 * SYNOPSIS
 *	match_ty *match_alloc(void);
 *
 * DESCRIPTION
 *	The match_alloc function is used to allocate a match structure.
 *	The returned structure will be all zeros.
 *
 * RETURNS
 *	match_ty * - a pointer to the match structure in dynamic memory
 *
 * CAVEAT
 *	When finished with it should be disposed of by calling the match_free
 *	function.
 */

static match_ty *match_alloc _((void));

static match_ty *
match_alloc()
{
	match_ty	*result;
	int		j;

	if (match_free_list)
	{
		result = match_free_list;
		match_free_list = result->next;
	}
	else
		result = mem_alloc(sizeof(match_ty));
	for (j = 0; j < SIZEOF(result->fill); ++j)
		result->fill[j] = 0;
	result->next = 0;
	return result;
}


/*
 * NAME
 *	match_free - dispose of match structure
 *
 * SYNOPSIS
 *	void match_free(match_ty *);
 *
 * DESCRIPTION
 *	The match_free function is used to dispose of a match structure
 *	allocated by the match_alloc function.
 *
 * RETURNS
 *	void
 */

void
match_free(field)
	match_ty	*field;
{
	int		j;

	trace(("match_free(field = %08X)\n{\n"/*}*/, field));
	for (j = 0; j < SIZEOF(field->fill); ++j)
	{
		if (field->fill[j])
		{
			str_free(field->fill[j]);
			field->fill[j] = 0;
		}
	}
	field->next = match_free_list;
	match_free_list = field;
	trace((/*{*/"}\n"));
}


/*
 * NAME
 *	matcher - match pattern to string
 *
 * SYNOPSIS
 *	int matcher(char *original_patn, char *patn, char *str, match_ty *field);
 *
 * DESCRIPTION
 *	The matcher function is used to match up a pattern with a string,
 *	filling in the fields as it goes.
 *
 * RETURNS
 *	int: zero if does not match, nonzero if does match.
 *
 * CAVEAT
 *	The field structure is not allocated here.
 */

static int matcher _((char *, char *, char *, match_ty *));

static int
matcher(op, p, s, field)
	char		*op;	/* original pattern */
	char		*p;
	char		*s;
	match_ty	*field;
{
	size_t		index;
	string_ty	*sp;
	int		result;

	trace(("matcher(op = %08lX, p = %08lX, s = %08lX, field = %08X)\n{\n"/*}*/, op, p, s, field));
	trace_string(op);
	trace_string(p);
	trace_string(s);
	for (;;)
	{
		switch (*p)
		{
		case 0:
			result = !*s;
			goto ret;

		case MATCH_CHAR:
			if (p[1] == MATCH_CHAR)
			{
				if (*s++ != MATCH_CHAR)
				{
					result = 0;
					goto ret;
				}
				p += 2;
				break;
			}
			if (p[1] >= '0' && p[1] <= '9')
			{
				index = p[1] - '0';
				p += 2;
			}
			else
			{
				index = 10;
				++p;
			}
			if
			(
				!index
			&&
				(
					(p - 2 != op && p[-3] != '/')
				||
					(!*p || *p == '/')
				)
			)
				illegal_pattern(op, -1);
			sp = field->fill[index];
			if (sp)
			{
				char	*q;

				q = sp->str_text;
				while (*q)
				{
					if (*q++ != *s++)
					{
						result = 0;
						goto ret;
					}
				}
			}
			else
			{
				char *q;

				if (index)
				{
					q = strchr(s, '/');
					if (!q)
						q = s + strlen(s);
					if (q == s)
					{
						result = 0;
						goto ret;
					}
				}
				else
				{
					q = strrchr(s, '/');
					if (!q || *s == '/')
						q = s;
					else
						q++;
				}
				while (q >= s)
				{
					field->fill[index] = str_n_from_c(s, q - s);
					trace_long_unsigned(index);
					trace_string(field->fill[index]->str_text);
					if (matcher(op, p, q, field))
					{
						result = 1;
						goto ret;
					}
					str_free(field->fill[index]);
					field->fill[index] = 0;
					--q;
					if (!index)
						while (q >= s)
						{
							if (q > s && q[-1] == '/')
								break;
							--q;
						}
				}
				result = 0;
				goto ret;
			}
			break;

		default:
			if (*p++ != *s++)
			{
				result = 0;
				goto ret;
			}
			break;
		}
	}
ret:
	trace(("return %d;\n", result));
	trace((/*{*/"}\n"));
	return result;
}


/*
 * NAME
 *	match - attempt to
 *
 * SYNOPSIS
 *	match_ty *match(string_ty *pattern, string_ty *string);
 *
 * DESCRIPTION
 *	The match function is used to match a pattern with a string.
 *	The matching fields are filled in in the returned structure.
 *
 * RETURNS
 *	match_ty *: a pointer to a match structure in dynamic memory with the
 *	match fields set as appropriate.  A NULL pointer is returned if the
 *	string does not match the pattern.
 *
 * CAVEAT
 *	The match structure should be releaseed by calling match_free.,
 */

match_ty *
match(pattern, string)
	string_ty	*pattern;
	string_ty	*string;
{
	match_ty	*field;

	trace(("match(pattern = %08lX, string = %08lX)\n{\n"/*}*/, pattern, string));
	trace_string(pattern->str_text);
	trace_string(string->str_text);
	field = match_alloc();
	if (!matcher(pattern->str_text, pattern->str_text, string->str_text, field))
	{
		match_free(field);
		field = 0;
	}
	trace(("retrurn %08lX;\n", field));
	trace((/*{*/"}\n"));
	return field;
}


/*
 * NAME
 *	reconstruct - make string from pattern
 *
 * SYNOPSIS
 *	string_ty *reconstruct(string_ty *pattern, match_ty *field);
 *
 * DESCRIPTION
 *	The reconstruct function is used to rebuild a string from a replacement
 *	pattern and the match field values.
 *
 * RETURNS
 *	string_ty *
 *
 * CAVEAT
 *	It is a fatal error for the pattern to reference fields not set by the
 *	pattern match which created the fields match structure.
 */

string_ty *
reconstruct(pattern, field)
	string_ty	*pattern;
	match_ty	*field;
{
	static char	*tmp;
	static size_t	tmplen;
	size_t		length;
	char		*p;
	string_ty	*s;
	char		*pos;
	int		index;

	trace(("reconstruct(pattern = %08lX, field = %08X)\n{\n"/*}*/, pattern, field));
	trace_string(pattern->str_text);
	length = 0;
	for (p = pattern->str_text; *p; ++p)
	{
		if (*p == MATCH_CHAR)
		{
			if (p[1] == MATCH_CHAR)
			{
				++length;
				++p;
				continue;
			}
			if (p[1] >= '0' && p[1] <= '9')
			{
				index = p[1] - '0';
				++p;
			}
			else
				index = 10;
			s = field->fill[index];
			if (!s)
				illegal_pattern(pattern->str_text, index);
			length += s->str_length;
		}
		else
			++length;
	}

	if (!tmp)
	{
		tmplen = length;
		if (tmplen < 16)
			tmplen = 16;
		tmp = mem_alloc(tmplen);
	}
	else
	{
		if (tmplen < length)
		{
			tmplen = length;
			tmp = mem_change_size(tmp, tmplen);
		}
	}

	pos = tmp;
	for (p = pattern->str_text; *p; ++p)
	{
		if (*p == MATCH_CHAR)
		{
			if (p[1] == MATCH_CHAR)
			{
				*pos++ = MATCH_CHAR;
				++p;
				continue;
			}
			if (p[1] >= '0' && p[1] <= '9')
			{
				index = p[1] - '0';
				++p;
			}
			else
				index = 10;
			s = field->fill[index];
			memcpy(pos, s->str_text, s->str_length);
			pos += s->str_length;
		}
		else
			*pos++ = *p;
	}

	s = str_n_from_c(tmp, length);
	trace_string(s->str_text);
	trace(("return %08lX;\n", s));
	trace((/*{*/"}\n"));
	return s;
}


/*
 * NAME
 *	match_push - patch match fields
 *
 * SYNOPSIS
 *	void match_push(match_ty *field);
 *
 * DESCRIPTION
 *	The match_push function is used to push a pattern onto the stack of
 *	match fields.  A NULL pointer may be pushed.  This mechanism is used by
 *	the chef (cook.c) to indicate implicit and explicit recipe replacements.
 *
 * RETURNS
 *	void
 */

void
match_push(field)
	match_ty	*field;
{
	trace(("match_push(field = %08X)\n{\n"/*}*/, field));
	if (!field)
	{
		field = match_alloc();
		field->fill[0] = ABSENT_MARKER;
	}
	field->next = stack;
	stack= field;
	trace((/*{*/"}\n"));
}


/*
 * NAME
 *	match_top - top of match stack
 *
 * SYNOPSIS
 *	match_ty *match_top(void);
 *
 * DESCRIPTION
 *	The match function is used to indicate the top of the match stack.
 *
 * RETURNS
 *	match_ty * - a pointer to a match strcuture, or NULL if the stack is
 *	empty, or a NULL was pashed to mak an exlpicit recipe.
 */

match_ty *
match_top()
{
	match_ty	*result;

	if (stack && stack->fill[0] == ABSENT_MARKER)
		result = 0;
	else
		result = stack;
	return result;
}


/*
 * NAME
 *	match_pop - shorten stack
 *
 * SYNOPSIS
 *	match_ty *match_pop(void);
 *
 * DESCRIPTION
 *	The match_pop function is used to pop a match structure from the match
 *	stack.
 *
 * RETURNS
 *	match_ty * - a pointer to a match strcuture, or NULL if the stack is
 *	empty, or a NULL was pashed to mak an exlpicit recipe.
 *
 * CAVEAT
 *	It is an error for the stack to be empty.
 */

match_ty *
match_pop()
{
	match_ty	*field;

	trace(("match_pop()\n{\n"/*}*/));
	assert(stack);
	field = stack;
	stack = stack->next;
	if (field->fill[0] == ABSENT_MARKER)
	{
		free(field);
		field = 0;
	}
	trace(("return %08X;\n", field));
	trace((/*{*/"}\n"));
	return field;
}


/*
 * NAME
 *	wl_match - find a pattern in a word list
 *
 * SYNOPSIS
 *	match_ty *wl_match(wlist *pattern, string_ty *target);
 *
 * DESCRIPTION
 *	Wl_match is used to determine whether any one of the words in
 *	the wordlist (wlp) match the pattern given.
 *
 * RETURNS
 *	A zero is returned if not one of the words matches the pattern;
 *	otherwise a pointer to a "match structure" is returned,
 *	in a similar fashion to match().
 *
 * CAVEAT
 *	The information returned resides in dynamic memory.
 *	It is the responsibility of the
 *	caller to ensure that it is freed when it is finished with,
 *	by a call to match_free();
 */

match_ty *
wl_match(pattern, target)
	wlist		*pattern;
	string_ty	*target;
{
	int		j;
	match_ty	*retval;

	for (j = 0; j < pattern->wl_nwords; j++)
	{
		retval = match(pattern->wl_word[j], target);
		if (retval)
			return retval;
	}
	return 0;
}


/*
 * NAME
 *	wl_reconstruct - reconstruct a word list
 *
 * SYNOPSIS
 *	void wl_reconstruct(wlist *to, wlist *from, match_ty *field)
 *
 * DESCRIPTION
 *	Wl_reconstruct is used to reconstruct an entire word list,
 *	sort of the convers of wl_match().
 *
 * RETURNS
 *	'To' is a word list of reconstructed strings.
 *
 * CAVEAT
 *	It is the responsibility of the caller to ensire that the
 *	reconstructed word list in 'to' is freed when finished with,
 *	by a call to wl_free().
 */

void
wl_reconstruct(to, from, field)
	wlist		*to;
	wlist		*from;
	match_ty	*field;
{
	int		j;

	wl_zero(to);
	for (j = 0; j < from->wl_nwords; j++)
	{
		string_ty *s;

		s = reconstruct(from->wl_word[j], field);
		wl_append(to, s);
		str_free(s);
	}
}
