/* lex.c -- C-tools lexical analyzer.
   Copyright (C) 1995 Sandro Sigala  */

/* $Id: lex.c,v 1.11 1995/08/08 12:29:23 sandro Exp $ */

/* This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */


#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "lex.h"
#include "misc.h"

extern FILE *input_file;

int lex_lineno = 1;
char *lex_token_buffer;

int lex_return_white_spaces = 0;
int lex_return_directives = 1;

#define MAX_LEXER_BUFFER 64

/* max allowed token size */
static int maxtoken;

/* char buffer */
static char lexbuf[MAX_LEXER_BUFFER];

/* char buffer index */
static int lexbufidx = 0;

#define lexgetc() \
    (lexbufidx ? lexbuf[--lexbufidx] : getc (input_file))

#define lexungetc(c) \
    lexbuf[lexbufidx++] = c

void init_lex (void);
void done_lex (void);
int gettoken (void);
void ungettoken (void);

int gettoken (void);
static char *extend_token_buffer (char *);
static int is_reserved_word (char *);

void
init_lex (void)
{
    maxtoken = 40;
    lex_token_buffer = (char *) xmalloc (maxtoken + 1);
}

void
done_lex (void)
{
    free (lex_token_buffer);
}

static char *
extend_token_buffer (char *p)
{
    int offset = p - lex_token_buffer;

    maxtoken = maxtoken * 2 + 10;
    lex_token_buffer = (char *) xrealloc (lex_token_buffer, maxtoken + 2);

    return lex_token_buffer + offset;
}

static int
is_reserved_word (char *word)
{
    static struct { char *w; int t; } wordlist[] =
    {
	{"auto", KW_AUTO},		{"break", KW_BREAK},
	{"case", KW_CASE},		{"char", KW_CHAR},
	{"const", KW_CONST},		{"continue", KW_CONTINUE},
	{"default", KW_DEFAULT},	{"do", KW_DO},
	{"double", KW_DOUBLE},		{"else", KW_ELSE},
	{"enum", KW_ENUM},		{"extern", KW_EXTERN},
	{"float", KW_FLOAT},		{"for", KW_FOR},
	{"goto", KW_GOTO},		{"if", KW_IF},
	{"int", KW_INT},		{"long", KW_LONG},
	{"register", KW_REGISTER},	{"return", KW_RETURN},
	{"short", KW_SHORT},		{"signed", KW_SIGNED},
	{"sizeof", KW_SIZEOF},		{"static", KW_STATIC},
	{"struct", KW_STRUCT},		{"switch", KW_SWITCH},
	{"typedef", KW_TYPEDEF},	{"union", KW_UNION},
	{"unsigned", KW_UNSIGNED},	{"void", KW_VOID},
	{"volatile", KW_VOLATILE},	{"while", KW_WHILE},
	{0, 0}
    };
    int i = 0, t;

    while ((t = wordlist[i].t) != 0)
	if (strcmp (wordlist[i++].w, word) == 0)
	    return t;

    return 0;
}

int
gettoken (void)
{
    int c, c1, c2, i;
    char *p;

    lex_token_buffer[0] = '\0';

    c = lexgetc ();

    if (c == EOF)
	return EOF;

    switch (c)
    {
    case '\n':
	lex_lineno++;

    case ' ':
    case '\011':		/* horizontal tab */
    case '\013':		/* vertical tab */
    case '\014':		/* form feed */
    case '\015':		/* carriage return */
	if (lex_return_white_spaces == 1)
	    return c;
	else
	    return (gettoken ());
	break;

    case '#':
	if (lex_return_directives)
	{
	    p = lex_token_buffer;

	    *p++ = c;

	    while ((c1 = lexgetc ()) != EOF && c1 != '\n')
	    {
		if (p >= lex_token_buffer + maxtoken)
		    p = extend_token_buffer (p);
		*p++ = c1;
	    }
	    *p++ = c1;
	    *p = 0;
	    return DIRECTIVE;
	}
	else
	    return '#';

    case 'A': case 'B': case 'C': case 'D': case 'E':
    case 'F': case 'G': case 'H': case 'I': case 'J':
    case 'K': case 'L': case 'M': case 'N': case 'O':
    case 'P': case 'Q': case 'R': case 'S': case 'T':
    case 'U': case 'V': case 'W': case 'X': case 'Y':
    case 'Z':
    case 'a': case 'b': case 'c': case 'd': case 'e':
    case 'f': case 'g': case 'h': case 'i': case 'j':
    case 'k': case 'l': case 'm': case 'n': case 'o':
    case 'p': case 'q': case 'r': case 's': case 't':
    case 'u': case 'v': case 'w': case 'x': case 'y':
    case 'z':
    case '_':
	p = lex_token_buffer;
	while (isalnum (c) || c == '_')
	{
	    if (p >= lex_token_buffer + maxtoken)
		p = extend_token_buffer (p);

	    *p++ = c;
	    c = lexgetc ();
	}
	lexungetc (c);

	*p = 0;

	if ((i = is_reserved_word (lex_token_buffer)) != 0)
	    return i;
	else
	    return IDENTIFIER;

    case '0':
    case '1': case '2': case '3':
    case '4': case '5': case '6':
    case '7': case '8': case '9':
    {
	int firstch = c, start = 0;
	p = lex_token_buffer;
	while (isdigit (c))
	{
	    if (p >= lex_token_buffer + maxtoken)
		p = extend_token_buffer (p);

	    *p++ = c;
	    c = lexgetc ();

	    if (!start && firstch == '0')
		if (c == 'x')
		{
		    *p++ = c;
		    while (isxdigit (c1 = lexgetc ()))
		    {
			if (p >= lex_token_buffer + maxtoken)
			    p = extend_token_buffer (p);
			*p++ = c1;
		    }
		    *p = '\0';
		    lexungetc (c1);
		    return NUMBER;
		}
	    start = 1;
	}
	lexungetc (c);

	*p = '\0';

	return NUMBER;
    }

    case '+':
	if ((c1 = lexgetc ()) == '+')
	    return TK_INCREMENT;
	else if (c1 == '=')
	    return TK_ADD_ASSIGN;
	else
	{
	    lexungetc (c1);
	    return '+';
	}

    case '-':
	if ((c1 = lexgetc ()) == '-')
	    return TK_DECREMENT;
	else if (c1 == '=')
	    return TK_SUB_ASSIGN;
	else if (c1 == '>')
	    return TK_PTR_OP;
	else
	{
	    lexungetc (c1);
	    return '-';
	}

    case '*':
	if ((c1 = lexgetc ()) == '=')
	    return TK_MUL_ASSIGN;
	else
	{
	    lexungetc (c1);
	    return '*';
	}

    case '/':
	if ((c1 = lexgetc ()) == '=')
	    return TK_DIV_ASSIGN;
	else
	{
	    if (c1 == '*')
	    {
		p = lex_token_buffer;
		*p++ = '/';
		*p++ = '*';
		while ((c1 = lexgetc ()) != EOF)
		{
		    if (p >= lex_token_buffer + maxtoken)
			p = extend_token_buffer (p);

		    *p++ = c1;

		    if (c1 == '*')
			if ((c2 = lexgetc ()) == '/')
			{
			    *p++ = c2;
			    *p = '\0';
			    return COMMENT;
			}
			else
			    *p++ = c2;
		}
		*p = '\0';
	    }
	    lexungetc (c1);
	    return '/';
	}

	case '%':
	    if ((c1 = lexgetc ()) == '=')
		return TK_MOD_ASSIGN;
	    else
	    {
		lexungetc (c1);
		return '%';
	    }

	case '!':
	    if ((c1 = lexgetc ()) == '=')
		return TK_NE_OP;
	    else
	    {
		lexungetc (c1);
		return '!';
	    }

	case '=':
	    if ((c1 = lexgetc ()) == '=')
		return TK_EQ_OP;
	    else
	    {
		lexungetc (c1);
		return '=';
	    }

	case '&':
	    if ((c1 = lexgetc ()) == '=')
		return TK_AND_ASSIGN;
	    else if (c1 == '&')
		return TK_AND_OP;
	    else
	    {
		lexungetc (c1);
		return '&';
	    }

	case '|':
	    if ((c1 = lexgetc ()) == '=')
		return TK_OR_ASSIGN;
	    else if (c1 == '|')
		return TK_OR_OP;
	    else
	    {
		lexungetc (c1);
		return '|';
	    }

	case '^':
	    if ((c1 = lexgetc ()) == '=')
		return TK_XOR_ASSIGN;
	    else
	    {
		lexungetc (c1);
		return '^';
	    }

	case '>':
	    if ((c1 = lexgetc ()) == '=')
		return TK_GE_OP;
	    else if (c1 == '>')
		if ((c2 = lexgetc ()) == '=')
		    return TK_LEFT_ASSIGN;
		else
		{
		    lexungetc (c2);
		    return TK_LEFT_OP;
		}
	    else
	    {
		lexungetc (c1);
		return '>';
	    }

	case '<':
	    if ((c1 = lexgetc ()) == '=')
		return TK_LE_OP;
	    else if (c1 == '<')
		if ((c2 = lexgetc ()) == '=')
		    return TK_RIGHT_ASSIGN;
		else
		{
		    lexungetc (c2);
		    return TK_RIGHT_OP;
		}
	    else
	    {
		lexungetc (c1);
		return '<';
	    }

	case '.':
	    if ((c1 = lexgetc ()) == '.')
		if ((c2 = lexgetc ()) == '.')
		    return TK_ELLIPSIS;
		else
		{
		    lexungetc (c2);
		    lexungetc (c1);
		    return '.';
		}
	    else
	    {
		lexungetc (c1);
		return '.';
	    }

	case '"':
	    p = lex_token_buffer;
	    *p++ = '"';
	    while ((c1 = lexgetc ()) != EOF && c1 != '"')
	    {
		if (p >= lex_token_buffer + maxtoken)
		    p = extend_token_buffer (p);
		*p++ = c1;

		if (c1 == '\\')
		    *p++ = lexgetc ();
	    }

	    *p++ = '"';

	    *p = 0;

	    return STRING;

	case '\'':
	    p = lex_token_buffer;
	    *p++ = '\'';
	    while ((c1 = lexgetc ()) != EOF && c1 != '\'')
	    {
		if (p >= lex_token_buffer + maxtoken)
		    p = extend_token_buffer (p);
		*p++ = c1;

		if (c1 == '\\')
		    *p++ = lexgetc ();
	    }

	    *p++ = '\'';

	    *p = 0;

	    return CHARACTER;

	default:
	    return c;
	}
}

/* lex.c ends here */
