/*****************************************************************************/
/*                                                                           */
/*  Compiler - a Parser Generator Program, Version 2.0                       */
/*  Copyright (c) 2000, 2003  Charles M. Fayle  All Rights Reserved.         */
/*                                                                           */
/*  This software is distributed under the terms of the GNU General Public   */
/*  License as specified in the file gpl.txt included with the distribution. */
/*                                                                           */
/*****************************************************************************/
//
//  $Id$
//

#include "lexicalDriver.h"

const int	CLexicalDriver::default_block_size = 4096;

CLexicalDriver::CLexicalDriver(int est)
	:	empty_symbol_token(est),
		lexical_error_code(-1),
		filter_terminal(-2)
{
	input_function	= &CLexicalDriver::GetInputNull;
	close_function	= &CLexicalDriver::CloseNull;
}

CLexicalDriver::~CLexicalDriver()
{
	CloseInput();
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputBuffer(const unsigned char *i_buffer, int i_size)
{
	int		r = CloseInput();

	input_function	= &CLexicalDriver::GetInputNull;
	close_function	= &CLexicalDriver::CloseNull;

	input_buffer			= const_cast<unsigned char *>(i_buffer);
	input_buffer_size		= i_size;
	input_buffer_offset		= 0;
	characters_remaining	= i_size;

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputString(const string &i_string)
{
	return SetInputBuffer(
					reinterpret_cast<const unsigned char *>(i_string.data()),
					i_string.length());
}

void	CLexicalDriver::AllocateBuffer(int b_size)
{
	input_buffer			= new unsigned char[b_size];
	input_buffer_size		= b_size;
	characters_remaining	= 0;
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputIoStream(istream *i_stream, int b_size)
{
	int		r = CloseInput();

	input_ios = i_stream;

	input_function	= &CLexicalDriver::GetInputIoStreamData;
	close_function	= &CLexicalDriver::CloseIoStream;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputFileIoStream(const string &f_name, int b_size)
{
	int		r = CloseInput();

	input_ios = new ifstream(f_name.c_str(), ios::in | ios::binary);

	if (input_ios->fail())
	{
		delete input_ios;
		return InputError(r | OpenError);
	}

	input_function	= &CLexicalDriver::GetInputIoStreamData;
	close_function	= &CLexicalDriver::CloseFileIoStream;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputStdStream(FILE *f, int b_size)
{
	int		r = CloseInput();

	input_std = f;

	input_function	= &CLexicalDriver::GetInputStdStreamData;
	close_function	= &CLexicalDriver::CloseStdStream;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputFileStdStream(const string &f_name, int b_size)
{
	int		r = CloseInput();

	input_std = fopen(f_name.c_str(), "r");

	if (!input_std)
		return InputError(r | OpenError);

	input_function	= &CLexicalDriver::GetInputStdStreamData;
	close_function	= &CLexicalDriver::CloseFileStdStream;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputDescriptor(int i_d, int b_size)
{
	int		r = CloseInput();

	input_fd = i_d;

	input_function	= &CLexicalDriver::GetInputDescriptorData;
	close_function	= &CLexicalDriver::CloseDescriptor;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError
	CLexicalDriver::SetInputFileDescriptor(const string &f_name, int b_size)
{
	int		r = CloseInput();

	input_fd = open(f_name.c_str(), O_RDONLY);

	if (input_fd == -1)
		return InputError(r | OpenError);

	input_function	= &CLexicalDriver::GetInputDescriptorData;
	close_function	= &CLexicalDriver::CloseFileDescriptor;

	AllocateBuffer(b_size);

	return InputError(r);
}

CLexicalDriver::InputError		CLexicalDriver::CloseInput()
{
	int		r = (this->*close_function)();

	input_function	= &CLexicalDriver::GetInputNull;
	close_function	= &CLexicalDriver::CloseNull;

	return InputError(r);
}

CLexicalDriver::InputError		CLexicalDriver::CloseIoStream()
{
	delete [] input_buffer;

	return NoError;
}

CLexicalDriver::InputError		CLexicalDriver::CloseFileIoStream()
{
	delete [] input_buffer;

	dynamic_cast<ifstream *>(input_ios)->close();

	if (input_ios->bad())
	{
		delete input_ios;
		return CloseError;
	}
	else
	{
		delete input_ios;
		return NoError;
	}
}

CLexicalDriver::InputError		CLexicalDriver::CloseStdStream()
{
	delete [] input_buffer;

	return NoError;
}

CLexicalDriver::InputError		CLexicalDriver::CloseFileStdStream()
{
	delete [] input_buffer;

	if (fclose(input_std))
	{
		return CloseError;
	}
	else
	{
		return NoError;
	}
}

CLexicalDriver::InputError		CLexicalDriver::CloseDescriptor()
{
	delete [] input_buffer;

	return NoError;
}

CLexicalDriver::InputError		CLexicalDriver::CloseFileDescriptor()
{
	delete [] input_buffer;

	if (close(input_fd) == -1)
	{
		return CloseError;
	}
	else
	{
		return NoError;
	}
}

int		CLexicalDriver::GetInputIoStreamData()
{
	input_ios->read((char *)input_buffer, input_buffer_size);

	return input_ios->gcount();
}

int		CLexicalDriver::GetInputStdStreamData()
{
	size_t	r = fread(	input_buffer,
						sizeof(unsigned char),
						input_buffer_size,
						input_std);

	return r;
}

int		CLexicalDriver::GetInputDescriptorData()
{
	ssize_t	r = read(	input_fd,
						input_buffer,
						input_buffer_size);

	if (r < 0)
		r = 0;

	return r;
}

string	&CLexicalDriver::GetLexicalStatusString(string &s, bool one_based)
{
	ostrstream	os;

	if (one_based)
	{
		os << "character = " << current_char_number + 1 << ", ";
		os << "line = " << current_line_number + 1 << ", ";
		os << "column = " << current_column_number + 1;
	}
	else
	{
		os << "character = " << current_char_number << ", ";
		os << "line = " << current_line_number << ", ";
		os << "column = " << current_column_number;
	}

	os << '\0';

	return s = os.str();
}

void	CLexicalSingleCharDriver::Initialize(int t_size, bool ptf)
{
	push_token_back_flag	= false;
	process_terminal_flag	= ptf;

	current_char			= 0;
	current_char_number		= -1;
	current_line_number		= 0;
	current_column_number	= -1;

	single_char_array_buffer[0] = '\0';
}

CLexicalSingleCharDriver::CLexicalSingleCharDriver(
							const char * const lex_char_token_names[], int est)
	: CLexicalDriver(est), map_size(256)
{
	character_map	= new int[map_size];

	int		i;
	int		j;

	for (i=0; i<map_size; i++)
		character_map[i] = filter_terminal;

	token_count = 0;

	while (lex_char_token_names[token_count])
		token_count++;

	if (!empty_symbol_token)
		empty_symbol_token = token_count;

	for (i=0; i<token_count; i++)
	{
		const char	*name = lex_char_token_names[i];

		if (name[1])
		{
			if (!strcmp(name, "space"))
			{
				character_map[' '] = i;
			}
			else if (!strcmp(name, "digit"))
			{
				for (j=0; j<10; j++)
					character_map['0' + j] = i;
			}
			else if (!strcmp(name, "hexdigit"))
			{
				for (j=0; j<6; j++)
					character_map['A' + j] = i;

				for (j=0; j<6; j++)
					character_map['a' + j] = i;
			}
			else if (!strcmp(name, "uppercase"))
			{
				for (j=0; j<26; j++)
					character_map['A' + j] = i;
			}
			else if (!strcmp(name, "lowercase"))
			{
				for (j=0; j<26; j++)
					character_map['a' + j] = i;
			}
			else if (!strcmp(name, "letter"))
			{
				for (j=0; j<26; j++)
					character_map['A' + j] = i;

				for (j=0; j<26; j++)
					character_map['a' + j] = i;
			}
			else if (!strcmp(name, "other"))
			{
				character_map['!'] = i;
				character_map['#'] = i;
				character_map['%'] = i;
				character_map['&'] = i;
				character_map['\''] = i;
				character_map[':'] = i;
				character_map[';'] = i;
				character_map['='] = i;
				character_map['@'] = i;
				character_map['_'] = i;
				character_map['`'] = i;
				character_map['~'] = i;
			}
		}
		else
			character_map[name[0]] = i;
	}

	single_char_array_buffer[1] = '\0';

	filter_count	= 0;
	terminal_count	= token_count;
}

CLexicalSingleCharDriver::~CLexicalSingleCharDriver()
{
	delete [] character_map;
}

int		CLexicalSingleCharDriver::GetFilterOrToken(
									unsigned char *&terminal_ptr,
									int &terminal_size)
{
	GetFilterOrToken();

	terminal_ptr = single_char_array_buffer;

	if (current_token == empty_symbol_token)
		terminal_size = 0;
	else
		terminal_size = 1;

	return current_token;
}

int		CLexicalSingleCharDriver::GetFilterOrToken()
{
	if (push_token_back_flag)
		push_token_back_flag = false;
	else
		GetTerminal();

	return current_token;
}

int		CLexicalSingleCharDriver::GetToken(	unsigned char *&token_ptr,
											int &token_size)
{
	GetToken();

	token_ptr = single_char_array_buffer;

	if (current_token == empty_symbol_token)
		token_size = 0;
	else
		token_size = 1;

	return current_token;
}

int		CLexicalSingleCharDriver::GetToken()
{
	if (push_token_back_flag)
		push_token_back_flag = false;
	else
	{
		while (1)
		{
			GetTerminal();

			if (current_token == filter_terminal)
			{
				if (process_terminal_flag)
					ProcessFilter();
			}
			else
				break;
		}
	}

	return current_token;
}

void	CLexicalSingleCharDriver::GetTerminal()
{
	if (!characters_remaining)
	{
		characters_remaining = GetInputData();

		if (!characters_remaining)
		{
			current_token	= empty_symbol_token;

			single_char_array_size		= 0;
			single_char_array_buffer[0]	= '\0';

			return;
		}

		input_buffer_offset = 0;
	}

	previous_char	= current_char;
	current_char	= input_buffer[input_buffer_offset];
	current_token	= character_map[current_char];

	input_buffer_offset++;
	characters_remaining--;
	current_char_number++;

	if (previous_char == '\n')
	{
		current_line_number++;
		current_column_number = 0;
	}
	else
		current_column_number++;

	single_char_array_size		= 1;
	single_char_array_buffer[0] = current_char;
}

void	CLexicalSingleCharDriver::LoadStackElement(CStackElement *se)
{
	se->type						= StackSingleChar;
	se->token						= current_token;
	se->scalar.scalar_unsigned_char	= current_char;
}

CLexicalRegexpDriver::CLexicalRegexpDriver(	int	tkc, int ftc, int tmc,
											int est, int ir, int fsmc,
											const int *f_table,
											const int *rm_table,
											const int *pm_table,
											const int *tm_table,
											const int *sa_table,
											const int *tc_table,
											const int *t_map_table,
											const int *f_map_table,
											const int *lt_table,
											int sctm)
	: CLexicalDriver(est)
{
	token_count			= tkc;
	filter_count		= ftc;
	terminal_count		= tmc;
	input_range			= ir;
	fsm_count			= fsmc;
	fsm_table			= f_table;
	regexp_match		= rm_table;
	prefix_match		= pm_table;
	token_match			= tm_table;
	start_assignment	= sa_table;
	trailing_context	= tc_table;
	token_map			= t_map_table;
	filter_map			= f_map_table;
	literal_terminal	= lt_table;

	single_column_token_match = sctm;

	SetupInverseTokenMap();
	SetupInverseFilterMap();

	terminal_char_array_buffer = 0;
}

void	CLexicalRegexpDriver::SetupInverseTokenMap()
{
	inverse_token_map = new int[token_count];

	int		i;

	for (i=0; i<terminal_count; i++)
		if (0 <= token_map[i] && token_map[i] < token_count)
			inverse_token_map[token_map[i]] = i;
}

void	CLexicalRegexpDriver::SetupInverseFilterMap()
{
	inverse_filter_map = new int[filter_count];

	int		i;

	for (i=0; i<terminal_count; i++)
		if (0 <= filter_map[i] && filter_map[i] < filter_count)
			inverse_filter_map[filter_map[i]] = i;
}

CLexicalRegexpDriver::~CLexicalRegexpDriver()
{
	delete [] inverse_token_map;
	delete [] inverse_filter_map;
	delete [] terminal_char_array_buffer;
}

void	CLexicalRegexpDriver::Initialize(int t_size, bool ptf)
{
	push_token_back_flag	= false;
	process_terminal_flag	= ptf;

	current_char			= 0;
	current_char_number		= -1;
	current_line_number		= 0;
	current_column_number	= -1;

	current_start_condition = terminal_count;

	terminal_match_index	= 0;
	terminal_state_index	= 0;

	terminal_char_list.Clear();

	delete [] terminal_char_array_buffer;

	terminal_char_array_maximum	= t_size;
	terminal_char_array_buffer	= new unsigned char[t_size + 1];
}

int		CLexicalRegexpDriver::GetFilterOrToken(
									unsigned char *&terminal_ptr,
									int &terminal_size)
{
	GetFilterOrToken();

	terminal_ptr	= terminal_char_array_buffer;
	terminal_size	= terminal_char_array_size;

	return current_token;
}

int		CLexicalRegexpDriver::GetFilterOrToken()
{
	if (push_token_back_flag)
		push_token_back_flag = false;
	else
		GetTerminal();

	return current_token;
}

int		CLexicalRegexpDriver::GetToken(	unsigned char *&token_ptr,
										int &token_size)
{
	GetToken();

	token_ptr	= terminal_char_array_buffer;
	token_size	= terminal_char_array_size;

	return current_token;
}

int		CLexicalRegexpDriver::GetToken()
{
	if (push_token_back_flag)
		push_token_back_flag = false;
	else
	{
		while (1)
		{
			GetTerminal();

			if (current_token == filter_terminal)
			{
				current_filter = filter_map[terminal_match_index];

				if (process_terminal_flag)
					ProcessFilter();
			}
			else
				break;
		}
	}

	return current_token;
}

void	CLexicalRegexpDriver::GetTerminal()
{
	GetTerminalCharList();

	if (!terminal_char_list.Size())
	{
		current_token	= empty_symbol_token;

		terminal_char_array_size		= 0;
		terminal_char_array_buffer[0]	= '\0';

		return;
	}
	else
	{
		while (terminal_char_listptr)
		{
			STerminalChar	&tc1 = terminal_char_list.Data(
										terminal_char_listptr);

			terminal_state_index = tc1.state_register;

			int		match = tc1.match_register;

			if (match != -1)
			{
				terminal_match_index = match;

				if (start_assignment[match])
					current_start_condition = match;

				current_token = token_map[match];

				if (trailing_context[match])
				{
					while (terminal_char_listptr)
					{
						STerminalChar	&tc2 =
							terminal_char_list.Data(terminal_char_listptr);

						if (prefix_match[tc2.state_register * terminal_count +
											match])
							break;

						terminal_char_list.PreviousListPtr(
													terminal_char_listptr);
						terminal_char_array_size--;
					}

					if (!terminal_char_array_size)
					{
						current_token				= lexical_error_code;
						terminal_char_array_size	= 1;
					}
				}

				if (terminal_char_array_size > terminal_char_array_maximum)
				{
					terminal_char_array_maximum =
						terminal_char_array_size >
							2 * terminal_char_array_maximum ?
						terminal_char_array_size :
						2 * terminal_char_array_maximum;

					delete [] terminal_char_array_buffer;
					terminal_char_array_buffer =
						new unsigned char[terminal_char_array_maximum + 1];
				}

				terminal_char_listptr = terminal_char_list.FirstListPtr();

				int	i;
				for (i=0; i<terminal_char_array_size; i++)
				{
					STerminalChar	&tc3 =
						terminal_char_list.NextData(terminal_char_listptr);

					terminal_char_array_buffer[i] = tc3.terminal_char;
				}

				terminal_char_array_buffer[i] = '\0';

				terminal_char_list.PopFront(terminal_char_array_size);

				return;
			}

			terminal_char_list.PreviousListPtr(terminal_char_listptr);
			terminal_char_array_size--;
		}

		current_token	= lexical_error_code;

		terminal_char_array_size		= 1;
		terminal_char_array_buffer[0]	=
			terminal_char_list.FirstData().terminal_char;
		terminal_char_array_buffer[1]	= '\0';

		terminal_char_list.PopFront();

		return;
	}
}

void	CLexicalRegexpDriver::GetTerminalCharList()
{
	int		current_state = 0;

	terminal_char_array_size	= 0;
	terminal_char_listptr		= terminal_char_list.FirstListPtr();

	while (terminal_char_listptr)
	{
		STerminalChar	&tc = terminal_char_list.Data(terminal_char_listptr);

		current_state = fsm_table[
							current_state * input_range + tc.terminal_char];

		if (current_state == -1)
		{
			terminal_char_list.PreviousListPtr(terminal_char_listptr);
			return;
		}

		tc.state_register = current_state;

		if (single_column_token_match)
			tc.match_register = token_match[current_state];
		else
			tc.match_register = token_match[
				current_state * (terminal_count + 1) + current_start_condition];

		terminal_char_list.NextListPtr(terminal_char_listptr);
		terminal_char_array_size++;
	}

	terminal_char_listptr = terminal_char_list.LastListPtr();

	while (1)
	{
		if (!characters_remaining)
		{
			characters_remaining = GetInputData();

			if (!characters_remaining)
				return;

			input_buffer_offset = 0;
		}

		previous_char	= current_char;
		current_char	= input_buffer[input_buffer_offset];

		terminal_char_listptr = terminal_char_list.PushBack(
									STerminalChar(	current_char,
													current_char_number,
													current_line_number,
													current_column_number));

		input_buffer_offset++;
		characters_remaining--;
		current_char_number++;

		if (previous_char == '\n')
		{
			current_line_number++;
			current_column_number = 0;
		}
		else
			current_column_number++;

		STerminalChar	&tc = terminal_char_list.Data(terminal_char_listptr);

		current_state = fsm_table[current_state * input_range + current_char];

		if (current_state == -1)
		{
			terminal_char_list.PreviousListPtr(terminal_char_listptr);
			return;
		}

		tc.state_register = current_state;

		if (single_column_token_match)
			tc.match_register = token_match[current_state];
		else
			tc.match_register = token_match[
				current_state * (terminal_count + 1) + current_start_condition];

		terminal_char_array_size++;
	}
}

int		CLexicalRegexpDriver::GetCurrentCharNumber()
{
	if (terminal_char_list.Size())
	{
		STerminalChar	&tc = terminal_char_list.FirstData();

		return tc.char_number;
	}
	else
		return current_char_number;
}

int		CLexicalRegexpDriver::GetCurrentLineNumber()
{
	if (terminal_char_list.Size())
	{
		STerminalChar	&tc = terminal_char_list.FirstData();

		return tc.line_number;
	}
	else
		return current_line_number;
}

int		CLexicalRegexpDriver::GetCurrentColumnNumber()
{
	if (terminal_char_list.Size())
	{
		STerminalChar	&tc = terminal_char_list.FirstData();

		return tc.column_number;
	}
	else
		return current_column_number;
}

void	CLexicalRegexpDriver::LoadStackElement(CStackElement *se)
{
	se->type	= StackLiteralToken;
	se->token	= current_token;
}
