diff lwbasic/attic/parser.c @ 185:cca933d32298

Clean up some mess in lwbasic directory
author lost@l-w.ca
date Thu, 22 Dec 2011 18:03:38 -0700
parents lwbasic/parser.c@5325b640424d
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lwbasic/attic/parser.c	Thu Dec 22 18:03:38 2011 -0700
@@ -0,0 +1,469 @@
+/*
+compiler.c
+
+Copyright © 2011 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+This is the actual compiler bit; it drives the parser and code generation
+*/
+
+#include <stdio.h>
+
+#include <lw_alloc.h>
+#include <lw_string.h>
+
+#include "lwbasic.h"
+#include "symtab.h"
+
+static void expect(cstate *state, int tt)
+{
+	if (state -> lexer_token != tt)
+		lwb_error("Expecting %s, got %s\n", lexer_token_name(tt), lexer_return_token(state));
+	lexer(state);
+}
+
+
+/* size of a type */
+static int sizeof_type(int type)
+{
+	/* everything is an "int" right now; 2 bytes */
+	return 2;
+}
+
+/* parse a type; the next token will be acquired as a result */
+/* the token advancement is to provide consistency */
+static int parse_type(cstate *state)
+{
+	int pt = -1;
+	
+	switch (state -> lexer_token)
+	{
+	case token_kw_integer:
+		pt = 1;
+		break;
+
+	default:
+		lwb_error("Invalid type specification");
+	}
+	lexer(state);
+	/* look for "unsigned" modifier for integer types */
+	return pt;
+}
+
+static void parse_expr(cstate *state, int prec);
+static void parse_term(cstate *state);
+static int parse_expression(cstate *state)
+{
+	state -> expression = 1;
+	
+	parse_expr(state, 0);
+	
+	state -> expression = 0;
+	return 1;
+}
+
+static void parse_decls(cstate *state)
+{
+	/* declarations */
+	/* the first thing that doesn't look like a declaration is assumed */
+	/* to be a statement and will trigger a bailout */
+	int vt;
+	char *vn;
+	symtab_entry_t *se;
+	
+	for (;;)
+	{
+		switch (state -> lexer_token)
+		{
+		/* DIM keyword */
+		case token_kw_dim:
+			lexer(state);
+			if (state -> lexer_token != token_identifier)
+			{
+				lwb_error("Expecting identifier, got %s\n", lexer_return_token(state));
+			}
+			vn = lw_strdup(state -> lexer_token_string);
+			lexer(state);
+			if (state -> lexer_token != token_kw_as)
+			{
+				lwb_error("Expecting AS, got %s\n", lexer_return_token(state));
+			}
+			lexer(state);
+			vt = parse_type(state);
+			
+			se = symtab_find(state -> local_syms, vn);
+			if (se)
+			{
+				lwb_error("Multiply defined local variable %s", vn);
+			}
+			state -> framesize += sizeof_type(vt);
+			symtab_register(state -> local_syms, vn, -(state -> framesize), symtype_var, NULL);
+			
+			lw_free(vn);
+			break;
+		
+		/* blank lines allowed */
+		case token_eol:
+			break;
+			
+		default:
+			return;
+		}
+		if (state -> lexer_token != token_eol)
+			lwb_error("Expecting end of line; got %s\n", lexer_return_token(state));
+		lexer(state);
+	}
+}
+
+static void parse_statements(cstate *state)
+{
+	symtab_entry_t *se;
+	int et;
+	
+	for (;;)
+	{
+		switch (state -> lexer_token)
+		{
+		/* blank lines allowed */
+		case token_eol:
+			break;
+		
+		/* variable assignment */
+		case token_identifier:
+			se = symtab_find(state -> local_syms, state -> lexer_token_string);
+			if (!se)
+			{
+				se = symtab_find(state -> global_syms, state -> lexer_token_string);
+			}
+			if (!se)
+				lwb_error("Unknown variable %s\n", state -> lexer_token_string);
+			lexer(state);
+			/* ensure the first token of the expression will be parsed correctly */
+			state -> expression = 1;
+			expect(state, token_op_assignment);
+
+			/* parse the expression */
+			et = parse_expression(state);
+			
+			/* check type compatibility */
+			
+			/* actually do the assignment */
+			
+			break;
+		
+		/* anything we don't recognize as a statement token breaks out */
+		default:
+			return;
+		}
+		if (state -> lexer_token != token_eol)
+			lwb_error("Expecting end of line; got %s\n", lexer_return_token(state));
+		lexer(state);
+	}
+}
+
+
+/* issub means RETURNS is not allowed; !issub means RETURNS is required */
+
+static void parse_subfunc(cstate *state, int issub)
+{
+	int pt, rt;
+	char *subname, *pn;
+	int vis = 0;
+	symtab_entry_t *se;
+	int paramsize = 0;
+	
+	state -> local_syms = symtab_init();
+	state -> framesize = 0;
+	
+	lexer(state);
+	if (state -> lexer_token != token_identifier)
+	{
+		lwb_error("Invalid sub name '%s'", state -> lexer_token_string);
+	}
+	
+	subname = lw_strdup(state -> lexer_token_string);
+	
+	lexer(state);
+	if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private)
+	{
+		if (state -> lexer_token == token_kw_public)
+			vis = 1;
+		lexer(state);
+	}
+
+	/* ignore the "PARAMS" keyword if present */
+	if (state -> lexer_token == token_kw_params)
+		lexer(state);
+	
+	if (state -> lexer_token == token_eol || state -> lexer_token == token_kw_returns)
+		goto noparms;
+
+paramagain:
+	if (state -> lexer_token != token_identifier)
+	{
+		lwb_error("Parameter name expected, got %s\n", lexer_return_token(state));
+	}
+	pn = lw_strdup(state -> lexer_token_string);
+	lexer(state);
+	
+	if (state -> lexer_token != token_kw_as)
+		lwb_error("Expecting AS\n");
+	lexer(state);
+	
+	pt = parse_type(state);
+
+	se = symtab_find(state -> local_syms, pn);
+	if (se)
+	{
+		lwb_error("Duplicate parameter name %s\n", pn);
+	}
+	symtab_register(state -> local_syms, pn, paramsize, symtype_param, NULL);
+	paramsize += sizeof_type(pt);
+	lw_free(pn);
+	
+	if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',')
+	{
+		lexer(state);
+		goto paramagain;
+	}
+
+noparms:
+	rt = -1;
+	if (!issub)
+	{
+		if (state -> lexer_token != token_kw_returns)
+		{
+			lwb_error("FUNCTION must have RETURNS\n");
+		}
+		lexer(state);
+/*		if (state -> lexer_token == token_identifier)
+		{
+			printf("Return value named: %s\n", state -> lexer_token_string);
+			
+			lexer(state);
+			if (state -> lexer_token != token_kw_as)
+				lwb_error("Execting AS after RETURNS");
+			lexer(state);
+		}
+*/
+		rt = parse_type(state);
+	}
+	else
+	{
+		if (state -> lexer_token == token_kw_returns)
+		{
+			lwb_error("SUB cannot specify RETURNS\n");
+		}
+	}
+
+	
+	if (state -> lexer_token != token_eol)
+	{
+		lwb_error("EOL expected; found %s\n", lexer_return_token(state));
+	}
+
+	
+	se = symtab_find(state -> global_syms, subname);
+	if (se)
+	{
+		lwb_error("Multiply defined symbol %s\n", subname);
+	}
+
+	symtab_register(state -> global_syms, subname, -1, issub ? symtype_sub : symtype_func, NULL);
+
+	state -> currentsub = subname;
+	state -> returntype = rt;
+	/* consume EOL */
+	lexer(state);
+	
+	/* variable declarations */
+	parse_decls(state);
+	
+	/* output function/sub prolog */
+	emit_prolog(state, vis);
+	
+	/* parse statement block  */
+	parse_statements(state);
+	
+	if (issub)
+	{
+		if (state -> lexer_token != token_kw_endsub)
+		{
+			lwb_error("Expecting ENDSUB, got %s\n", lexer_return_token(state));
+		}
+	}
+	else
+	{
+		if (state -> lexer_token != token_kw_endfunction)
+		{
+			lwb_error("Expecting ENDFUNCTION, got %s\n", lexer_return_token(state));
+		}
+	}
+	/* output function/sub epilog */
+	emit_epilog(state);
+	
+	lw_free(state -> currentsub);
+	state -> currentsub = NULL;
+	symtab_destroy(state -> local_syms);
+	state -> local_syms = NULL;
+}
+
+void parser(cstate *state)
+{
+	state -> lexer_curchar = -1;
+	state -> global_syms = symtab_init();
+		
+	/* now look for a global declaration */
+	for (;;)
+	{
+		state -> parser_state = parser_state_global;
+		lexer(state);
+		switch (state -> lexer_token)
+		{
+		case token_kw_function:
+			printf("Function\n");
+			parse_subfunc(state, 0);
+			break;
+			
+		case token_kw_sub:
+			printf("Sub\n");
+			parse_subfunc(state, 1);
+			break;
+
+		/* blank lines are allowed */
+		case token_eol:
+			continue;
+		
+		/* EOF is allowed - end of parsing */
+		case token_eof:
+			return;
+
+		default:
+			lwb_error("Invalid token '%s' in global state\n", lexer_return_token(state));
+		}
+	}	
+}
+
+static void parse_expr(cstate *state, int prec)
+{
+	static const struct operinfo {
+		int opernum;
+		int operprec;
+	} operators[] =
+	{
+		{ token_op_plus, 100 },
+		{ token_op_minus, 100 },
+		{ token_op_times, 150 },
+		{ token_op_divide, 150 },
+		{ token_op_modulus, 150 },
+		{ token_op_and,	25 },
+		{ token_op_or, 20 },
+		{ token_op_xor, 20 },
+		{ token_op_band, 50 },
+		{ token_op_bor, 45 },
+		{ token_op_bxor, 45 },
+		{ -1, -1 }
+	};
+	int opern;
+	
+	parse_term(state);
+	
+eval_next:
+	for (opern = 0; operators[opern].opernum != -1; opern++)
+	{
+		if (operators[opern].opernum == state -> lexer_token)
+			break;
+	}
+	if (operators[opern].opernum == -1)
+		return;
+	
+	if (operators[opern].operprec <= prec)
+		return;
+	
+	lexer(state);
+	
+	parse_expr(state, operators[opern].operprec);
+	
+	/* push operator */
+	
+	goto eval_next;
+}
+
+static void parse_term(cstate *state)
+{
+eval_next:
+	/* parens */
+	if (state -> lexer_token == token_op_oparen)
+	{
+		lexer(state);
+		parse_expr(state, 0);
+		expect(state, token_op_cparen);
+		return;
+	}
+	
+	/* unary plus; ignore it */
+	if (state -> lexer_token == token_op_plus)
+	{
+		lexer(state);
+		goto eval_next;
+	}
+	
+	/* unary minus, precision 200 */
+	if (state -> lexer_token == token_op_minus)
+	{
+		lexer(state);
+		parse_expr(state, 200);
+		
+		/* push unary negation */
+	}
+	
+	/* BNOT, NOT */
+	if (state -> lexer_token == token_op_not || state -> lexer_token == token_op_bnot)
+	{
+		lexer(state);
+		parse_expr(state, 200);
+		
+		/* push unary operator */
+	}
+	
+	/* integer */
+	if (state -> lexer_token == token_int)
+	{
+	}
+	
+	/* unsigned integer */
+	if (state -> lexer_token == token_uint)
+	{
+	}
+	
+	/* variable or function call */
+	if (state -> lexer_token == token_identifier)
+	{
+		lexer(state);
+		if (state -> lexer_token == token_op_oparen)
+		{
+			/* function call */
+			return;
+		}
+		/* variable */
+		return;
+	}
+	
+	lwb_error("Invalid input in expression; got %s\n", lexer_return_token(state));
+}