view src/lwasm.c @ 0:57495da01900

Initial checking of LWASM
author lost
date Fri, 03 Oct 2008 02:44:20 +0000
parents
children 34568fab6058
line wrap: on
line source

/*
 * lwasm.c
 *
 * main code for lwasm
 */

#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define __lwasm_c_seen__
#include "instab.h"
#include "lwasm.h"

void lwasm_read_file(asmstate_t *as, char *fname);
extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr);
extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr);

#define debug(mess, ...)	do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0)

void register_error(asmstate_t *as, sourceline_t *cl, int errcode)
{
	errortab_t *e;
	
	e = malloc(sizeof(errortab_t));
	
	e -> errnum = errcode;
	e -> line = cl;
	e -> next = cl -> errors;
	cl -> errors = e;
	
	as -> errorcount++;
}

int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val);

int eval_min(int v1, int v2, int v3, int v4)
{
	if (v2 < v1)
		v1 = v2;
	if (v3 < v1)
		v1 = v3;
	if (v4 < v1)
		v1 = v4;
	return v1;
}

int eval_max(int v1, int v2, int v3, int v4)
{
	if (v2 > v1)
		v1 = v2;
	if (v3 > v1)
		v1 = v3;
	if (v4 > v1)
		v1 = v4;
	return v1;
}

int lookupreg3(const char *rlist, char **str)
{
	int rval = 0;
	int f = 0;
	const char *reglist = rlist;
		
	while (*reglist)
	{
		if (toupper(**str) == *reglist)
		{
			// first char matches
			if (reglist[1] == ' ')
			{
				f = 1;
				break;
			}
			if (toupper(*(*str + 1)) == reglist[1])
			{
				// second char matches
				if (reglist[2] == ' ')
				{
					f = 1;
					break;
				}
				if (toupper(*(*str + 2)) == reglist[2])
				{
					f = 1;
					break;
				}
			}
		}
		reglist += 3;
		rval++;
	}
	if (f == 0)
		return -1;
	
	
	reglist = rval * 3 + rlist;
	if (reglist[1] == ' ')
		(*str) += 1;
	else if (reglist[2] == ' ')
		(*str) += 2;
	else
		(*str)+=3;
	return rval;
}


int lookupreg(const char *reglist, char **str)
{
	int rval = 0;
	while (*reglist)
	{
		if (toupper(**str) == *reglist)
		{
			// first char matches
			if (reglist[1] == ' ' && !isalpha(*(*str + 1)))
				break;
			if (toupper(*(*str + 1)) == reglist[1])
				break;
		}
		reglist += 2;
		rval++;
	}
	if (!*reglist)
		return -1;
	if (reglist[1] == ' ')
		(*str)++;
	else
		(*str)+=2;
	return rval;
}

void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb)
{
	cl -> len += 1;
	if (as -> passnum != 2)
		return;

	if (cl -> numcodebytes >= cl -> codesize)
	{
		cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32);
		cl -> codesize += 32;
	}
	debug("EMIT: %02x\n", cb & 0xff);
	cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF;
}

// parse a symble out of the line and return a pointer
// to a static pointer
// return NULL if not a symbol or a bad symbol
char *parse_symbol(asmstate_t *as, char **ptr)
{
	static char *symptr = NULL;
	char *tptr = *ptr;
	int sl = 0;
	
	// symbol can start with _,a-z,A-Z
	
	if (!strchr(SYMCHAR_START, **ptr))
		return NULL;
	
	while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr))
	{
		tptr++;
		sl++;
	}

	symptr = realloc(symptr, sl + 1);
	tptr = symptr;
	while (sl)
	{
		*tptr++ = *(*ptr)++;
		sl--;
	}
	*tptr = '\0';
	return symptr;
}

// resolve an instruction
void resolve_insn(asmstate_t *as, sourceline_t *cl)
{
	char *optr;
	char opbuf[MAX_OP_LEN + 1];
	char *symbol = NULL;
	int c;
	
	cl -> code_symloc = as -> addr;
	
	cl -> addrset = 0;
	cl -> isequ = 0;
	cl -> len = 0;
	cl -> undef = 0;
	
	// only parse line on first pass
	if (as -> passnum == 1)
	{
		optr = cl -> line;
		if (!*optr || *optr == '*' || *optr == ';')
		{
			cl -> opcode = -1;
			cl -> remainder = cl -> line;
			return;
		}
	
		if (!isspace(*optr))
		{
			symbol = parse_symbol(as, &optr);
			if (*optr && !isspace(*optr) && !(as -> inmacro))
			{
				errorp1(ERR_BADSYM);
				while (*optr && !isspace(*optr))
					optr++;
			}
			if (symbol)
			{
				cl -> symstr = strdup(symbol);
				cl -> hassym = 1;
			}
		}

		while (isspace(*optr))
			optr++;	
	
		// parse opcode
		if (*optr && *optr != ';')
		{
			c = 0;
			while (c < MAX_OP_LEN && *optr && !isspace(*optr))
			{
				opbuf[c++] = *optr++;
			}
			opbuf[c] = '\0';
			if (*optr && !isspace(*optr) && !(as -> inmacro))
			{
				errorp1(ERR_BADOP);
				cl -> opcode = -1;
			}
			else
			{
				cl -> opcstr = strdup(opbuf);
				for (c = 0; instab[c].opcode; c++)
				{
					if (!strcasecmp(opbuf, instab[c].opcode))
						break;
				}
				if (!instab[c].opcode && opbuf[0] == '*')
				{
					cl -> opcode = -1;
				}
				else if (!instab[c].opcode && !(as -> inmacro))
				{
					cl -> opcode = -1;
					
					// look up macro
					if (as -> macros)
					{
						macrotab_t *m;
						
						for (m = as -> macros; m; m = m -> next)
						{
							if (!strcmp(m -> name, opbuf))
								break;
						}
						if (m)
						{
							// we have a macro here
							cl -> macro = m;
							while (*optr && isspace(*optr))
								optr++;
							expand_macro(as, cl, &optr);
							return;
						}
						else
						{
							errorp1(ERR_BADOP);
						}
					}
					else
					{
						errorp1(ERR_BADOP);
					}
				}
				else
					cl -> opcode = c;
			}
		}
		else
			cl -> opcode = -1;
	
		if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM)
		{
			add_macro_line(as, cl, cl -> line);
			cl -> opcode = -1;
			cl -> remainder = cl -> line;
			cl -> opcstr = NULL;
			cl -> operstr = NULL;
			cl -> symstr = NULL;
			cl -> hassym = 0;
			cl -> macrodef = 1;
			return;
		}
		// parse operand
		while (*optr && isspace(*optr))
			optr++;

		cl -> operstr = optr;
	}
	else
		optr = cl -> operstr;

	if (as -> skipcond)
	{
		// if skipping a condition, need to skip a macro
		if (cl -> opcode >= 0)
		{
			if (instab[cl -> opcode].specialnum == SPECIAL_MACRO)
			{
				as -> skipmacro = 1;
			}
			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM)
			{
				as -> skipmacro = 0;
			}
			else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro))
			{
				as -> skipcount++;
			}
			else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro))
			{
				as -> skipcount--;
				if (as -> skipcount <= 0)
				{
					as -> skipcond = 0;
					as -> noelse = 0;
				}
			}
			else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro))
			{
				if (as -> skipcount == 1)
				{	
					as -> skipcount = 0;
					as -> skipcond = 0;
					as -> noelse = 1;
					return;
				}
			}
		}
		if (as -> skipcond)
			cl -> skipped = 1;
		return;
	}
		
	// do the code thing
	// on pass 1, no code is generated
	// on pass 2, code is generated using the "emit()" macro
	if (cl -> opcode >= 0)
	{
		if (instab[cl -> opcode].opfn)
		{
			(*(instab[cl -> opcode].opfn))(as, cl, &optr);
			if (as -> passnum == 1)
			{
				if (*optr)
				{
					char *t = optr;
					char t2;
					
					t2 = *optr;
					cl -> operstr = strdup(cl -> operstr);
					*optr = t2;
					while (*t && isspace(*t))
						t++;
					cl -> remainder = strdup(t);
					
				}
				cl -> remainder = optr;
			}
		}
		else
		{
			errorp1(ERR_BADOP);
			cl -> opcode = -1;
		}
	}
	// address of the symbol may have been changed by a pseudo op
	// so we couldn't register it above
	// that means it may turn out to be a "forward ref" in pass 1
	if (cl -> hassym)
	{
		register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE);
	}

	as -> addr += cl -> len;
}

void generate_code(asmstate_t *as)
{
	sourceline_t *cl;
	
	as -> addr = 0;
	as -> dpval = 0;
	as -> passnum = 2;
	for (cl = as -> source_head; cl; cl = cl -> next)
	{
		resolve_insn(as, cl);
	}
}

void lwasm_read_file(asmstate_t *as, char *fname)
{
	FILE *f;
	int cline = 0;
	sourceline_t *cl;
	size_t bufflen;
	char *buff = NULL;
	int retval;
	
	as -> passnum = 1;
	
	f = fopen(fname, "r");
	if (!f)
	{
		fprintf(stderr, "Cannot open input file %s: %s\n", fname, strerror(errno));
		return;
	}
	
	while (!feof(f))
	{
		retval = getline(&buff, &bufflen, f);
		debug(" read line (%s:%d): %s\n", fname, cline, buff);
		if (retval < 0)
		{
			if (feof(f))
				break;
			fprintf(stderr, "Error reading '%s': %s\n", fname, strerror(errno));
			exit(1);
		}
		if (strchr(buff, '\n'))
			*strchr(buff, '\n') = '\0';
		if (strchr(buff, '\r'))
			*strchr(buff, '\r') = '\0';
		cl = calloc(sizeof(sourceline_t), 1);
		if (!cl)
		{
			perror("Malloc");
			exit(1);
		}
		
		cl -> lineno = cline++;
		cl -> sourcefile = fname;
		cl -> opcode = -1;
		cl -> addrmode = -1;
		cl -> addr = as -> addr;
		cl -> dpval = as -> dpval;
		cl -> prev = as -> source_tail;
		if (as -> source_tail)
			as -> source_tail -> next = cl;
		as -> source_tail = cl;
		if (as -> source_head == NULL)
			as -> source_head = cl;
		cl -> line = strdup(buff);

		resolve_insn(as, cl);

		if (cl -> opcode >= 0 && instab[cl -> opcode].instype == INSTYPE_PSEUDO && instab[cl -> opcode].specialnum == SPECIAL_END)
			break;
		
		*buff = '\0';

	}
	if (buff)
		free(buff);

	fclose(f);

	return;
}

/*
below this point is the expression evaluation package

Supported binary operators: + - / * %
Supported unary operators: -

<infix>: + | - | * | / | %
<unary>: -
<expr>: <term> <infix> <term>
<term>: <unary> <term>
<term>: ( <expr> )
<term>: <symbol>
<term>: ' <char>
<term>: " <char> <char>
<term>: *
<term>: <number>

<number>: <dec>
<number>: & <dec>

<number>: $ <hex>
<number>: <hex> H
<number>: @ <oct>
<number>: <oct> O
<number>: <oct> Q

<number>: % <bin>
<number>: <bin> B

<bin>: 0 | 1
<oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7
<dec>: <oct> | 8 | 9
<hex>: <dec> | A | B | C | D | E | F

NOTE: hex values which start with a non-digit will need to be prefixed
by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the
assembler cannot tell the difference between CCH as a symbol or CCH as
the value $CC

*/

// will throw an error and return 0 in tval if there's a problem
// -1 is problem; cl -> undef set is undefined symbol
int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval)
{
	char tc;
	int rval;
	int binval;
	int octval;
	int decval;
	int hexval;
	int valtype;
	int digval;
	int bindone = 0;
	
	*tval = 0;

beginagain:
	tc = **optr;
	if (tc == '+')
	{
		// unary +, ignored for symetry
		(*optr)++;
		goto beginagain;
	}

	if (tc == '(')
	{
		(*optr)++;
		rval = eval_expr(as, cl, optr, tval);
		if (rval < 0)
			return rval;
		if (**optr != ')')
		{
			errorp1(ERR_BADEXPR);
			return -1;
		}
		(*optr)++;
		return 0;
	}

	if (tc == '-')
	{
		(*optr)++;
		rval = eval_term(as, cl, optr, tval);
		if (rval < 0)
			return rval;
		*tval = -*tval;
		return 0;
	}
	
	// current address (of current instruction, not PC)
	if (tc == '*')
	{
		*tval = cl -> addr;
		(*optr)++;
		return 0;
	}
	
	if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc)))
	{
		// evaluate a symbol
		char *symbuf;

		symbuf = parse_symbol(as, optr);
		if (!symbuf)
		{
			errorp1(ERR_BADSYM);
			*tval = 0;
			return -1;
		}

		debug(" looking up symbol: %s\n", symbuf);
		*tval = lookup_symbol(as, symbuf);
		
		// if not found, flag forward ref
		if (*tval == -1)
		{
			errorp2(ERR_UNDEF);
			cl -> undef = 1;
			*tval = 0;
			return 0;
		}
		return 0;
	}
	
	if (tc == '%')
	{
		// binary number
		int v1 = 0;
		(*optr)++;
		while (strchr("01", **optr))
		{
			v1 = v1 << 1 | ((*(*optr)++) - '0');
		}
		*tval = v1;
		return 0;
	}
	if (tc == '$')
	{
		// hex number
		int v1 = 0;
		(*optr)++;
		debug("HEX CONST: %s\n", *optr);
		while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr)))
		{
			debug("HEX 2: %02x\n", tc);
			if (**optr >= 'A')
			{
				v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10);
			}
			else
			{
				v1 = v1 << 4 | ((*(*optr)++) - '0');
			}
		}
		*tval = v1;
		return 0;
	}
	if (tc == '@')
	{
		// octal number
		int v1 = 0;
		(*optr)++;
		while (strchr("01234567", **optr))
		{
			v1 = v1 << 3 | ((*(*optr)++) - '0');
		}
		*tval = v1;
		return 0;
	}
	if (tc == '&')
	{
		// decimal number
		int v1 = 0;
		(*optr)++;
		while (strchr("0123456789", **optr))
		{
			v1 = v1 * 10 + ((*(*optr)++) - '0');
		}
		*tval = v1;
		return 0;
	}
	if (tc == '\'')
	{
		(*optr)++;
		if (!**optr)
		{
			errorp1(ERR_BADEXPR);
			return -2;
		}
		*tval = *(*optr)++; 
		return 0;
	}
	if (tc == '"')
	{
		(*optr)++;
		if (!**optr || !*(*optr + 1))
		{
			errorp1(ERR_BADEXPR);
			return -2;
		}
		*tval = *(*optr)++ << 8 | *(*optr)++;
		return 0;
	}
	// end of string
	if (tc == '\0')
	{
		// error if at EOS as we are looking for a term
		errorp1(ERR_BADEXPR);
		return -1;
	}
	
	// we have a generic number here which may be decimal, hex, binary, or octal
	// based on a suffix

	// possible data types are binary (1), octal (2), decimal(4), hex (8)
	valtype = 15;
	hexval = octval = decval = binval = 0;
	while (1)
	{
		
//		printf("    %c\n", **optr);
		if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr))
		{
			// end of string, must be decimal or the end of a bin
			if (bindone == 1)
			{
				*tval = binval;
				return 0;
			}
			if (valtype & 4)
			{
				*tval = decval;
				return 0;
			}
			else
			{
				errorp1(ERR_BADEXPR);
				return -1;
			}
		}
		tc = toupper(*(*optr)++);
		
		if (tc == 'H')
		{
			if (valtype & 8)
			{
				*tval = hexval;
				return 0;
			}
			else
			{
				// syntax error
				errorp1(ERR_BADEXPR);
				return -1;
			}
		}
		
		if (tc == 'Q' || tc == 'O')
		{
			if (valtype && 2)
			{
				*tval = octval;
				return 0;
			}
			else
			{
				errorp1(ERR_BADEXPR);
				return -1;
			}
		}
		
		digval = tc - '0';
		if (digval > 9)
			digval -= 7;
		
		// if it's not in the range of a hex digit, error out
		if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F')
		{
			(*optr)--;
			if (valtype & 4)
			{
				*tval = decval;
				return 0;
			}
			// if we're in hex/bin mode and run to the end of the number
			// we must have a binary constant or an error
			// if the previous character is B, then we have binary
			// else we have error since hex would require a terminating H
			// which would be caught above
			if (valtype == 8 && toupper(*(*optr)) == 'B')
			{
				*tval = binval;
				return 0;
			}
			errorp1(ERR_BADEXPR);
			return -1;
		}
		
		// if we have any characters past the end of the B, it's not binary
		if (bindone == 1)
			bindone = 0;
		if (tc == 'B')
			bindone = 1;
		if (digval > 1)
			valtype &= 14;
		else if (digval > 7)
			valtype &= 13;
		else if (digval > 9)
			valtype &= 11;
		
		if (valtype & 8)
		{
			hexval = (hexval << 4) | digval;
		}
		if (valtype & 4)
		{
			decval = decval * 10 + digval;
		}
		if (valtype & 2)
		{
			octval = (octval << 3) | digval;
		}
		if (valtype & 1 && !bindone)
		{
			binval = (binval << 1) | digval;
		}
		
	}	
	// can't get here from there
}

// returns -1 if the expression cannot be parsed
// and returns -2 if there is an undefined symbol reference
// resulting value will be in *val; undefined symbols are parsed as
// value 0 but cl -> undef will be set.
int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val)
{
	int left;
	int right;
	char oper;
	int rval;
	
	// by default, return 0 in val
	*val = 0;
	cl -> undef = 0;

	rval = eval_term(as, cl, optr, &left);
	if (rval < 0)
		return rval;

nextop:
	oper = **optr;

	// end of expr	
	if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')')
		goto retleft;

	// unrecognized chars
	if (!strchr("+-*/%", oper))
		goto retleft;

	(*optr)++;

	rval = eval_term(as, cl, optr, &right);
	// propagate error
	if (rval < 0)
		return rval;

	// do the operation and put it in "left"
	switch (oper)
	{
	case '+':
		left += right;
		break;

	case '-':
		left -= right;
		break;
	
	case '*':
		left *= right;
		break;
	
	case '/':
		left /= right;
		break;
		
	case '%':
		left %= right;
		break;
	}

	goto nextop;

retleft:
	*val = left;
	return 0;
}