Mercurial > hg-old > index.cgi
diff src/lwasm.c @ 0:57495da01900
Initial checking of LWASM
author | lost |
---|---|
date | Fri, 03 Oct 2008 02:44:20 +0000 |
parents | |
children | 34568fab6058 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lwasm.c Fri Oct 03 02:44:20 2008 +0000 @@ -0,0 +1,879 @@ +/* + * lwasm.c + * + * main code for lwasm + */ + +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#define __lwasm_c_seen__ +#include "instab.h" +#include "lwasm.h" + +void lwasm_read_file(asmstate_t *as, char *fname); +extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr); +extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr); + +#define debug(mess, ...) do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0) + +void register_error(asmstate_t *as, sourceline_t *cl, int errcode) +{ + errortab_t *e; + + e = malloc(sizeof(errortab_t)); + + e -> errnum = errcode; + e -> line = cl; + e -> next = cl -> errors; + cl -> errors = e; + + as -> errorcount++; +} + +int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val); + +int eval_min(int v1, int v2, int v3, int v4) +{ + if (v2 < v1) + v1 = v2; + if (v3 < v1) + v1 = v3; + if (v4 < v1) + v1 = v4; + return v1; +} + +int eval_max(int v1, int v2, int v3, int v4) +{ + if (v2 > v1) + v1 = v2; + if (v3 > v1) + v1 = v3; + if (v4 > v1) + v1 = v4; + return v1; +} + +int lookupreg3(const char *rlist, char **str) +{ + int rval = 0; + int f = 0; + const char *reglist = rlist; + + while (*reglist) + { + if (toupper(**str) == *reglist) + { + // first char matches + if (reglist[1] == ' ') + { + f = 1; + break; + } + if (toupper(*(*str + 1)) == reglist[1]) + { + // second char matches + if (reglist[2] == ' ') + { + f = 1; + break; + } + if (toupper(*(*str + 2)) == reglist[2]) + { + f = 1; + break; + } + } + } + reglist += 3; + rval++; + } + if (f == 0) + return -1; + + + reglist = rval * 3 + rlist; + if (reglist[1] == ' ') + (*str) += 1; + else if (reglist[2] == ' ') + (*str) += 2; + else + (*str)+=3; + return rval; +} + + +int lookupreg(const char *reglist, char **str) +{ + int rval = 0; + while (*reglist) + { + if (toupper(**str) == *reglist) + { + // first char matches + if (reglist[1] == ' ' && !isalpha(*(*str + 1))) + break; + if (toupper(*(*str + 1)) == reglist[1]) + break; + } + reglist += 2; + rval++; + } + if (!*reglist) + return -1; + if (reglist[1] == ' ') + (*str)++; + else + (*str)+=2; + return rval; +} + +void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb) +{ + cl -> len += 1; + if (as -> passnum != 2) + return; + + if (cl -> numcodebytes >= cl -> codesize) + { + cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32); + cl -> codesize += 32; + } + debug("EMIT: %02x\n", cb & 0xff); + cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF; +} + +// parse a symble out of the line and return a pointer +// to a static pointer +// return NULL if not a symbol or a bad symbol +char *parse_symbol(asmstate_t *as, char **ptr) +{ + static char *symptr = NULL; + char *tptr = *ptr; + int sl = 0; + + // symbol can start with _,a-z,A-Z + + if (!strchr(SYMCHAR_START, **ptr)) + return NULL; + + while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr)) + { + tptr++; + sl++; + } + + symptr = realloc(symptr, sl + 1); + tptr = symptr; + while (sl) + { + *tptr++ = *(*ptr)++; + sl--; + } + *tptr = '\0'; + return symptr; +} + +// resolve an instruction +void resolve_insn(asmstate_t *as, sourceline_t *cl) +{ + char *optr; + char opbuf[MAX_OP_LEN + 1]; + char *symbol = NULL; + int c; + + cl -> code_symloc = as -> addr; + + cl -> addrset = 0; + cl -> isequ = 0; + cl -> len = 0; + cl -> undef = 0; + + // only parse line on first pass + if (as -> passnum == 1) + { + optr = cl -> line; + if (!*optr || *optr == '*' || *optr == ';') + { + cl -> opcode = -1; + cl -> remainder = cl -> line; + return; + } + + if (!isspace(*optr)) + { + symbol = parse_symbol(as, &optr); + if (*optr && !isspace(*optr) && !(as -> inmacro)) + { + errorp1(ERR_BADSYM); + while (*optr && !isspace(*optr)) + optr++; + } + if (symbol) + { + cl -> symstr = strdup(symbol); + cl -> hassym = 1; + } + } + + while (isspace(*optr)) + optr++; + + // parse opcode + if (*optr && *optr != ';') + { + c = 0; + while (c < MAX_OP_LEN && *optr && !isspace(*optr)) + { + opbuf[c++] = *optr++; + } + opbuf[c] = '\0'; + if (*optr && !isspace(*optr) && !(as -> inmacro)) + { + errorp1(ERR_BADOP); + cl -> opcode = -1; + } + else + { + cl -> opcstr = strdup(opbuf); + for (c = 0; instab[c].opcode; c++) + { + if (!strcasecmp(opbuf, instab[c].opcode)) + break; + } + if (!instab[c].opcode && opbuf[0] == '*') + { + cl -> opcode = -1; + } + else if (!instab[c].opcode && !(as -> inmacro)) + { + cl -> opcode = -1; + + // look up macro + if (as -> macros) + { + macrotab_t *m; + + for (m = as -> macros; m; m = m -> next) + { + if (!strcmp(m -> name, opbuf)) + break; + } + if (m) + { + // we have a macro here + cl -> macro = m; + while (*optr && isspace(*optr)) + optr++; + expand_macro(as, cl, &optr); + return; + } + else + { + errorp1(ERR_BADOP); + } + } + else + { + errorp1(ERR_BADOP); + } + } + else + cl -> opcode = c; + } + } + else + cl -> opcode = -1; + + if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM) + { + add_macro_line(as, cl, cl -> line); + cl -> opcode = -1; + cl -> remainder = cl -> line; + cl -> opcstr = NULL; + cl -> operstr = NULL; + cl -> symstr = NULL; + cl -> hassym = 0; + cl -> macrodef = 1; + return; + } + // parse operand + while (*optr && isspace(*optr)) + optr++; + + cl -> operstr = optr; + } + else + optr = cl -> operstr; + + if (as -> skipcond) + { + // if skipping a condition, need to skip a macro + if (cl -> opcode >= 0) + { + if (instab[cl -> opcode].specialnum == SPECIAL_MACRO) + { + as -> skipmacro = 1; + } + else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM) + { + as -> skipmacro = 0; + } + else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro)) + { + as -> skipcount++; + } + else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro)) + { + as -> skipcount--; + if (as -> skipcount <= 0) + { + as -> skipcond = 0; + as -> noelse = 0; + } + } + else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro)) + { + if (as -> skipcount == 1) + { + as -> skipcount = 0; + as -> skipcond = 0; + as -> noelse = 1; + return; + } + } + } + if (as -> skipcond) + cl -> skipped = 1; + return; + } + + // do the code thing + // on pass 1, no code is generated + // on pass 2, code is generated using the "emit()" macro + if (cl -> opcode >= 0) + { + if (instab[cl -> opcode].opfn) + { + (*(instab[cl -> opcode].opfn))(as, cl, &optr); + if (as -> passnum == 1) + { + if (*optr) + { + char *t = optr; + char t2; + + t2 = *optr; + cl -> operstr = strdup(cl -> operstr); + *optr = t2; + while (*t && isspace(*t)) + t++; + cl -> remainder = strdup(t); + + } + cl -> remainder = optr; + } + } + else + { + errorp1(ERR_BADOP); + cl -> opcode = -1; + } + } + // address of the symbol may have been changed by a pseudo op + // so we couldn't register it above + // that means it may turn out to be a "forward ref" in pass 1 + if (cl -> hassym) + { + register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE); + } + + as -> addr += cl -> len; +} + +void generate_code(asmstate_t *as) +{ + sourceline_t *cl; + + as -> addr = 0; + as -> dpval = 0; + as -> passnum = 2; + for (cl = as -> source_head; cl; cl = cl -> next) + { + resolve_insn(as, cl); + } +} + +void lwasm_read_file(asmstate_t *as, char *fname) +{ + FILE *f; + int cline = 0; + sourceline_t *cl; + size_t bufflen; + char *buff = NULL; + int retval; + + as -> passnum = 1; + + f = fopen(fname, "r"); + if (!f) + { + fprintf(stderr, "Cannot open input file %s: %s\n", fname, strerror(errno)); + return; + } + + while (!feof(f)) + { + retval = getline(&buff, &bufflen, f); + debug(" read line (%s:%d): %s\n", fname, cline, buff); + if (retval < 0) + { + if (feof(f)) + break; + fprintf(stderr, "Error reading '%s': %s\n", fname, strerror(errno)); + exit(1); + } + if (strchr(buff, '\n')) + *strchr(buff, '\n') = '\0'; + if (strchr(buff, '\r')) + *strchr(buff, '\r') = '\0'; + cl = calloc(sizeof(sourceline_t), 1); + if (!cl) + { + perror("Malloc"); + exit(1); + } + + cl -> lineno = cline++; + cl -> sourcefile = fname; + cl -> opcode = -1; + cl -> addrmode = -1; + cl -> addr = as -> addr; + cl -> dpval = as -> dpval; + cl -> prev = as -> source_tail; + if (as -> source_tail) + as -> source_tail -> next = cl; + as -> source_tail = cl; + if (as -> source_head == NULL) + as -> source_head = cl; + cl -> line = strdup(buff); + + resolve_insn(as, cl); + + if (cl -> opcode >= 0 && instab[cl -> opcode].instype == INSTYPE_PSEUDO && instab[cl -> opcode].specialnum == SPECIAL_END) + break; + + *buff = '\0'; + + } + if (buff) + free(buff); + + fclose(f); + + return; +} + +/* +below this point is the expression evaluation package + +Supported binary operators: + - / * % +Supported unary operators: - + +<infix>: + | - | * | / | % +<unary>: - +<expr>: <term> <infix> <term> +<term>: <unary> <term> +<term>: ( <expr> ) +<term>: <symbol> +<term>: ' <char> +<term>: " <char> <char> +<term>: * +<term>: <number> + +<number>: <dec> +<number>: & <dec> + +<number>: $ <hex> +<number>: <hex> H +<number>: @ <oct> +<number>: <oct> O +<number>: <oct> Q + +<number>: % <bin> +<number>: <bin> B + +<bin>: 0 | 1 +<oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7 +<dec>: <oct> | 8 | 9 +<hex>: <dec> | A | B | C | D | E | F + +NOTE: hex values which start with a non-digit will need to be prefixed +by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the +assembler cannot tell the difference between CCH as a symbol or CCH as +the value $CC + +*/ + +// will throw an error and return 0 in tval if there's a problem +// -1 is problem; cl -> undef set is undefined symbol +int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval) +{ + char tc; + int rval; + int binval; + int octval; + int decval; + int hexval; + int valtype; + int digval; + int bindone = 0; + + *tval = 0; + +beginagain: + tc = **optr; + if (tc == '+') + { + // unary +, ignored for symetry + (*optr)++; + goto beginagain; + } + + if (tc == '(') + { + (*optr)++; + rval = eval_expr(as, cl, optr, tval); + if (rval < 0) + return rval; + if (**optr != ')') + { + errorp1(ERR_BADEXPR); + return -1; + } + (*optr)++; + return 0; + } + + if (tc == '-') + { + (*optr)++; + rval = eval_term(as, cl, optr, tval); + if (rval < 0) + return rval; + *tval = -*tval; + return 0; + } + + // current address (of current instruction, not PC) + if (tc == '*') + { + *tval = cl -> addr; + (*optr)++; + return 0; + } + + if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc))) + { + // evaluate a symbol + char *symbuf; + + symbuf = parse_symbol(as, optr); + if (!symbuf) + { + errorp1(ERR_BADSYM); + *tval = 0; + return -1; + } + + debug(" looking up symbol: %s\n", symbuf); + *tval = lookup_symbol(as, symbuf); + + // if not found, flag forward ref + if (*tval == -1) + { + errorp2(ERR_UNDEF); + cl -> undef = 1; + *tval = 0; + return 0; + } + return 0; + } + + if (tc == '%') + { + // binary number + int v1 = 0; + (*optr)++; + while (strchr("01", **optr)) + { + v1 = v1 << 1 | ((*(*optr)++) - '0'); + } + *tval = v1; + return 0; + } + if (tc == '$') + { + // hex number + int v1 = 0; + (*optr)++; + debug("HEX CONST: %s\n", *optr); + while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr))) + { + debug("HEX 2: %02x\n", tc); + if (**optr >= 'A') + { + v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10); + } + else + { + v1 = v1 << 4 | ((*(*optr)++) - '0'); + } + } + *tval = v1; + return 0; + } + if (tc == '@') + { + // octal number + int v1 = 0; + (*optr)++; + while (strchr("01234567", **optr)) + { + v1 = v1 << 3 | ((*(*optr)++) - '0'); + } + *tval = v1; + return 0; + } + if (tc == '&') + { + // decimal number + int v1 = 0; + (*optr)++; + while (strchr("0123456789", **optr)) + { + v1 = v1 * 10 + ((*(*optr)++) - '0'); + } + *tval = v1; + return 0; + } + if (tc == '\'') + { + (*optr)++; + if (!**optr) + { + errorp1(ERR_BADEXPR); + return -2; + } + *tval = *(*optr)++; + return 0; + } + if (tc == '"') + { + (*optr)++; + if (!**optr || !*(*optr + 1)) + { + errorp1(ERR_BADEXPR); + return -2; + } + *tval = *(*optr)++ << 8 | *(*optr)++; + return 0; + } + // end of string + if (tc == '\0') + { + // error if at EOS as we are looking for a term + errorp1(ERR_BADEXPR); + return -1; + } + + // we have a generic number here which may be decimal, hex, binary, or octal + // based on a suffix + + // possible data types are binary (1), octal (2), decimal(4), hex (8) + valtype = 15; + hexval = octval = decval = binval = 0; + while (1) + { + +// printf(" %c\n", **optr); + if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr)) + { + // end of string, must be decimal or the end of a bin + if (bindone == 1) + { + *tval = binval; + return 0; + } + if (valtype & 4) + { + *tval = decval; + return 0; + } + else + { + errorp1(ERR_BADEXPR); + return -1; + } + } + tc = toupper(*(*optr)++); + + if (tc == 'H') + { + if (valtype & 8) + { + *tval = hexval; + return 0; + } + else + { + // syntax error + errorp1(ERR_BADEXPR); + return -1; + } + } + + if (tc == 'Q' || tc == 'O') + { + if (valtype && 2) + { + *tval = octval; + return 0; + } + else + { + errorp1(ERR_BADEXPR); + return -1; + } + } + + digval = tc - '0'; + if (digval > 9) + digval -= 7; + + // if it's not in the range of a hex digit, error out + if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F') + { + (*optr)--; + if (valtype & 4) + { + *tval = decval; + return 0; + } + // if we're in hex/bin mode and run to the end of the number + // we must have a binary constant or an error + // if the previous character is B, then we have binary + // else we have error since hex would require a terminating H + // which would be caught above + if (valtype == 8 && toupper(*(*optr)) == 'B') + { + *tval = binval; + return 0; + } + errorp1(ERR_BADEXPR); + return -1; + } + + // if we have any characters past the end of the B, it's not binary + if (bindone == 1) + bindone = 0; + if (tc == 'B') + bindone = 1; + if (digval > 1) + valtype &= 14; + else if (digval > 7) + valtype &= 13; + else if (digval > 9) + valtype &= 11; + + if (valtype & 8) + { + hexval = (hexval << 4) | digval; + } + if (valtype & 4) + { + decval = decval * 10 + digval; + } + if (valtype & 2) + { + octval = (octval << 3) | digval; + } + if (valtype & 1 && !bindone) + { + binval = (binval << 1) | digval; + } + + } + // can't get here from there +} + +// returns -1 if the expression cannot be parsed +// and returns -2 if there is an undefined symbol reference +// resulting value will be in *val; undefined symbols are parsed as +// value 0 but cl -> undef will be set. +int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val) +{ + int left; + int right; + char oper; + int rval; + + // by default, return 0 in val + *val = 0; + cl -> undef = 0; + + rval = eval_term(as, cl, optr, &left); + if (rval < 0) + return rval; + +nextop: + oper = **optr; + + // end of expr + if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')') + goto retleft; + + // unrecognized chars + if (!strchr("+-*/%", oper)) + goto retleft; + + (*optr)++; + + rval = eval_term(as, cl, optr, &right); + // propagate error + if (rval < 0) + return rval; + + // do the operation and put it in "left" + switch (oper) + { + case '+': + left += right; + break; + + case '-': + left -= right; + break; + + case '*': + left *= right; + break; + + case '/': + left /= right; + break; + + case '%': + left %= right; + break; + } + + goto nextop; + +retleft: + *val = left; + return 0; +}