Mercurial > hg-old > index.cgi
view src/lwasm.c @ 0:57495da01900
Initial checking of LWASM
author | lost |
---|---|
date | Fri, 03 Oct 2008 02:44:20 +0000 |
parents | |
children | 34568fab6058 |
line wrap: on
line source
/* * lwasm.c * * main code for lwasm */ #include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #define __lwasm_c_seen__ #include "instab.h" #include "lwasm.h" void lwasm_read_file(asmstate_t *as, char *fname); extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr); extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr); #define debug(mess, ...) do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0) void register_error(asmstate_t *as, sourceline_t *cl, int errcode) { errortab_t *e; e = malloc(sizeof(errortab_t)); e -> errnum = errcode; e -> line = cl; e -> next = cl -> errors; cl -> errors = e; as -> errorcount++; } int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val); int eval_min(int v1, int v2, int v3, int v4) { if (v2 < v1) v1 = v2; if (v3 < v1) v1 = v3; if (v4 < v1) v1 = v4; return v1; } int eval_max(int v1, int v2, int v3, int v4) { if (v2 > v1) v1 = v2; if (v3 > v1) v1 = v3; if (v4 > v1) v1 = v4; return v1; } int lookupreg3(const char *rlist, char **str) { int rval = 0; int f = 0; const char *reglist = rlist; while (*reglist) { if (toupper(**str) == *reglist) { // first char matches if (reglist[1] == ' ') { f = 1; break; } if (toupper(*(*str + 1)) == reglist[1]) { // second char matches if (reglist[2] == ' ') { f = 1; break; } if (toupper(*(*str + 2)) == reglist[2]) { f = 1; break; } } } reglist += 3; rval++; } if (f == 0) return -1; reglist = rval * 3 + rlist; if (reglist[1] == ' ') (*str) += 1; else if (reglist[2] == ' ') (*str) += 2; else (*str)+=3; return rval; } int lookupreg(const char *reglist, char **str) { int rval = 0; while (*reglist) { if (toupper(**str) == *reglist) { // first char matches if (reglist[1] == ' ' && !isalpha(*(*str + 1))) break; if (toupper(*(*str + 1)) == reglist[1]) break; } reglist += 2; rval++; } if (!*reglist) return -1; if (reglist[1] == ' ') (*str)++; else (*str)+=2; return rval; } void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb) { cl -> len += 1; if (as -> passnum != 2) return; if (cl -> numcodebytes >= cl -> codesize) { cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32); cl -> codesize += 32; } debug("EMIT: %02x\n", cb & 0xff); cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF; } // parse a symble out of the line and return a pointer // to a static pointer // return NULL if not a symbol or a bad symbol char *parse_symbol(asmstate_t *as, char **ptr) { static char *symptr = NULL; char *tptr = *ptr; int sl = 0; // symbol can start with _,a-z,A-Z if (!strchr(SYMCHAR_START, **ptr)) return NULL; while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr)) { tptr++; sl++; } symptr = realloc(symptr, sl + 1); tptr = symptr; while (sl) { *tptr++ = *(*ptr)++; sl--; } *tptr = '\0'; return symptr; } // resolve an instruction void resolve_insn(asmstate_t *as, sourceline_t *cl) { char *optr; char opbuf[MAX_OP_LEN + 1]; char *symbol = NULL; int c; cl -> code_symloc = as -> addr; cl -> addrset = 0; cl -> isequ = 0; cl -> len = 0; cl -> undef = 0; // only parse line on first pass if (as -> passnum == 1) { optr = cl -> line; if (!*optr || *optr == '*' || *optr == ';') { cl -> opcode = -1; cl -> remainder = cl -> line; return; } if (!isspace(*optr)) { symbol = parse_symbol(as, &optr); if (*optr && !isspace(*optr) && !(as -> inmacro)) { errorp1(ERR_BADSYM); while (*optr && !isspace(*optr)) optr++; } if (symbol) { cl -> symstr = strdup(symbol); cl -> hassym = 1; } } while (isspace(*optr)) optr++; // parse opcode if (*optr && *optr != ';') { c = 0; while (c < MAX_OP_LEN && *optr && !isspace(*optr)) { opbuf[c++] = *optr++; } opbuf[c] = '\0'; if (*optr && !isspace(*optr) && !(as -> inmacro)) { errorp1(ERR_BADOP); cl -> opcode = -1; } else { cl -> opcstr = strdup(opbuf); for (c = 0; instab[c].opcode; c++) { if (!strcasecmp(opbuf, instab[c].opcode)) break; } if (!instab[c].opcode && opbuf[0] == '*') { cl -> opcode = -1; } else if (!instab[c].opcode && !(as -> inmacro)) { cl -> opcode = -1; // look up macro if (as -> macros) { macrotab_t *m; for (m = as -> macros; m; m = m -> next) { if (!strcmp(m -> name, opbuf)) break; } if (m) { // we have a macro here cl -> macro = m; while (*optr && isspace(*optr)) optr++; expand_macro(as, cl, &optr); return; } else { errorp1(ERR_BADOP); } } else { errorp1(ERR_BADOP); } } else cl -> opcode = c; } } else cl -> opcode = -1; if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM) { add_macro_line(as, cl, cl -> line); cl -> opcode = -1; cl -> remainder = cl -> line; cl -> opcstr = NULL; cl -> operstr = NULL; cl -> symstr = NULL; cl -> hassym = 0; cl -> macrodef = 1; return; } // parse operand while (*optr && isspace(*optr)) optr++; cl -> operstr = optr; } else optr = cl -> operstr; if (as -> skipcond) { // if skipping a condition, need to skip a macro if (cl -> opcode >= 0) { if (instab[cl -> opcode].specialnum == SPECIAL_MACRO) { as -> skipmacro = 1; } else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM) { as -> skipmacro = 0; } else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro)) { as -> skipcount++; } else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro)) { as -> skipcount--; if (as -> skipcount <= 0) { as -> skipcond = 0; as -> noelse = 0; } } else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro)) { if (as -> skipcount == 1) { as -> skipcount = 0; as -> skipcond = 0; as -> noelse = 1; return; } } } if (as -> skipcond) cl -> skipped = 1; return; } // do the code thing // on pass 1, no code is generated // on pass 2, code is generated using the "emit()" macro if (cl -> opcode >= 0) { if (instab[cl -> opcode].opfn) { (*(instab[cl -> opcode].opfn))(as, cl, &optr); if (as -> passnum == 1) { if (*optr) { char *t = optr; char t2; t2 = *optr; cl -> operstr = strdup(cl -> operstr); *optr = t2; while (*t && isspace(*t)) t++; cl -> remainder = strdup(t); } cl -> remainder = optr; } } else { errorp1(ERR_BADOP); cl -> opcode = -1; } } // address of the symbol may have been changed by a pseudo op // so we couldn't register it above // that means it may turn out to be a "forward ref" in pass 1 if (cl -> hassym) { register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE); } as -> addr += cl -> len; } void generate_code(asmstate_t *as) { sourceline_t *cl; as -> addr = 0; as -> dpval = 0; as -> passnum = 2; for (cl = as -> source_head; cl; cl = cl -> next) { resolve_insn(as, cl); } } void lwasm_read_file(asmstate_t *as, char *fname) { FILE *f; int cline = 0; sourceline_t *cl; size_t bufflen; char *buff = NULL; int retval; as -> passnum = 1; f = fopen(fname, "r"); if (!f) { fprintf(stderr, "Cannot open input file %s: %s\n", fname, strerror(errno)); return; } while (!feof(f)) { retval = getline(&buff, &bufflen, f); debug(" read line (%s:%d): %s\n", fname, cline, buff); if (retval < 0) { if (feof(f)) break; fprintf(stderr, "Error reading '%s': %s\n", fname, strerror(errno)); exit(1); } if (strchr(buff, '\n')) *strchr(buff, '\n') = '\0'; if (strchr(buff, '\r')) *strchr(buff, '\r') = '\0'; cl = calloc(sizeof(sourceline_t), 1); if (!cl) { perror("Malloc"); exit(1); } cl -> lineno = cline++; cl -> sourcefile = fname; cl -> opcode = -1; cl -> addrmode = -1; cl -> addr = as -> addr; cl -> dpval = as -> dpval; cl -> prev = as -> source_tail; if (as -> source_tail) as -> source_tail -> next = cl; as -> source_tail = cl; if (as -> source_head == NULL) as -> source_head = cl; cl -> line = strdup(buff); resolve_insn(as, cl); if (cl -> opcode >= 0 && instab[cl -> opcode].instype == INSTYPE_PSEUDO && instab[cl -> opcode].specialnum == SPECIAL_END) break; *buff = '\0'; } if (buff) free(buff); fclose(f); return; } /* below this point is the expression evaluation package Supported binary operators: + - / * % Supported unary operators: - <infix>: + | - | * | / | % <unary>: - <expr>: <term> <infix> <term> <term>: <unary> <term> <term>: ( <expr> ) <term>: <symbol> <term>: ' <char> <term>: " <char> <char> <term>: * <term>: <number> <number>: <dec> <number>: & <dec> <number>: $ <hex> <number>: <hex> H <number>: @ <oct> <number>: <oct> O <number>: <oct> Q <number>: % <bin> <number>: <bin> B <bin>: 0 | 1 <oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7 <dec>: <oct> | 8 | 9 <hex>: <dec> | A | B | C | D | E | F NOTE: hex values which start with a non-digit will need to be prefixed by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the assembler cannot tell the difference between CCH as a symbol or CCH as the value $CC */ // will throw an error and return 0 in tval if there's a problem // -1 is problem; cl -> undef set is undefined symbol int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval) { char tc; int rval; int binval; int octval; int decval; int hexval; int valtype; int digval; int bindone = 0; *tval = 0; beginagain: tc = **optr; if (tc == '+') { // unary +, ignored for symetry (*optr)++; goto beginagain; } if (tc == '(') { (*optr)++; rval = eval_expr(as, cl, optr, tval); if (rval < 0) return rval; if (**optr != ')') { errorp1(ERR_BADEXPR); return -1; } (*optr)++; return 0; } if (tc == '-') { (*optr)++; rval = eval_term(as, cl, optr, tval); if (rval < 0) return rval; *tval = -*tval; return 0; } // current address (of current instruction, not PC) if (tc == '*') { *tval = cl -> addr; (*optr)++; return 0; } if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc))) { // evaluate a symbol char *symbuf; symbuf = parse_symbol(as, optr); if (!symbuf) { errorp1(ERR_BADSYM); *tval = 0; return -1; } debug(" looking up symbol: %s\n", symbuf); *tval = lookup_symbol(as, symbuf); // if not found, flag forward ref if (*tval == -1) { errorp2(ERR_UNDEF); cl -> undef = 1; *tval = 0; return 0; } return 0; } if (tc == '%') { // binary number int v1 = 0; (*optr)++; while (strchr("01", **optr)) { v1 = v1 << 1 | ((*(*optr)++) - '0'); } *tval = v1; return 0; } if (tc == '$') { // hex number int v1 = 0; (*optr)++; debug("HEX CONST: %s\n", *optr); while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr))) { debug("HEX 2: %02x\n", tc); if (**optr >= 'A') { v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10); } else { v1 = v1 << 4 | ((*(*optr)++) - '0'); } } *tval = v1; return 0; } if (tc == '@') { // octal number int v1 = 0; (*optr)++; while (strchr("01234567", **optr)) { v1 = v1 << 3 | ((*(*optr)++) - '0'); } *tval = v1; return 0; } if (tc == '&') { // decimal number int v1 = 0; (*optr)++; while (strchr("0123456789", **optr)) { v1 = v1 * 10 + ((*(*optr)++) - '0'); } *tval = v1; return 0; } if (tc == '\'') { (*optr)++; if (!**optr) { errorp1(ERR_BADEXPR); return -2; } *tval = *(*optr)++; return 0; } if (tc == '"') { (*optr)++; if (!**optr || !*(*optr + 1)) { errorp1(ERR_BADEXPR); return -2; } *tval = *(*optr)++ << 8 | *(*optr)++; return 0; } // end of string if (tc == '\0') { // error if at EOS as we are looking for a term errorp1(ERR_BADEXPR); return -1; } // we have a generic number here which may be decimal, hex, binary, or octal // based on a suffix // possible data types are binary (1), octal (2), decimal(4), hex (8) valtype = 15; hexval = octval = decval = binval = 0; while (1) { // printf(" %c\n", **optr); if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr)) { // end of string, must be decimal or the end of a bin if (bindone == 1) { *tval = binval; return 0; } if (valtype & 4) { *tval = decval; return 0; } else { errorp1(ERR_BADEXPR); return -1; } } tc = toupper(*(*optr)++); if (tc == 'H') { if (valtype & 8) { *tval = hexval; return 0; } else { // syntax error errorp1(ERR_BADEXPR); return -1; } } if (tc == 'Q' || tc == 'O') { if (valtype && 2) { *tval = octval; return 0; } else { errorp1(ERR_BADEXPR); return -1; } } digval = tc - '0'; if (digval > 9) digval -= 7; // if it's not in the range of a hex digit, error out if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F') { (*optr)--; if (valtype & 4) { *tval = decval; return 0; } // if we're in hex/bin mode and run to the end of the number // we must have a binary constant or an error // if the previous character is B, then we have binary // else we have error since hex would require a terminating H // which would be caught above if (valtype == 8 && toupper(*(*optr)) == 'B') { *tval = binval; return 0; } errorp1(ERR_BADEXPR); return -1; } // if we have any characters past the end of the B, it's not binary if (bindone == 1) bindone = 0; if (tc == 'B') bindone = 1; if (digval > 1) valtype &= 14; else if (digval > 7) valtype &= 13; else if (digval > 9) valtype &= 11; if (valtype & 8) { hexval = (hexval << 4) | digval; } if (valtype & 4) { decval = decval * 10 + digval; } if (valtype & 2) { octval = (octval << 3) | digval; } if (valtype & 1 && !bindone) { binval = (binval << 1) | digval; } } // can't get here from there } // returns -1 if the expression cannot be parsed // and returns -2 if there is an undefined symbol reference // resulting value will be in *val; undefined symbols are parsed as // value 0 but cl -> undef will be set. int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val) { int left; int right; char oper; int rval; // by default, return 0 in val *val = 0; cl -> undef = 0; rval = eval_term(as, cl, optr, &left); if (rval < 0) return rval; nextop: oper = **optr; // end of expr if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')') goto retleft; // unrecognized chars if (!strchr("+-*/%", oper)) goto retleft; (*optr)++; rval = eval_term(as, cl, optr, &right); // propagate error if (rval < 0) return rval; // do the operation and put it in "left" switch (oper) { case '+': left += right; break; case '-': left -= right; break; case '*': left *= right; break; case '/': left /= right; break; case '%': left %= right; break; } goto nextop; retleft: *val = left; return 0; }