Mercurial > hg > index.cgi
diff lwasm/lwasm.c @ 0:2c24602be78f
Initial import from lwtools 3.0.1 version, with new hand built build system and file reorganization
author | lost@l-w.ca |
---|---|
date | Wed, 19 Jan 2011 22:27:17 -0700 |
parents | |
children | 7317fbe024af |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwasm/lwasm.c Wed Jan 19 22:27:17 2011 -0700 @@ -0,0 +1,866 @@ +/* +lwasm.c + +Copyright © 2010 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define ___lwasm_c_seen___ + +#include <stdio.h> +#include <stdarg.h> +#include <string.h> + +#include <lw_expr.h> +#include <lw_alloc.h> +#include <lw_string.h> + +#include "lwasm.h" + +void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...); + +int lwasm_expr_exportable(asmstate_t *as, lw_expr_t expr) +{ + return 0; +} + +int lwasm_expr_exportval(asmstate_t *as, lw_expr_t expr) +{ + return 0; +} + +lw_expr_t lwasm_evaluate_var(char *var, void *priv) +{ + asmstate_t *as = (asmstate_t *)priv; + lw_expr_t e; + importlist_t *im; + struct symtabe *s; + + s = lookup_symbol(as, as -> cl, var); + if (s) + { + e = lw_expr_build(lw_expr_type_special, lwasm_expr_syment, s); + return e; + } + + // undefined here is undefied unless output is object + if (as -> output_format != OUTPUT_OBJ) + goto nomatch; + + // check for import + for (im = as -> importlist; im; im = im -> next) + { + if (!strcmp(im -> symbol, var)) + break; + } + + // check for "undefined" to import automatically + if (!im && CURPRAGMA(as -> cl, PRAGMA_UNDEFEXTERN)) + { + im = lw_alloc(sizeof(importlist_t)); + im -> symbol = lw_strdup(var); + im -> next = as -> importlist; + as -> importlist = im; + } + + if (!im) + goto nomatch; + + e = lw_expr_build(lw_expr_type_special, lwasm_expr_import, im); + return e; + +nomatch: + if (as -> badsymerr) + { + lwasm_register_error(as, as -> cl, "Undefined symbol %s", var); + } + return NULL; +} + +lw_expr_t lwasm_evaluate_special(int t, void *ptr, void *priv) +{ + switch (t) + { + case lwasm_expr_secbase: + { +// sectiontab_t *s = priv; + asmstate_t *as = priv; + if (as -> exportcheck && ptr == as -> csect) + return lw_expr_build(lw_expr_type_int, 0); + return NULL; + } + + case lwasm_expr_linelen: + { + line_t *cl = ptr; + if (cl -> len == -1) + return NULL; + return lw_expr_build(lw_expr_type_int, cl -> len); + } + break; + + case lwasm_expr_lineaddr: + { + line_t *cl = ptr; + if (cl -> addr) + return lw_expr_copy(cl -> addr); + else + return NULL; + } + + case lwasm_expr_syment: + { + struct symtabe *sym = ptr; + return lw_expr_copy(sym -> value); + } + + case lwasm_expr_import: + { + return NULL; + } + + case lwasm_expr_nextbp: + { + line_t *cl = ptr; + for (cl = cl -> next; cl; cl = cl -> next) + { + if (cl -> isbrpt) + break; + } + if (cl) + { + return lw_expr_copy(cl -> addr); + } + return NULL; + } + + case lwasm_expr_prevbp: + { + line_t *cl = ptr; + for (cl = cl -> prev; cl; cl = cl -> prev) + { + if (cl -> isbrpt) + break; + } + if (cl) + { + return lw_expr_copy(cl -> addr); + } + return NULL; + } + } + return NULL; +} + +void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...) +{ + lwasm_error_t *e; + va_list args; + char errbuff[1024]; + int r; + + if (!l) + return; + + va_start(args, msg); + + e = lw_alloc(sizeof(lwasm_error_t)); + + e -> next = l -> err; + l -> err = e; + + as -> errorcount++; + + r = vsnprintf(errbuff, 1024, msg, args); + e -> mess = lw_strdup(errbuff); + + va_end(args); +} + +void lwasm_register_warning(asmstate_t *as, line_t *l, const char *msg, ...) +{ + lwasm_error_t *e; + va_list args; + char errbuff[1024]; + int r; + + if (!l) + return; + + va_start(args, msg); + + e = lw_alloc(sizeof(lwasm_error_t)); + + e -> next = l -> err; + l -> err = e; + + as -> errorcount++; + + r = vsnprintf(errbuff, 1024, msg, args); + e -> mess = lw_strdup(errbuff); + + va_end(args); +} + +int lwasm_next_context(asmstate_t *as) +{ + int r; + r = as -> nextcontext; + as -> nextcontext++; + return r; +} + +void lwasm_emit(line_t *cl, int byte) +{ + if (cl -> outputl < 0) + cl -> outputl = 0; + + if (cl -> outputl == cl -> outputbl) + { + cl -> output = lw_realloc(cl -> output, cl -> outputbl + 8); + cl -> outputbl += 8; + } + cl -> output[cl -> outputl++] = byte & 0xff; + + if (cl -> inmod) + { + asmstate_t *as = cl -> as; + // update module CRC + // this is a direct transliteration from the nitros9 asm source + // to C; it can, no doubt, be optimized for 32 bit processing + byte &= 0xff; + + byte ^= (as -> crc)[0]; + (as -> crc)[0] = (as -> crc)[1]; + (as -> crc)[1] = (as -> crc)[2]; + (as -> crc)[1] ^= (byte >> 7); + (as -> crc)[2] = (byte << 1); + (as -> crc)[1] ^= (byte >> 2); + (as -> crc)[2] ^= (byte << 6); + byte ^= (byte << 1); + byte ^= (byte << 2); + byte ^= (byte << 4); + if (byte & 0x80) + { + (as -> crc)[0] ^= 0x80; + (as -> crc)[2] ^= 0x21; + } + } +} + +void lwasm_emitop(line_t *cl, int opc) +{ + if (opc > 0x100) + lwasm_emit(cl, opc >> 8); + lwasm_emit(cl, opc); +} + +lw_expr_t lwasm_parse_term(char **p, void *priv) +{ + asmstate_t *as = priv; + int val; + + if (!**p) + return NULL; + + if (**p == '*' || ( + **p == '.' + && !((*p)[1] >= 'A' && (*p)[1] <= 'Z') + && !((*p)[1] >= 'a' && (*p)[1] <= 'z') + && !((*p)[1] >= '0' && (*p)[1] <= '9') + )) + { + // special "symbol" for current line addr (*, .) + (*p)++; + return lw_expr_build(lw_expr_type_special, lwasm_expr_lineaddr, as -> cl); + } + + // branch points + if (**p == '<') + { + (*p)++; + return lw_expr_build(lw_expr_type_special, lwasm_expr_prevbp, as -> cl); + } + if (**p == '>') + { + (*p)++; + return lw_expr_build(lw_expr_type_special, lwasm_expr_nextbp, as -> cl); + } + + // double ascii constant + if (**p == '"') + { + int v; + (*p)++; + if (!**p) + return NULL; + if (!*((*p)+1)) + return NULL; + v = (unsigned char)**p << 8 | (unsigned char)*((*p)+1); + (*p) += 2; + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '\'') + { + int v; + + (*p)++; + if (!**p) + return NULL; + + v = (unsigned char)**p; + (*p)++; + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '&') + { + // decimal constant + int v = 0; + (*p)++; + + if (!strchr("0123456789", **p)) + return NULL; + + while (**p && strchr("0123456789", **p)) + { + val = val * 10 + (**p - '0'); + (*p)++; + } + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '%') + { + // binary constant + int v = 0; + (*p)++; + + if (**p != '0' && **p != '1') + return NULL; + + while (**p && (**p == '0' || **p == '1')) + { + val = val * 2 + (**p - '0'); + (*p)++; + } + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '$') + { + // hexadecimal constant + int v = 0, v2; + (*p)++; + if (!strchr("0123456789abcdefABCDEF", **p)) + return NULL; + + while (**p && strchr("0123456789abcdefABCDEF", **p)) + { + v2 = toupper(**p) - '0'; + if (v2 > 9) + v2 -= 7; + v = v * 16 + v2; + (*p)++; + } + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '0' && (*((*p)+1) == 'x' || *((*p)+1) == 'X')) + { + // hexadecimal constant, C style + int v = 0, v2; + (*p)+=2; + + if (!strchr("0123456789abcdefABCDEF", **p)) + return NULL; + + while (**p && strchr("0123456789abcdefABCDEF", **p)) + { + v2 = toupper(**p) - '0'; + if (v2 > 9) + v2 -= 7; + v = v * 16 + v2; + (*p)++; + } + return lw_expr_build(lw_expr_type_int, v); + } + + if (**p == '@' && (*((*p)+1) >= '0' && *((*p)+1) <= '7')) + { + // octal constant + int v = 0; + (*p)++; + + if (!strchr("01234567", **p)) + return NULL; + + while (**p && strchr("01234567", **p)) + { + v = v * 8 + (**p - '0'); + (*p)++; + } + return lw_expr_build(lw_expr_type_int, v); + } + + + // symbol or bare decimal or suffix constant here + do + { + int havedol = 0; + int l = 0; + + while ((*p)[l] && strchr(SYMCHARS, (*p)[l])) + { + if ((*p)[l] == '$') + havedol = 1; + l++; + } + if (l == 0) + return NULL; + + if ((*p)[l] == '{') + { + while ((*p)[l] && (*p)[l] != '}') + l++; + l++; + } + + if (havedol || **p < '0' || **p > '9') + { + // have a symbol here + char *sym; + lw_expr_t term; + + sym = lw_strndup(*p, l); + (*p) += l; + term = lw_expr_build(lw_expr_type_var, sym); + lw_free(sym); + return term; + } + } while (0); + + if (!**p) + return NULL; + + // we have a numeric constant here, either decimal or postfix base notation + { + int decval = 0, binval = 0, hexval = 0, octval = 0; + int valtype = 15; // 1 = bin, 2 = oct, 4 = dec, 8 = hex + int bindone = 0; + int val; + int dval; + + while (1) + { + if (!**p || !strchr("0123456789ABCDEFabcdefqhoQHO", **p)) + { + // we can legally be bin or decimal here + if (bindone) + { + // just finished a binary value + val = binval; + break; + } + else if (valtype & 4) + { + val = decval; + break; + } + else + { + // bad value + return NULL; + } + } + + dval = toupper(**p); + (*p)++; + + if (bindone) + { + // any characters past "B" means it is not binary + bindone = 0; + valtype &= 14; + } + + switch (dval) + { + case 'Q': + case 'O': + if (valtype & 2) + { + val = octval; + valtype = -1; + break; + } + else + { + return NULL; + } + /* can't get here */ + + case 'H': + if (valtype & 8) + { + val = hexval; + valtype = -1; + break; + } + else + { + return NULL; + } + /* can't get here */ + + case 'B': + // this is a bit of a sticky one since B may be a + // hex number instead of the end of a binary number + // so it falls through to the digit case + if (valtype & 1) + { + // could still be binary of hex + bindone = 1; + valtype = 9; + } + /* fall through intented */ + + default: + // digit + dval -= '0'; + if (dval > 9) + dval -= 7; + if (valtype & 8) + hexval = hexval * 16 + dval; + if (valtype & 4) + { + if (dval > 9) + valtype &= 11; + else + decval = decval * 10 + dval; + } + if (valtype & 2) + { + if (dval > 7) + valtype &= 13; + else + octval = octval * 8 + dval; + } + if (valtype & 1) + { + if (dval > 1) + valtype &= 14; + else + binval = binval * 2 + dval; + } + } + if (valtype == -1) + break; + + // return if no more valid types + if (valtype == 0) + return NULL; + + val = decval; // in case we fall through + } + + // get here if we have a value + return lw_expr_build(lw_expr_type_int, val); + } + // can't get here +} + +lw_expr_t lwasm_parse_expr(asmstate_t *as, char **p) +{ + lw_expr_t e; + + e = lw_expr_parse(p, as); + + return e; +} + +int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr) +{ + lw_expr_simplify(expr, as); +} + +void lwasm_save_expr(line_t *cl, int id, lw_expr_t expr) +{ + struct line_expr_s *e; + + for (e = cl -> exprs; e; e = e -> next) + { + if (e -> id == id) + { + lw_expr_destroy(e -> expr); + e -> expr = expr; + return; + } + } + + e = lw_alloc(sizeof(struct line_expr_s)); + e -> expr = expr; + e -> id = id; + e -> next = cl -> exprs; + cl -> exprs = e; +} + +lw_expr_t lwasm_fetch_expr(line_t *cl, int id) +{ + struct line_expr_s *e; + + for (e = cl -> exprs; e; e = e -> next) + { + if (e -> id == id) + { + return e -> expr; + } + } + return NULL; +} + +void skip_operand(char **p) +{ + for (; **p && !isspace(**p); (*p)++) + /* do nothing */ ; +} + +int lwasm_emitexpr(line_t *l, lw_expr_t expr, int size) +{ + int v = 0; + int ol; + + ol = l -> outputl; + if (ol == -1) + ol = 0; + + if (lw_expr_istype(expr, lw_expr_type_int)) + { + v = lw_expr_intval(expr); + } + // handle external/cross-section/incomplete references here + else + { + if (l -> as -> output_format == OUTPUT_OBJ) + { + reloctab_t *re; + lw_expr_t te; + + if (size == 4) + { + // create a two part reference because lwlink doesn't + // support 32 bit references + lw_expr_t te2; + te = lw_expr_build(lw_expr_type_int, 0x10000); + te2 = lw_expr_build(lw_expr_type_oper, lw_expr_oper_divide, expr, te); + lw_expr_destroy(te); + + re = lw_alloc(sizeof(reloctab_t)); + re -> next = l -> csect -> reloctab; + l -> csect -> reloctab = re; + te = lw_expr_build(lw_expr_type_int, ol); + re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te); + lw_expr_destroy(te); + lwasm_reduce_expr(l -> as, re -> offset); + re -> expr = te2; + re -> size = 2; + + te = lw_expr_build(lw_expr_type_int, 0xFFFF); + te2 = lw_expr_build(lw_expr_type_oper, lw_expr_oper_bwand, expr, te); + lw_expr_destroy(te); + + re = lw_alloc(sizeof(reloctab_t)); + re -> next = l -> csect -> reloctab; + l -> csect -> reloctab = re; + te = lw_expr_build(lw_expr_type_int, ol + 2); + re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te); + lw_expr_destroy(te); + lwasm_reduce_expr(l -> as, re -> offset); + re -> expr = te2; + re -> size = 2; + } + else + { + // add "expression" record to section table + re = lw_alloc(sizeof(reloctab_t)); + re -> next = l -> csect -> reloctab; + l -> csect -> reloctab = re; + te = lw_expr_build(lw_expr_type_int, ol); + re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te); + lw_expr_destroy(te); + lwasm_reduce_expr(l -> as, re -> offset); + re -> size = size; + re -> expr = lw_expr_copy(expr); + } + for (v = 0; v < size; v++) + lwasm_emit(l, 0); + return 0; + } + lwasm_register_error(l -> as, l, "Expression not fully resolved"); + return -1; + } + + switch (size) + { + case 4: + lwasm_emit(l, v >> 24); + lwasm_emit(l, v >> 16); + /* fallthrough intended */ + + case 2: + lwasm_emit(l, v >> 8); + /* fallthrough intended */ + + case 1: + lwasm_emit(l, v); + } + + return 0; +} + +int lwasm_lookupreg2(const char *regs, char **p) +{ + int rval = 0; + + while (*regs) + { + if (toupper(**p) == *regs) + { + if (regs[1] == ' ' && !isalpha(*(*p + 1))) + break; + if (toupper(*(*p + 1)) == regs[1]) + break; + } + regs += 2; + rval++; + } + if (!*regs) + return -1; + if (regs[1] == ' ') + (*p)++; + else + (*p) += 2; + return rval; +} + +int lwasm_lookupreg3(const char *regs, char **p) +{ + int rval = 0; + + while (*regs) + { + if (toupper(**p) == *regs) + { + if (regs[1] == ' ' && !isalpha(*(*p + 1))) + break; + if (toupper(*(*p + 1)) == regs[1]) + { + if (regs[2] == ' ' && !isalpha(*(*p + 2))) + break; + if (toupper(*(*p + 2)) == regs[2]) + break; + } + } + regs += 3; + rval++; + } + if (!*regs) + return -1; + if (regs[1] == ' ') + (*p)++; + else if (regs[2] == ' ') + (*p) += 2; + else + (*p) += 3; + return rval; +} + +void lwasm_show_errors(asmstate_t *as) +{ + line_t *cl; + lwasm_error_t *e; + + for (cl = as -> line_head; cl; cl = cl -> next) + { + if (!(cl -> err) && !(cl -> warn)) + continue; + for (e = cl -> err; e; e = e -> next) + { + fprintf(stderr, "ERROR: %s\n", e -> mess); + } + for (e = cl -> warn; e; e = e -> next) + { + fprintf(stderr, "WARNING: %s\n", e -> mess); + } + fprintf(stderr, "%s:%05d %s\n\n", cl -> linespec, cl -> lineno, cl -> ltext); + } +} + +/* +this does any passes and other gymnastics that might be useful +to see if an expression reduces early +*/ +extern void do_pass3(asmstate_t *as); +extern void do_pass4_aux(asmstate_t *as, int force); + +void lwasm_interim_reduce(asmstate_t *as) +{ + do_pass3(as); +// do_pass4_aux(as, 0); +} + +lw_expr_t lwasm_parse_cond(asmstate_t *as, char **p) +{ + lw_expr_t e; + + debug_message(as, 250, "Parsing condition"); + e = lwasm_parse_expr(as, p); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); + + if (!e) + { + lwasm_register_error(as, as -> cl, "Bad expression"); + return NULL; + } + + /* we need to simplify the expression here */ + debug_message(as, 250, "Doing interim reductions"); + lwasm_interim_reduce(as); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); + debug_message(as, 250, "Reducing expression"); + lwasm_reduce_expr(as, e); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); +/* lwasm_reduce_expr(as, e); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); + lwasm_reduce_expr(as, e); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); + lwasm_reduce_expr(as, e); + debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e)); +*/ + + lwasm_save_expr(as -> cl, 4242, e); + + if (!lw_expr_istype(e, lw_expr_type_int)) + { + debug_message(as, 250, "Non-constant expression"); + lwasm_register_error(as, as -> cl, "Conditions must be constant on pass 1"); + return NULL; + } + debug_message(as, 250, "Returning expression"); + return e; +}