Mercurial > hg > index.cgi
changeset 402:b20f14edda5a
Completed initial conversion to new parser allowing spaces in operands
Converted the remaining addressing modes. This required a complete rewrite
of a large portion of the indexed addressing parser. Now the entire indexed
parsing system is programmatic without cheating with a lookup table.
This update also fixes the "force 0,r" by writing a literal 0,r which is
*supposed* to work.
There will likely be some pseudo ops that need tweaking for space handling,
specially those that take multiple operands of some description which are
not expressions. (The expression parser call eats the spaces both before and
after the expression, if appropriate.)
author | William Astle <lost@l-w.ca> |
---|---|
date | Wed, 14 Oct 2015 20:49:41 -0600 |
parents | bbe5401a9bf3 |
children | f5a88f147fae |
files | lwasm/insn_bitbit.c lwasm/insn_gen.c lwasm/insn_indexed.c lwasm/insn_logicmem.c lwasm/insn_rlist.c lwasm/insn_rtor.c lwasm/insn_tfm.c |
diffstat | 7 files changed, 274 insertions(+), 138 deletions(-) [+] |
line wrap: on
line diff
--- a/lwasm/insn_bitbit.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_bitbit.c Wed Oct 14 20:49:41 2015 -0600 @@ -49,7 +49,7 @@ lwasm_register_error(as, l, E_REGISTER_BAD); return; } - + lwasm_skip_to_next_token(l, p); if (*(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -81,7 +81,7 @@ lwasm_register_error(as, l, E_OPERAND_BAD); return; } - + lwasm_skip_to_next_token(l, p); // ignore base page address modifier if (**p == '<') (*p)++;
--- a/lwasm/insn_gen.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_gen.c Wed Oct 14 20:49:41 2015 -0600 @@ -37,36 +37,35 @@ // "extra" is required due to the way OIM, EIM, TIM, and AIM work void insn_parse_gen_aux(asmstate_t *as, line_t *l, char **p, int elen) { - const char *optr2; + char *optr2; int v1, tv; lw_expr_t s; - + if (!**p) { lwasm_register_error(as, l, E_OPERAND_BAD); return; } - optr2 = *p; - while (*optr2 && !isspace(*optr2) && *optr2 != ',') optr2++ - /* do nothing */ ; - - if (*optr2 == ',' || **p == '[') + /* this is the easy case - start it [ or , means indexed */ + if (**p == ',' || **p == '[') { +indexed: l -> lint = -1; - l -> lint2 = 1; + l -> lint2 = 1; insn_parse_indexed_aux(as, l, p); l -> minlen = OPLEN(instab[l -> insn].ops[1]) + 1 + elen; l -> maxlen = OPLEN(instab[l -> insn].ops[1]) + 3 + elen; goto out; } + /* we have to parse the first expression to find if we have a comma after it */ + optr2 = *p; if (**p == '<') { (*p)++; l -> lint2 = 0; } - // for compatibility with asxxxx // * followed by a digit, alpha, or _, or ., or ?, or another * is "f8" else if (**p == '*') @@ -87,10 +86,17 @@ { l -> lint2 = -1; } - - l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen; - l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen; + lwasm_skip_to_next_token(l, p); + s = lwasm_parse_expr(as, p); + + if (**p == ',') + { + /* we have an indexed mode here - reset and transfer control to indexing mode */ + lw_expr_destroy(s); + *p = optr2; + goto indexed; + } if (!s) { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -99,6 +105,8 @@ lwasm_save_expr(l, 0, s); + l -> minlen = OPLEN(instab[l -> insn].ops[0]) + 1 + elen; + l -> maxlen = OPLEN(instab[l -> insn].ops[2]) + 2 + elen; if (as -> output_format == OUTPUT_OBJ && l -> lint2 == -1) { l -> lint2 = 2;
--- a/lwasm/insn_indexed.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_indexed.c Wed Oct 14 20:49:41 2015 -0600 @@ -38,147 +38,250 @@ */ void insn_parse_indexed_aux(asmstate_t *as, line_t *l, char **p) { - struct opvals { char *opstr; int pb; }; - - static const char *regs = "X Y U S W PCRPC "; - static const struct opvals simpleindex[] = - { - {",x", 0x84}, {",y", 0xa4}, {",u", 0xc4}, {",s", 0xe4}, - {",x+", 0x80}, {",y+", 0xa0}, {",u+", 0xc0}, {",s+", 0xe0}, - {",x++", 0x81}, {",y++", 0xa1}, {",u++", 0xc1}, {",s++", 0xe1}, - {",-x", 0x82}, {",-y", 0xa2}, {",-u", 0xc2}, {",-s", 0xe2}, - {",--x", 0x83}, {",--y", 0xa3}, {",--u", 0xc3}, {",--s", 0xe3}, - {"a,x", 0x86}, {"a,y", 0xa6}, {"a,u", 0xc6}, {"a,s", 0xe6}, - {"b,x", 0x85}, {"b,y", 0xa5}, {"b,u", 0xc5}, {"b,s", 0xe5}, - {"e,x", 0x87}, {"e,y", 0xa7}, {"e,u", 0xc7}, {"e,s", 0xe7}, - {"f,x", 0x8a}, {"f,y", 0xaa}, {"f,u", 0xca}, {"f,s", 0xea}, - {"d,x", 0x8b}, {"d,y", 0xab}, {"d,u", 0xcb}, {"d,s", 0xeb}, - {"w,x", 0x8e}, {"w,y", 0xae}, {"w,u", 0xce}, {"w,s", 0xee}, - {",w", 0x8f}, {",w++", 0xcf}, {",--w", 0xef}, - - {"[,x]", 0x94}, {"[,y]", 0xb4}, {"[,u]", 0xd4}, {"[,s]", 0xf4}, - {"[,x++]", 0x91}, {"[,y++]", 0xb1}, {"[,u++]", 0xd1}, {"[,s++]", 0xf1}, - {"[,--x]", 0x93}, {"[,--y]", 0xb3}, {"[,--u]", 0xd3}, {"[,--s]", 0xf3}, - {"[a,x]", 0x96}, {"[a,y]", 0xb6}, {"[a,u]", 0xd6}, {"[a,s]", 0xf6}, - {"[b,x]", 0x95}, {"[b,y]", 0xb5}, {"[b,u]", 0xd5}, {"[b,s]", 0xf5}, - {"[e,x]", 0x97}, {"[e,y]", 0xb7}, {"[e,u]", 0xd7}, {"[e,s]", 0xf7}, - {"[f,x]", 0x9a}, {"[f,y]", 0xba}, {"[f,u]", 0xda}, {"[f,s]", 0xfa}, - {"[d,x]", 0x9b}, {"[d,y]", 0xbb}, {"[d,u]", 0xdb}, {"[d,s]", 0xfb}, - {"[w,x]", 0x9e}, {"[w,y]", 0xbe}, {"[w,u]", 0xde}, {"[w,s]", 0xfe}, - {"[,w]", 0x90}, {"[,w++]", 0xd0}, {"[,--w]", 0xf0}, - - { "", -1 } - }; - static const char *regs9 = "X Y U S PCRPC "; - static const struct opvals simpleindex9[] = - { - {",x", 0x84}, {",y", 0xa4}, {",u", 0xc4}, {",s", 0xe4}, - {",x+", 0x80}, {",y+", 0xa0}, {",u+", 0xc0}, {",s+", 0xe0}, - {",x++", 0x81}, {",y++", 0xa1}, {",u++", 0xc1}, {",s++", 0xe1}, - {",-x", 0x82}, {",-y", 0xa2}, {",-u", 0xc2}, {",-s", 0xe2}, - {",--x", 0x83}, {",--y", 0xa3}, {",--u", 0xc3}, {",--s", 0xe3}, - {"a,x", 0x86}, {"a,y", 0xa6}, {"a,u", 0xc6}, {"a,s", 0xe6}, - {"b,x", 0x85}, {"b,y", 0xa5}, {"b,u", 0xc5}, {"b,s", 0xe5}, - {"d,x", 0x8b}, {"d,y", 0xab}, {"d,u", 0xcb}, {"d,s", 0xeb}, - - {"[,x]", 0x94}, {"[,y]", 0xb4}, {"[,u]", 0xd4}, {"[,s]", 0xf4}, - {"[,x++]", 0x91}, {"[,y++]", 0xb1}, {"[,u++]", 0xd1}, {"[,s++]", 0xf1}, - {"[,--x]", 0x93}, {"[,--y]", 0xb3}, {"[,--u]", 0xd3}, {"[,--s]", 0xf3}, - {"[a,x]", 0x96}, {"[a,y]", 0xb6}, {"[a,u]", 0xd6}, {"[a,s]", 0xf6}, - {"[b,x]", 0x95}, {"[b,y]", 0xb5}, {"[b,u]", 0xd5}, {"[b,s]", 0xf5}, - {"[d,x]", 0x9b}, {"[d,y]", 0xbb}, {"[d,u]", 0xdb}, {"[d,s]", 0xfb}, - - { "", -1 } - }; - char stbuf[25]; - int i, j, rn; + static const char *regs = "X Y U S W PCRPC "; + int i, rn; int indir = 0; - int f0 = 1; - const struct opvals *simples; + int f0 = 0; const char *reglist; lw_expr_t e; - + char *tstr; + + if (CURPRAGMA(l, PRAGMA_6809)) { - simples = simpleindex9; reglist = regs9; } else { - simples = simpleindex; reglist = regs; } - - // fetch out operand for lookup - for (i = 0; i < 24; i++) - { - if (*((*p) + i) && !isspace(*((*p) + i))) - stbuf[i] = *((*p) + i); - else - break; - } - stbuf[i] = '\0'; - - // now look up operand in "simple" table - if (!*((*p) + i) || isspace(*((*p) + i))) - { - // do simple lookup - for (j = 0; simples[j].opstr[0]; j++) - { - if (!strcasecmp(stbuf, simples[j].opstr)) - break; - } - if (simples[j].opstr[0]) - { - l -> pb = simples[j].pb; - l -> lint = 0; - (*p) += i; - return; - } - } - - // now do the "hard" ones - // is it indirect? if (**p == '[') { indir = 1; (*p)++; } - - // look for a "," - all indexed modes have a "," except extended indir - rn = 0; - for (i = 0; (*p)[i] && !isspace((*p)[i]); i++) + lwasm_skip_to_next_token(l, p); + if (**p == ',') { - if ((*p)[i] == ',') + int incdec = 0; + /* we have a pre-dec, post-inc, or no offset mode here */ + (*p)++; + lwasm_skip_to_next_token(l, p); + if (**p == '-') { + incdec = -1; + (*p)++; + if (**p == '-') + { + incdec = -2; + (*p)++; + } + lwasm_skip_to_next_token(l, p); + } + /* allowed registers: X, Y, U, S, or W (6309) */ + switch (**p) + { + case 'x': + case 'X': + rn = 0; + break; + + case 'y': + case 'Y': rn = 1; break; - } - } - - // if no "," and indirect, do extended indir - if (!rn && indir) - { - // eat the extended addressing indicator if present - if (**p == '>') - (*p)++; - // extended indir - l -> pb = 0x9f; - e = lwasm_parse_expr(as, p); - if (!e || **p != ']') - { + + case 'u': + case 'U': + rn = 2; + break; + + case 's': + case 'S': + rn = 3; + break; + + case 'w': + case 'W': + if (CURPRAGMA(l, PRAGMA_6809)) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + rn = 4; + break; + + default: lwasm_register_error(as, l, E_OPERAND_BAD); return; } - lwasm_save_expr(l, 0, e); - (*p)++; - l -> lint = 2; + lwasm_skip_to_next_token(l, p); + if (**p == '+') + { + if (incdec != 0) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + incdec = 1; + (*p)++; + if (**p == '+') + { + incdec = 2; + (*p)++; + } + lwasm_skip_to_next_token(l, p); + } + if (indir) + { + if (**p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + } + if (indir || rn == 4) + { + if (incdec == 1 || incdec == -1) + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + } + if (rn == 4) + { + if (indir) + { + if (incdec == 0) + i = 0x90; + else if (incdec == -2) + i = 0xF0; + else + i = 0xD0; + } + else + { + if (incdec == 0) + i = 0x8F; + else if (incdec == -2) + i = 0xEF; + else + i = 0xCF; + } + } + else + { + switch (incdec) + { + case 0: + i = 0x84; + break; + case 1: + i = 0x80; + break; + case 2: + i = 0x81; + break; + case -1: + i = 0x82; + break; + case -2: + i = 0x83; + break; + } + i = (rn << 5) | i | (indir << 4); + } + l -> pb = i; + l -> lint = 0; return; } - + i = toupper(**p); + if ( + (i == 'A' || i == 'B' || i == 'D') || + (!CURPRAGMA(l, PRAGMA_6809) && (i == 'E' || i == 'F' || i == 'W')) + ) + { + tstr = *p + 1; + lwasm_skip_to_next_token(l, &tstr); + if (*tstr == ',') + { + *p = tstr + 1; + lwasm_skip_to_next_token(l, p); + switch (**p) + { + case 'x': + case 'X': + rn = 0; + break; + + case 'y': + case 'Y': + rn = 1; + break; + + case 'u': + case 'U': + rn = 2; + break; + + case 's': + case 'S': + rn = 3; + break; + + default: + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + lwasm_skip_to_next_token(l, p); + if (indir) + { + if (**p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + } + + switch (i) + { + case 'A': + i = 0x86; + break; + + case 'B': + i = 0x85; + break; + + case 'D': + i = 0x8B; + break; + + case 'E': + i = 0x87; + break; + + case 'F': + i = 0x8A; + break; + + case 'W': + i = 0x8E; + break; + } + l -> pb = i | (indir << 4) | (rn << 5); + l -> lint = 0; + return; + } + } + + /* we have the "expression" types now */ if (**p == '<') { l -> lint = 1; @@ -189,12 +292,17 @@ l -> lint = 2; (*p)++; } - - if (**p == '0' && *((*p)+1) == ',') + lwasm_skip_to_next_token(l, p); + if (**p == '0') { - f0 = 1; + tstr = *p + 1; + lwasm_skip_to_next_token(l, &tstr); + if (*tstr == ',') + { + f0 = 1; + } } - + // now we have to evaluate the expression e = lwasm_parse_expr(as, p); if (!e) @@ -203,14 +311,22 @@ return; } lwasm_save_expr(l, 0, e); - - // now look for a comma; if not present, explode - if (*(*p)++ != ',') + + if (**p != ',') { - lwasm_register_error(as, l, E_OPERAND_BAD); + /* if no comma, we have extended indirect */ + if (l -> lint == 1 || **p != ']') + { + lwasm_register_error(as, l, E_OPERAND_BAD); + return; + } + (*p)++; + l -> lint = 2; + l -> pb = 0x9F; return; } - + (*p)++; + lwasm_skip_to_next_token(l, p); // now get the register rn = lwasm_lookupreg3(reglist, p); if (rn < 0) @@ -462,7 +578,8 @@ { // we know how big it is v = lw_expr_intval(e); - if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4) + + if (v == 0 && !CURPRAGMA(l, PRAGMA_NOINDEX0TONONE) && (l -> pb & 0x07) <= 4 && ((l -> pb & 0x40) == 0)) { if ((l -> pb & 0x07) < 4) {
--- a/lwasm/insn_logicmem.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_logicmem.c Wed Oct 14 20:49:41 2015 -0600 @@ -50,6 +50,7 @@ } lwasm_save_expr(l, 100, s); + lwasm_skip_to_next_token(l, p); if (**p != ',' && **p != ';') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -57,7 +58,7 @@ } (*p)++; - + lwasm_skip_to_next_token(l, p); // now we have a general addressing mode - call for it insn_parse_gen_aux(as, l, p, 1); }
--- a/lwasm/insn_rlist.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_rlist.c Wed Oct 14 20:49:41 2015 -0600 @@ -41,12 +41,16 @@ lwasm_register_error2(as, l, E_REGISTER_BAD, "'%s'", *p); return; } + lwasm_skip_to_next_token(l, p); if (**p && **p != ',' && !isspace(**p)) { lwasm_register_error(as, l, E_OPERAND_BAD); } if (**p == ',') + { (*p)++; + lwasm_skip_to_next_token(l, p); + } if ((instab[l -> insn].ops[0]) & 2) { // pshu/pulu
--- a/lwasm/insn_rtor.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_rtor.c Wed Oct 14 20:49:41 2015 -0600 @@ -35,6 +35,7 @@ // A,B,CC,DP,0,0,E,F r0 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p); + lwasm_skip_to_next_token(l, p); if (r0 < 0 || *(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -42,6 +43,7 @@ } else { + lwasm_skip_to_next_token(l, p); r1 = lwasm_lookupreg2(!CURPRAGMA(l, PRAGMA_6809) ? regs : regs9, p); if (r1 < 0) {
--- a/lwasm/insn_tfm.c Wed Oct 14 19:15:41 2015 -0600 +++ b/lwasm/insn_tfm.c Wed Oct 14 20:49:41 2015 -0600 @@ -48,11 +48,13 @@ (*p)++; tfm = 2; } + lwasm_skip_to_next_token(l, p); if (*(*p)++ != ',') { lwasm_register_error(as, l, E_UNKNOWN_OPERATION); return; } + lwasm_skip_to_next_token(l, p); c = strchr(reglist, toupper(*(*p)++)); if (!c) { @@ -131,6 +133,7 @@ // D,X,Y,U,S,PC,W,V // A,B,CC,DP,0,0,E,F r0 = lwasm_lookupreg2(regs, p); + lwasm_skip_to_next_token(l, p); if (r0 < 0 || *(*p)++ != ',') { lwasm_register_error(as, l, E_OPERAND_BAD); @@ -138,6 +141,7 @@ } else { + lwasm_skip_to_next_token(l, p); r1 = lwasm_lookupreg2(regs, p); if (r1 < 0) {