comparison src/lwasm.c @ 26:d2e86babd958

Added error tracking infrastructure
author lost
date Fri, 02 Jan 2009 02:38:02 +0000
parents 05d4115b4860
children f736579569b4
comparison
equal deleted inserted replaced
25:3b818f05dc2a 26:d2e86babd958
15 more details. 15 more details.
16 16
17 You should have received a copy of the GNU General Public License along with 17 You should have received a copy of the GNU General Public License along with
18 this program. If not, see <http://www.gnu.org/licenses/>. 18 this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 Contains the main code for lwasm 20
21 Contains random functions used by the assembler
21 */ 22 */
22 23
23 #include <ctype.h> 24 #define __lwasm_c_seen__
24 #include <errno.h> 25
26 #include <stdarg.h>
27 #include <stdlib.h>
25 #include <stdio.h> 28 #include <stdio.h>
26 #include <stdlib.h> 29
27 #include <string.h>
28 #define __lwasm_c_seen__
29 #include "instab.h"
30 #include "lwasm.h" 30 #include "lwasm.h"
31 #include "util.h"
31 32
32 void lwasm_read_file(asmstate_t *as, char *fname); 33 int register_error(asmstate_t *as, lwasm_line_t *l, int pass, const char *fmt, ...)
33 extern int add_macro_line(asmstate_t *as, sourceline_t *cl, char *optr);
34 extern void expand_macro(asmstate_t *as, sourceline_t *cl, char **optr);
35
36 #define debug(mess, ...) do { if (as->debug) { fprintf(stderr, "DEBUG: "); fprintf(stderr, (mess), ## __VA_ARGS__); } } while (0)
37
38 void register_error(asmstate_t *as, sourceline_t *cl, int errcode)
39 { 34 {
40 errortab_t *e; 35 lwasm_error_t *e;
36 va_list args;
37 char errbuff[1024];
38 int r;
41 39
42 e = malloc(sizeof(errortab_t)); 40 if (as -> passnum != pass)
41 return;
43 42
44 e -> errnum = errcode; 43 va_start(args, fmt);
45 e -> line = cl; 44
46 e -> next = cl -> errors; 45 e = lwasm_alloc(sizeof(lwasm_error_t));
47 cl -> errors = e; 46
47 e -> next = l -> err;
48 l -> err = e;
48 49
49 as -> errorcount++; 50 as -> errorcount++;
51
52 r = vsnprintf(errbuff, 1024, fmt, args);
53 e -> mess = lwasm_strdup(errbuff);
54
55 va_end(args);
56
57 return r;
50 } 58 }
51
52 int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val);
53
54 int eval_min(int v1, int v2, int v3, int v4)
55 {
56 if (v2 < v1)
57 v1 = v2;
58 if (v3 < v1)
59 v1 = v3;
60 if (v4 < v1)
61 v1 = v4;
62 return v1;
63 }
64
65 int eval_max(int v1, int v2, int v3, int v4)
66 {
67 if (v2 > v1)
68 v1 = v2;
69 if (v3 > v1)
70 v1 = v3;
71 if (v4 > v1)
72 v1 = v4;
73 return v1;
74 }
75
76 int lookupreg3(const char *rlist, char **str)
77 {
78 int rval = 0;
79 int f = 0;
80 const char *reglist = rlist;
81
82 while (*reglist)
83 {
84 if (toupper(**str) == *reglist)
85 {
86 // first char matches
87 if (reglist[1] == ' ')
88 {
89 f = 1;
90 break;
91 }
92 if (toupper(*(*str + 1)) == reglist[1])
93 {
94 // second char matches
95 if (reglist[2] == ' ')
96 {
97 f = 1;
98 break;
99 }
100 if (toupper(*(*str + 2)) == reglist[2])
101 {
102 f = 1;
103 break;
104 }
105 }
106 }
107 reglist += 3;
108 rval++;
109 }
110 if (f == 0)
111 return -1;
112
113
114 reglist = rval * 3 + rlist;
115 if (reglist[1] == ' ')
116 (*str) += 1;
117 else if (reglist[2] == ' ')
118 (*str) += 2;
119 else
120 (*str)+=3;
121 return rval;
122 }
123
124
125 int lookupreg(const char *reglist, char **str)
126 {
127 int rval = 0;
128 while (*reglist)
129 {
130 if (toupper(**str) == *reglist)
131 {
132 // first char matches
133 if (reglist[1] == ' ' && !isalpha(*(*str + 1)))
134 break;
135 if (toupper(*(*str + 1)) == reglist[1])
136 break;
137 }
138 reglist += 2;
139 rval++;
140 }
141 if (!*reglist)
142 return -1;
143 if (reglist[1] == ' ')
144 (*str)++;
145 else
146 (*str)+=2;
147 return rval;
148 }
149
150 void addcodebyte(asmstate_t *as, sourceline_t *cl, int cb)
151 {
152 cl -> len += 1;
153 if (as -> passnum != 2)
154 return;
155
156 if (cl -> numcodebytes >= cl -> codesize)
157 {
158 cl -> codebytes = realloc(cl -> codebytes, cl -> codesize + 32);
159 cl -> codesize += 32;
160 }
161 debug("EMIT: %02x\n", cb & 0xff);
162 cl -> codebytes[cl -> numcodebytes++] = cb & 0xFF;
163 }
164
165 // parse a symble out of the line and return a pointer
166 // to a static pointer
167 // return NULL if not a symbol or a bad symbol
168 char *parse_symbol(asmstate_t *as, char **ptr)
169 {
170 static char *symptr = NULL;
171 char *tptr = *ptr;
172 int sl = 0;
173
174 // symbol can start with _,a-z,A-Z
175
176 if (!strchr(SYMCHAR_START, **ptr))
177 return NULL;
178
179 while (*tptr && !isspace(*tptr) && strchr(SYMCHAR, *tptr))
180 {
181 tptr++;
182 sl++;
183 }
184
185 symptr = realloc(symptr, sl + 1);
186 tptr = symptr;
187 while (sl)
188 {
189 *tptr++ = *(*ptr)++;
190 sl--;
191 }
192 *tptr = '\0';
193 return symptr;
194 }
195
196 // resolve an instruction
197 void resolve_insn(asmstate_t *as, sourceline_t *cl)
198 {
199 char *optr;
200 char opbuf[MAX_OP_LEN + 1];
201 char *symbol = NULL;
202 int c;
203
204 cl -> code_symloc = as -> addr;
205
206 cl -> addrset = 0;
207 cl -> isequ = 0;
208 cl -> len = 0;
209 cl -> undef = 0;
210
211 // only parse line on first pass
212 if (as -> passnum == 1)
213 {
214 optr = cl -> line;
215 if (!*optr || *optr == '*' || *optr == ';')
216 {
217 cl -> opcode = -1;
218 cl -> remainder = cl -> line;
219 return;
220 }
221
222 if (!isspace(*optr))
223 {
224 symbol = parse_symbol(as, &optr);
225 if (*optr && !isspace(*optr) && !(as -> inmacro))
226 {
227 errorp1(ERR_BADSYM);
228 while (*optr && !isspace(*optr))
229 optr++;
230 }
231 if (symbol)
232 {
233 cl -> symstr = strdup(symbol);
234 cl -> hassym = 1;
235 }
236 }
237
238 while (isspace(*optr))
239 optr++;
240
241 // parse opcode
242 if (*optr && *optr != ';')
243 {
244 c = 0;
245 while (c < MAX_OP_LEN && *optr && !isspace(*optr))
246 {
247 opbuf[c++] = *optr++;
248 }
249 opbuf[c] = '\0';
250 if (*optr && !isspace(*optr) && !(as -> inmacro))
251 {
252 errorp1(ERR_BADOP);
253 cl -> opcode = -1;
254 }
255 else
256 {
257 cl -> opcstr = strdup(opbuf);
258 for (c = 0; instab[c].opcode; c++)
259 {
260 if (!strcasecmp(opbuf, instab[c].opcode))
261 break;
262 }
263 if (!instab[c].opcode && opbuf[0] == '*')
264 {
265 cl -> opcode = -1;
266 }
267 else if (!instab[c].opcode && !(as -> inmacro))
268 {
269 cl -> opcode = -1;
270
271 // look up macro
272 if (as -> macros)
273 {
274 macrotab_t *m;
275
276 for (m = as -> macros; m; m = m -> next)
277 {
278 if (!strcmp(m -> name, opbuf))
279 break;
280 }
281 if (m)
282 {
283 // we have a macro here
284 cl -> macro = m;
285 while (*optr && isspace(*optr))
286 optr++;
287 expand_macro(as, cl, &optr);
288 return;
289 }
290 else
291 {
292 errorp1(ERR_BADOP);
293 }
294 }
295 else
296 {
297 errorp1(ERR_BADOP);
298 }
299 }
300 else
301 cl -> opcode = c;
302 }
303 }
304 else
305 cl -> opcode = -1;
306
307 if (as -> inmacro && cl -> opcode >= 0 && instab[cl -> opcode].specialnum != SPECIAL_ENDM)
308 {
309 add_macro_line(as, cl, cl -> line);
310 cl -> opcode = -1;
311 cl -> remainder = cl -> line;
312 cl -> opcstr = NULL;
313 cl -> operstr = NULL;
314 cl -> symstr = NULL;
315 cl -> hassym = 0;
316 cl -> macrodef = 1;
317 return;
318 }
319 // parse operand
320 while (*optr && isspace(*optr))
321 optr++;
322
323 cl -> operstr = optr;
324 }
325 else
326 optr = cl -> operstr;
327
328 if (as -> skipcond)
329 {
330 // if skipping a condition, need to skip a macro
331 if (cl -> opcode >= 0)
332 {
333 if (instab[cl -> opcode].specialnum == SPECIAL_MACRO)
334 {
335 as -> skipmacro = 1;
336 }
337 else if (instab[cl -> opcode].specialnum == SPECIAL_ENDM)
338 {
339 as -> skipmacro = 0;
340 }
341 else if (instab[cl -> opcode].specialnum == SPECIAL_COND && !(as -> skipmacro))
342 {
343 as -> skipcount++;
344 }
345 else if (instab[cl -> opcode].specialnum == SPECIAL_ENDC && !(as -> skipmacro))
346 {
347 as -> skipcount--;
348 if (as -> skipcount <= 0)
349 {
350 as -> skipcond = 0;
351 as -> noelse = 0;
352 }
353 }
354 else if (instab[cl -> opcode].specialnum == SPECIAL_ELSE && !(as -> skipmacro))
355 {
356 if (as -> skipcount == 1)
357 {
358 as -> skipcount = 0;
359 as -> skipcond = 0;
360 as -> noelse = 1;
361 return;
362 }
363 }
364 }
365 if (as -> skipcond)
366 cl -> skipped = 1;
367 return;
368 }
369
370 // do the code thing
371 // on pass 1, no code is generated
372 // on pass 2, code is generated using the "emit()" macro
373 if (cl -> opcode >= 0)
374 {
375 if (instab[cl -> opcode].opfn)
376 {
377 (*(instab[cl -> opcode].opfn))(as, cl, &optr);
378 if (as -> passnum == 1)
379 {
380 if (*optr)
381 {
382 char *t = optr;
383 char t2;
384
385 t2 = *optr;
386 cl -> operstr = strdup(cl -> operstr);
387 *optr = t2;
388 while (*t && isspace(*t))
389 t++;
390 cl -> remainder = strdup(t);
391
392 }
393 cl -> remainder = optr;
394 }
395 }
396 else
397 {
398 errorp1(ERR_BADOP);
399 cl -> opcode = -1;
400 }
401 }
402 // address of the symbol may have been changed by a pseudo op
403 // so we couldn't register it above
404 // that means it may turn out to be a "forward ref" in pass 1
405 if (cl -> hassym)
406 {
407 register_symbol(as, cl, cl -> symstr, cl -> code_symloc, cl -> isset ? SYMFLAG_SET : SYMFLAG_NONE);
408 }
409
410 as -> addr += cl -> len;
411 }
412
413 void generate_code(asmstate_t *as)
414 {
415 sourceline_t *cl;
416
417 as -> addr = 0;
418 as -> dpval = 0;
419 as -> passnum = 2;
420 for (cl = as -> source_head; cl; cl = cl -> next)
421 {
422 resolve_insn(as, cl);
423 }
424 }
425
426
427 /*
428 below this point is the expression evaluation package
429
430 Supported binary operators: + - / * %
431 Supported unary operators: -
432
433 <infix>: + | - | * | / | %
434 <unary>: -
435 <expr>: <term> <infix> <term>
436 <term>: <unary> <term>
437 <term>: ( <expr> )
438 <term>: <symbol>
439 <term>: ' <char>
440 <term>: " <char> <char>
441 <term>: *
442 <term>: <number>
443
444 <number>: <dec>
445 <number>: & <dec>
446
447 <number>: $ <hex>
448 <number>: <hex> H
449 <number>: @ <oct>
450 <number>: <oct> O
451 <number>: <oct> Q
452
453 <number>: % <bin>
454 <number>: <bin> B
455
456 <bin>: 0 | 1
457 <oct>: <bin> | 2 | 3 | 4 | 5 | 6 | 7
458 <dec>: <oct> | 8 | 9
459 <hex>: <dec> | A | B | C | D | E | F
460
461 NOTE: hex values which start with a non-digit will need to be prefixed
462 by $ or have a 0 as the leading digit; hence: $CC or 0CCH otherwise the
463 assembler cannot tell the difference between CCH as a symbol or CCH as
464 the value $CC
465
466 */
467
468 // will throw an error and return 0 in tval if there's a problem
469 // -1 is problem; cl -> undef set is undefined symbol
470 int eval_term(asmstate_t *as, sourceline_t *cl, char **optr, int *tval)
471 {
472 char tc;
473 int rval;
474 int binval;
475 int octval;
476 int decval;
477 int hexval;
478 int valtype;
479 int digval;
480 int bindone = 0;
481
482 *tval = 0;
483
484 beginagain:
485 tc = **optr;
486 if (tc == '+')
487 {
488 // unary +, ignored for symetry
489 (*optr)++;
490 goto beginagain;
491 }
492
493 if (tc == '(')
494 {
495 (*optr)++;
496 rval = eval_expr(as, cl, optr, tval);
497 if (rval < 0)
498 return rval;
499 if (**optr != ')')
500 {
501 errorp1(ERR_BADEXPR);
502 return -1;
503 }
504 (*optr)++;
505 return 0;
506 }
507
508 if (tc == '-')
509 {
510 (*optr)++;
511 rval = eval_term(as, cl, optr, tval);
512 if (rval < 0)
513 return rval;
514 *tval = -*tval;
515 return 0;
516 }
517
518 // current address (of current instruction, not PC)
519 if (tc == '*')
520 {
521 *tval = cl -> addr;
522 (*optr)++;
523 return 0;
524 }
525
526 if (strchr("abcdefghijklmnopqrstuvwxyz_", tolower(tc)))
527 {
528 // evaluate a symbol
529 char *symbuf;
530
531 symbuf = parse_symbol(as, optr);
532 if (!symbuf)
533 {
534 errorp1(ERR_BADSYM);
535 *tval = 0;
536 return -1;
537 }
538
539 debug(" looking up symbol: %s\n", symbuf);
540 *tval = lookup_symbol(as, symbuf);
541
542 // if not found, flag forward ref
543 if (*tval == -1)
544 {
545 errorp2(ERR_UNDEF);
546 cl -> undef = 1;
547 *tval = 0;
548 return 0;
549 }
550 return 0;
551 }
552
553 if (tc == '%')
554 {
555 // binary number
556 int v1 = 0;
557 (*optr)++;
558 while (strchr("01", **optr))
559 {
560 v1 = v1 << 1 | ((*(*optr)++) - '0');
561 }
562 *tval = v1;
563 return 0;
564 }
565 if (tc == '$')
566 {
567 // hex number
568 int v1 = 0;
569 (*optr)++;
570 debug("HEX CONST: %s\n", *optr);
571 while (**optr && strchr("01234567890ABCDEF", toupper(tc = **optr)))
572 {
573 debug("HEX 2: %02x\n", tc);
574 if (**optr >= 'A')
575 {
576 v1 = v1 << 4 | (toupper((*(*optr)++)) - 'A' + 10);
577 }
578 else
579 {
580 v1 = v1 << 4 | ((*(*optr)++) - '0');
581 }
582 }
583 *tval = v1;
584 return 0;
585 }
586 if (tc == '@')
587 {
588 // octal number
589 int v1 = 0;
590 (*optr)++;
591 while (strchr("01234567", **optr))
592 {
593 v1 = v1 << 3 | ((*(*optr)++) - '0');
594 }
595 *tval = v1;
596 return 0;
597 }
598 if (tc == '&')
599 {
600 // decimal number
601 int v1 = 0;
602 (*optr)++;
603 while (strchr("0123456789", **optr))
604 {
605 v1 = v1 * 10 + ((*(*optr)++) - '0');
606 }
607 *tval = v1;
608 return 0;
609 }
610 if (tc == '\'')
611 {
612 (*optr)++;
613 if (!**optr)
614 {
615 errorp1(ERR_BADEXPR);
616 return -2;
617 }
618 *tval = *(*optr)++;
619 return 0;
620 }
621 if (tc == '"')
622 {
623 (*optr)++;
624 if (!**optr || !*(*optr + 1))
625 {
626 errorp1(ERR_BADEXPR);
627 return -2;
628 }
629 *tval = *(*optr)++ << 8 | *(*optr)++;
630 return 0;
631 }
632 // end of string
633 if (tc == '\0')
634 {
635 // error if at EOS as we are looking for a term
636 errorp1(ERR_BADEXPR);
637 return -1;
638 }
639
640 // we have a generic number here which may be decimal, hex, binary, or octal
641 // based on a suffix
642
643 // possible data types are binary (1), octal (2), decimal(4), hex (8)
644 valtype = 15;
645 hexval = octval = decval = binval = 0;
646 while (1)
647 {
648
649 // printf(" %c\n", **optr);
650 if (!**optr || !strchr("ABCDEFabcdefqhoQHO0123456789", **optr))
651 {
652 // end of string, must be decimal or the end of a bin
653 if (bindone == 1)
654 {
655 *tval = binval;
656 return 0;
657 }
658 if (valtype & 4)
659 {
660 *tval = decval;
661 return 0;
662 }
663 else
664 {
665 errorp1(ERR_BADEXPR);
666 return -1;
667 }
668 }
669 tc = toupper(*(*optr)++);
670
671 if (tc == 'H')
672 {
673 if (valtype & 8)
674 {
675 *tval = hexval;
676 return 0;
677 }
678 else
679 {
680 // syntax error
681 errorp1(ERR_BADEXPR);
682 return -1;
683 }
684 }
685
686 if (tc == 'Q' || tc == 'O')
687 {
688 if (valtype && 2)
689 {
690 *tval = octval;
691 return 0;
692 }
693 else
694 {
695 errorp1(ERR_BADEXPR);
696 return -1;
697 }
698 }
699
700 digval = tc - '0';
701 if (digval > 9)
702 digval -= 7;
703
704 // if it's not in the range of a hex digit, error out
705 if (tc < '0' || (tc > '9' && tc < 'A') || tc > 'F')
706 {
707 (*optr)--;
708 if (valtype & 4)
709 {
710 *tval = decval;
711 return 0;
712 }
713 // if we're in hex/bin mode and run to the end of the number
714 // we must have a binary constant or an error
715 // if the previous character is B, then we have binary
716 // else we have error since hex would require a terminating H
717 // which would be caught above
718 if (valtype == 8 && toupper(*(*optr)) == 'B')
719 {
720 *tval = binval;
721 return 0;
722 }
723 errorp1(ERR_BADEXPR);
724 return -1;
725 }
726
727 // if we have any characters past the end of the B, it's not binary
728 if (bindone == 1)
729 bindone = 0;
730 if (tc == 'B')
731 bindone = 1;
732 if (digval > 1)
733 valtype &= 14;
734 else if (digval > 7)
735 valtype &= 13;
736 else if (digval > 9)
737 valtype &= 11;
738
739 if (valtype & 8)
740 {
741 hexval = (hexval << 4) | digval;
742 }
743 if (valtype & 4)
744 {
745 decval = decval * 10 + digval;
746 }
747 if (valtype & 2)
748 {
749 octval = (octval << 3) | digval;
750 }
751 if (valtype & 1 && !bindone)
752 {
753 binval = (binval << 1) | digval;
754 }
755
756 }
757 // can't get here from there
758 }
759
760 // returns -1 if the expression cannot be parsed
761 // and returns -2 if there is an undefined symbol reference
762 // resulting value will be in *val; undefined symbols are parsed as
763 // value 0 but cl -> undef will be set.
764 int eval_expr(asmstate_t *as, sourceline_t *cl, char **optr, int *val)
765 {
766 int left;
767 int right;
768 char oper;
769 int rval;
770
771 // by default, return 0 in val
772 *val = 0;
773 cl -> undef = 0;
774
775 rval = eval_term(as, cl, optr, &left);
776 if (rval < 0)
777 return rval;
778
779 nextop:
780 oper = **optr;
781
782 // end of expr
783 if (isspace(oper) || oper == ',' || oper == '\0' || oper == ']' || oper == ')')
784 goto retleft;
785
786 // unrecognized chars
787 if (!strchr("+-*/%", oper))
788 goto retleft;
789
790 (*optr)++;
791
792 rval = eval_term(as, cl, optr, &right);
793 // propagate error
794 if (rval < 0)
795 return rval;
796
797 // do the operation and put it in "left"
798 switch (oper)
799 {
800 case '+':
801 left += right;
802 break;
803
804 case '-':
805 left -= right;
806 break;
807
808 case '*':
809 left *= right;
810 break;
811
812 case '/':
813 left /= right;
814 break;
815
816 case '%':
817 left %= right;
818 break;
819 }
820
821 goto nextop;
822
823 retleft:
824 *val = left;
825 return 0;
826 }