comparison lwasm/lwasm.c @ 0:2c24602be78f

Initial import from lwtools 3.0.1 version, with new hand built build system and file reorganization
author lost@l-w.ca
date Wed, 19 Jan 2011 22:27:17 -0700
parents
children 7317fbe024af
comparison
equal deleted inserted replaced
-1:000000000000 0:2c24602be78f
1 /*
2 lwasm.c
3
4 Copyright © 2010 William Astle
5
6 This file is part of LWTOOLS.
7
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation, either version 3 of the License, or (at your option) any later
11 version.
12
13 This program is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 You should have received a copy of the GNU General Public License along with
19 this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #define ___lwasm_c_seen___
23
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <string.h>
27
28 #include <lw_expr.h>
29 #include <lw_alloc.h>
30 #include <lw_string.h>
31
32 #include "lwasm.h"
33
34 void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...);
35
36 int lwasm_expr_exportable(asmstate_t *as, lw_expr_t expr)
37 {
38 return 0;
39 }
40
41 int lwasm_expr_exportval(asmstate_t *as, lw_expr_t expr)
42 {
43 return 0;
44 }
45
46 lw_expr_t lwasm_evaluate_var(char *var, void *priv)
47 {
48 asmstate_t *as = (asmstate_t *)priv;
49 lw_expr_t e;
50 importlist_t *im;
51 struct symtabe *s;
52
53 s = lookup_symbol(as, as -> cl, var);
54 if (s)
55 {
56 e = lw_expr_build(lw_expr_type_special, lwasm_expr_syment, s);
57 return e;
58 }
59
60 // undefined here is undefied unless output is object
61 if (as -> output_format != OUTPUT_OBJ)
62 goto nomatch;
63
64 // check for import
65 for (im = as -> importlist; im; im = im -> next)
66 {
67 if (!strcmp(im -> symbol, var))
68 break;
69 }
70
71 // check for "undefined" to import automatically
72 if (!im && CURPRAGMA(as -> cl, PRAGMA_UNDEFEXTERN))
73 {
74 im = lw_alloc(sizeof(importlist_t));
75 im -> symbol = lw_strdup(var);
76 im -> next = as -> importlist;
77 as -> importlist = im;
78 }
79
80 if (!im)
81 goto nomatch;
82
83 e = lw_expr_build(lw_expr_type_special, lwasm_expr_import, im);
84 return e;
85
86 nomatch:
87 if (as -> badsymerr)
88 {
89 lwasm_register_error(as, as -> cl, "Undefined symbol %s", var);
90 }
91 return NULL;
92 }
93
94 lw_expr_t lwasm_evaluate_special(int t, void *ptr, void *priv)
95 {
96 switch (t)
97 {
98 case lwasm_expr_secbase:
99 {
100 // sectiontab_t *s = priv;
101 asmstate_t *as = priv;
102 if (as -> exportcheck && ptr == as -> csect)
103 return lw_expr_build(lw_expr_type_int, 0);
104 return NULL;
105 }
106
107 case lwasm_expr_linelen:
108 {
109 line_t *cl = ptr;
110 if (cl -> len == -1)
111 return NULL;
112 return lw_expr_build(lw_expr_type_int, cl -> len);
113 }
114 break;
115
116 case lwasm_expr_lineaddr:
117 {
118 line_t *cl = ptr;
119 if (cl -> addr)
120 return lw_expr_copy(cl -> addr);
121 else
122 return NULL;
123 }
124
125 case lwasm_expr_syment:
126 {
127 struct symtabe *sym = ptr;
128 return lw_expr_copy(sym -> value);
129 }
130
131 case lwasm_expr_import:
132 {
133 return NULL;
134 }
135
136 case lwasm_expr_nextbp:
137 {
138 line_t *cl = ptr;
139 for (cl = cl -> next; cl; cl = cl -> next)
140 {
141 if (cl -> isbrpt)
142 break;
143 }
144 if (cl)
145 {
146 return lw_expr_copy(cl -> addr);
147 }
148 return NULL;
149 }
150
151 case lwasm_expr_prevbp:
152 {
153 line_t *cl = ptr;
154 for (cl = cl -> prev; cl; cl = cl -> prev)
155 {
156 if (cl -> isbrpt)
157 break;
158 }
159 if (cl)
160 {
161 return lw_expr_copy(cl -> addr);
162 }
163 return NULL;
164 }
165 }
166 return NULL;
167 }
168
169 void lwasm_register_error(asmstate_t *as, line_t *l, const char *msg, ...)
170 {
171 lwasm_error_t *e;
172 va_list args;
173 char errbuff[1024];
174 int r;
175
176 if (!l)
177 return;
178
179 va_start(args, msg);
180
181 e = lw_alloc(sizeof(lwasm_error_t));
182
183 e -> next = l -> err;
184 l -> err = e;
185
186 as -> errorcount++;
187
188 r = vsnprintf(errbuff, 1024, msg, args);
189 e -> mess = lw_strdup(errbuff);
190
191 va_end(args);
192 }
193
194 void lwasm_register_warning(asmstate_t *as, line_t *l, const char *msg, ...)
195 {
196 lwasm_error_t *e;
197 va_list args;
198 char errbuff[1024];
199 int r;
200
201 if (!l)
202 return;
203
204 va_start(args, msg);
205
206 e = lw_alloc(sizeof(lwasm_error_t));
207
208 e -> next = l -> err;
209 l -> err = e;
210
211 as -> errorcount++;
212
213 r = vsnprintf(errbuff, 1024, msg, args);
214 e -> mess = lw_strdup(errbuff);
215
216 va_end(args);
217 }
218
219 int lwasm_next_context(asmstate_t *as)
220 {
221 int r;
222 r = as -> nextcontext;
223 as -> nextcontext++;
224 return r;
225 }
226
227 void lwasm_emit(line_t *cl, int byte)
228 {
229 if (cl -> outputl < 0)
230 cl -> outputl = 0;
231
232 if (cl -> outputl == cl -> outputbl)
233 {
234 cl -> output = lw_realloc(cl -> output, cl -> outputbl + 8);
235 cl -> outputbl += 8;
236 }
237 cl -> output[cl -> outputl++] = byte & 0xff;
238
239 if (cl -> inmod)
240 {
241 asmstate_t *as = cl -> as;
242 // update module CRC
243 // this is a direct transliteration from the nitros9 asm source
244 // to C; it can, no doubt, be optimized for 32 bit processing
245 byte &= 0xff;
246
247 byte ^= (as -> crc)[0];
248 (as -> crc)[0] = (as -> crc)[1];
249 (as -> crc)[1] = (as -> crc)[2];
250 (as -> crc)[1] ^= (byte >> 7);
251 (as -> crc)[2] = (byte << 1);
252 (as -> crc)[1] ^= (byte >> 2);
253 (as -> crc)[2] ^= (byte << 6);
254 byte ^= (byte << 1);
255 byte ^= (byte << 2);
256 byte ^= (byte << 4);
257 if (byte & 0x80)
258 {
259 (as -> crc)[0] ^= 0x80;
260 (as -> crc)[2] ^= 0x21;
261 }
262 }
263 }
264
265 void lwasm_emitop(line_t *cl, int opc)
266 {
267 if (opc > 0x100)
268 lwasm_emit(cl, opc >> 8);
269 lwasm_emit(cl, opc);
270 }
271
272 lw_expr_t lwasm_parse_term(char **p, void *priv)
273 {
274 asmstate_t *as = priv;
275 int val;
276
277 if (!**p)
278 return NULL;
279
280 if (**p == '*' || (
281 **p == '.'
282 && !((*p)[1] >= 'A' && (*p)[1] <= 'Z')
283 && !((*p)[1] >= 'a' && (*p)[1] <= 'z')
284 && !((*p)[1] >= '0' && (*p)[1] <= '9')
285 ))
286 {
287 // special "symbol" for current line addr (*, .)
288 (*p)++;
289 return lw_expr_build(lw_expr_type_special, lwasm_expr_lineaddr, as -> cl);
290 }
291
292 // branch points
293 if (**p == '<')
294 {
295 (*p)++;
296 return lw_expr_build(lw_expr_type_special, lwasm_expr_prevbp, as -> cl);
297 }
298 if (**p == '>')
299 {
300 (*p)++;
301 return lw_expr_build(lw_expr_type_special, lwasm_expr_nextbp, as -> cl);
302 }
303
304 // double ascii constant
305 if (**p == '"')
306 {
307 int v;
308 (*p)++;
309 if (!**p)
310 return NULL;
311 if (!*((*p)+1))
312 return NULL;
313 v = (unsigned char)**p << 8 | (unsigned char)*((*p)+1);
314 (*p) += 2;
315 return lw_expr_build(lw_expr_type_int, v);
316 }
317
318 if (**p == '\'')
319 {
320 int v;
321
322 (*p)++;
323 if (!**p)
324 return NULL;
325
326 v = (unsigned char)**p;
327 (*p)++;
328 return lw_expr_build(lw_expr_type_int, v);
329 }
330
331 if (**p == '&')
332 {
333 // decimal constant
334 int v = 0;
335 (*p)++;
336
337 if (!strchr("0123456789", **p))
338 return NULL;
339
340 while (**p && strchr("0123456789", **p))
341 {
342 val = val * 10 + (**p - '0');
343 (*p)++;
344 }
345 return lw_expr_build(lw_expr_type_int, v);
346 }
347
348 if (**p == '%')
349 {
350 // binary constant
351 int v = 0;
352 (*p)++;
353
354 if (**p != '0' && **p != '1')
355 return NULL;
356
357 while (**p && (**p == '0' || **p == '1'))
358 {
359 val = val * 2 + (**p - '0');
360 (*p)++;
361 }
362 return lw_expr_build(lw_expr_type_int, v);
363 }
364
365 if (**p == '$')
366 {
367 // hexadecimal constant
368 int v = 0, v2;
369 (*p)++;
370 if (!strchr("0123456789abcdefABCDEF", **p))
371 return NULL;
372
373 while (**p && strchr("0123456789abcdefABCDEF", **p))
374 {
375 v2 = toupper(**p) - '0';
376 if (v2 > 9)
377 v2 -= 7;
378 v = v * 16 + v2;
379 (*p)++;
380 }
381 return lw_expr_build(lw_expr_type_int, v);
382 }
383
384 if (**p == '0' && (*((*p)+1) == 'x' || *((*p)+1) == 'X'))
385 {
386 // hexadecimal constant, C style
387 int v = 0, v2;
388 (*p)+=2;
389
390 if (!strchr("0123456789abcdefABCDEF", **p))
391 return NULL;
392
393 while (**p && strchr("0123456789abcdefABCDEF", **p))
394 {
395 v2 = toupper(**p) - '0';
396 if (v2 > 9)
397 v2 -= 7;
398 v = v * 16 + v2;
399 (*p)++;
400 }
401 return lw_expr_build(lw_expr_type_int, v);
402 }
403
404 if (**p == '@' && (*((*p)+1) >= '0' && *((*p)+1) <= '7'))
405 {
406 // octal constant
407 int v = 0;
408 (*p)++;
409
410 if (!strchr("01234567", **p))
411 return NULL;
412
413 while (**p && strchr("01234567", **p))
414 {
415 v = v * 8 + (**p - '0');
416 (*p)++;
417 }
418 return lw_expr_build(lw_expr_type_int, v);
419 }
420
421
422 // symbol or bare decimal or suffix constant here
423 do
424 {
425 int havedol = 0;
426 int l = 0;
427
428 while ((*p)[l] && strchr(SYMCHARS, (*p)[l]))
429 {
430 if ((*p)[l] == '$')
431 havedol = 1;
432 l++;
433 }
434 if (l == 0)
435 return NULL;
436
437 if ((*p)[l] == '{')
438 {
439 while ((*p)[l] && (*p)[l] != '}')
440 l++;
441 l++;
442 }
443
444 if (havedol || **p < '0' || **p > '9')
445 {
446 // have a symbol here
447 char *sym;
448 lw_expr_t term;
449
450 sym = lw_strndup(*p, l);
451 (*p) += l;
452 term = lw_expr_build(lw_expr_type_var, sym);
453 lw_free(sym);
454 return term;
455 }
456 } while (0);
457
458 if (!**p)
459 return NULL;
460
461 // we have a numeric constant here, either decimal or postfix base notation
462 {
463 int decval = 0, binval = 0, hexval = 0, octval = 0;
464 int valtype = 15; // 1 = bin, 2 = oct, 4 = dec, 8 = hex
465 int bindone = 0;
466 int val;
467 int dval;
468
469 while (1)
470 {
471 if (!**p || !strchr("0123456789ABCDEFabcdefqhoQHO", **p))
472 {
473 // we can legally be bin or decimal here
474 if (bindone)
475 {
476 // just finished a binary value
477 val = binval;
478 break;
479 }
480 else if (valtype & 4)
481 {
482 val = decval;
483 break;
484 }
485 else
486 {
487 // bad value
488 return NULL;
489 }
490 }
491
492 dval = toupper(**p);
493 (*p)++;
494
495 if (bindone)
496 {
497 // any characters past "B" means it is not binary
498 bindone = 0;
499 valtype &= 14;
500 }
501
502 switch (dval)
503 {
504 case 'Q':
505 case 'O':
506 if (valtype & 2)
507 {
508 val = octval;
509 valtype = -1;
510 break;
511 }
512 else
513 {
514 return NULL;
515 }
516 /* can't get here */
517
518 case 'H':
519 if (valtype & 8)
520 {
521 val = hexval;
522 valtype = -1;
523 break;
524 }
525 else
526 {
527 return NULL;
528 }
529 /* can't get here */
530
531 case 'B':
532 // this is a bit of a sticky one since B may be a
533 // hex number instead of the end of a binary number
534 // so it falls through to the digit case
535 if (valtype & 1)
536 {
537 // could still be binary of hex
538 bindone = 1;
539 valtype = 9;
540 }
541 /* fall through intented */
542
543 default:
544 // digit
545 dval -= '0';
546 if (dval > 9)
547 dval -= 7;
548 if (valtype & 8)
549 hexval = hexval * 16 + dval;
550 if (valtype & 4)
551 {
552 if (dval > 9)
553 valtype &= 11;
554 else
555 decval = decval * 10 + dval;
556 }
557 if (valtype & 2)
558 {
559 if (dval > 7)
560 valtype &= 13;
561 else
562 octval = octval * 8 + dval;
563 }
564 if (valtype & 1)
565 {
566 if (dval > 1)
567 valtype &= 14;
568 else
569 binval = binval * 2 + dval;
570 }
571 }
572 if (valtype == -1)
573 break;
574
575 // return if no more valid types
576 if (valtype == 0)
577 return NULL;
578
579 val = decval; // in case we fall through
580 }
581
582 // get here if we have a value
583 return lw_expr_build(lw_expr_type_int, val);
584 }
585 // can't get here
586 }
587
588 lw_expr_t lwasm_parse_expr(asmstate_t *as, char **p)
589 {
590 lw_expr_t e;
591
592 e = lw_expr_parse(p, as);
593
594 return e;
595 }
596
597 int lwasm_reduce_expr(asmstate_t *as, lw_expr_t expr)
598 {
599 lw_expr_simplify(expr, as);
600 }
601
602 void lwasm_save_expr(line_t *cl, int id, lw_expr_t expr)
603 {
604 struct line_expr_s *e;
605
606 for (e = cl -> exprs; e; e = e -> next)
607 {
608 if (e -> id == id)
609 {
610 lw_expr_destroy(e -> expr);
611 e -> expr = expr;
612 return;
613 }
614 }
615
616 e = lw_alloc(sizeof(struct line_expr_s));
617 e -> expr = expr;
618 e -> id = id;
619 e -> next = cl -> exprs;
620 cl -> exprs = e;
621 }
622
623 lw_expr_t lwasm_fetch_expr(line_t *cl, int id)
624 {
625 struct line_expr_s *e;
626
627 for (e = cl -> exprs; e; e = e -> next)
628 {
629 if (e -> id == id)
630 {
631 return e -> expr;
632 }
633 }
634 return NULL;
635 }
636
637 void skip_operand(char **p)
638 {
639 for (; **p && !isspace(**p); (*p)++)
640 /* do nothing */ ;
641 }
642
643 int lwasm_emitexpr(line_t *l, lw_expr_t expr, int size)
644 {
645 int v = 0;
646 int ol;
647
648 ol = l -> outputl;
649 if (ol == -1)
650 ol = 0;
651
652 if (lw_expr_istype(expr, lw_expr_type_int))
653 {
654 v = lw_expr_intval(expr);
655 }
656 // handle external/cross-section/incomplete references here
657 else
658 {
659 if (l -> as -> output_format == OUTPUT_OBJ)
660 {
661 reloctab_t *re;
662 lw_expr_t te;
663
664 if (size == 4)
665 {
666 // create a two part reference because lwlink doesn't
667 // support 32 bit references
668 lw_expr_t te2;
669 te = lw_expr_build(lw_expr_type_int, 0x10000);
670 te2 = lw_expr_build(lw_expr_type_oper, lw_expr_oper_divide, expr, te);
671 lw_expr_destroy(te);
672
673 re = lw_alloc(sizeof(reloctab_t));
674 re -> next = l -> csect -> reloctab;
675 l -> csect -> reloctab = re;
676 te = lw_expr_build(lw_expr_type_int, ol);
677 re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te);
678 lw_expr_destroy(te);
679 lwasm_reduce_expr(l -> as, re -> offset);
680 re -> expr = te2;
681 re -> size = 2;
682
683 te = lw_expr_build(lw_expr_type_int, 0xFFFF);
684 te2 = lw_expr_build(lw_expr_type_oper, lw_expr_oper_bwand, expr, te);
685 lw_expr_destroy(te);
686
687 re = lw_alloc(sizeof(reloctab_t));
688 re -> next = l -> csect -> reloctab;
689 l -> csect -> reloctab = re;
690 te = lw_expr_build(lw_expr_type_int, ol + 2);
691 re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te);
692 lw_expr_destroy(te);
693 lwasm_reduce_expr(l -> as, re -> offset);
694 re -> expr = te2;
695 re -> size = 2;
696 }
697 else
698 {
699 // add "expression" record to section table
700 re = lw_alloc(sizeof(reloctab_t));
701 re -> next = l -> csect -> reloctab;
702 l -> csect -> reloctab = re;
703 te = lw_expr_build(lw_expr_type_int, ol);
704 re -> offset = lw_expr_build(lw_expr_type_oper, lw_expr_oper_plus, l -> addr, te);
705 lw_expr_destroy(te);
706 lwasm_reduce_expr(l -> as, re -> offset);
707 re -> size = size;
708 re -> expr = lw_expr_copy(expr);
709 }
710 for (v = 0; v < size; v++)
711 lwasm_emit(l, 0);
712 return 0;
713 }
714 lwasm_register_error(l -> as, l, "Expression not fully resolved");
715 return -1;
716 }
717
718 switch (size)
719 {
720 case 4:
721 lwasm_emit(l, v >> 24);
722 lwasm_emit(l, v >> 16);
723 /* fallthrough intended */
724
725 case 2:
726 lwasm_emit(l, v >> 8);
727 /* fallthrough intended */
728
729 case 1:
730 lwasm_emit(l, v);
731 }
732
733 return 0;
734 }
735
736 int lwasm_lookupreg2(const char *regs, char **p)
737 {
738 int rval = 0;
739
740 while (*regs)
741 {
742 if (toupper(**p) == *regs)
743 {
744 if (regs[1] == ' ' && !isalpha(*(*p + 1)))
745 break;
746 if (toupper(*(*p + 1)) == regs[1])
747 break;
748 }
749 regs += 2;
750 rval++;
751 }
752 if (!*regs)
753 return -1;
754 if (regs[1] == ' ')
755 (*p)++;
756 else
757 (*p) += 2;
758 return rval;
759 }
760
761 int lwasm_lookupreg3(const char *regs, char **p)
762 {
763 int rval = 0;
764
765 while (*regs)
766 {
767 if (toupper(**p) == *regs)
768 {
769 if (regs[1] == ' ' && !isalpha(*(*p + 1)))
770 break;
771 if (toupper(*(*p + 1)) == regs[1])
772 {
773 if (regs[2] == ' ' && !isalpha(*(*p + 2)))
774 break;
775 if (toupper(*(*p + 2)) == regs[2])
776 break;
777 }
778 }
779 regs += 3;
780 rval++;
781 }
782 if (!*regs)
783 return -1;
784 if (regs[1] == ' ')
785 (*p)++;
786 else if (regs[2] == ' ')
787 (*p) += 2;
788 else
789 (*p) += 3;
790 return rval;
791 }
792
793 void lwasm_show_errors(asmstate_t *as)
794 {
795 line_t *cl;
796 lwasm_error_t *e;
797
798 for (cl = as -> line_head; cl; cl = cl -> next)
799 {
800 if (!(cl -> err) && !(cl -> warn))
801 continue;
802 for (e = cl -> err; e; e = e -> next)
803 {
804 fprintf(stderr, "ERROR: %s\n", e -> mess);
805 }
806 for (e = cl -> warn; e; e = e -> next)
807 {
808 fprintf(stderr, "WARNING: %s\n", e -> mess);
809 }
810 fprintf(stderr, "%s:%05d %s\n\n", cl -> linespec, cl -> lineno, cl -> ltext);
811 }
812 }
813
814 /*
815 this does any passes and other gymnastics that might be useful
816 to see if an expression reduces early
817 */
818 extern void do_pass3(asmstate_t *as);
819 extern void do_pass4_aux(asmstate_t *as, int force);
820
821 void lwasm_interim_reduce(asmstate_t *as)
822 {
823 do_pass3(as);
824 // do_pass4_aux(as, 0);
825 }
826
827 lw_expr_t lwasm_parse_cond(asmstate_t *as, char **p)
828 {
829 lw_expr_t e;
830
831 debug_message(as, 250, "Parsing condition");
832 e = lwasm_parse_expr(as, p);
833 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
834
835 if (!e)
836 {
837 lwasm_register_error(as, as -> cl, "Bad expression");
838 return NULL;
839 }
840
841 /* we need to simplify the expression here */
842 debug_message(as, 250, "Doing interim reductions");
843 lwasm_interim_reduce(as);
844 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
845 debug_message(as, 250, "Reducing expression");
846 lwasm_reduce_expr(as, e);
847 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
848 /* lwasm_reduce_expr(as, e);
849 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
850 lwasm_reduce_expr(as, e);
851 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
852 lwasm_reduce_expr(as, e);
853 debug_message(as, 250, "COND EXPR: %s", lw_expr_print(e));
854 */
855
856 lwasm_save_expr(as -> cl, 4242, e);
857
858 if (!lw_expr_istype(e, lw_expr_type_int))
859 {
860 debug_message(as, 250, "Non-constant expression");
861 lwasm_register_error(as, as -> cl, "Conditions must be constant on pass 1");
862 return NULL;
863 }
864 debug_message(as, 250, "Returning expression");
865 return e;
866 }