comparison src/expr.c @ 18:218aabbc3b1a

First pass at expression evaluator complete
author lost
date Thu, 01 Jan 2009 23:55:18 +0000
parents df0c4a46af8f
children ec0bf61a5502
comparison
equal deleted inserted replaced
17:df0c4a46af8f 18:218aabbc3b1a
17 You should have received a copy of the GNU General Public License along with 17 You should have received a copy of the GNU General Public License along with
18 this program. If not, see <http://www.gnu.org/licenses/>. 18 this program. If not, see <http://www.gnu.org/licenses/>.
19 */ 19 */
20 20
21 /* 21 /*
22 This file contains the actual expression evaluator which uses the LWVAL 22 This file contains the actual expression evaluator
23 mechanism to store results.
24 */ 23 */
25 24
26 #define __expr_c_seen__ 25 #define __expr_c_seen__
27 26
27 #include <ctype.h>
28 #include <stdio.h> 28 #include <stdio.h>
29 #include <stdlib.h> 29 #include <stdlib.h>
30 #include <string.h>
30 31
31 #include "expr.h" 32 #include "expr.h"
32 #include "util.h" 33 #include "util.h"
33 34
34 lwasm_expr_stack_t *lwasm_expr_stack_create(void) 35 lwasm_expr_stack_t *lwasm_expr_stack_create(void)
160 161
161 lwasm_free(n); 162 lwasm_free(n);
162 163
163 return t; 164 return t;
164 } 165 }
166
167 // the following two functions are co-routines which actually parse
168 // an infix expression onto the expression stack, each returns -1
169 // if an error is encountered
170
171 /*
172 parse a term and push it onto the stack
173
174 this function handles unary prefix operators (-, +, .not., .com.)
175 as well as ()
176 */
177 int lwasm_expr_parse_term(lwasm_expr_stack_t *s, const char **p)
178 {
179 lwasm_expr_term_t *t;
180
181 eval_next:
182 if (**p == '(')
183 {
184 (*p)++;
185 lwasm_expr_parse_expr(s, p, 0);
186 if (**p != ')')
187 return -1;
188 (*p)++;
189 return 0;
190 }
191
192 if (**p == '+')
193 {
194 (*p)++;
195 goto eval_next;
196 }
197
198 if (**p == '-')
199 {
200 // parse expression following "-"
201 (*p)++;
202 if (lwasm_expr_parse_expr(s, p, 200) < 0)
203 return -1;
204 t = lwasm_expr_term_create_oper(LWASM_OPER_NEG);
205 lwasm_expr_stack_push(s, t);
206 lwasm_expr_term_free(t);
207 return 0;
208 }
209
210 /*
211 we have an actual term here so evaluate it
212
213 it could be one of the following:
214
215 1. a decimal constant
216 2. a hexadecimal constant
217 3. an octal constant
218 4. a binary constant
219 5. a symbol reference
220 6. the "current" instruction address (*)
221 7. the "current" data address (.)
222 8. a "back reference" (<)
223 9. a "forward reference" (>)
224
225 items 6 through 9 are stored as symbol references
226
227 (a . followed by a . or a alpha char or number is a symbol)
228 */
229 if (**p == '*'
230 || (
231 **p == '.'
232 && (*p)[1] != '.'
233 && !((*p)[1] >= 'A' && (*p)[1] <= 'Z')
234 && !((*p)[1] >= 'a' && (*p)[1] <= 'z')
235 && !((*p)[1] >= '0' && (*p)[1] <= '9')
236 )
237 || **p == '<'
238 || **p == '>')
239 {
240 char tstr[2];
241 tstr[0] = **p;
242 tstr[1] = '\0';
243 t = lwasm_expr_term_create_sym(tstr);
244 lwasm_expr_stack_push(s, t);
245 lwasm_expr_term_free(t);
246 (*p)++;
247 return 0;
248 }
249
250 /*
251 - a symbol will be a string of characters introduced by a letter, ".",
252 "_" but NOT a number
253 - a decimal constant will consist of only digits, optionally prefixed
254 with "&"
255 - a binary constant will consist of only 0s and 1s either prefixed with %
256 or suffixed with "B"
257 - a hex constant will consist of 0-9A-F either prefixed with $ or
258 suffixed with "H"; a hex number starting with A-F must be prefixed
259 with $ or start with 0 and end with H
260 - an octal constant will consist of 0-7 either prefixed with @ or
261 suffixed with "O" or "Q"
262 - an ascii constant will be a single character prefixed with a '
263 - a double ascii constant will be two characters prefixed with a "
264
265 */
266 if (**p == '"')
267 {
268 // double ascii constant
269 int val;
270 (*p)++;
271 if (!**p)
272 return -1;
273 if (!*((*p)+1))
274 return -1;
275 val = **p << 8 | *((*p) + 1);
276 (*p) += 2;
277 t = lwasm_expr_term_create_int(val);
278 lwasm_expr_stack_push(s, t);
279 lwasm_expr_term_free(t);
280 return 0;
281 }
282 else if (**p == '\'')
283 {
284 // single ascii constant
285 int val;
286 (*p)++;
287 if (!**p)
288 return -1;
289 val = **p;
290 (*p)++;
291 t = lwasm_expr_term_create_int(val);
292 lwasm_expr_stack_push(s, t);
293 lwasm_expr_term_free(t);
294 }
295 else if (**p == '&')
296 {
297 // decimal constant
298 int val = 0;
299
300 (*p)++;
301 while (strchr("0123456789", **p))
302 {
303 val = val * 10 + (**p - '0');
304 (*p)++;
305 }
306 t = lwasm_expr_term_create_int(val);
307 lwasm_expr_stack_push(s, t);
308 lwasm_expr_term_free(t);
309 return 0;
310 }
311 else if (**p == '%')
312 {
313 // binary constant
314 int val = 0;
315
316 (*p)++;
317 while (**p == '0' || **p == '1')
318 {
319 val = val * 2 + (**p - '0');
320 (*p)++;
321 }
322 t = lwasm_expr_term_create_int(val);
323 lwasm_expr_stack_push(s, t);
324 lwasm_expr_term_free(t);
325 return 0;
326 }
327 else if (**p == '$')
328 {
329 // hexadecimal constant
330 int val = 0, val2;
331
332 (*p)++;
333 while (strchr("0123456789ABCDEFabcdef", **p))
334 {
335 val2 = toupper(**p) - '0';
336 if (val2 > 9)
337 val2 -= 7;
338 val = val * 16 + val2;
339 (*p)++;
340 }
341 t = lwasm_expr_term_create_int(val);
342 lwasm_expr_stack_push(s, t);
343 lwasm_expr_term_free(t);
344 return 0;
345 }
346 else if (**p == '@')
347 {
348 // octal constant
349 int val = 0;
350
351 (*p)++;
352 while (strchr("01234567", **p))
353 {
354 val = val * 8 + (**p - '0');
355 (*p)++;
356 }
357 t = lwasm_expr_term_create_int(val);
358 lwasm_expr_stack_push(s, t);
359 lwasm_expr_term_free(t);
360 return 0;
361 }
362
363 // symbol or bare decimal or suffix identified constant here
364 // all numbers will start with a digit at this point
365 if (**p < '0' || **p > '9')
366 {
367 int l = 0;
368 char *sb;
369
370 // evaluate a symbol here
371 static const char *symchars = "_.$@\\abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
372 while (strchr(symchars, (*p)[l]))
373 l++;
374
375 if (l == 0)
376 return -1;
377
378 sb = lwasm_alloc(l + 1);
379 sb[l] = '\0';
380 memcpy(sb, *p, l);
381 t = lwasm_expr_term_create_sym(sb);
382 lwasm_expr_stack_push(s, t);
383 lwasm_expr_term_free(t);
384 lwasm_free(sb);
385 (*p) += l;
386 return 0;
387 }
388
389 if (!**p)
390 return -1;
391
392 // evaluate a suffix based constant
393 {
394 int decval = 0, binval = 0, hexval = 0, octval = 0;
395 int valtype = 15; // 1 = bin, 2 = oct, 4 = dec, 8 = hex
396 int bindone = 0;
397 int val;
398 int dval;
399
400 while (1)
401 {
402 if (!**p || !strchr("0123456789ABCDEFabcdefqhoQHO", **p))
403 {
404 // we can legally have bin or decimal here
405 if (bindone)
406 {
407 // we just finished a binary value
408 val = binval;
409 break;
410 }
411 else if (valtype & 4)
412 {
413 // otherwise we must be decimal (if we're still allowed one)
414 val = decval;
415 break;
416 }
417 else
418 {
419 // bad value
420 return -1;
421 }
422 }
423
424 dval = toupper(**p);
425 (*p)++;
426
427 if (bindone)
428 {
429 // any characters past "B" means it is not binary
430 bindone = 0;
431 valtype &= 14;
432 }
433
434 switch (dval)
435 {
436 case 'Q':
437 case 'O':
438 if (valtype & 2)
439 {
440 val = octval;
441 valtype = -1;
442 break;
443 }
444 else
445 {
446 // not a valid octal value
447 return -1;
448 }
449 /* can't get here */
450
451 case 'H':
452 if (valtype & 8)
453 {
454 val = hexval;
455 valtype = -1;
456 break;
457 }
458 else
459 {
460 // not a valid hex number
461 return -1;
462 }
463 /* can't get here */
464
465 case 'B':
466 // this is a bit of a sticky one since B is a legit hex
467 // digit so this may or may not be the end of the number
468 // so we fall through to the digit case
469
470 if (valtype & 1)
471 {
472 // could still be binary
473 bindone = 1;
474 valtype = 9; // hex and binary
475 }
476 /* fall through intentional */
477
478 default:
479 // digit
480 dval -= '0';
481 if (dval > 9)
482 dval -= 7;
483
484 if (valtype & 8)
485 {
486 hexval = hexval * 16 + dval;
487 }
488 if (valtype & 4)
489 {
490 if (dval > 9)
491 valtype &= 11;
492 else
493 decval = decval * 10 + dval;
494 }
495 if (valtype & 2)
496 {
497 if (dval > 7)
498 valtype &= 13;
499 else
500 octval = octval * 8 + dval;
501 }
502 if (valtype & 1)
503 {
504 if (dval > 1)
505 valtype &= 14;
506 else
507 binval = binval * 2 + dval;
508 }
509 }
510 // break out if we have a return value
511 if (valtype = -1)
512 break;
513 // return if no more valid possibilities!
514 if (valtype == 0)
515 return -1;
516 }
517
518 // we get here when we have a value to return
519 t = lwasm_expr_term_create_int(val);
520 lwasm_expr_stack_push(s, t);
521 lwasm_expr_term_free(t);
522 return 0;
523 }
524 /* can't get here */
525 }
526
527 // parse an expression and push the result onto the stack
528 // if an operator of lower precedence than the value of "prec" is found,
529 int lwasm_expr_parse_expr(lwasm_expr_stack_t *s, const char **p, int prec)
530 {
531 static const struct operinfo
532 {
533 int opernum;
534 char *operstr;
535 int operprec;
536 } operators[] =
537 {
538 { LWASM_OPER_PLUS, "+", 100 },
539 { LWASM_OPER_MINUS, "-", 100 },
540 { LWASM_OPER_TIMES, "*", 150 },
541 { LWASM_OPER_DIVIDE, "/", 150 },
542 { LWASM_OPER_MOD, "%", 150 },
543 { LWASM_OPER_INTDIV, "\\", 150 },
544
545 { LWASM_OPER_NONE, "", 0 }
546 };
547 int opern, i;
548 lwasm_expr_term_t *operterm;
549
550 // return if we are at the end of the expression or a subexpression
551 if (!**p || isspace(**p) || **p == ')')
552 return 0;
553
554 eval_next:
555 if (lwasm_expr_parse_term(s, p) < 0)
556 return -1;
557
558 // expecting an operator here
559 for (opern = 0; operators[opern].opernum != LWASM_OPER_NONE; opern++)
560 {
561 for (i = 0; (*p)[i] && operators[opern].operstr[i] && (*p[i] == operators[opern].operstr[i]); i++)
562 /* do nothing */ ;
563 if (operators[opern].operstr[i] == '\0')
564 break;
565 }
566 if (operators[opern].opernum == LWASM_OPER_NONE)
567 {
568 // unrecognized operator
569 return -1;
570 }
571
572 // the operator number in question is in opern; i is the length of the
573 // operator string
574
575 // logic:
576 // if the precedence of this operation is <= to the "prec" flag,
577 // we simply return without advancing the input pointer; the operator
578 // will be evaluated again in the enclosing function call
579 if (operators[opern].operprec <= prec)
580 return 0;
581
582 // logic:
583 // we have a higher precedence operator here so we will advance the
584 // input pointer to the next term and let the expression evaluator
585 // loose on it after which time we will push our operator onto the
586 // stack and then go on with the expression evaluation
587 (*p) += i; // advance input pointer
588
589 // evaluate next expression(s) of higher precedence
590 if (lwasm_expr_parse_expr(s, p, operators[opern].operprec) < 0)
591 return -1;
592
593 operterm = lwasm_expr_term_create_oper(operators[opern].opernum);
594 lwasm_expr_stack_push(s, operterm);
595 lwasm_expr_term_free(operterm);
596
597 // return if we are at the end of the expression or a subexpression
598 if (!**p || isspace(**p) || **p == ')')
599 return 0;
600
601 // continue evaluating
602 goto eval_next;
603 }
604
605 /*
606 actually evaluate an expression
607
608 This happens in two stages. The first stage merely parses the expression into
609 a lwasm_expr_stack_t * which is then evaluated as much as possible before the
610 result is returned.
611
612 Returns NULL on a parse error or otherwise invalid expression. *outp will
613 contain the pointer to the next character after the expression if and only
614 if there is no error. In the case of an error, *outp is undefined.
615 */
616 lwasm_expr_stack_t *lwasm_expr_eval(const char *inp, const char **outp)
617 {
618 lwasm_expr_stack_t *s;
619 const char *p;
620
621 // actually parse the expression
622 p = inp;
623 s = lwasm_expr_stack_create();
624 if (lwasm_expr_parse_expr(s, &p, 0) < 0)
625 goto cleanup_error;
626
627 // save end of expression
628 if (outp)
629 (*outp) = p;
630
631 // return potentially partial expression
632 if (lwasm_expr_reval(s) < 0)
633 goto cleanup_error;
634
635 return s;
636
637 cleanup_error:
638 lwasm_expr_stack_free(s);
639 return NULL;
640 }
641
642 /*
643 take an expression stack s and scan for operations that can be completed
644
645 return -1 on error, 0 on no error
646
647 possible errors are: division by zero or unknown operator
648
649 theory of operation:
650
651 scan the stack for an operator which has two constants preceding it (binary)
652 or 1 constant preceding it (unary) and if found, perform the calculation
653 and replace the operator and its operands with the result
654
655 repeat the scan until no futher simplications are found or if there are no
656 further operators or only a single term remains
657
658 */
659 int lwasm_expr_reval(lwasm_expr_stack_t *s)
660 {
661 lwasm_expr_stack_node_t *n;
662
663 next_iter:
664 // a single term
665 if (s -> head == s -> tail)
666 return 0;
667
668 // search for an operator
669 for (n = s -> head; n; n = n -> next)
670 {
671 if (n -> term -> term_type == LWASM_TERM_OPER)
672 {
673 if (n -> term -> value == LWASM_OPER_NEG
674 || n -> term -> value == LWASM_OPER_COM
675 )
676 {
677 // unary operator
678 if (n -> prev && n -> prev -> term -> term_type == LWASM_TERM_INT)
679 {
680 // a unary operator we can resolve
681 // we do the op then remove the term "n" is pointing at
682 if (n -> term -> value == LWASM_OPER_NEG)
683 {
684 n -> prev -> term -> value = -(n -> prev -> term -> value);
685 }
686 else if (n -> term -> value == LWASM_OPER_COM)
687 {
688 n -> prev -> term -> value = ~(n -> prev -> term -> value);
689 }
690 n -> prev -> next = n -> next;
691 if (n -> next)
692 n -> next -> prev = n -> prev;
693 else
694 s -> tail = n -> prev;
695
696 lwasm_expr_term_free(n -> term);
697 lwasm_free(n);
698 break;
699 }
700 }
701 else
702 {
703 // binary operator
704 if (n -> prev && n -> prev -> prev && n -> prev -> term -> term_type == LWASM_TERM_INT && n -> prev -> prev -> term -> term_type == LWASM_TERM_INT)
705 {
706 // a binary operator we can resolve
707 switch (n -> term -> value)
708 {
709 case LWASM_OPER_PLUS:
710 n -> prev -> prev -> term -> value += n -> prev -> term -> value;
711 break;
712
713 case LWASM_OPER_MINUS:
714 n -> prev -> prev -> term -> value -= n -> prev -> term -> value;
715 break;
716
717 case LWASM_OPER_TIMES:
718 n -> prev -> prev -> term -> value *= n -> prev -> term -> value;
719 break;
720
721 case LWASM_OPER_DIVIDE:
722 if (n -> prev -> term -> value == 0)
723 return -1;
724 n -> prev -> prev -> term -> value /= n -> prev -> term -> value;
725 break;
726
727 case LWASM_OPER_MOD:
728 if (n -> prev -> term -> value == 0)
729 return -1;
730 n -> prev -> prev -> term -> value %= n -> prev -> term -> value;
731 break;
732
733 case LWASM_OPER_INTDIV:
734 if (n -> prev -> term -> value == 0)
735 return -1;
736 n -> prev -> prev -> term -> value /= n -> prev -> term -> value;
737 break;
738
739 case LWASM_OPER_BWAND:
740 n -> prev -> prev -> term -> value &= n -> prev -> term -> value;
741 break;
742
743 case LWASM_OPER_BWOR:
744 n -> prev -> prev -> term -> value |= n -> prev -> term -> value;
745 break;
746
747 case LWASM_OPER_BWXOR:
748 n -> prev -> prev -> term -> value ^= n -> prev -> term -> value;
749 break;
750
751 case LWASM_OPER_AND:
752 n -> prev -> prev -> term -> value = (n -> prev -> term -> value && n -> prev -> prev -> term -> value) ? 1 : 0;
753 break;
754
755 case LWASM_OPER_OR:
756 n -> prev -> prev -> term -> value = (n -> prev -> term -> value || n -> prev -> prev -> term -> value) ? 1 : 0;
757 break;
758
759 default:
760 // return error if unknown operator!
761 return -1;
762 }
763
764 // now remove the two unneeded entries from the stack
765 n -> prev -> prev -> next = n -> next;
766 if (n -> next)
767 n -> next -> prev = n -> prev -> prev;
768 else
769 s -> tail = n -> prev -> prev;
770
771 lwasm_expr_term_free(n -> term);
772 lwasm_expr_term_free(n -> prev -> term);
773 lwasm_free(n -> prev);
774 lwasm_free(n);
775 break;
776 }
777 }
778 }
779 }
780 // note for the terminally confused about dynamic memory and pointers:
781 // n will not be NULL even after the lwasm_free calls above so
782 // this test will still work (n will be a dangling pointer)
783 // (n will only be NULL if we didn't find any operators to simplify)
784 if (n)
785 goto next_iter;
786
787 return 0;
788 }