Mercurial > hg > index.cgi
diff lwcc/preproc.c @ 296:83fcc1ed6ad6 ccdev
Checkpoint lwcc development
Initial untested version of the preprocessor with macro expansion but
without file inclusion.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sat, 14 Sep 2013 20:04:38 -0600 |
parents | |
children | 310df72c641d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/preproc.c Sat Sep 14 20:04:38 2013 -0600 @@ -0,0 +1,983 @@ +/* +lwcc/preproc.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#include "cpp.h" +#include "strbuf.h" +#include "symbol.h" +#include "token.h" + +static int expand_macro(struct preproc_info *, char *); +static void process_directive(struct preproc_info *); +static long eval_expr(struct preproc_info *); + +struct token *preproc_next_processed_token(struct preproc_info *pp) +{ + struct token *ct; + +again: + ct = preproc_next_token(pp); + if (ct -> ttype == TOK_EOF) + return ct; + if (ct -> ttype == TOK_EOL) + pp -> ppeolseen = 1; + + if (ct -> ttype == TOK_HASH && pp -> eolseen == 1) + { + // preprocessor directive + process_directive(pp); + } + // if we're in a false section, don't return the token; keep scanning + if (pp -> skip_level) + goto again; + + if (ct -> ttype != TOK_WSPACE) + pp -> ppeolseen = 0; + + if (ct -> ttype == TOK_IDENT) + { + // possible macro expansion + if (expand_macro(pp, ct -> strval)) + goto again; + } + + return ct; +} + +static struct token *preproc_next_processed_token_nws(struct preproc_info *pp) +{ + struct token *t; + + do + { + t = preproc_next_processed_token(pp); + } while (t -> ttype == TOK_WSPACE); + return t; +} + +static struct token *preproc_next_token_nws(struct preproc_info *pp) +{ + struct token *t; + + do + { + t = preproc_next_token(pp); + } while (t -> ttype == TOK_WSPACE); + return t; +} + +static void skip_eol(struct preproc_info *pp) +{ + struct token *t; + + if (pp -> curtok && pp -> curtok -> ttype == TOK_EOL) + return; + do + { + t = preproc_next_token(pp); + } while (t -> ttype != TOK_EOL); +} + +static void check_eol(struct preproc_info *pp) +{ + struct token *t; + + t = preproc_next_token(pp); + if (t -> ttype != TOK_EOL) + preproc_throw_warning(pp, "Extra text after preprocessor directive"); + skip_eol(pp); +} + +static void dir_ifdef(struct preproc_info *pp) +{ + struct token *ct; + + if (pp -> skip_level) + { + pp -> skip_level++; + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #ifdef"); + skip_eol(pp); + } + + if (symtab_find(pp, ct -> strval) == NULL) + { + pp -> skip_level++; + } + else + { + pp -> found_level++; + } + check_eol(pp); +} + +static void dir_ifndef(struct preproc_info *pp) +{ + struct token *ct; + + if (pp -> skip_level) + { + pp -> skip_level++; + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #ifdef"); + skip_eol(pp); + } + + if (symtab_find(pp, ct -> strval) != NULL) + { + pp -> skip_level++; + } + else + { + pp -> found_level++; + } + check_eol(pp); +} + +static void dir_if(struct preproc_info *pp) +{ + if (pp -> skip_level || !eval_expr(pp)) + pp -> skip_level++; + else + pp -> found_level++; +} + +static void dir_elif(struct preproc_info *pp) +{ + if (pp -> skip_level == 0) + pp -> else_skip_level = pp -> found_level; + if (pp -> skip_level) + { + if (pp -> else_skip_level > pp -> found_level) + ; + else if (--(pp -> skip_level) != 0) + pp -> skip_level++; + else if (eval_expr(pp)) + pp -> found_level++; + else + pp -> skip_level++; + } + else if (pp -> found_level) + { + pp -> skip_level++; + pp -> found_level--; + } + else + preproc_throw_error(pp, "#elif in non-conditional section"); +} + +static void dir_else(struct preproc_info *pp) +{ + if (pp -> skip_level) + { + if (pp -> else_skip_level > pp -> found_level) + ; + else if (--(pp -> skip_level) != 0) + pp -> skip_level++; + else + pp -> found_level++; + } + else if (pp -> found_level) + { + pp -> skip_level++; + pp -> found_level--; + } + else + { + preproc_throw_error(pp, "#else in non-conditional section"); + } + if (pp -> else_level == pp -> found_level + pp -> skip_level) + { + preproc_throw_error(pp, "Too many #else"); + } + pp -> else_level = pp -> found_level + pp -> skip_level; + check_eol(pp); +} + +static void dir_endif(struct preproc_info *pp) +{ + if (pp -> skip_level) + pp -> skip_level--; + else if (pp -> found_level) + pp -> found_level--; + else + preproc_throw_error(pp, "#endif in non-conditional section"); + if (pp -> skip_level == 0) + pp -> else_skip_level = 0; + pp -> else_level = 0; + check_eol(pp); +} + +static void dir_define(struct preproc_info *pp) +{ + struct token *tl = NULL; + struct token *ttl; + struct token *ct; + int nargs = -1; + int vargs = 0; + char *mname = NULL; + + char **arglist = NULL; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_IDENT) + goto baddefine; + + mname = lw_strdup(ct -> strval); + ct = preproc_next_token(pp); + + if (ct -> ttype == TOK_WSPACE) + { + /* object like macro */ + } + else if (ct -> ttype == TOK_EOL) + { + /* object like macro - empty value */ + goto out; + } + else if (ct -> ttype == TOK_OPAREN) + { + /* function like macro - parse args */ + nargs = 0; + vargs = 0; + for (;;) + { + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_EOL) + { + goto baddefine; + } + if (ct -> ttype == TOK_CPAREN) + break; + + if (ct -> ttype == TOK_IDENT) + { + /* parameter name */ + nargs++; + /* record argument name */ + arglist = lw_realloc(arglist, sizeof(char *) * nargs); + arglist[nargs - 1] = lw_strdup(ct -> strval); + + /* check for end of args or comma */ + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_CPAREN) + break; + else if (ct -> ttype == TOK_COMMA) + continue; + else + goto baddefine; + } + else if (ct -> ttype == TOK_ELLIPSIS) + { + /* variadic macro */ + vargs = 1; + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_CPAREN) + goto baddefine; + break; + } + else + goto baddefine; + } + } + else + { +baddefine: + preproc_throw_error(pp, "bad #define"); +baddefine2: + skip_eol(pp); + lw_free(mname); + while (nargs > 0) + lw_free(arglist[--nargs]); + lw_free(arglist); + return; + } + + for (;;) + { + ct = preproc_next_token(pp); + if (ct -> ttype == TOK_EOL) + break; + if (!tl) + tl = ct; + else + ttl -> next = ct; + ttl = ct; + pp -> curtok = NULL; // tell *_next_token* not to clobber token + } +out: + if (strcmp(mname, "defined") == 0) + { + preproc_throw_warning(pp, "attempt to define 'defined' as a macro not allowed"); + goto baddefine2; + } + else if (symtab_find(pp, mname) != NULL) + { + /* need to do a token compare between the old value and the new value + to decide whether to complain */ + preproc_throw_warning(pp, "%s previous defined", mname); + symtab_undef(pp, mname); + } + symtab_define(pp, mname, tl, nargs, arglist, vargs); + lw_free(mname); + while (nargs > 0) + lw_free(arglist[--nargs]); + lw_free(arglist); + /* no need to check for EOL here */ +} + +static void dir_undef(struct preproc_info *pp) +{ + struct token *ct; + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad #undef"); + skip_eol(pp); + } + + symtab_undef(pp, ct -> strval); + check_eol(pp); +} + +char *streol(struct preproc_info *pp) +{ + struct strbuf *s; + struct token *ct; + int i; + + s = strbuf_new(); + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + while (ct -> ttype != TOK_EOL) + { + for (i = 0; ct -> strval[i]; i++) + strbuf_add(s, ct -> strval[i]); + ct = preproc_next_token(pp); + } + return strbuf_end(s); +} + +static void dir_error(struct preproc_info *pp) +{ + char *s; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + s = streol(pp); + preproc_throw_error(pp, "%s", s); + lw_free(s); +} + +static void dir_warning(struct preproc_info *pp) +{ + char *s; + + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + s = streol(pp); + preproc_throw_warning(pp, "%s", s); + lw_free(s); +} + +static void dir_include(struct preproc_info *pp) +{ +} + +static void dir_line(struct preproc_info *pp) +{ +} + +static void dir_pragma(struct preproc_info *pp) +{ + if (pp -> skip_level) + { + skip_eol(pp); + return; + } + + preproc_throw_warning(pp, "Unsupported #pragma"); + skip_eol(pp); +} + +struct { char *name; void (*fn)(struct preproc_info *); } dirlist[] = +{ + { "ifdef", dir_ifdef }, + { "ifndef", dir_ifndef }, + { "if", dir_if }, + { "else", dir_else }, + { "elif", dir_elif }, + { "endif", dir_endif }, + { "define", dir_define }, + { "undef", dir_undef }, + { "include", dir_include }, + { "error", dir_error }, + { "warning", dir_warning }, + { "line", dir_line }, + { "pragma", dir_pragma }, + { NULL, NULL } +}; + +static void process_directive(struct preproc_info *pp) +{ + struct token *ct; + int i; + + do + { + ct = preproc_next_token(pp); + } while (ct -> ttype == TOK_WSPACE); + + // NULL directive + if (ct -> ttype == TOK_EOL) + return; + + if (ct -> ttype != TOK_IDENT) + goto baddir; + + for (i = 0; dirlist[i].name; i++) + { + if (strcmp(dirlist[i].name, ct -> strval) == 0) + { + (*(dirlist[i].fn))(pp); + return; + } + } +baddir: + preproc_throw_error(pp, "Bad preprocessor directive"); + while (ct -> ttype != TOK_EOL) + ct = preproc_next_token(pp); + return; +} + +/* +Evaluate a preprocessor expression +*/ + +/* same as skip_eol() but the EOL token is not consumed */ +static void skip_eoe(struct preproc_info *pp) +{ + skip_eol(pp); + preproc_unget_token(pp, pp -> curtok); +} + +static long eval_expr_real(struct preproc_info *, int); +static long preproc_numval(struct token *); + +static long eval_term_real(struct preproc_info *pp) +{ + long tval = 0; + struct token *ct; + +eval_next: + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype == TOK_EOL) + { + preproc_throw_error(pp, "Bad expression"); + return 0; + } + + switch (ct -> ttype) + { + case TOK_OPAREN: + tval = eval_expr_real(pp, 0); + ct = preproc_next_processed_token_nws(pp); + if (ct -> ttype != ')') + { + preproc_throw_error(pp, "Unbalanced () in expression"); + skip_eoe(pp); + return 0; + } + return tval; + + case TOK_ADD: // unary + + goto eval_next; + + case TOK_SUB: // unary - + tval = eval_expr_real(pp, 200); + return -tval; + + /* NOTE: we should only get "TOK_IDENT" from an undefined macro */ + case TOK_IDENT: // some sort of function, symbol, etc. + if (strcmp(ct -> strval, "defined")) + { + /* the defined operator */ + /* any number in the "defined" bit will be + treated as a defined symbol, even zero */ + ct = preproc_next_token_nws(pp); + if (ct -> ttype == TOK_OPAREN) + { + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_IDENT) + { + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + if (symtab_find(pp, ct -> strval) == NULL) + tval = 0; + else + tval = 1; + ct = preproc_next_token_nws(pp); + if (ct -> ttype != TOK_CPAREN) + { + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + return tval; + } + else if (ct -> ttype == TOK_IDENT) + { + return (symtab_find(pp, ct -> strval) != NULL) ? 1 : 0; + } + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + /* unknown identifier - it's zero */ + return 0; + + /* numbers */ + case TOK_NUMBER: + return preproc_numval(ct); + + default: + preproc_throw_error(pp, "Bad expression"); + skip_eoe(pp); + return 0; + } + return 0; +} + +static long eval_expr_real(struct preproc_info *pp, int p) +{ + static const struct operinfo + { + int tok; + int prec; + } operators[] = + { + { TOK_ADD, 100 }, + { TOK_SUB, 100 }, + { TOK_STAR, 150 }, + { TOK_DIV, 150 }, + { TOK_MOD, 150 }, + { TOK_LT, 75 }, + { TOK_LE, 75 }, + { TOK_GT, 75 }, + { TOK_GE, 75 }, + { TOK_EQ, 70 }, + { TOK_NE, 70 }, + { TOK_BAND, 30 }, + { TOK_BOR, 25 }, + { TOK_NONE, 0 } + }; + + int op; + long term1, term2, term3; + struct token *ct; + + term1 = eval_term_real(pp); +eval_next: + ct = preproc_next_processed_token_nws(pp); + for (op = 0; operators[op].tok != TOK_NONE; op++) + { + if (operators[op].tok == ct -> ttype) + break; + } + /* if it isn't a recognized operator, assume end of expression */ + if (operators[op].tok == TOK_NONE) + { + preproc_unget_token(pp, ct); + return term1; + } + + /* if new operation is not higher than the current precedence, let the previous op finish */ + if (operators[op].prec <= p) + return term1; + + /* get the second term */ + term2 = eval_expr_real(pp, operators[op].prec); + + switch (operators[op].tok) + { + case TOK_ADD: + term3 = term1 + term2; + break; + + case TOK_SUB: + term3 = term1 - term2; + break; + + case TOK_STAR: + term3 = term1 * term2; + break; + + case TOK_DIV: + if (!term2) + { + preproc_throw_warning(pp, "Division by zero"); + term3 = 0; + break; + } + term3 = term1 / term2; + break; + + case TOK_MOD: + if (!term2) + { + preproc_throw_warning(pp, "Division by zero"); + term3 = 0; + break; + } + term3 = term1 % term2; + break; + + case TOK_BAND: + term3 = (term1 && term2); + break; + + case TOK_BOR: + term3 = (term1 || term2); + break; + + case TOK_EQ: + term3 = (term1 == term2); + break; + + case TOK_NE: + term3 = (term1 != term2); + break; + + case TOK_GT: + term3 = (term1 > term2); + break; + + case TOK_GE: + term3 = (term1 >= term2); + break; + + case TOK_LT: + term3 = (term1 < term2); + break; + + case TOK_LE: + term3 = (term1 <= term2); + break; + + default: + term3 = 0; + break; + } + term1 = term3; + goto eval_next; +} + +static long eval_expr(struct preproc_info *pp) +{ + long rv; + + rv = eval_expr_real(pp, 0); + if (pp -> curtok -> ttype != TOK_EOL) + { + preproc_throw_error(pp, "Bad expression"); + skip_eol(pp); + } + return rv; +} + +/* convert a numeric string to a number */ +long preproc_numval(struct token *t) +{ + return 0; +} + +/* +Below here is the logic for expanding a macro +*/ +static int expand_macro(struct preproc_info *pp, char *mname) +{ + struct symtab_e *s; + struct token *t, *t2, *t3; + struct token **arglist = NULL; + int nargs = 0; + struct expand_e *e; + struct token **exparglist = NULL; + int i; + + s = symtab_find(pp, mname); + if (!s) + return 0; + + for (e = pp -> expand_list; e; e = e -> next) + { + /* don't expand if we're already expanding the same macro */ + if (e -> s == s) + return 0; + } + + if (s -> nargs == -1) + { + /* short circuit NULL expansion */ + if (s -> tl == NULL) + return 1; + + goto expandmacro; + } + + // look for opening paren after optional whitespace + t2 = NULL; + t = NULL; + for (;;) + { + t = preproc_next_token(pp); + if (t -> ttype != TOK_WSPACE && t -> ttype != TOK_EOL) + break; + t -> next = t2; + t2 = t2; + } + if (t -> ttype != TOK_OPAREN) + { + // not a function-like invocation + while (t2) + { + t = t2 -> next; + preproc_unget_token(pp, t2); + t2 = t; + } + return 0; + } + + // parse parameters here + t = preproc_next_token_nws(pp); + nargs = 1; + arglist = lw_alloc(sizeof(struct token *)); + arglist[0] = NULL; + t2 = NULL; + + while (t -> ttype != TOK_CPAREN) + { + if (t -> ttype == TOK_EOF) + { + preproc_throw_error(pp, "Unexpected EOF in macro call"); + break; + } + if (t -> ttype == TOK_EOL) + continue; + if (t -> ttype == TOK_COMMA) + { + if (!(s -> vargs) || (nargs > s -> nargs)) + { + nargs++; + arglist = lw_realloc(arglist, sizeof(struct token *) * nargs); + arglist[nargs - 1] = NULL; + t2 = NULL; + continue; + } + } + if (t2) + { + t2 -> next = token_dup(t); + t2 = t2 -> next; + } + else + { + t2 = token_dup(t); + arglist[nargs - 1] = t2; + } + } + + if (s -> vargs) + { + if (nargs <= s -> nargs) + { + preproc_throw_error(pp, "Wrong number of arguments (%d) for variadic macro %s which takes %d arguments", nargs, mname, s -> nargs); + } + } + else + { + if (s -> nargs != nargs && !(s -> nargs == 0 && nargs == 1 && arglist[nargs - 1])) + { + preproc_throw_error(pp, "Wrong number of arguments (%d) for macro %s which takes %d arguments", nargs, mname, s -> nargs); + } + } + + /* now calculate the pre-expansions of the arguments */ + exparglist = lw_alloc(nargs); + for (i = 0; i < nargs; i++) + { + t2 = NULL; + exparglist[i] = NULL; + // NOTE: do nothing if empty argument + if (arglist[i] == NULL) + continue; + pp -> sourcelist = arglist[i]; + for (;;) + { + t = preproc_next_processed_token(pp); + if (t -> ttype == TOK_EOF) + break; + if (t2) + { + t2 -> next = token_dup(t); + t2 = t2 -> next; + } + else + { + t2 = token_dup(t); + exparglist[i] = t2; + } + } + } + +expandmacro: + t2 = NULL; + t3 = NULL; + + for (t = s -> tl; t; t = t -> next) + { + if (t -> ttype == TOK_IDENT) + { + /* identifiers might need expansion to arguments */ + if (strcmp(t -> strval, "__VA_ARGS__") == 0) + { + i = s -> nargs; + } + else + { + for (i = 0; i < nargs; i++) + { + if (strcmp(t -> strval, s -> params[i]) == 0) + break; + } + } + if ((i == s -> nargs) && !(s -> vargs)) + { + struct token *te; + // expand argument + // FIXME: handle # and ## + for (te = exparglist[i]; te; te = te -> next) + { + if (t2) + { + t2 -> next = token_dup(te); + t2 = t2 -> next; + } + else + { + t3 = token_dup(te); + t2 = t2; + } + } + continue; + } + } + if (t2) + { + t2 -> next = token_dup(t); + t2 = t2 -> next; + } + else + { + t3 = token_dup(t); + t2 = t3; + } + } + + /* put the new expansion in front of the input, if relevant; if we + expanded to nothing, no need to create an expansion record or + put anything into the input queue */ + if (t3) + { + t2 -> next = token_create(TOK_ENDEXPAND, "", -1, -1, ""); + t2 -> next -> next = pp -> tokqueue; + pp -> tokqueue = t3; + + /* set up expansion record */ + e = lw_alloc(sizeof(struct expand_e)); + e -> next = pp -> expand_list; + pp -> expand_list = e; + e -> s = s; + } + + /* now clean up */ + for (i = 0; i < nargs; i++) + { + lw_free(arglist[i]); + lw_free(exparglist[i]); + } + lw_free(arglist); + lw_free(exparglist); + + return 1; +}