Mercurial > hg > index.cgi
diff lwcc/cc-parse.c @ 498:1bd2d590d734
Rejig parser to eliminate lemon
No longer use lemon for building the parser. It adds too much complexity,
really, and a hand written recursive descent parser is far more flexible.
The current iteration can handle exactly one statement: "return <int>".
author | William Astle <lost@l-w.ca> |
---|---|
date | Thu, 08 Aug 2019 23:32:23 -0600 |
parents | |
children | f3e9732973f1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cc-parse.c Thu Aug 08 23:32:23 2019 -0600 @@ -0,0 +1,278 @@ +/* +lwcc/cc-parse.c + +Copyright © 2019 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#include "cpp.h" +#include "tree.h" + +#define TOK_KW_IF -1 +#define TOK_KW_ELSE -2 +#define TOK_KW_WHILE -3 +#define TOK_KW_DO -4 +#define TOK_KW_FOR -5 +#define TOK_KW_VOID -6 +#define TOK_KW_INT -7 +#define TOK_KW_CHAR -8 +#define TOK_KW_SHORT -9 +#define TOK_KW_LONG -10 +#define TOK_KW_UNSIGNED -11 +#define TOK_KW_SIGNED -12 +#define TOK_KW_FLOAT -13 +#define TOK_KW_DOUBLE -14 +#define TOK_KW_STRUCT -15 +#define TOK_KW_UNION -16 +#define TOK_KW_TYPEDEF -17 +#define TOK_KW_STATIC -18 +#define TOK_KW_SWITCH -19 +#define TOK_KW_CASE -20 +#define TOK_KW_DEFAULT -21 +#define TOK_KW_BREAK -22 +#define TOK_KW_CONTINUE -23 +#define TOK_KW_CONST -24 +#define TOK_KW_AUTO -25 +#define TOK_KW_ENUM -26 +#define TOK_KW_REGISTER -27 +#define TOK_KW_SIZEOF -28 +#define TOK_KW_VOLATILE -29 +#define TOK_KW_RETURN -30 +#define TOK_KW_EXTERN -31 +#define TOK_KW_GOTO -32 +#define TOK_TYPENAME -100 +#define TOK_CONST_INT -150 + +static struct { int tok; char *word; } keyword_list[] = { + { TOK_KW_IF, "if" }, + { TOK_KW_ELSE, "else" }, + { TOK_KW_WHILE, "while" }, + { TOK_KW_DO, "do" }, + { TOK_KW_FOR, "for" }, + { TOK_KW_VOID, "void" }, + { TOK_KW_INT, "int" }, + { TOK_KW_CHAR, "char" }, + { TOK_KW_SHORT, "short" }, + { TOK_KW_LONG, "long" }, + { TOK_KW_UNSIGNED, "unsigned" }, + { TOK_KW_SIGNED, "signed" }, + { TOK_KW_FLOAT, "float" }, + { TOK_KW_DOUBLE, "double" }, + { TOK_KW_STRUCT, "struct" }, + { TOK_KW_UNION, "union" }, + { TOK_KW_TYPEDEF, "typedef" }, + { TOK_KW_STATIC, "static" }, + { TOK_KW_SWITCH, "switch" }, + { TOK_KW_CASE, "case" }, + { TOK_KW_DEFAULT, "default" }, + { TOK_KW_BREAK, "break" }, + { TOK_KW_CONTINUE, "continue" }, + { TOK_KW_CONST, "const" }, + { TOK_KW_AUTO, "auto" }, + { TOK_KW_ENUM, "enum" }, + { TOK_KW_REGISTER, "register" }, + { TOK_KW_SIZEOF, "sizeof" }, + { TOK_KW_VOLATILE, "volatile" }, + { TOK_KW_RETURN, "return" }, + { TOK_KW_EXTERN, "extern" }, + { TOK_KW_GOTO, "goto" }, + { TOK_NONE, "" } +}; + + +struct parser_state +{ + struct preproc_info *pp; // preprocessor data + struct token *curtok; // the current token +}; + + +struct token *parse_next(struct parser_state *ps) +{ + struct token *tok; + int i; + + for (;;) + { + tok = preproc_next(ps -> pp); + if (tok -> ttype == TOK_WSPACE) + continue; + if (tok -> ttype == TOK_EOL) + continue; + if (tok -> ttype == TOK_CHAR) + { + // random character + fprintf(stderr, "Random character %02x\n", tok -> strval[0]); + if (tok -> strval[0] < 32 || tok -> strval[0] > 126) + continue; + } + break; + } + if (tok -> ttype == TOK_IDENT) + { + // convert identifier tokens to their respective meanings + for (i = 0; keyword_list[i].tok != TOK_NONE; i++) + { + if (strcmp(keyword_list[i].word, tok -> strval) == 0) + { + tok -> ttype = keyword_list[i].tok; + goto out; + } + } + // check for registered types here + } + else if (tok -> ttype == TOK_NUMBER) + { + // look for anything that isn't 0-9 + for (i = 0; tok -> strval[i]; i++) + { + if (tok -> strval[i] < '0' || tok -> strval[i] > '9') + break; + } + if (tok -> strval[i] == 0) + tok -> ttype = TOK_CONST_INT; + } +out: + fprintf(stderr, "Lexed: "); + token_print(tok, stderr); + fprintf(stderr, " (%d)\n", tok -> ttype); + if (ps -> curtok) + token_free(ps -> curtok); + ps -> curtok = tok; + return tok; +} + +void parse_generr(struct parser_state *ps, char *tag) +{ + fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype); + token_print(ps -> curtok, stderr); + fprintf(stderr, "\n"); + +} + +node_t *parse_expr(struct parser_state *ps) +{ + node_t *rv; + if (ps -> curtok -> ttype == TOK_CONST_INT) + { + rv = node_create(NODE_CONST_INT, ps -> curtok -> strval); + parse_next(ps); + return rv; + } + parse_generr(ps, "expr"); + return NULL; +} + +node_t *parse_statement(struct parser_state *ps) +{ + node_t *rv; + node_t *n; + + switch (ps -> curtok -> ttype) + { + case TOK_KW_RETURN: + parse_next(ps); + n = parse_expr(ps); + if (!n) + { + parse_generr(ps, "statement"); + return NULL; + } + rv = node_create(NODE_STMT_RETURN); + node_addchild(rv, n); + break; + + default: + return NULL; + } + + if (ps -> curtok -> ttype != TOK_EOS) + parse_generr(ps, "statement"); + else + parse_next(ps); + return rv; +} + +node_t *parse_globaldecl(struct parser_state *ps) +{ + node_t *rv = NULL; + node_t *stmt; + char *fnname = NULL; + if (ps -> curtok -> ttype == TOK_KW_INT) + { + // variable name + parse_next(ps); + if (ps -> curtok -> ttype != TOK_IDENT) + goto error; + fnname = lw_strdup(ps -> curtok -> strval); + parse_next(ps); + if (ps -> curtok -> ttype != TOK_OPAREN) + goto error; + parse_next(ps); + if (ps -> curtok -> ttype != TOK_CPAREN) + goto error; + parse_next(ps); + if (ps -> curtok -> ttype != TOK_OBRACE) + goto error; + parse_next(ps); + stmt = parse_statement(ps); + if (!stmt) + goto error; + rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt); + if (ps -> curtok -> ttype != TOK_CBRACE) + goto error; + parse_next(ps); + lw_free(fnname); + return rv; + } + + +error: + if (fnname) + lw_free(fnname); + parse_generr(ps, "globaldecl"); + return rv; +} + +node_t *parse_program(struct preproc_info *pp) +{ + node_t *rv; + node_t *node; + struct parser_state ps; + + ps.pp = pp; + ps.curtok = NULL; + + rv = node_create(NODE_PROGRAM); + + // prime the parser + parse_next(&ps); + while (ps.curtok -> ttype != TOK_EOF) + { + node = parse_globaldecl(&ps); + if (!node) + break; + node_addchild(rv, node); + } + + return rv; +}