Mercurial > hg > index.cgi
view lwcc/cc-parse.c @ 498:1bd2d590d734
Rejig parser to eliminate lemon
No longer use lemon for building the parser. It adds too much complexity,
really, and a hand written recursive descent parser is far more flexible.
The current iteration can handle exactly one statement: "return <int>".
author | William Astle <lost@l-w.ca> |
---|---|
date | Thu, 08 Aug 2019 23:32:23 -0600 |
parents | |
children | f3e9732973f1 |
line wrap: on
line source
/* lwcc/cc-parse.c Copyright © 2019 William Astle This file is part of LWTOOLS. LWTOOLS is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <string.h> #include <lw_alloc.h> #include <lw_string.h> #include "cpp.h" #include "tree.h" #define TOK_KW_IF -1 #define TOK_KW_ELSE -2 #define TOK_KW_WHILE -3 #define TOK_KW_DO -4 #define TOK_KW_FOR -5 #define TOK_KW_VOID -6 #define TOK_KW_INT -7 #define TOK_KW_CHAR -8 #define TOK_KW_SHORT -9 #define TOK_KW_LONG -10 #define TOK_KW_UNSIGNED -11 #define TOK_KW_SIGNED -12 #define TOK_KW_FLOAT -13 #define TOK_KW_DOUBLE -14 #define TOK_KW_STRUCT -15 #define TOK_KW_UNION -16 #define TOK_KW_TYPEDEF -17 #define TOK_KW_STATIC -18 #define TOK_KW_SWITCH -19 #define TOK_KW_CASE -20 #define TOK_KW_DEFAULT -21 #define TOK_KW_BREAK -22 #define TOK_KW_CONTINUE -23 #define TOK_KW_CONST -24 #define TOK_KW_AUTO -25 #define TOK_KW_ENUM -26 #define TOK_KW_REGISTER -27 #define TOK_KW_SIZEOF -28 #define TOK_KW_VOLATILE -29 #define TOK_KW_RETURN -30 #define TOK_KW_EXTERN -31 #define TOK_KW_GOTO -32 #define TOK_TYPENAME -100 #define TOK_CONST_INT -150 static struct { int tok; char *word; } keyword_list[] = { { TOK_KW_IF, "if" }, { TOK_KW_ELSE, "else" }, { TOK_KW_WHILE, "while" }, { TOK_KW_DO, "do" }, { TOK_KW_FOR, "for" }, { TOK_KW_VOID, "void" }, { TOK_KW_INT, "int" }, { TOK_KW_CHAR, "char" }, { TOK_KW_SHORT, "short" }, { TOK_KW_LONG, "long" }, { TOK_KW_UNSIGNED, "unsigned" }, { TOK_KW_SIGNED, "signed" }, { TOK_KW_FLOAT, "float" }, { TOK_KW_DOUBLE, "double" }, { TOK_KW_STRUCT, "struct" }, { TOK_KW_UNION, "union" }, { TOK_KW_TYPEDEF, "typedef" }, { TOK_KW_STATIC, "static" }, { TOK_KW_SWITCH, "switch" }, { TOK_KW_CASE, "case" }, { TOK_KW_DEFAULT, "default" }, { TOK_KW_BREAK, "break" }, { TOK_KW_CONTINUE, "continue" }, { TOK_KW_CONST, "const" }, { TOK_KW_AUTO, "auto" }, { TOK_KW_ENUM, "enum" }, { TOK_KW_REGISTER, "register" }, { TOK_KW_SIZEOF, "sizeof" }, { TOK_KW_VOLATILE, "volatile" }, { TOK_KW_RETURN, "return" }, { TOK_KW_EXTERN, "extern" }, { TOK_KW_GOTO, "goto" }, { TOK_NONE, "" } }; struct parser_state { struct preproc_info *pp; // preprocessor data struct token *curtok; // the current token }; struct token *parse_next(struct parser_state *ps) { struct token *tok; int i; for (;;) { tok = preproc_next(ps -> pp); if (tok -> ttype == TOK_WSPACE) continue; if (tok -> ttype == TOK_EOL) continue; if (tok -> ttype == TOK_CHAR) { // random character fprintf(stderr, "Random character %02x\n", tok -> strval[0]); if (tok -> strval[0] < 32 || tok -> strval[0] > 126) continue; } break; } if (tok -> ttype == TOK_IDENT) { // convert identifier tokens to their respective meanings for (i = 0; keyword_list[i].tok != TOK_NONE; i++) { if (strcmp(keyword_list[i].word, tok -> strval) == 0) { tok -> ttype = keyword_list[i].tok; goto out; } } // check for registered types here } else if (tok -> ttype == TOK_NUMBER) { // look for anything that isn't 0-9 for (i = 0; tok -> strval[i]; i++) { if (tok -> strval[i] < '0' || tok -> strval[i] > '9') break; } if (tok -> strval[i] == 0) tok -> ttype = TOK_CONST_INT; } out: fprintf(stderr, "Lexed: "); token_print(tok, stderr); fprintf(stderr, " (%d)\n", tok -> ttype); if (ps -> curtok) token_free(ps -> curtok); ps -> curtok = tok; return tok; } void parse_generr(struct parser_state *ps, char *tag) { fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype); token_print(ps -> curtok, stderr); fprintf(stderr, "\n"); } node_t *parse_expr(struct parser_state *ps) { node_t *rv; if (ps -> curtok -> ttype == TOK_CONST_INT) { rv = node_create(NODE_CONST_INT, ps -> curtok -> strval); parse_next(ps); return rv; } parse_generr(ps, "expr"); return NULL; } node_t *parse_statement(struct parser_state *ps) { node_t *rv; node_t *n; switch (ps -> curtok -> ttype) { case TOK_KW_RETURN: parse_next(ps); n = parse_expr(ps); if (!n) { parse_generr(ps, "statement"); return NULL; } rv = node_create(NODE_STMT_RETURN); node_addchild(rv, n); break; default: return NULL; } if (ps -> curtok -> ttype != TOK_EOS) parse_generr(ps, "statement"); else parse_next(ps); return rv; } node_t *parse_globaldecl(struct parser_state *ps) { node_t *rv = NULL; node_t *stmt; char *fnname = NULL; if (ps -> curtok -> ttype == TOK_KW_INT) { // variable name parse_next(ps); if (ps -> curtok -> ttype != TOK_IDENT) goto error; fnname = lw_strdup(ps -> curtok -> strval); parse_next(ps); if (ps -> curtok -> ttype != TOK_OPAREN) goto error; parse_next(ps); if (ps -> curtok -> ttype != TOK_CPAREN) goto error; parse_next(ps); if (ps -> curtok -> ttype != TOK_OBRACE) goto error; parse_next(ps); stmt = parse_statement(ps); if (!stmt) goto error; rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt); if (ps -> curtok -> ttype != TOK_CBRACE) goto error; parse_next(ps); lw_free(fnname); return rv; } error: if (fnname) lw_free(fnname); parse_generr(ps, "globaldecl"); return rv; } node_t *parse_program(struct preproc_info *pp) { node_t *rv; node_t *node; struct parser_state ps; ps.pp = pp; ps.curtok = NULL; rv = node_create(NODE_PROGRAM); // prime the parser parse_next(&ps); while (ps.curtok -> ttype != TOK_EOF) { node = parse_globaldecl(&ps); if (!node) break; node_addchild(rv, node); } return rv; }