Mercurial > hg > index.cgi
comparison lwcc/cc-parse.c @ 498:1bd2d590d734
Rejig parser to eliminate lemon
No longer use lemon for building the parser. It adds too much complexity,
really, and a hand written recursive descent parser is far more flexible.
The current iteration can handle exactly one statement: "return <int>".
author | William Astle <lost@l-w.ca> |
---|---|
date | Thu, 08 Aug 2019 23:32:23 -0600 |
parents | |
children | f3e9732973f1 |
comparison
equal
deleted
inserted
replaced
497:4b865c9d4371 | 498:1bd2d590d734 |
---|---|
1 /* | |
2 lwcc/cc-parse.c | |
3 | |
4 Copyright © 2019 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <string.h> | |
23 | |
24 #include <lw_alloc.h> | |
25 #include <lw_string.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "tree.h" | |
29 | |
30 #define TOK_KW_IF -1 | |
31 #define TOK_KW_ELSE -2 | |
32 #define TOK_KW_WHILE -3 | |
33 #define TOK_KW_DO -4 | |
34 #define TOK_KW_FOR -5 | |
35 #define TOK_KW_VOID -6 | |
36 #define TOK_KW_INT -7 | |
37 #define TOK_KW_CHAR -8 | |
38 #define TOK_KW_SHORT -9 | |
39 #define TOK_KW_LONG -10 | |
40 #define TOK_KW_UNSIGNED -11 | |
41 #define TOK_KW_SIGNED -12 | |
42 #define TOK_KW_FLOAT -13 | |
43 #define TOK_KW_DOUBLE -14 | |
44 #define TOK_KW_STRUCT -15 | |
45 #define TOK_KW_UNION -16 | |
46 #define TOK_KW_TYPEDEF -17 | |
47 #define TOK_KW_STATIC -18 | |
48 #define TOK_KW_SWITCH -19 | |
49 #define TOK_KW_CASE -20 | |
50 #define TOK_KW_DEFAULT -21 | |
51 #define TOK_KW_BREAK -22 | |
52 #define TOK_KW_CONTINUE -23 | |
53 #define TOK_KW_CONST -24 | |
54 #define TOK_KW_AUTO -25 | |
55 #define TOK_KW_ENUM -26 | |
56 #define TOK_KW_REGISTER -27 | |
57 #define TOK_KW_SIZEOF -28 | |
58 #define TOK_KW_VOLATILE -29 | |
59 #define TOK_KW_RETURN -30 | |
60 #define TOK_KW_EXTERN -31 | |
61 #define TOK_KW_GOTO -32 | |
62 #define TOK_TYPENAME -100 | |
63 #define TOK_CONST_INT -150 | |
64 | |
65 static struct { int tok; char *word; } keyword_list[] = { | |
66 { TOK_KW_IF, "if" }, | |
67 { TOK_KW_ELSE, "else" }, | |
68 { TOK_KW_WHILE, "while" }, | |
69 { TOK_KW_DO, "do" }, | |
70 { TOK_KW_FOR, "for" }, | |
71 { TOK_KW_VOID, "void" }, | |
72 { TOK_KW_INT, "int" }, | |
73 { TOK_KW_CHAR, "char" }, | |
74 { TOK_KW_SHORT, "short" }, | |
75 { TOK_KW_LONG, "long" }, | |
76 { TOK_KW_UNSIGNED, "unsigned" }, | |
77 { TOK_KW_SIGNED, "signed" }, | |
78 { TOK_KW_FLOAT, "float" }, | |
79 { TOK_KW_DOUBLE, "double" }, | |
80 { TOK_KW_STRUCT, "struct" }, | |
81 { TOK_KW_UNION, "union" }, | |
82 { TOK_KW_TYPEDEF, "typedef" }, | |
83 { TOK_KW_STATIC, "static" }, | |
84 { TOK_KW_SWITCH, "switch" }, | |
85 { TOK_KW_CASE, "case" }, | |
86 { TOK_KW_DEFAULT, "default" }, | |
87 { TOK_KW_BREAK, "break" }, | |
88 { TOK_KW_CONTINUE, "continue" }, | |
89 { TOK_KW_CONST, "const" }, | |
90 { TOK_KW_AUTO, "auto" }, | |
91 { TOK_KW_ENUM, "enum" }, | |
92 { TOK_KW_REGISTER, "register" }, | |
93 { TOK_KW_SIZEOF, "sizeof" }, | |
94 { TOK_KW_VOLATILE, "volatile" }, | |
95 { TOK_KW_RETURN, "return" }, | |
96 { TOK_KW_EXTERN, "extern" }, | |
97 { TOK_KW_GOTO, "goto" }, | |
98 { TOK_NONE, "" } | |
99 }; | |
100 | |
101 | |
102 struct parser_state | |
103 { | |
104 struct preproc_info *pp; // preprocessor data | |
105 struct token *curtok; // the current token | |
106 }; | |
107 | |
108 | |
109 struct token *parse_next(struct parser_state *ps) | |
110 { | |
111 struct token *tok; | |
112 int i; | |
113 | |
114 for (;;) | |
115 { | |
116 tok = preproc_next(ps -> pp); | |
117 if (tok -> ttype == TOK_WSPACE) | |
118 continue; | |
119 if (tok -> ttype == TOK_EOL) | |
120 continue; | |
121 if (tok -> ttype == TOK_CHAR) | |
122 { | |
123 // random character | |
124 fprintf(stderr, "Random character %02x\n", tok -> strval[0]); | |
125 if (tok -> strval[0] < 32 || tok -> strval[0] > 126) | |
126 continue; | |
127 } | |
128 break; | |
129 } | |
130 if (tok -> ttype == TOK_IDENT) | |
131 { | |
132 // convert identifier tokens to their respective meanings | |
133 for (i = 0; keyword_list[i].tok != TOK_NONE; i++) | |
134 { | |
135 if (strcmp(keyword_list[i].word, tok -> strval) == 0) | |
136 { | |
137 tok -> ttype = keyword_list[i].tok; | |
138 goto out; | |
139 } | |
140 } | |
141 // check for registered types here | |
142 } | |
143 else if (tok -> ttype == TOK_NUMBER) | |
144 { | |
145 // look for anything that isn't 0-9 | |
146 for (i = 0; tok -> strval[i]; i++) | |
147 { | |
148 if (tok -> strval[i] < '0' || tok -> strval[i] > '9') | |
149 break; | |
150 } | |
151 if (tok -> strval[i] == 0) | |
152 tok -> ttype = TOK_CONST_INT; | |
153 } | |
154 out: | |
155 fprintf(stderr, "Lexed: "); | |
156 token_print(tok, stderr); | |
157 fprintf(stderr, " (%d)\n", tok -> ttype); | |
158 if (ps -> curtok) | |
159 token_free(ps -> curtok); | |
160 ps -> curtok = tok; | |
161 return tok; | |
162 } | |
163 | |
164 void parse_generr(struct parser_state *ps, char *tag) | |
165 { | |
166 fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype); | |
167 token_print(ps -> curtok, stderr); | |
168 fprintf(stderr, "\n"); | |
169 | |
170 } | |
171 | |
172 node_t *parse_expr(struct parser_state *ps) | |
173 { | |
174 node_t *rv; | |
175 if (ps -> curtok -> ttype == TOK_CONST_INT) | |
176 { | |
177 rv = node_create(NODE_CONST_INT, ps -> curtok -> strval); | |
178 parse_next(ps); | |
179 return rv; | |
180 } | |
181 parse_generr(ps, "expr"); | |
182 return NULL; | |
183 } | |
184 | |
185 node_t *parse_statement(struct parser_state *ps) | |
186 { | |
187 node_t *rv; | |
188 node_t *n; | |
189 | |
190 switch (ps -> curtok -> ttype) | |
191 { | |
192 case TOK_KW_RETURN: | |
193 parse_next(ps); | |
194 n = parse_expr(ps); | |
195 if (!n) | |
196 { | |
197 parse_generr(ps, "statement"); | |
198 return NULL; | |
199 } | |
200 rv = node_create(NODE_STMT_RETURN); | |
201 node_addchild(rv, n); | |
202 break; | |
203 | |
204 default: | |
205 return NULL; | |
206 } | |
207 | |
208 if (ps -> curtok -> ttype != TOK_EOS) | |
209 parse_generr(ps, "statement"); | |
210 else | |
211 parse_next(ps); | |
212 return rv; | |
213 } | |
214 | |
215 node_t *parse_globaldecl(struct parser_state *ps) | |
216 { | |
217 node_t *rv = NULL; | |
218 node_t *stmt; | |
219 char *fnname = NULL; | |
220 if (ps -> curtok -> ttype == TOK_KW_INT) | |
221 { | |
222 // variable name | |
223 parse_next(ps); | |
224 if (ps -> curtok -> ttype != TOK_IDENT) | |
225 goto error; | |
226 fnname = lw_strdup(ps -> curtok -> strval); | |
227 parse_next(ps); | |
228 if (ps -> curtok -> ttype != TOK_OPAREN) | |
229 goto error; | |
230 parse_next(ps); | |
231 if (ps -> curtok -> ttype != TOK_CPAREN) | |
232 goto error; | |
233 parse_next(ps); | |
234 if (ps -> curtok -> ttype != TOK_OBRACE) | |
235 goto error; | |
236 parse_next(ps); | |
237 stmt = parse_statement(ps); | |
238 if (!stmt) | |
239 goto error; | |
240 rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt); | |
241 if (ps -> curtok -> ttype != TOK_CBRACE) | |
242 goto error; | |
243 parse_next(ps); | |
244 lw_free(fnname); | |
245 return rv; | |
246 } | |
247 | |
248 | |
249 error: | |
250 if (fnname) | |
251 lw_free(fnname); | |
252 parse_generr(ps, "globaldecl"); | |
253 return rv; | |
254 } | |
255 | |
256 node_t *parse_program(struct preproc_info *pp) | |
257 { | |
258 node_t *rv; | |
259 node_t *node; | |
260 struct parser_state ps; | |
261 | |
262 ps.pp = pp; | |
263 ps.curtok = NULL; | |
264 | |
265 rv = node_create(NODE_PROGRAM); | |
266 | |
267 // prime the parser | |
268 parse_next(&ps); | |
269 while (ps.curtok -> ttype != TOK_EOF) | |
270 { | |
271 node = parse_globaldecl(&ps); | |
272 if (!node) | |
273 break; | |
274 node_addchild(rv, node); | |
275 } | |
276 | |
277 return rv; | |
278 } |