Mercurial > hg > index.cgi
annotate lwcc/cc-parse.c @ 501:f3e9732973f1
Add basic integer operations to lwcc
Add +, -, *, and / to lwcc parser and code generator. Multiplication and
division require helper functions in a yet to be created support library.
These operations are integer only for the moment.
author | William Astle <lost@l-w.ca> |
---|---|
date | Tue, 24 Sep 2019 22:07:56 -0600 |
parents | 1bd2d590d734 |
children | 14a40f8bb4eb |
rev | line source |
---|---|
498 | 1 /* |
2 lwcc/cc-parse.c | |
3 | |
4 Copyright © 2019 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <string.h> | |
23 | |
24 #include <lw_alloc.h> | |
25 #include <lw_string.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "tree.h" | |
29 | |
30 #define TOK_KW_IF -1 | |
31 #define TOK_KW_ELSE -2 | |
32 #define TOK_KW_WHILE -3 | |
33 #define TOK_KW_DO -4 | |
34 #define TOK_KW_FOR -5 | |
35 #define TOK_KW_VOID -6 | |
36 #define TOK_KW_INT -7 | |
37 #define TOK_KW_CHAR -8 | |
38 #define TOK_KW_SHORT -9 | |
39 #define TOK_KW_LONG -10 | |
40 #define TOK_KW_UNSIGNED -11 | |
41 #define TOK_KW_SIGNED -12 | |
42 #define TOK_KW_FLOAT -13 | |
43 #define TOK_KW_DOUBLE -14 | |
44 #define TOK_KW_STRUCT -15 | |
45 #define TOK_KW_UNION -16 | |
46 #define TOK_KW_TYPEDEF -17 | |
47 #define TOK_KW_STATIC -18 | |
48 #define TOK_KW_SWITCH -19 | |
49 #define TOK_KW_CASE -20 | |
50 #define TOK_KW_DEFAULT -21 | |
51 #define TOK_KW_BREAK -22 | |
52 #define TOK_KW_CONTINUE -23 | |
53 #define TOK_KW_CONST -24 | |
54 #define TOK_KW_AUTO -25 | |
55 #define TOK_KW_ENUM -26 | |
56 #define TOK_KW_REGISTER -27 | |
57 #define TOK_KW_SIZEOF -28 | |
58 #define TOK_KW_VOLATILE -29 | |
59 #define TOK_KW_RETURN -30 | |
60 #define TOK_KW_EXTERN -31 | |
61 #define TOK_KW_GOTO -32 | |
62 #define TOK_TYPENAME -100 | |
63 #define TOK_CONST_INT -150 | |
64 | |
65 static struct { int tok; char *word; } keyword_list[] = { | |
66 { TOK_KW_IF, "if" }, | |
67 { TOK_KW_ELSE, "else" }, | |
68 { TOK_KW_WHILE, "while" }, | |
69 { TOK_KW_DO, "do" }, | |
70 { TOK_KW_FOR, "for" }, | |
71 { TOK_KW_VOID, "void" }, | |
72 { TOK_KW_INT, "int" }, | |
73 { TOK_KW_CHAR, "char" }, | |
74 { TOK_KW_SHORT, "short" }, | |
75 { TOK_KW_LONG, "long" }, | |
76 { TOK_KW_UNSIGNED, "unsigned" }, | |
77 { TOK_KW_SIGNED, "signed" }, | |
78 { TOK_KW_FLOAT, "float" }, | |
79 { TOK_KW_DOUBLE, "double" }, | |
80 { TOK_KW_STRUCT, "struct" }, | |
81 { TOK_KW_UNION, "union" }, | |
82 { TOK_KW_TYPEDEF, "typedef" }, | |
83 { TOK_KW_STATIC, "static" }, | |
84 { TOK_KW_SWITCH, "switch" }, | |
85 { TOK_KW_CASE, "case" }, | |
86 { TOK_KW_DEFAULT, "default" }, | |
87 { TOK_KW_BREAK, "break" }, | |
88 { TOK_KW_CONTINUE, "continue" }, | |
89 { TOK_KW_CONST, "const" }, | |
90 { TOK_KW_AUTO, "auto" }, | |
91 { TOK_KW_ENUM, "enum" }, | |
92 { TOK_KW_REGISTER, "register" }, | |
93 { TOK_KW_SIZEOF, "sizeof" }, | |
94 { TOK_KW_VOLATILE, "volatile" }, | |
95 { TOK_KW_RETURN, "return" }, | |
96 { TOK_KW_EXTERN, "extern" }, | |
97 { TOK_KW_GOTO, "goto" }, | |
98 { TOK_NONE, "" } | |
99 }; | |
100 | |
101 | |
102 struct parser_state | |
103 { | |
104 struct preproc_info *pp; // preprocessor data | |
105 struct token *curtok; // the current token | |
106 }; | |
107 | |
108 | |
109 struct token *parse_next(struct parser_state *ps) | |
110 { | |
111 struct token *tok; | |
112 int i; | |
113 | |
114 for (;;) | |
115 { | |
116 tok = preproc_next(ps -> pp); | |
117 if (tok -> ttype == TOK_WSPACE) | |
118 continue; | |
119 if (tok -> ttype == TOK_EOL) | |
120 continue; | |
121 if (tok -> ttype == TOK_CHAR) | |
122 { | |
123 // random character | |
124 fprintf(stderr, "Random character %02x\n", tok -> strval[0]); | |
125 if (tok -> strval[0] < 32 || tok -> strval[0] > 126) | |
126 continue; | |
127 } | |
128 break; | |
129 } | |
130 if (tok -> ttype == TOK_IDENT) | |
131 { | |
132 // convert identifier tokens to their respective meanings | |
133 for (i = 0; keyword_list[i].tok != TOK_NONE; i++) | |
134 { | |
135 if (strcmp(keyword_list[i].word, tok -> strval) == 0) | |
136 { | |
137 tok -> ttype = keyword_list[i].tok; | |
138 goto out; | |
139 } | |
140 } | |
141 // check for registered types here | |
142 } | |
143 else if (tok -> ttype == TOK_NUMBER) | |
144 { | |
145 // look for anything that isn't 0-9 | |
146 for (i = 0; tok -> strval[i]; i++) | |
147 { | |
148 if (tok -> strval[i] < '0' || tok -> strval[i] > '9') | |
149 break; | |
150 } | |
151 if (tok -> strval[i] == 0) | |
152 tok -> ttype = TOK_CONST_INT; | |
153 } | |
154 out: | |
155 fprintf(stderr, "Lexed: "); | |
156 token_print(tok, stderr); | |
157 fprintf(stderr, " (%d)\n", tok -> ttype); | |
158 if (ps -> curtok) | |
159 token_free(ps -> curtok); | |
160 ps -> curtok = tok; | |
161 return tok; | |
162 } | |
163 | |
164 void parse_generr(struct parser_state *ps, char *tag) | |
165 { | |
166 fprintf(stderr, "(%s) Unexpected token (%d): ", tag, ps -> curtok -> ttype); | |
167 token_print(ps -> curtok, stderr); | |
168 fprintf(stderr, "\n"); | |
169 | |
170 } | |
171 | |
501
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
172 node_t *parse_term_real(struct parser_state *ps) |
498 | 173 { |
174 node_t *rv; | |
501
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
175 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
176 switch (ps -> curtok -> ttype) |
498 | 177 { |
501
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
178 case TOK_CONST_INT: |
498 | 179 rv = node_create(NODE_CONST_INT, ps -> curtok -> strval); |
180 parse_next(ps); | |
181 return rv; | |
182 } | |
501
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
183 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
184 parse_generr(ps, "term"); |
498 | 185 return NULL; |
186 } | |
187 | |
501
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
188 node_t *parse_expr_real(struct parser_state *ps, int prec) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
189 { |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
190 static struct { int tok; int nodetype; int prec; } operlist[] = { |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
191 { TOK_STAR, NODE_OPER_TIMES, 150 }, |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
192 { TOK_DIV, NODE_OPER_DIVIDE, 150 }, |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
193 { TOK_ADD, NODE_OPER_PLUS, 100 }, |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
194 { TOK_SUB, NODE_OPER_MINUS, 100 }, |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
195 { 0, 0, 0 } |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
196 }; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
197 node_t *term1, *term2; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
198 int i; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
199 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
200 term1 = parse_term_real(ps); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
201 if (!term1) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
202 return NULL; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
203 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
204 nextoper: |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
205 for (i = 0; operlist[i].tok; i++) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
206 if (operlist[i].tok == ps -> curtok -> ttype) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
207 break; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
208 fprintf(stderr, "Matched operator: %d, %d\n", operlist[i].tok, operlist[i].prec); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
209 // if we hit the end of the expression, return |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
210 if (operlist[i].tok == 0) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
211 return term1; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
212 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
213 // is the next operator less or same precedence? |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
214 if (operlist[i].prec <= prec) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
215 return term1; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
216 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
217 parse_next(ps); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
218 term2 = parse_expr_real(ps, operlist[i].prec); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
219 if (!term2) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
220 { |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
221 parse_generr(ps, "expr"); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
222 node_destroy(term2); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
223 } |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
224 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
225 term1 = node_create(operlist[i].nodetype, term1, term2); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
226 term2 = NULL; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
227 goto nextoper; |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
228 } |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
229 |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
230 node_t *parse_expr(struct parser_state *ps) |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
231 { |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
232 return parse_expr_real(ps, 0); |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
233 } |
f3e9732973f1
Add basic integer operations to lwcc
William Astle <lost@l-w.ca>
parents:
498
diff
changeset
|
234 |
498 | 235 node_t *parse_statement(struct parser_state *ps) |
236 { | |
237 node_t *rv; | |
238 node_t *n; | |
239 | |
240 switch (ps -> curtok -> ttype) | |
241 { | |
242 case TOK_KW_RETURN: | |
243 parse_next(ps); | |
244 n = parse_expr(ps); | |
245 if (!n) | |
246 { | |
247 parse_generr(ps, "statement"); | |
248 return NULL; | |
249 } | |
250 rv = node_create(NODE_STMT_RETURN); | |
251 node_addchild(rv, n); | |
252 break; | |
253 | |
254 default: | |
255 return NULL; | |
256 } | |
257 | |
258 if (ps -> curtok -> ttype != TOK_EOS) | |
259 parse_generr(ps, "statement"); | |
260 else | |
261 parse_next(ps); | |
262 return rv; | |
263 } | |
264 | |
265 node_t *parse_globaldecl(struct parser_state *ps) | |
266 { | |
267 node_t *rv = NULL; | |
268 node_t *stmt; | |
269 char *fnname = NULL; | |
270 if (ps -> curtok -> ttype == TOK_KW_INT) | |
271 { | |
272 // variable name | |
273 parse_next(ps); | |
274 if (ps -> curtok -> ttype != TOK_IDENT) | |
275 goto error; | |
276 fnname = lw_strdup(ps -> curtok -> strval); | |
277 parse_next(ps); | |
278 if (ps -> curtok -> ttype != TOK_OPAREN) | |
279 goto error; | |
280 parse_next(ps); | |
281 if (ps -> curtok -> ttype != TOK_CPAREN) | |
282 goto error; | |
283 parse_next(ps); | |
284 if (ps -> curtok -> ttype != TOK_OBRACE) | |
285 goto error; | |
286 parse_next(ps); | |
287 stmt = parse_statement(ps); | |
288 if (!stmt) | |
289 goto error; | |
290 rv = node_create(NODE_FUNDEF, node_create(NODE_TYPE_INT), node_create(NODE_IDENT, fnname), node_create(NODE_FUNARGS), stmt); | |
291 if (ps -> curtok -> ttype != TOK_CBRACE) | |
292 goto error; | |
293 parse_next(ps); | |
294 lw_free(fnname); | |
295 return rv; | |
296 } | |
297 | |
298 | |
299 error: | |
300 if (fnname) | |
301 lw_free(fnname); | |
302 parse_generr(ps, "globaldecl"); | |
303 return rv; | |
304 } | |
305 | |
306 node_t *parse_program(struct preproc_info *pp) | |
307 { | |
308 node_t *rv; | |
309 node_t *node; | |
310 struct parser_state ps; | |
311 | |
312 ps.pp = pp; | |
313 ps.curtok = NULL; | |
314 | |
315 rv = node_create(NODE_PROGRAM); | |
316 | |
317 // prime the parser | |
318 parse_next(&ps); | |
319 while (ps.curtok -> ttype != TOK_EOF) | |
320 { | |
321 node = parse_globaldecl(&ps); | |
322 if (!node) | |
323 break; | |
324 node_addchild(rv, node); | |
325 } | |
326 | |
327 return rv; | |
328 } |