Mercurial > hg > index.cgi
comparison lwbasic/attic/parser.c @ 185:cca933d32298
Clean up some mess in lwbasic directory
author | lost@l-w.ca |
---|---|
date | Thu, 22 Dec 2011 18:03:38 -0700 |
parents | lwbasic/parser.c@5325b640424d |
children |
comparison
equal
deleted
inserted
replaced
184:6433cb024174 | 185:cca933d32298 |
---|---|
1 /* | |
2 compiler.c | |
3 | |
4 Copyright © 2011 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 /* | |
23 This is the actual compiler bit; it drives the parser and code generation | |
24 */ | |
25 | |
26 #include <stdio.h> | |
27 | |
28 #include <lw_alloc.h> | |
29 #include <lw_string.h> | |
30 | |
31 #include "lwbasic.h" | |
32 #include "symtab.h" | |
33 | |
34 static void expect(cstate *state, int tt) | |
35 { | |
36 if (state -> lexer_token != tt) | |
37 lwb_error("Expecting %s, got %s\n", lexer_token_name(tt), lexer_return_token(state)); | |
38 lexer(state); | |
39 } | |
40 | |
41 | |
42 /* size of a type */ | |
43 static int sizeof_type(int type) | |
44 { | |
45 /* everything is an "int" right now; 2 bytes */ | |
46 return 2; | |
47 } | |
48 | |
49 /* parse a type; the next token will be acquired as a result */ | |
50 /* the token advancement is to provide consistency */ | |
51 static int parse_type(cstate *state) | |
52 { | |
53 int pt = -1; | |
54 | |
55 switch (state -> lexer_token) | |
56 { | |
57 case token_kw_integer: | |
58 pt = 1; | |
59 break; | |
60 | |
61 default: | |
62 lwb_error("Invalid type specification"); | |
63 } | |
64 lexer(state); | |
65 /* look for "unsigned" modifier for integer types */ | |
66 return pt; | |
67 } | |
68 | |
69 static void parse_expr(cstate *state, int prec); | |
70 static void parse_term(cstate *state); | |
71 static int parse_expression(cstate *state) | |
72 { | |
73 state -> expression = 1; | |
74 | |
75 parse_expr(state, 0); | |
76 | |
77 state -> expression = 0; | |
78 return 1; | |
79 } | |
80 | |
81 static void parse_decls(cstate *state) | |
82 { | |
83 /* declarations */ | |
84 /* the first thing that doesn't look like a declaration is assumed */ | |
85 /* to be a statement and will trigger a bailout */ | |
86 int vt; | |
87 char *vn; | |
88 symtab_entry_t *se; | |
89 | |
90 for (;;) | |
91 { | |
92 switch (state -> lexer_token) | |
93 { | |
94 /* DIM keyword */ | |
95 case token_kw_dim: | |
96 lexer(state); | |
97 if (state -> lexer_token != token_identifier) | |
98 { | |
99 lwb_error("Expecting identifier, got %s\n", lexer_return_token(state)); | |
100 } | |
101 vn = lw_strdup(state -> lexer_token_string); | |
102 lexer(state); | |
103 if (state -> lexer_token != token_kw_as) | |
104 { | |
105 lwb_error("Expecting AS, got %s\n", lexer_return_token(state)); | |
106 } | |
107 lexer(state); | |
108 vt = parse_type(state); | |
109 | |
110 se = symtab_find(state -> local_syms, vn); | |
111 if (se) | |
112 { | |
113 lwb_error("Multiply defined local variable %s", vn); | |
114 } | |
115 state -> framesize += sizeof_type(vt); | |
116 symtab_register(state -> local_syms, vn, -(state -> framesize), symtype_var, NULL); | |
117 | |
118 lw_free(vn); | |
119 break; | |
120 | |
121 /* blank lines allowed */ | |
122 case token_eol: | |
123 break; | |
124 | |
125 default: | |
126 return; | |
127 } | |
128 if (state -> lexer_token != token_eol) | |
129 lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); | |
130 lexer(state); | |
131 } | |
132 } | |
133 | |
134 static void parse_statements(cstate *state) | |
135 { | |
136 symtab_entry_t *se; | |
137 int et; | |
138 | |
139 for (;;) | |
140 { | |
141 switch (state -> lexer_token) | |
142 { | |
143 /* blank lines allowed */ | |
144 case token_eol: | |
145 break; | |
146 | |
147 /* variable assignment */ | |
148 case token_identifier: | |
149 se = symtab_find(state -> local_syms, state -> lexer_token_string); | |
150 if (!se) | |
151 { | |
152 se = symtab_find(state -> global_syms, state -> lexer_token_string); | |
153 } | |
154 if (!se) | |
155 lwb_error("Unknown variable %s\n", state -> lexer_token_string); | |
156 lexer(state); | |
157 /* ensure the first token of the expression will be parsed correctly */ | |
158 state -> expression = 1; | |
159 expect(state, token_op_assignment); | |
160 | |
161 /* parse the expression */ | |
162 et = parse_expression(state); | |
163 | |
164 /* check type compatibility */ | |
165 | |
166 /* actually do the assignment */ | |
167 | |
168 break; | |
169 | |
170 /* anything we don't recognize as a statement token breaks out */ | |
171 default: | |
172 return; | |
173 } | |
174 if (state -> lexer_token != token_eol) | |
175 lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); | |
176 lexer(state); | |
177 } | |
178 } | |
179 | |
180 | |
181 /* issub means RETURNS is not allowed; !issub means RETURNS is required */ | |
182 | |
183 static void parse_subfunc(cstate *state, int issub) | |
184 { | |
185 int pt, rt; | |
186 char *subname, *pn; | |
187 int vis = 0; | |
188 symtab_entry_t *se; | |
189 int paramsize = 0; | |
190 | |
191 state -> local_syms = symtab_init(); | |
192 state -> framesize = 0; | |
193 | |
194 lexer(state); | |
195 if (state -> lexer_token != token_identifier) | |
196 { | |
197 lwb_error("Invalid sub name '%s'", state -> lexer_token_string); | |
198 } | |
199 | |
200 subname = lw_strdup(state -> lexer_token_string); | |
201 | |
202 lexer(state); | |
203 if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private) | |
204 { | |
205 if (state -> lexer_token == token_kw_public) | |
206 vis = 1; | |
207 lexer(state); | |
208 } | |
209 | |
210 /* ignore the "PARAMS" keyword if present */ | |
211 if (state -> lexer_token == token_kw_params) | |
212 lexer(state); | |
213 | |
214 if (state -> lexer_token == token_eol || state -> lexer_token == token_kw_returns) | |
215 goto noparms; | |
216 | |
217 paramagain: | |
218 if (state -> lexer_token != token_identifier) | |
219 { | |
220 lwb_error("Parameter name expected, got %s\n", lexer_return_token(state)); | |
221 } | |
222 pn = lw_strdup(state -> lexer_token_string); | |
223 lexer(state); | |
224 | |
225 if (state -> lexer_token != token_kw_as) | |
226 lwb_error("Expecting AS\n"); | |
227 lexer(state); | |
228 | |
229 pt = parse_type(state); | |
230 | |
231 se = symtab_find(state -> local_syms, pn); | |
232 if (se) | |
233 { | |
234 lwb_error("Duplicate parameter name %s\n", pn); | |
235 } | |
236 symtab_register(state -> local_syms, pn, paramsize, symtype_param, NULL); | |
237 paramsize += sizeof_type(pt); | |
238 lw_free(pn); | |
239 | |
240 if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',') | |
241 { | |
242 lexer(state); | |
243 goto paramagain; | |
244 } | |
245 | |
246 noparms: | |
247 rt = -1; | |
248 if (!issub) | |
249 { | |
250 if (state -> lexer_token != token_kw_returns) | |
251 { | |
252 lwb_error("FUNCTION must have RETURNS\n"); | |
253 } | |
254 lexer(state); | |
255 /* if (state -> lexer_token == token_identifier) | |
256 { | |
257 printf("Return value named: %s\n", state -> lexer_token_string); | |
258 | |
259 lexer(state); | |
260 if (state -> lexer_token != token_kw_as) | |
261 lwb_error("Execting AS after RETURNS"); | |
262 lexer(state); | |
263 } | |
264 */ | |
265 rt = parse_type(state); | |
266 } | |
267 else | |
268 { | |
269 if (state -> lexer_token == token_kw_returns) | |
270 { | |
271 lwb_error("SUB cannot specify RETURNS\n"); | |
272 } | |
273 } | |
274 | |
275 | |
276 if (state -> lexer_token != token_eol) | |
277 { | |
278 lwb_error("EOL expected; found %s\n", lexer_return_token(state)); | |
279 } | |
280 | |
281 | |
282 se = symtab_find(state -> global_syms, subname); | |
283 if (se) | |
284 { | |
285 lwb_error("Multiply defined symbol %s\n", subname); | |
286 } | |
287 | |
288 symtab_register(state -> global_syms, subname, -1, issub ? symtype_sub : symtype_func, NULL); | |
289 | |
290 state -> currentsub = subname; | |
291 state -> returntype = rt; | |
292 /* consume EOL */ | |
293 lexer(state); | |
294 | |
295 /* variable declarations */ | |
296 parse_decls(state); | |
297 | |
298 /* output function/sub prolog */ | |
299 emit_prolog(state, vis); | |
300 | |
301 /* parse statement block */ | |
302 parse_statements(state); | |
303 | |
304 if (issub) | |
305 { | |
306 if (state -> lexer_token != token_kw_endsub) | |
307 { | |
308 lwb_error("Expecting ENDSUB, got %s\n", lexer_return_token(state)); | |
309 } | |
310 } | |
311 else | |
312 { | |
313 if (state -> lexer_token != token_kw_endfunction) | |
314 { | |
315 lwb_error("Expecting ENDFUNCTION, got %s\n", lexer_return_token(state)); | |
316 } | |
317 } | |
318 /* output function/sub epilog */ | |
319 emit_epilog(state); | |
320 | |
321 lw_free(state -> currentsub); | |
322 state -> currentsub = NULL; | |
323 symtab_destroy(state -> local_syms); | |
324 state -> local_syms = NULL; | |
325 } | |
326 | |
327 void parser(cstate *state) | |
328 { | |
329 state -> lexer_curchar = -1; | |
330 state -> global_syms = symtab_init(); | |
331 | |
332 /* now look for a global declaration */ | |
333 for (;;) | |
334 { | |
335 state -> parser_state = parser_state_global; | |
336 lexer(state); | |
337 switch (state -> lexer_token) | |
338 { | |
339 case token_kw_function: | |
340 printf("Function\n"); | |
341 parse_subfunc(state, 0); | |
342 break; | |
343 | |
344 case token_kw_sub: | |
345 printf("Sub\n"); | |
346 parse_subfunc(state, 1); | |
347 break; | |
348 | |
349 /* blank lines are allowed */ | |
350 case token_eol: | |
351 continue; | |
352 | |
353 /* EOF is allowed - end of parsing */ | |
354 case token_eof: | |
355 return; | |
356 | |
357 default: | |
358 lwb_error("Invalid token '%s' in global state\n", lexer_return_token(state)); | |
359 } | |
360 } | |
361 } | |
362 | |
363 static void parse_expr(cstate *state, int prec) | |
364 { | |
365 static const struct operinfo { | |
366 int opernum; | |
367 int operprec; | |
368 } operators[] = | |
369 { | |
370 { token_op_plus, 100 }, | |
371 { token_op_minus, 100 }, | |
372 { token_op_times, 150 }, | |
373 { token_op_divide, 150 }, | |
374 { token_op_modulus, 150 }, | |
375 { token_op_and, 25 }, | |
376 { token_op_or, 20 }, | |
377 { token_op_xor, 20 }, | |
378 { token_op_band, 50 }, | |
379 { token_op_bor, 45 }, | |
380 { token_op_bxor, 45 }, | |
381 { -1, -1 } | |
382 }; | |
383 int opern; | |
384 | |
385 parse_term(state); | |
386 | |
387 eval_next: | |
388 for (opern = 0; operators[opern].opernum != -1; opern++) | |
389 { | |
390 if (operators[opern].opernum == state -> lexer_token) | |
391 break; | |
392 } | |
393 if (operators[opern].opernum == -1) | |
394 return; | |
395 | |
396 if (operators[opern].operprec <= prec) | |
397 return; | |
398 | |
399 lexer(state); | |
400 | |
401 parse_expr(state, operators[opern].operprec); | |
402 | |
403 /* push operator */ | |
404 | |
405 goto eval_next; | |
406 } | |
407 | |
408 static void parse_term(cstate *state) | |
409 { | |
410 eval_next: | |
411 /* parens */ | |
412 if (state -> lexer_token == token_op_oparen) | |
413 { | |
414 lexer(state); | |
415 parse_expr(state, 0); | |
416 expect(state, token_op_cparen); | |
417 return; | |
418 } | |
419 | |
420 /* unary plus; ignore it */ | |
421 if (state -> lexer_token == token_op_plus) | |
422 { | |
423 lexer(state); | |
424 goto eval_next; | |
425 } | |
426 | |
427 /* unary minus, precision 200 */ | |
428 if (state -> lexer_token == token_op_minus) | |
429 { | |
430 lexer(state); | |
431 parse_expr(state, 200); | |
432 | |
433 /* push unary negation */ | |
434 } | |
435 | |
436 /* BNOT, NOT */ | |
437 if (state -> lexer_token == token_op_not || state -> lexer_token == token_op_bnot) | |
438 { | |
439 lexer(state); | |
440 parse_expr(state, 200); | |
441 | |
442 /* push unary operator */ | |
443 } | |
444 | |
445 /* integer */ | |
446 if (state -> lexer_token == token_int) | |
447 { | |
448 } | |
449 | |
450 /* unsigned integer */ | |
451 if (state -> lexer_token == token_uint) | |
452 { | |
453 } | |
454 | |
455 /* variable or function call */ | |
456 if (state -> lexer_token == token_identifier) | |
457 { | |
458 lexer(state); | |
459 if (state -> lexer_token == token_op_oparen) | |
460 { | |
461 /* function call */ | |
462 return; | |
463 } | |
464 /* variable */ | |
465 return; | |
466 } | |
467 | |
468 lwb_error("Invalid input in expression; got %s\n", lexer_return_token(state)); | |
469 } |