Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 560:dba08c7dff96
Fix off by one handling MOD directive optional arguments
Fixes an off by one in handling the optional arguments to the MOD directive
as of changeset 928c033c0cd0.
Thanks to Alex Evans <varmfskii@gmail.com> for reporting the problem and a
suggested fix. This fix is different but should be more stable should the
rest of the parsing code be refactored in the future.
author | William Astle <lost@l-w.ca> |
---|---|
date | Fri, 22 Sep 2023 12:15:09 -0600 |
parents | ee3e52ab2288 |
children |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
26 #include <lw_strbuf.h> |
295 | 27 |
28 #include "cpp.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
194 void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) |
295 | 195 { |
306
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
196 if (pp -> lexstr) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
197 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
198 if (c == CPP_EOL) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
199 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
200 if (pp -> lexstrloc > 0) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
201 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
202 pp -> lexstrloc--; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
203 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
204 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
205 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
206 |
295 | 207 if (pp -> ungetbufl >= pp -> ungetbufs) |
208 { | |
209 pp -> ungetbufs += 100; | |
210 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
211 } | |
212 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
213 } | |
214 | |
215 /* This function retrieves a byte from the input stream. It performs | |
216 backslash-newline splicing on the returned bytes. Any character | |
217 retrieved from the unfetch buffer is presumed to have already passed | |
218 the backslash-newline filter. */ | |
219 static int fetch_byte(struct preproc_info *pp) | |
220 { | |
221 int c; | |
222 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
223 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
224 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
225 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
226 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
227 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
228 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
229 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
230 |
295 | 231 if (pp -> ungetbufl > 0) |
232 { | |
233 pp -> ungetbufl--; | |
234 c = pp -> ungetbuf[pp -> ungetbufl]; | |
235 if (pp -> ungetbufl == 0) | |
236 { | |
237 lw_free(pp -> ungetbuf); | |
238 pp -> ungetbuf = NULL; | |
239 pp -> ungetbufs = 0; | |
240 } | |
241 return c; | |
242 } | |
243 | |
244 again: | |
245 if (pp -> unget != CPP_NOUNG) | |
246 { | |
247 c = pp -> unget; | |
248 pp -> unget = CPP_NOUNG; | |
249 } | |
250 else | |
251 { | |
252 c = fetch_byte_tg(pp); | |
253 } | |
254 if (c == '\\') | |
255 { | |
256 int c2; | |
257 c2 = fetch_byte_tg(pp); | |
258 if (c2 == CPP_EOL) | |
259 goto again; | |
260 else | |
261 pp -> unget = c2; | |
262 } | |
263 return c; | |
264 } | |
265 | |
266 | |
267 | |
268 /* | |
269 Lex a token off the current input file. | |
270 | |
271 Returned tokens are as follows: | |
272 | |
273 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
274 * numbers are returned as their appropriate type | |
275 * all whitespace in a sequence, including comments, is returned as | |
276 a single instance of TOK_WSPACE | |
277 * TOK_EOL is returned in the case of the end of a line | |
278 * TOK_EOF is returned when the end of the file is reached | |
279 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
280 * Any symbolic operator, etc., recognized by C will be returned as such | |
281 a token | |
282 * TOK_HASH will be returned for a # | |
283 * trigraphs will be interpreted | |
284 * backslash-newline will be interpreted | |
285 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
286 */ | |
287 | |
288 | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
289 int preproc_lex_fetch_byte(struct preproc_info *pp) |
295 | 290 { |
291 int c; | |
292 c = fetch_byte(pp); | |
293 if (c == CPP_EOF && pp -> eolseen == 0) | |
294 { | |
295 preproc_throw_warning(pp, "No newline at end of file"); | |
296 pp -> eolseen = 1; | |
297 return CPP_EOL; | |
298 } | |
299 | |
300 if (c == CPP_EOL) | |
301 { | |
302 pp -> eolseen = 1; | |
303 return c; | |
304 } | |
300 | 305 |
295 | 306 pp -> eolseen = 0; |
307 | |
308 /* convert comments to a single space here */ | |
309 if (c == '/') | |
310 { | |
311 int c2; | |
312 c2 = fetch_byte(pp); | |
313 if (c2 == '/') | |
314 { | |
315 /* single line comment */ | |
316 c = ' '; | |
317 for (;;) | |
318 { | |
319 c2 = fetch_byte(pp); | |
320 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
321 break; | |
322 } | |
323 preproc_lex_unfetch_byte(pp, c2); | |
324 } | |
325 else if (c2 == '*') | |
326 { | |
327 /* block comment */ | |
328 c = ' '; | |
329 for (;;) | |
330 { | |
331 c2 = fetch_byte(pp); | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
332 if (c2 == CPP_EOF) |
295 | 333 { |
334 preproc_lex_unfetch_byte(pp, c); | |
335 break; | |
336 } | |
337 if (c2 == '*') | |
338 { | |
339 /* maybe end of comment */ | |
340 c2 = preproc_lex_fetch_byte(pp); | |
341 if (c2 == '/') | |
342 break; | |
343 } | |
344 } | |
345 } | |
346 else | |
347 { | |
348 /* not a comment - restore lookahead character */ | |
349 preproc_lex_unfetch_byte(pp, c2); | |
350 } | |
351 } | |
352 return c; | |
353 } | |
354 | |
355 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
356 { | |
357 int sline = pp -> lineno; | |
358 int scol = pp -> column; | |
359 char *strval = NULL; | |
360 int ttype = TOK_NONE; | |
361 int c, c2; | |
362 int cl; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
363 struct lw_strbuf *strbuf; |
304
d85d173ba120
Checkpoint lwcc development - preprocessor is runnable but nonfunctional
William Astle <lost@l-w.ca>
parents:
300
diff
changeset
|
364 struct token *t = NULL; |
300 | 365 struct preproc_info *fs; |
366 | |
367 fileagain: | |
295 | 368 c = preproc_lex_fetch_byte(pp); |
369 if (c == CPP_EOF) | |
370 { | |
371 if (pp -> nlseen == 0) | |
372 { | |
373 c = CPP_EOL; | |
374 } | |
375 } | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
376 |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
377 if (pp -> lineno != sline) |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
378 { |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
379 sline = pp -> lineno; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
380 scol = pp -> column; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
381 } |
295 | 382 |
383 if (c == CPP_EOF) | |
384 { | |
300 | 385 /* check if we fell off the end of an include file */ |
386 if (pp -> filestack) | |
387 { | |
388 if (pp -> skip_level || pp -> found_level) | |
389 { | |
390 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
391 } | |
392 fclose(pp -> fp); | |
393 fs = pp -> filestack; | |
394 *pp = *fs; | |
395 pp -> filestack = fs -> n; | |
396 goto fileagain; | |
397 } | |
398 else | |
399 { | |
400 ttype = TOK_EOF; | |
401 goto out; | |
402 } | |
295 | 403 } |
404 if (c == CPP_EOL) | |
405 { | |
406 pp -> nlseen = 1; | |
407 ttype = TOK_EOL; | |
408 goto out; | |
409 } | |
410 | |
411 pp -> nlseen = 0; | |
412 if (isspace(c)) | |
413 { | |
414 while (isspace(c)) | |
415 c = preproc_lex_fetch_byte(pp); | |
416 preproc_lex_unfetch_byte(pp, c); | |
417 ttype = TOK_WSPACE; | |
418 goto out; | |
419 } | |
420 | |
421 switch (c) | |
422 { | |
423 case '?': | |
424 ttype = TOK_QMARK; | |
425 goto out; | |
426 | |
427 case ':': | |
428 ttype = TOK_COLON; | |
429 goto out; | |
430 | |
431 case ',': | |
432 ttype = TOK_COMMA; | |
433 goto out; | |
434 | |
435 case '(': | |
436 ttype = TOK_OPAREN; | |
437 goto out; | |
438 | |
439 case ')': | |
440 ttype = TOK_CPAREN; | |
441 goto out; | |
442 | |
443 case '{': | |
444 ttype = TOK_OBRACE; | |
445 goto out; | |
446 | |
447 case '}': | |
448 ttype = TOK_CBRACE; | |
449 goto out; | |
450 | |
451 case '[': | |
452 ttype = TOK_OSQUARE; | |
453 goto out; | |
454 | |
455 case ']': | |
456 ttype = TOK_CSQUARE; | |
457 goto out; | |
458 | |
459 case '~': | |
460 ttype = TOK_COM; | |
461 goto out; | |
462 | |
463 case ';': | |
464 ttype = TOK_EOS; | |
465 goto out; | |
466 | |
467 /* and now for the possible multi character tokens */ | |
468 case '#': | |
469 ttype = TOK_HASH; | |
470 c = preproc_lex_fetch_byte(pp); | |
471 if (c == '#') | |
472 ttype = TOK_DBLHASH; | |
473 else | |
474 preproc_lex_unfetch_byte(pp, c); | |
475 goto out; | |
476 | |
477 case '^': | |
478 ttype = TOK_XOR; | |
479 c = preproc_lex_fetch_byte(pp); | |
480 if (c == '=') | |
481 ttype = TOK_XORASS; | |
482 else | |
483 preproc_lex_unfetch_byte(pp, c); | |
484 goto out; | |
485 | |
486 case '!': | |
487 ttype = TOK_BNOT; | |
488 c = preproc_lex_fetch_byte(pp); | |
489 if (c == '=') | |
490 ttype = TOK_NE; | |
491 else | |
492 preproc_lex_unfetch_byte(pp, c); | |
493 goto out; | |
494 | |
495 case '*': | |
496 ttype = TOK_STAR; | |
497 c = preproc_lex_fetch_byte(pp); | |
498 if (c == '=') | |
499 ttype = TOK_MULASS; | |
500 else | |
501 preproc_lex_unfetch_byte(pp, c); | |
502 goto out; | |
503 | |
504 case '/': | |
505 ttype = TOK_DIV; | |
506 c = preproc_lex_fetch_byte(pp); | |
507 if (c == '=') | |
508 ttype = TOK_DIVASS; | |
509 else | |
510 preproc_lex_unfetch_byte(pp, c); | |
511 goto out; | |
512 | |
513 case '=': | |
514 ttype = TOK_ASS; | |
515 c = preproc_lex_fetch_byte(pp); | |
516 if (c == '=') | |
517 ttype = TOK_EQ; | |
518 else | |
519 preproc_lex_unfetch_byte(pp, c); | |
520 goto out; | |
521 | |
522 case '%': | |
523 ttype = TOK_MOD; | |
524 c = preproc_lex_fetch_byte(pp); | |
525 if (c == '=') | |
526 ttype = TOK_MODASS; | |
527 else | |
528 preproc_lex_unfetch_byte(pp, c); | |
529 goto out; | |
530 | |
531 case '-': | |
532 ttype = TOK_SUB; | |
533 c = preproc_lex_fetch_byte(pp); | |
534 if (c == '=') | |
535 ttype = TOK_SUBASS; | |
536 else if (c == '-') | |
537 ttype = TOK_DBLSUB; | |
538 else if (c == '>') | |
539 ttype = TOK_ARROW; | |
540 else | |
541 preproc_lex_unfetch_byte(pp, c); | |
542 goto out; | |
543 | |
544 case '+': | |
545 ttype = TOK_ADD; | |
546 c = preproc_lex_fetch_byte(pp); | |
547 if (c == '=') | |
548 ttype = TOK_ADDASS; | |
549 else if (c == '+') | |
550 ttype = TOK_DBLADD; | |
551 else | |
552 preproc_lex_unfetch_byte(pp, c); | |
553 goto out; | |
554 | |
555 | |
556 case '&': | |
557 ttype = TOK_BWAND; | |
558 c = preproc_lex_fetch_byte(pp); | |
559 if (c == '=') | |
560 ttype = TOK_BWANDASS; | |
561 else if (c == '&') | |
562 ttype = TOK_BAND; | |
563 else | |
564 preproc_lex_unfetch_byte(pp, c); | |
565 goto out; | |
566 | |
567 case '|': | |
568 ttype = TOK_BWOR; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_BWORASS; | |
572 else if (c == '|') | |
573 ttype = TOK_BOR; | |
574 else | |
575 preproc_lex_unfetch_byte(pp, c); | |
576 goto out; | |
577 | |
578 case '<': | |
579 ttype = TOK_LT; | |
580 c = preproc_lex_fetch_byte(pp); | |
581 if (c == '=') | |
582 ttype = TOK_LE; | |
583 else if (c == '<') | |
584 { | |
585 ttype = TOK_LSH; | |
586 c = preproc_lex_fetch_byte(pp); | |
587 if (c == '=') | |
588 ttype = TOK_LSHASS; | |
589 else | |
590 preproc_lex_unfetch_byte(pp, c); | |
591 } | |
592 else | |
593 preproc_lex_unfetch_byte(pp, c); | |
594 goto out; | |
595 | |
596 | |
597 case '>': | |
598 ttype = TOK_GT; | |
599 c = preproc_lex_fetch_byte(pp); | |
600 if (c == '=') | |
601 ttype = TOK_GE; | |
602 else if (c == '>') | |
603 { | |
604 ttype = TOK_RSH; | |
605 c = preproc_lex_fetch_byte(pp); | |
606 if (c == '=') | |
607 ttype = TOK_RSHASS; | |
608 else | |
609 preproc_lex_unfetch_byte(pp, c); | |
610 } | |
611 else | |
612 preproc_lex_unfetch_byte(pp, c); | |
613 goto out; | |
614 | |
615 case '\'': | |
616 /* character constant - turns into a uint */ | |
617 chrlit: | |
618 cl = 0; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
619 strbuf = lw_strbuf_new(); |
295 | 620 for (;;) |
621 { | |
622 c = preproc_lex_fetch_byte(pp); | |
623 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
624 break; | |
625 cl++; | |
626 if (c == '\\') | |
627 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
628 lw_strbuf_add(strbuf, '\\'); |
295 | 629 c = preproc_lex_fetch_byte(pp); |
630 if (c == CPP_EOF || c == CPP_EOL) | |
631 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
632 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
633 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
635 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
636 goto out; |
295 | 637 } |
638 cl++; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
639 lw_strbuf_add(strbuf, c); |
295 | 640 continue; |
641 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
642 lw_strbuf_add(strbuf, c); |
295 | 643 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
644 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
645 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
646 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
647 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
648 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
649 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
650 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
651 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 ttype = TOK_CHR_LIT; |
295 | 653 goto out; |
654 | |
655 case '"': | |
656 strlit: | |
657 /* string literal */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
658 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
659 lw_strbuf_add(strbuf, '"'); |
295 | 660 for (;;) |
661 { | |
662 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
663 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
664 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
665 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
666 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
667 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
668 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
669 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
670 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
671 if (c == '"') |
295 | 672 break; |
673 if (c == '\\') | |
674 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
675 lw_strbuf_add(strbuf, '\\'); |
295 | 676 c = preproc_lex_fetch_byte(pp); |
677 if (c == CPP_EOF || c == CPP_EOL) | |
678 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
679 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
680 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
681 preproc_throw_error(pp, "Invalid string constant"); |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
682 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
683 goto out; |
295 | 684 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
685 lw_strbuf_add(strbuf, c); |
295 | 686 continue; |
687 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
688 lw_strbuf_add(strbuf, c); |
295 | 689 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
690 lw_strbuf_add(strbuf, '"'); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
691 strval = lw_strbuf_end(strbuf); |
295 | 692 ttype = TOK_STR_LIT; |
693 goto out; | |
694 | |
695 case 'L': | |
696 /* check for wide string or wide char const */ | |
697 c2 = preproc_lex_fetch_byte(pp); | |
698 if (c2 == '\'') | |
699 { | |
700 goto chrlit; | |
701 } | |
702 else if (c2 == '"') | |
703 { | |
704 goto strlit; | |
705 } | |
706 preproc_lex_unfetch_byte(pp, c2); | |
707 /* fall through for identifier */ | |
708 case '_': | |
709 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
710 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
711 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
712 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
713 case 'y': case 'z': | |
714 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
715 case 'G': case 'H': case 'I': case 'J': case 'K': | |
716 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
717 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
718 case 'Y': case 'Z': | |
719 /* we have an identifier here */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
720 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
721 lw_strbuf_add(strbuf, c); |
295 | 722 for (;;) |
723 { | |
724 c = preproc_lex_fetch_byte(pp); | |
725 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
726 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
727 lw_strbuf_add(strbuf, c); |
295 | 728 continue; |
729 } | |
730 else | |
731 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
732 lw_strbuf_add(strbuf, 0); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
733 strval = lw_strbuf_end(strbuf); |
295 | 734 break; |
735 } | |
736 } | |
737 preproc_lex_unfetch_byte(pp, c); | |
738 ttype = TOK_IDENT; | |
739 goto out; | |
740 | |
741 case '.': | |
742 c = preproc_lex_fetch_byte(pp); | |
743 if (c >= '0' && c <= '9') | |
744 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
745 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
746 lw_strbuf_add(strbuf, '.'); |
295 | 747 goto numlit; |
748 } | |
749 else if (c == '.') | |
750 { | |
751 c = preproc_lex_fetch_byte(pp); | |
752 if (c == '.') | |
753 { | |
754 ttype = TOK_ELLIPSIS; | |
755 goto out; | |
756 } | |
757 preproc_lex_unfetch_byte(pp, c); | |
758 } | |
759 preproc_lex_unfetch_byte(pp, c); | |
760 ttype = TOK_DOT; | |
761 goto out; | |
762 | |
763 case '0': case '1': case '2': case '3': case '4': | |
764 case '5': case '6': case '7': case '8': case '9': | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
765 strbuf = lw_strbuf_new(); |
295 | 766 numlit: |
296 | 767 ttype = TOK_NUMBER; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
768 lw_strbuf_add(strbuf, c); |
295 | 769 for (;;) |
770 { | |
771 c = preproc_lex_fetch_byte(pp); | |
772 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
773 break; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
774 lw_strbuf_add(strbuf, c); |
295 | 775 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') |
776 { | |
777 c = preproc_lex_fetch_byte(pp); | |
778 if (c == '+' || c == '-') | |
779 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
780 lw_strbuf_add(strbuf, c); |
295 | 781 continue; |
782 } | |
783 preproc_lex_unfetch_byte(pp, c); | |
784 } | |
785 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
786 strval = lw_strbuf_end(strbuf); |
295 | 787 preproc_lex_unfetch_byte(pp, c); |
788 goto out; | |
789 | |
790 default: | |
791 ttype = TOK_CHAR; | |
792 strval = lw_alloc(2); | |
793 strval[0] = c; | |
794 strval[1] = 0; | |
795 break; | |
796 } | |
797 out: | |
798 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
799 lw_free(strval); | |
800 return t; | |
801 } |