Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 305:54f213c8fb81 ccdev
Various bugfixes and output tuning
Tuned output of preprocessor to include line markers similar to the ones
added by the gcc preprocessor.
Also, many fixes for various bits of dumbosity leading to misbehaviour and
crashing.
author | William Astle <lost@l-w.ca> |
---|---|
date | Wed, 18 Sep 2013 19:17:52 -0600 |
parents | d85d173ba120 |
children | b08787e5b9f3 |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "strbuf.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
194 void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) |
295 | 195 { |
196 if (pp -> ungetbufl >= pp -> ungetbufs) | |
197 { | |
198 pp -> ungetbufs += 100; | |
199 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
200 } | |
201 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
202 } | |
203 | |
204 /* This function retrieves a byte from the input stream. It performs | |
205 backslash-newline splicing on the returned bytes. Any character | |
206 retrieved from the unfetch buffer is presumed to have already passed | |
207 the backslash-newline filter. */ | |
208 static int fetch_byte(struct preproc_info *pp) | |
209 { | |
210 int c; | |
211 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
212 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
213 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
214 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
215 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
216 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
217 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
218 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
219 |
295 | 220 if (pp -> ungetbufl > 0) |
221 { | |
222 pp -> ungetbufl--; | |
223 c = pp -> ungetbuf[pp -> ungetbufl]; | |
224 if (pp -> ungetbufl == 0) | |
225 { | |
226 lw_free(pp -> ungetbuf); | |
227 pp -> ungetbuf = NULL; | |
228 pp -> ungetbufs = 0; | |
229 } | |
230 return c; | |
231 } | |
232 | |
233 again: | |
234 if (pp -> unget != CPP_NOUNG) | |
235 { | |
236 c = pp -> unget; | |
237 pp -> unget = CPP_NOUNG; | |
238 } | |
239 else | |
240 { | |
241 c = fetch_byte_tg(pp); | |
242 } | |
243 if (c == '\\') | |
244 { | |
245 int c2; | |
246 c2 = fetch_byte_tg(pp); | |
247 if (c2 == CPP_EOL) | |
248 goto again; | |
249 else | |
250 pp -> unget = c2; | |
251 } | |
252 return c; | |
253 } | |
254 | |
255 | |
256 | |
257 /* | |
258 Lex a token off the current input file. | |
259 | |
260 Returned tokens are as follows: | |
261 | |
262 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
263 * numbers are returned as their appropriate type | |
264 * all whitespace in a sequence, including comments, is returned as | |
265 a single instance of TOK_WSPACE | |
266 * TOK_EOL is returned in the case of the end of a line | |
267 * TOK_EOF is returned when the end of the file is reached | |
268 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
269 * Any symbolic operator, etc., recognized by C will be returned as such | |
270 a token | |
271 * TOK_HASH will be returned for a # | |
272 * trigraphs will be interpreted | |
273 * backslash-newline will be interpreted | |
274 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
275 */ | |
276 | |
277 | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
278 int preproc_lex_fetch_byte(struct preproc_info *pp) |
295 | 279 { |
280 int c; | |
281 c = fetch_byte(pp); | |
282 if (c == CPP_EOF && pp -> eolseen == 0) | |
283 { | |
284 preproc_throw_warning(pp, "No newline at end of file"); | |
285 pp -> eolseen = 1; | |
286 return CPP_EOL; | |
287 } | |
288 | |
289 if (c == CPP_EOL) | |
290 { | |
291 pp -> eolseen = 1; | |
292 return c; | |
293 } | |
300 | 294 |
295 | 295 pp -> eolseen = 0; |
296 | |
297 /* convert comments to a single space here */ | |
298 if (c == '/') | |
299 { | |
300 int c2; | |
301 c2 = fetch_byte(pp); | |
302 if (c2 == '/') | |
303 { | |
304 /* single line comment */ | |
305 c = ' '; | |
306 for (;;) | |
307 { | |
308 c2 = fetch_byte(pp); | |
309 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
310 break; | |
311 } | |
312 preproc_lex_unfetch_byte(pp, c2); | |
313 } | |
314 else if (c2 == '*') | |
315 { | |
316 /* block comment */ | |
317 c = ' '; | |
318 for (;;) | |
319 { | |
320 c2 = fetch_byte(pp); | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
321 if (c2 == CPP_EOF) |
295 | 322 { |
323 preproc_lex_unfetch_byte(pp, c); | |
324 break; | |
325 } | |
326 if (c2 == '*') | |
327 { | |
328 /* maybe end of comment */ | |
329 c2 = preproc_lex_fetch_byte(pp); | |
330 if (c2 == '/') | |
331 break; | |
332 } | |
333 } | |
334 } | |
335 else | |
336 { | |
337 /* not a comment - restore lookahead character */ | |
338 preproc_lex_unfetch_byte(pp, c2); | |
339 } | |
340 } | |
341 return c; | |
342 } | |
343 | |
344 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
345 { | |
346 int sline = pp -> lineno; | |
347 int scol = pp -> column; | |
348 char *strval = NULL; | |
349 int ttype = TOK_NONE; | |
350 int c, c2; | |
351 int cl; | |
352 struct strbuf *strbuf; | |
304
d85d173ba120
Checkpoint lwcc development - preprocessor is runnable but nonfunctional
William Astle <lost@l-w.ca>
parents:
300
diff
changeset
|
353 struct token *t = NULL; |
300 | 354 struct preproc_info *fs; |
355 | |
356 fileagain: | |
295 | 357 c = preproc_lex_fetch_byte(pp); |
358 if (c == CPP_EOF) | |
359 { | |
360 if (pp -> nlseen == 0) | |
361 { | |
362 c = CPP_EOL; | |
363 } | |
364 } | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
365 |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
366 if (pp -> lineno != sline) |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
367 { |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
368 sline = pp -> lineno; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
369 scol = pp -> column; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
370 } |
295 | 371 |
372 if (c == CPP_EOF) | |
373 { | |
300 | 374 /* check if we fell off the end of an include file */ |
375 if (pp -> filestack) | |
376 { | |
377 if (pp -> skip_level || pp -> found_level) | |
378 { | |
379 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
380 } | |
381 fclose(pp -> fp); | |
382 fs = pp -> filestack; | |
383 *pp = *fs; | |
384 pp -> filestack = fs -> n; | |
385 goto fileagain; | |
386 } | |
387 else | |
388 { | |
389 ttype = TOK_EOF; | |
390 goto out; | |
391 } | |
295 | 392 } |
393 if (c == CPP_EOL) | |
394 { | |
395 pp -> nlseen = 1; | |
396 ttype = TOK_EOL; | |
397 goto out; | |
398 } | |
399 | |
400 pp -> nlseen = 0; | |
401 if (isspace(c)) | |
402 { | |
403 while (isspace(c)) | |
404 c = preproc_lex_fetch_byte(pp); | |
405 preproc_lex_unfetch_byte(pp, c); | |
406 ttype = TOK_WSPACE; | |
407 goto out; | |
408 } | |
409 | |
410 switch (c) | |
411 { | |
412 case '?': | |
413 ttype = TOK_QMARK; | |
414 goto out; | |
415 | |
416 case ':': | |
417 ttype = TOK_COLON; | |
418 goto out; | |
419 | |
420 case ',': | |
421 ttype = TOK_COMMA; | |
422 goto out; | |
423 | |
424 case '(': | |
425 ttype = TOK_OPAREN; | |
426 goto out; | |
427 | |
428 case ')': | |
429 ttype = TOK_CPAREN; | |
430 goto out; | |
431 | |
432 case '{': | |
433 ttype = TOK_OBRACE; | |
434 goto out; | |
435 | |
436 case '}': | |
437 ttype = TOK_CBRACE; | |
438 goto out; | |
439 | |
440 case '[': | |
441 ttype = TOK_OSQUARE; | |
442 goto out; | |
443 | |
444 case ']': | |
445 ttype = TOK_CSQUARE; | |
446 goto out; | |
447 | |
448 case '~': | |
449 ttype = TOK_COM; | |
450 goto out; | |
451 | |
452 case ';': | |
453 ttype = TOK_EOS; | |
454 goto out; | |
455 | |
456 /* and now for the possible multi character tokens */ | |
457 case '#': | |
458 ttype = TOK_HASH; | |
459 c = preproc_lex_fetch_byte(pp); | |
460 if (c == '#') | |
461 ttype = TOK_DBLHASH; | |
462 else | |
463 preproc_lex_unfetch_byte(pp, c); | |
464 goto out; | |
465 | |
466 case '^': | |
467 ttype = TOK_XOR; | |
468 c = preproc_lex_fetch_byte(pp); | |
469 if (c == '=') | |
470 ttype = TOK_XORASS; | |
471 else | |
472 preproc_lex_unfetch_byte(pp, c); | |
473 goto out; | |
474 | |
475 case '!': | |
476 ttype = TOK_BNOT; | |
477 c = preproc_lex_fetch_byte(pp); | |
478 if (c == '=') | |
479 ttype = TOK_NE; | |
480 else | |
481 preproc_lex_unfetch_byte(pp, c); | |
482 goto out; | |
483 | |
484 case '*': | |
485 ttype = TOK_STAR; | |
486 c = preproc_lex_fetch_byte(pp); | |
487 if (c == '=') | |
488 ttype = TOK_MULASS; | |
489 else | |
490 preproc_lex_unfetch_byte(pp, c); | |
491 goto out; | |
492 | |
493 case '/': | |
494 ttype = TOK_DIV; | |
495 c = preproc_lex_fetch_byte(pp); | |
496 if (c == '=') | |
497 ttype = TOK_DIVASS; | |
498 else | |
499 preproc_lex_unfetch_byte(pp, c); | |
500 goto out; | |
501 | |
502 case '=': | |
503 ttype = TOK_ASS; | |
504 c = preproc_lex_fetch_byte(pp); | |
505 if (c == '=') | |
506 ttype = TOK_EQ; | |
507 else | |
508 preproc_lex_unfetch_byte(pp, c); | |
509 goto out; | |
510 | |
511 case '%': | |
512 ttype = TOK_MOD; | |
513 c = preproc_lex_fetch_byte(pp); | |
514 if (c == '=') | |
515 ttype = TOK_MODASS; | |
516 else | |
517 preproc_lex_unfetch_byte(pp, c); | |
518 goto out; | |
519 | |
520 case '-': | |
521 ttype = TOK_SUB; | |
522 c = preproc_lex_fetch_byte(pp); | |
523 if (c == '=') | |
524 ttype = TOK_SUBASS; | |
525 else if (c == '-') | |
526 ttype = TOK_DBLSUB; | |
527 else if (c == '>') | |
528 ttype = TOK_ARROW; | |
529 else | |
530 preproc_lex_unfetch_byte(pp, c); | |
531 goto out; | |
532 | |
533 case '+': | |
534 ttype = TOK_ADD; | |
535 c = preproc_lex_fetch_byte(pp); | |
536 if (c == '=') | |
537 ttype = TOK_ADDASS; | |
538 else if (c == '+') | |
539 ttype = TOK_DBLADD; | |
540 else | |
541 preproc_lex_unfetch_byte(pp, c); | |
542 goto out; | |
543 | |
544 | |
545 case '&': | |
546 ttype = TOK_BWAND; | |
547 c = preproc_lex_fetch_byte(pp); | |
548 if (c == '=') | |
549 ttype = TOK_BWANDASS; | |
550 else if (c == '&') | |
551 ttype = TOK_BAND; | |
552 else | |
553 preproc_lex_unfetch_byte(pp, c); | |
554 goto out; | |
555 | |
556 case '|': | |
557 ttype = TOK_BWOR; | |
558 c = preproc_lex_fetch_byte(pp); | |
559 if (c == '=') | |
560 ttype = TOK_BWORASS; | |
561 else if (c == '|') | |
562 ttype = TOK_BOR; | |
563 else | |
564 preproc_lex_unfetch_byte(pp, c); | |
565 goto out; | |
566 | |
567 case '<': | |
568 ttype = TOK_LT; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_LE; | |
572 else if (c == '<') | |
573 { | |
574 ttype = TOK_LSH; | |
575 c = preproc_lex_fetch_byte(pp); | |
576 if (c == '=') | |
577 ttype = TOK_LSHASS; | |
578 else | |
579 preproc_lex_unfetch_byte(pp, c); | |
580 } | |
581 else | |
582 preproc_lex_unfetch_byte(pp, c); | |
583 goto out; | |
584 | |
585 | |
586 case '>': | |
587 ttype = TOK_GT; | |
588 c = preproc_lex_fetch_byte(pp); | |
589 if (c == '=') | |
590 ttype = TOK_GE; | |
591 else if (c == '>') | |
592 { | |
593 ttype = TOK_RSH; | |
594 c = preproc_lex_fetch_byte(pp); | |
595 if (c == '=') | |
596 ttype = TOK_RSHASS; | |
597 else | |
598 preproc_lex_unfetch_byte(pp, c); | |
599 } | |
600 else | |
601 preproc_lex_unfetch_byte(pp, c); | |
602 goto out; | |
603 | |
604 case '\'': | |
605 /* character constant - turns into a uint */ | |
606 chrlit: | |
607 cl = 0; | |
608 strbuf = strbuf_new(); | |
609 for (;;) | |
610 { | |
611 c = preproc_lex_fetch_byte(pp); | |
612 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
613 break; | |
614 cl++; | |
615 if (c == '\\') | |
616 { | |
617 strbuf_add(strbuf, '\\'); | |
618 c = preproc_lex_fetch_byte(pp); | |
619 if (c == CPP_EOF || c == CPP_EOL) | |
620 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
621 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
622 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
623 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
624 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
625 goto out; |
295 | 626 } |
627 cl++; | |
628 strbuf_add(strbuf, c); | |
629 continue; | |
630 } | |
631 strbuf_add(strbuf, c); | |
632 } | |
633 strval = strbuf_end(strbuf); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
635 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
636 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
637 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
638 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
639 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
640 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
641 ttype = TOK_CHR_LIT; |
295 | 642 goto out; |
643 | |
644 case '"': | |
645 strlit: | |
646 /* string literal */ | |
647 strbuf = strbuf_new(); | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
648 strbuf_add(strbuf, '"'); |
295 | 649 for (;;) |
650 { | |
651 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
653 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
654 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
655 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
656 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
657 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
658 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
659 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
660 if (c == '"') |
295 | 661 break; |
662 if (c == '\\') | |
663 { | |
664 strbuf_add(strbuf, '\\'); | |
665 c = preproc_lex_fetch_byte(pp); | |
666 if (c == CPP_EOF || c == CPP_EOL) | |
667 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
668 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
669 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
670 preproc_throw_error(pp, "Invalid string constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
671 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
672 goto out; |
295 | 673 } |
674 cl++; | |
675 strbuf_add(strbuf, c); | |
676 continue; | |
677 } | |
678 strbuf_add(strbuf, c); | |
679 } | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
680 strbuf_add(strbuf, '"'); |
295 | 681 strval = strbuf_end(strbuf); |
682 ttype = TOK_STR_LIT; | |
683 goto out; | |
684 | |
685 case 'L': | |
686 /* check for wide string or wide char const */ | |
687 c2 = preproc_lex_fetch_byte(pp); | |
688 if (c2 == '\'') | |
689 { | |
690 goto chrlit; | |
691 } | |
692 else if (c2 == '"') | |
693 { | |
694 goto strlit; | |
695 } | |
696 preproc_lex_unfetch_byte(pp, c2); | |
697 /* fall through for identifier */ | |
698 case '_': | |
699 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
700 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
701 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
702 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
703 case 'y': case 'z': | |
704 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
705 case 'G': case 'H': case 'I': case 'J': case 'K': | |
706 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
707 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
708 case 'Y': case 'Z': | |
709 /* we have an identifier here */ | |
710 strbuf = strbuf_new(); | |
711 strbuf_add(strbuf, c); | |
712 for (;;) | |
713 { | |
714 c = preproc_lex_fetch_byte(pp); | |
715 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
716 { | |
717 strbuf_add(strbuf, c); | |
718 continue; | |
719 } | |
720 else | |
721 { | |
722 strbuf_add(strbuf, 0); | |
723 strval = strbuf_end(strbuf); | |
724 break; | |
725 } | |
726 } | |
727 preproc_lex_unfetch_byte(pp, c); | |
728 ttype = TOK_IDENT; | |
729 goto out; | |
730 | |
731 case '.': | |
732 c = preproc_lex_fetch_byte(pp); | |
733 if (c >= '0' && c <= '9') | |
734 { | |
735 strbuf = strbuf_new(); | |
736 strbuf_add(strbuf, '.'); | |
737 goto numlit; | |
738 } | |
739 else if (c == '.') | |
740 { | |
741 c = preproc_lex_fetch_byte(pp); | |
742 if (c == '.') | |
743 { | |
744 ttype = TOK_ELLIPSIS; | |
745 goto out; | |
746 } | |
747 preproc_lex_unfetch_byte(pp, c); | |
748 } | |
749 preproc_lex_unfetch_byte(pp, c); | |
750 ttype = TOK_DOT; | |
751 goto out; | |
752 | |
753 case '0': case '1': case '2': case '3': case '4': | |
754 case '5': case '6': case '7': case '8': case '9': | |
755 strbuf = strbuf_new(); | |
756 numlit: | |
296 | 757 ttype = TOK_NUMBER; |
295 | 758 strbuf_add(strbuf, c); |
759 for (;;) | |
760 { | |
761 c = preproc_lex_fetch_byte(pp); | |
762 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
763 break; | |
764 strbuf_add(strbuf, c); | |
765 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') | |
766 { | |
767 c = preproc_lex_fetch_byte(pp); | |
768 if (c == '+' || c == '-') | |
769 { | |
770 strbuf_add(strbuf, c); | |
771 continue; | |
772 } | |
773 preproc_lex_unfetch_byte(pp, c); | |
774 } | |
775 } | |
776 strval = strbuf_end(strbuf); | |
777 preproc_lex_unfetch_byte(pp, c); | |
778 goto out; | |
779 | |
780 default: | |
781 ttype = TOK_CHAR; | |
782 strval = lw_alloc(2); | |
783 strval[0] = c; | |
784 strval[1] = 0; | |
785 break; | |
786 } | |
787 out: | |
788 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
789 lw_free(strval); | |
790 return t; | |
791 } |