Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 298:6112c67728ba ccdev
Add stringification and token concatenation
Add support for # and ## in macro expansion by the preprocessor
(stringification and token concatenation). Totally untested.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sat, 14 Sep 2013 22:42:53 -0600 |
parents | 83fcc1ed6ad6 |
children | 856caf91ffaa |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "strbuf.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
194 static void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) | |
195 { | |
196 if (pp -> ungetbufl >= pp -> ungetbufs) | |
197 { | |
198 pp -> ungetbufs += 100; | |
199 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
200 } | |
201 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
202 } | |
203 | |
204 /* This function retrieves a byte from the input stream. It performs | |
205 backslash-newline splicing on the returned bytes. Any character | |
206 retrieved from the unfetch buffer is presumed to have already passed | |
207 the backslash-newline filter. */ | |
208 static int fetch_byte(struct preproc_info *pp) | |
209 { | |
210 int c; | |
211 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
212 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
213 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
214 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
215 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
216 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
217 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
218 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
219 |
295 | 220 if (pp -> ungetbufl > 0) |
221 { | |
222 pp -> ungetbufl--; | |
223 c = pp -> ungetbuf[pp -> ungetbufl]; | |
224 if (pp -> ungetbufl == 0) | |
225 { | |
226 lw_free(pp -> ungetbuf); | |
227 pp -> ungetbuf = NULL; | |
228 pp -> ungetbufs = 0; | |
229 } | |
230 return c; | |
231 } | |
232 | |
233 again: | |
234 if (pp -> unget != CPP_NOUNG) | |
235 { | |
236 c = pp -> unget; | |
237 pp -> unget = CPP_NOUNG; | |
238 } | |
239 else | |
240 { | |
241 c = fetch_byte_tg(pp); | |
242 } | |
243 if (c == '\\') | |
244 { | |
245 int c2; | |
246 c2 = fetch_byte_tg(pp); | |
247 if (c2 == CPP_EOL) | |
248 goto again; | |
249 else | |
250 pp -> unget = c2; | |
251 } | |
252 return c; | |
253 } | |
254 | |
255 | |
256 | |
257 /* | |
258 Lex a token off the current input file. | |
259 | |
260 Returned tokens are as follows: | |
261 | |
262 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
263 * numbers are returned as their appropriate type | |
264 * all whitespace in a sequence, including comments, is returned as | |
265 a single instance of TOK_WSPACE | |
266 * TOK_EOL is returned in the case of the end of a line | |
267 * TOK_EOF is returned when the end of the file is reached | |
268 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
269 * Any symbolic operator, etc., recognized by C will be returned as such | |
270 a token | |
271 * TOK_HASH will be returned for a # | |
272 * trigraphs will be interpreted | |
273 * backslash-newline will be interpreted | |
274 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
275 */ | |
276 | |
277 | |
278 static int preproc_lex_fetch_byte(struct preproc_info *pp) | |
279 { | |
280 int c; | |
281 c = fetch_byte(pp); | |
282 if (c == CPP_EOF && pp -> eolseen == 0) | |
283 { | |
284 preproc_throw_warning(pp, "No newline at end of file"); | |
285 pp -> eolseen = 1; | |
286 return CPP_EOL; | |
287 } | |
288 | |
289 if (c == CPP_EOL) | |
290 { | |
291 pp -> eolseen = 1; | |
292 return c; | |
293 } | |
294 | |
295 pp -> eolseen = 0; | |
296 | |
297 /* convert comments to a single space here */ | |
298 if (c == '/') | |
299 { | |
300 int c2; | |
301 c2 = fetch_byte(pp); | |
302 if (c2 == '/') | |
303 { | |
304 /* single line comment */ | |
305 c = ' '; | |
306 for (;;) | |
307 { | |
308 c2 = fetch_byte(pp); | |
309 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
310 break; | |
311 } | |
312 preproc_lex_unfetch_byte(pp, c2); | |
313 } | |
314 else if (c2 == '*') | |
315 { | |
316 /* block comment */ | |
317 c = ' '; | |
318 for (;;) | |
319 { | |
320 c2 = fetch_byte(pp); | |
321 if (c2 == CPP_EOL || c2 == CPP_EOF) | |
322 { | |
323 preproc_lex_unfetch_byte(pp, c); | |
324 break; | |
325 } | |
326 if (c2 == '*') | |
327 { | |
328 /* maybe end of comment */ | |
329 c2 = preproc_lex_fetch_byte(pp); | |
330 if (c2 == '/') | |
331 break; | |
332 } | |
333 } | |
334 } | |
335 else | |
336 { | |
337 /* not a comment - restore lookahead character */ | |
338 preproc_lex_unfetch_byte(pp, c2); | |
339 } | |
340 } | |
341 return c; | |
342 } | |
343 | |
344 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
345 { | |
346 int sline = pp -> lineno; | |
347 int scol = pp -> column; | |
348 char *strval = NULL; | |
349 int ttype = TOK_NONE; | |
350 int c, c2; | |
351 int cl; | |
352 struct strbuf *strbuf; | |
353 struct token *t; | |
354 | |
355 c = preproc_lex_fetch_byte(pp); | |
356 if (c == CPP_EOF) | |
357 { | |
358 if (pp -> nlseen == 0) | |
359 { | |
360 c = CPP_EOL; | |
361 } | |
362 } | |
363 | |
364 if (c == CPP_EOF) | |
365 { | |
366 ttype = TOK_EOF; | |
367 goto out; | |
368 } | |
369 if (c == CPP_EOL) | |
370 { | |
371 pp -> nlseen = 1; | |
372 ttype = TOK_EOL; | |
373 goto out; | |
374 } | |
375 | |
376 pp -> nlseen = 0; | |
377 if (isspace(c)) | |
378 { | |
379 while (isspace(c)) | |
380 c = preproc_lex_fetch_byte(pp); | |
381 preproc_lex_unfetch_byte(pp, c); | |
382 ttype = TOK_WSPACE; | |
383 goto out; | |
384 } | |
385 | |
386 switch (c) | |
387 { | |
388 case '?': | |
389 ttype = TOK_QMARK; | |
390 goto out; | |
391 | |
392 case ':': | |
393 ttype = TOK_COLON; | |
394 goto out; | |
395 | |
396 case ',': | |
397 ttype = TOK_COMMA; | |
398 goto out; | |
399 | |
400 case '(': | |
401 ttype = TOK_OPAREN; | |
402 goto out; | |
403 | |
404 case ')': | |
405 ttype = TOK_CPAREN; | |
406 goto out; | |
407 | |
408 case '{': | |
409 ttype = TOK_OBRACE; | |
410 goto out; | |
411 | |
412 case '}': | |
413 ttype = TOK_CBRACE; | |
414 goto out; | |
415 | |
416 case '[': | |
417 ttype = TOK_OSQUARE; | |
418 goto out; | |
419 | |
420 case ']': | |
421 ttype = TOK_CSQUARE; | |
422 goto out; | |
423 | |
424 case '~': | |
425 ttype = TOK_COM; | |
426 goto out; | |
427 | |
428 case ';': | |
429 ttype = TOK_EOS; | |
430 goto out; | |
431 | |
432 /* and now for the possible multi character tokens */ | |
433 case '#': | |
434 ttype = TOK_HASH; | |
435 c = preproc_lex_fetch_byte(pp); | |
436 if (c == '#') | |
437 ttype = TOK_DBLHASH; | |
438 else | |
439 preproc_lex_unfetch_byte(pp, c); | |
440 goto out; | |
441 | |
442 case '^': | |
443 ttype = TOK_XOR; | |
444 c = preproc_lex_fetch_byte(pp); | |
445 if (c == '=') | |
446 ttype = TOK_XORASS; | |
447 else | |
448 preproc_lex_unfetch_byte(pp, c); | |
449 goto out; | |
450 | |
451 case '!': | |
452 ttype = TOK_BNOT; | |
453 c = preproc_lex_fetch_byte(pp); | |
454 if (c == '=') | |
455 ttype = TOK_NE; | |
456 else | |
457 preproc_lex_unfetch_byte(pp, c); | |
458 goto out; | |
459 | |
460 case '*': | |
461 ttype = TOK_STAR; | |
462 c = preproc_lex_fetch_byte(pp); | |
463 if (c == '=') | |
464 ttype = TOK_MULASS; | |
465 else | |
466 preproc_lex_unfetch_byte(pp, c); | |
467 goto out; | |
468 | |
469 case '/': | |
470 ttype = TOK_DIV; | |
471 c = preproc_lex_fetch_byte(pp); | |
472 if (c == '=') | |
473 ttype = TOK_DIVASS; | |
474 else | |
475 preproc_lex_unfetch_byte(pp, c); | |
476 goto out; | |
477 | |
478 case '=': | |
479 ttype = TOK_ASS; | |
480 c = preproc_lex_fetch_byte(pp); | |
481 if (c == '=') | |
482 ttype = TOK_EQ; | |
483 else | |
484 preproc_lex_unfetch_byte(pp, c); | |
485 goto out; | |
486 | |
487 case '%': | |
488 ttype = TOK_MOD; | |
489 c = preproc_lex_fetch_byte(pp); | |
490 if (c == '=') | |
491 ttype = TOK_MODASS; | |
492 else | |
493 preproc_lex_unfetch_byte(pp, c); | |
494 goto out; | |
495 | |
496 case '-': | |
497 ttype = TOK_SUB; | |
498 c = preproc_lex_fetch_byte(pp); | |
499 if (c == '=') | |
500 ttype = TOK_SUBASS; | |
501 else if (c == '-') | |
502 ttype = TOK_DBLSUB; | |
503 else if (c == '>') | |
504 ttype = TOK_ARROW; | |
505 else | |
506 preproc_lex_unfetch_byte(pp, c); | |
507 goto out; | |
508 | |
509 case '+': | |
510 ttype = TOK_ADD; | |
511 c = preproc_lex_fetch_byte(pp); | |
512 if (c == '=') | |
513 ttype = TOK_ADDASS; | |
514 else if (c == '+') | |
515 ttype = TOK_DBLADD; | |
516 else | |
517 preproc_lex_unfetch_byte(pp, c); | |
518 goto out; | |
519 | |
520 | |
521 case '&': | |
522 ttype = TOK_BWAND; | |
523 c = preproc_lex_fetch_byte(pp); | |
524 if (c == '=') | |
525 ttype = TOK_BWANDASS; | |
526 else if (c == '&') | |
527 ttype = TOK_BAND; | |
528 else | |
529 preproc_lex_unfetch_byte(pp, c); | |
530 goto out; | |
531 | |
532 case '|': | |
533 ttype = TOK_BWOR; | |
534 c = preproc_lex_fetch_byte(pp); | |
535 if (c == '=') | |
536 ttype = TOK_BWORASS; | |
537 else if (c == '|') | |
538 ttype = TOK_BOR; | |
539 else | |
540 preproc_lex_unfetch_byte(pp, c); | |
541 goto out; | |
542 | |
543 case '<': | |
544 ttype = TOK_LT; | |
545 c = preproc_lex_fetch_byte(pp); | |
546 if (c == '=') | |
547 ttype = TOK_LE; | |
548 else if (c == '<') | |
549 { | |
550 ttype = TOK_LSH; | |
551 c = preproc_lex_fetch_byte(pp); | |
552 if (c == '=') | |
553 ttype = TOK_LSHASS; | |
554 else | |
555 preproc_lex_unfetch_byte(pp, c); | |
556 } | |
557 else | |
558 preproc_lex_unfetch_byte(pp, c); | |
559 goto out; | |
560 | |
561 | |
562 case '>': | |
563 ttype = TOK_GT; | |
564 c = preproc_lex_fetch_byte(pp); | |
565 if (c == '=') | |
566 ttype = TOK_GE; | |
567 else if (c == '>') | |
568 { | |
569 ttype = TOK_RSH; | |
570 c = preproc_lex_fetch_byte(pp); | |
571 if (c == '=') | |
572 ttype = TOK_RSHASS; | |
573 else | |
574 preproc_lex_unfetch_byte(pp, c); | |
575 } | |
576 else | |
577 preproc_lex_unfetch_byte(pp, c); | |
578 goto out; | |
579 | |
580 case '\'': | |
581 /* character constant - turns into a uint */ | |
582 chrlit: | |
583 cl = 0; | |
584 strbuf = strbuf_new(); | |
585 for (;;) | |
586 { | |
587 c = preproc_lex_fetch_byte(pp); | |
588 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
589 break; | |
590 cl++; | |
591 if (c == '\\') | |
592 { | |
593 strbuf_add(strbuf, '\\'); | |
594 c = preproc_lex_fetch_byte(pp); | |
595 if (c == CPP_EOF || c == CPP_EOL) | |
596 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
597 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
598 preproc_throw_error(pp, "Invalid character constant"); |
295 | 599 break; |
600 } | |
601 cl++; | |
602 strbuf_add(strbuf, c); | |
603 continue; | |
604 } | |
605 strbuf_add(strbuf, c); | |
606 } | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
607 if (cl == 0 && !pp -> lexstr) |
295 | 608 preproc_throw_error(pp, "Invalid character constant"); |
609 strval = strbuf_end(strbuf); | |
610 ttype = TOK_CHR_LIT; | |
611 goto out; | |
612 | |
613 case '"': | |
614 strlit: | |
615 /* string literal */ | |
616 strbuf = strbuf_new(); | |
617 for (;;) | |
618 { | |
619 c = preproc_lex_fetch_byte(pp); | |
620 if (c == CPP_EOF || c == CPP_EOL || c == '"') | |
621 break; | |
622 if (c == '\\') | |
623 { | |
624 strbuf_add(strbuf, '\\'); | |
625 c = preproc_lex_fetch_byte(pp); | |
626 if (c == CPP_EOF || c == CPP_EOL) | |
627 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
628 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
629 preproc_throw_error(pp, "Invalid string constant"); |
295 | 630 break; |
631 } | |
632 cl++; | |
633 strbuf_add(strbuf, c); | |
634 continue; | |
635 } | |
636 strbuf_add(strbuf, c); | |
637 } | |
638 strval = strbuf_end(strbuf); | |
639 ttype = TOK_STR_LIT; | |
640 goto out; | |
641 | |
642 case 'L': | |
643 /* check for wide string or wide char const */ | |
644 c2 = preproc_lex_fetch_byte(pp); | |
645 if (c2 == '\'') | |
646 { | |
647 goto chrlit; | |
648 } | |
649 else if (c2 == '"') | |
650 { | |
651 goto strlit; | |
652 } | |
653 preproc_lex_unfetch_byte(pp, c2); | |
654 /* fall through for identifier */ | |
655 case '_': | |
656 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
657 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
658 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
659 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
660 case 'y': case 'z': | |
661 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
662 case 'G': case 'H': case 'I': case 'J': case 'K': | |
663 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
664 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
665 case 'Y': case 'Z': | |
666 /* we have an identifier here */ | |
667 strbuf = strbuf_new(); | |
668 strbuf_add(strbuf, c); | |
669 for (;;) | |
670 { | |
671 c = preproc_lex_fetch_byte(pp); | |
672 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
673 { | |
674 strbuf_add(strbuf, c); | |
675 continue; | |
676 } | |
677 else | |
678 { | |
679 strbuf_add(strbuf, 0); | |
680 strval = strbuf_end(strbuf); | |
681 break; | |
682 } | |
683 } | |
684 preproc_lex_unfetch_byte(pp, c); | |
685 ttype = TOK_IDENT; | |
686 goto out; | |
687 | |
688 case '.': | |
689 c = preproc_lex_fetch_byte(pp); | |
690 if (c >= '0' && c <= '9') | |
691 { | |
692 strbuf = strbuf_new(); | |
693 strbuf_add(strbuf, '.'); | |
694 goto numlit; | |
695 } | |
696 else if (c == '.') | |
697 { | |
698 c = preproc_lex_fetch_byte(pp); | |
699 if (c == '.') | |
700 { | |
701 ttype = TOK_ELLIPSIS; | |
702 goto out; | |
703 } | |
704 preproc_lex_unfetch_byte(pp, c); | |
705 } | |
706 preproc_lex_unfetch_byte(pp, c); | |
707 ttype = TOK_DOT; | |
708 goto out; | |
709 | |
710 case '0': case '1': case '2': case '3': case '4': | |
711 case '5': case '6': case '7': case '8': case '9': | |
712 strbuf = strbuf_new(); | |
713 numlit: | |
296 | 714 ttype = TOK_NUMBER; |
295 | 715 strbuf_add(strbuf, c); |
716 for (;;) | |
717 { | |
718 c = preproc_lex_fetch_byte(pp); | |
719 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
720 break; | |
721 strbuf_add(strbuf, c); | |
722 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') | |
723 { | |
724 c = preproc_lex_fetch_byte(pp); | |
725 if (c == '+' || c == '-') | |
726 { | |
727 strbuf_add(strbuf, c); | |
728 continue; | |
729 } | |
730 preproc_lex_unfetch_byte(pp, c); | |
731 } | |
732 } | |
733 strval = strbuf_end(strbuf); | |
734 preproc_lex_unfetch_byte(pp, c); | |
735 goto out; | |
736 | |
737 default: | |
738 ttype = TOK_CHAR; | |
739 strval = lw_alloc(2); | |
740 strval[0] = c; | |
741 strval[1] = 0; | |
742 break; | |
743 } | |
744 out: | |
745 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
746 lw_free(strval); | |
747 return t; | |
748 } |