295
|
1 /*
|
|
2 lwcc/lex.c
|
|
3
|
|
4 Copyright © 2013 William Astle
|
|
5
|
|
6 This file is part of LWTOOLS.
|
|
7
|
|
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
|
|
9 terms of the GNU General Public License as published by the Free Software
|
|
10 Foundation, either version 3 of the License, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 This program is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
16 more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License along with
|
|
19 this program. If not, see <http://www.gnu.org/licenses/>.
|
|
20 */
|
|
21
|
|
22 #include <ctype.h>
|
|
23 #include <stdio.h>
|
|
24
|
|
25 #include <lw_alloc.h>
|
|
26
|
|
27 #include "cpp.h"
|
|
28 #include "strbuf.h"
|
|
29 #include "token.h"
|
|
30
|
|
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if
|
|
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered.
|
|
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is
|
|
34 returned on the first CR or LF encountered. The complementary CR or LF
|
|
35 is munched, if present, when the *next* character is read. This always
|
|
36 operates on file_stack.
|
|
37
|
|
38 This function also accounts for line numbers in input files and also
|
|
39 character columns.
|
|
40 */
|
|
41 static int fetch_byte_ll(struct preproc_info *pp)
|
|
42 {
|
|
43 int c;
|
|
44
|
|
45 if (pp -> eolstate != 0)
|
|
46 {
|
|
47 pp -> lineno++;
|
|
48 pp -> column = 0;
|
|
49 }
|
|
50 c = getc(pp -> fp);
|
|
51 pp -> column++;
|
|
52 if (pp -> eolstate == 1)
|
|
53 {
|
|
54 // just saw CR, munch LF
|
|
55 if (c == 10)
|
|
56 c = getc(pp -> fp);
|
|
57 pp -> eolstate = 0;
|
|
58 }
|
|
59 else if (pp -> eolstate == 2)
|
|
60 {
|
|
61 // just saw LF, much CR
|
|
62 if (c == 13)
|
|
63 c = getc(pp -> fp);
|
|
64 pp -> eolstate = 0;
|
|
65 }
|
|
66
|
|
67 if (c == 10)
|
|
68 {
|
|
69 // we have LF - end of line, flag to munch CR
|
|
70 pp -> eolstate = 2;
|
|
71 c = CPP_EOL;
|
|
72 }
|
|
73 else if (c == 13)
|
|
74 {
|
|
75 // we have CR - end of line, flag to munch LF
|
|
76 pp -> eolstate = 1;
|
|
77 c = CPP_EOL;
|
|
78 }
|
|
79 else if (c == EOF)
|
|
80 {
|
|
81 c = CPP_EOF;
|
|
82 }
|
|
83 return c;
|
|
84 }
|
|
85
|
|
86 /* This function takes a sequence of bytes from the _ll function above
|
|
87 and does trigraph interpretation on it, but only if the global
|
|
88 trigraphs is nonzero. */
|
|
89 static int fetch_byte_tg(struct preproc_info *pp)
|
|
90 {
|
|
91 int c;
|
|
92
|
|
93 if (!pp -> trigraphs)
|
|
94 {
|
|
95 c = fetch_byte_ll(pp);
|
|
96 }
|
|
97 else
|
|
98 {
|
|
99 /* we have to do the trigraph shit here */
|
|
100 if (pp -> ra != CPP_NOUNG)
|
|
101 {
|
|
102 if (pp -> qseen > 0)
|
|
103 {
|
|
104 c = '?';
|
|
105 pp -> qseen -= 1;
|
|
106 return c;
|
|
107 }
|
|
108 else
|
|
109 {
|
|
110 c = pp -> ra;
|
|
111 pp -> ra = CPP_NOUNG;
|
|
112 return c;
|
|
113 }
|
|
114 }
|
|
115
|
|
116 c = fetch_byte_ll(pp);
|
|
117 while (c == '?')
|
|
118 {
|
|
119 pp -> qseen++;
|
|
120 c = fetch_byte_ll(pp);
|
|
121 }
|
|
122
|
|
123 if (pp -> qseen >= 2)
|
|
124 {
|
|
125 // we have a trigraph
|
|
126 switch (c)
|
|
127 {
|
|
128 case '=':
|
|
129 c = '#';
|
|
130 pp -> qseen -= 2;
|
|
131 break;
|
|
132
|
|
133 case '/':
|
|
134 c = '\\';
|
|
135 pp -> qseen -= 2;
|
|
136 break;
|
|
137
|
|
138 case '\'':
|
|
139 c = '^';
|
|
140 pp -> qseen -= 2;
|
|
141 break;
|
|
142
|
|
143 case '(':
|
|
144 c = '[';
|
|
145 pp -> qseen -= 2;
|
|
146 break;
|
|
147
|
|
148 case ')':
|
|
149 c = ']';
|
|
150 pp -> qseen -= 2;
|
|
151 break;
|
|
152
|
|
153 case '!':
|
|
154 c = '|';
|
|
155 pp -> qseen -= 2;
|
|
156 break;
|
|
157
|
|
158 case '<':
|
|
159 c = '{';
|
|
160 pp -> qseen -= 2;
|
|
161 break;
|
|
162
|
|
163 case '>':
|
|
164 c = '}';
|
|
165 pp -> qseen -= 2;
|
|
166 break;
|
|
167
|
|
168 case '-':
|
|
169 c = '~';
|
|
170 pp -> qseen -= 2;
|
|
171 break;
|
|
172 }
|
|
173 if (pp -> qseen > 0)
|
|
174 {
|
|
175 pp -> ra = c;
|
|
176 c = '?';
|
|
177 pp -> qseen--;
|
|
178 }
|
|
179 }
|
|
180 else if (pp -> qseen > 0)
|
|
181 {
|
|
182 pp -> ra = c;
|
|
183 c = '?';
|
|
184 pp -> qseen--;
|
|
185 }
|
|
186 }
|
|
187 return c;
|
|
188 }
|
|
189
|
|
190 /* This function puts a byte back onto the front of the input stream used
|
|
191 by fetch_byte(). Theoretically, an unlimited number of characters can
|
|
192 be unfetched. Line and column counting may be incorrect if unfetched
|
|
193 characters cross a token boundary. */
|
|
194 static void preproc_lex_unfetch_byte(struct preproc_info *pp, int c)
|
|
195 {
|
|
196 if (pp -> ungetbufl >= pp -> ungetbufs)
|
|
197 {
|
|
198 pp -> ungetbufs += 100;
|
|
199 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs);
|
|
200 }
|
|
201 pp -> ungetbuf[pp -> ungetbufl++] = c;
|
|
202 }
|
|
203
|
|
204 /* This function retrieves a byte from the input stream. It performs
|
|
205 backslash-newline splicing on the returned bytes. Any character
|
|
206 retrieved from the unfetch buffer is presumed to have already passed
|
|
207 the backslash-newline filter. */
|
|
208 static int fetch_byte(struct preproc_info *pp)
|
|
209 {
|
|
210 int c;
|
|
211
|
|
212 if (pp -> ungetbufl > 0)
|
|
213 {
|
|
214 pp -> ungetbufl--;
|
|
215 c = pp -> ungetbuf[pp -> ungetbufl];
|
|
216 if (pp -> ungetbufl == 0)
|
|
217 {
|
|
218 lw_free(pp -> ungetbuf);
|
|
219 pp -> ungetbuf = NULL;
|
|
220 pp -> ungetbufs = 0;
|
|
221 }
|
|
222 return c;
|
|
223 }
|
|
224
|
|
225 again:
|
|
226 if (pp -> unget != CPP_NOUNG)
|
|
227 {
|
|
228 c = pp -> unget;
|
|
229 pp -> unget = CPP_NOUNG;
|
|
230 }
|
|
231 else
|
|
232 {
|
|
233 c = fetch_byte_tg(pp);
|
|
234 }
|
|
235 if (c == '\\')
|
|
236 {
|
|
237 int c2;
|
|
238 c2 = fetch_byte_tg(pp);
|
|
239 if (c2 == CPP_EOL)
|
|
240 goto again;
|
|
241 else
|
|
242 pp -> unget = c2;
|
|
243 }
|
|
244 return c;
|
|
245 }
|
|
246
|
|
247
|
|
248
|
|
249 /*
|
|
250 Lex a token off the current input file.
|
|
251
|
|
252 Returned tokens are as follows:
|
|
253
|
|
254 * all words starting with [a-zA-Z_] are returned as TOK_IDENT
|
|
255 * numbers are returned as their appropriate type
|
|
256 * all whitespace in a sequence, including comments, is returned as
|
|
257 a single instance of TOK_WSPACE
|
|
258 * TOK_EOL is returned in the case of the end of a line
|
|
259 * TOK_EOF is returned when the end of the file is reached
|
|
260 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised
|
|
261 * Any symbolic operator, etc., recognized by C will be returned as such
|
|
262 a token
|
|
263 * TOK_HASH will be returned for a #
|
|
264 * trigraphs will be interpreted
|
|
265 * backslash-newline will be interpreted
|
|
266 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL
|
|
267 */
|
|
268
|
|
269
|
|
270 static int preproc_lex_fetch_byte(struct preproc_info *pp)
|
|
271 {
|
|
272 int c;
|
|
273 c = fetch_byte(pp);
|
|
274 if (c == CPP_EOF && pp -> eolseen == 0)
|
|
275 {
|
|
276 preproc_throw_warning(pp, "No newline at end of file");
|
|
277 pp -> eolseen = 1;
|
|
278 return CPP_EOL;
|
|
279 }
|
|
280
|
|
281 if (c == CPP_EOL)
|
|
282 {
|
|
283 pp -> eolseen = 1;
|
|
284 return c;
|
|
285 }
|
|
286
|
|
287 pp -> eolseen = 0;
|
|
288
|
|
289 /* convert comments to a single space here */
|
|
290 if (c == '/')
|
|
291 {
|
|
292 int c2;
|
|
293 c2 = fetch_byte(pp);
|
|
294 if (c2 == '/')
|
|
295 {
|
|
296 /* single line comment */
|
|
297 c = ' ';
|
|
298 for (;;)
|
|
299 {
|
|
300 c2 = fetch_byte(pp);
|
|
301 if (c2 == CPP_EOF || c2 == CPP_EOL)
|
|
302 break;
|
|
303 }
|
|
304 preproc_lex_unfetch_byte(pp, c2);
|
|
305 }
|
|
306 else if (c2 == '*')
|
|
307 {
|
|
308 /* block comment */
|
|
309 c = ' ';
|
|
310 for (;;)
|
|
311 {
|
|
312 c2 = fetch_byte(pp);
|
|
313 if (c2 == CPP_EOL || c2 == CPP_EOF)
|
|
314 {
|
|
315 preproc_lex_unfetch_byte(pp, c);
|
|
316 break;
|
|
317 }
|
|
318 if (c2 == '*')
|
|
319 {
|
|
320 /* maybe end of comment */
|
|
321 c2 = preproc_lex_fetch_byte(pp);
|
|
322 if (c2 == '/')
|
|
323 break;
|
|
324 }
|
|
325 }
|
|
326 }
|
|
327 else
|
|
328 {
|
|
329 /* not a comment - restore lookahead character */
|
|
330 preproc_lex_unfetch_byte(pp, c2);
|
|
331 }
|
|
332 }
|
|
333 return c;
|
|
334 }
|
|
335
|
|
336 struct token *preproc_lex_next_token(struct preproc_info *pp)
|
|
337 {
|
|
338 int sline = pp -> lineno;
|
|
339 int scol = pp -> column;
|
|
340 char *strval = NULL;
|
|
341 int ttype = TOK_NONE;
|
|
342 int c, c2;
|
|
343 int cl;
|
|
344 struct strbuf *strbuf;
|
|
345 struct token *t;
|
|
346
|
|
347 c = preproc_lex_fetch_byte(pp);
|
|
348 if (c == CPP_EOF)
|
|
349 {
|
|
350 if (pp -> nlseen == 0)
|
|
351 {
|
|
352 c = CPP_EOL;
|
|
353 }
|
|
354 }
|
|
355
|
|
356 if (c == CPP_EOF)
|
|
357 {
|
|
358 ttype = TOK_EOF;
|
|
359 goto out;
|
|
360 }
|
|
361 if (c == CPP_EOL)
|
|
362 {
|
|
363 pp -> nlseen = 1;
|
|
364 ttype = TOK_EOL;
|
|
365 goto out;
|
|
366 }
|
|
367
|
|
368 pp -> nlseen = 0;
|
|
369 if (isspace(c))
|
|
370 {
|
|
371 while (isspace(c))
|
|
372 c = preproc_lex_fetch_byte(pp);
|
|
373 preproc_lex_unfetch_byte(pp, c);
|
|
374 ttype = TOK_WSPACE;
|
|
375 goto out;
|
|
376 }
|
|
377
|
|
378 switch (c)
|
|
379 {
|
|
380 case '?':
|
|
381 ttype = TOK_QMARK;
|
|
382 goto out;
|
|
383
|
|
384 case ':':
|
|
385 ttype = TOK_COLON;
|
|
386 goto out;
|
|
387
|
|
388 case ',':
|
|
389 ttype = TOK_COMMA;
|
|
390 goto out;
|
|
391
|
|
392 case '(':
|
|
393 ttype = TOK_OPAREN;
|
|
394 goto out;
|
|
395
|
|
396 case ')':
|
|
397 ttype = TOK_CPAREN;
|
|
398 goto out;
|
|
399
|
|
400 case '{':
|
|
401 ttype = TOK_OBRACE;
|
|
402 goto out;
|
|
403
|
|
404 case '}':
|
|
405 ttype = TOK_CBRACE;
|
|
406 goto out;
|
|
407
|
|
408 case '[':
|
|
409 ttype = TOK_OSQUARE;
|
|
410 goto out;
|
|
411
|
|
412 case ']':
|
|
413 ttype = TOK_CSQUARE;
|
|
414 goto out;
|
|
415
|
|
416 case '~':
|
|
417 ttype = TOK_COM;
|
|
418 goto out;
|
|
419
|
|
420 case ';':
|
|
421 ttype = TOK_EOS;
|
|
422 goto out;
|
|
423
|
|
424 /* and now for the possible multi character tokens */
|
|
425 case '#':
|
|
426 ttype = TOK_HASH;
|
|
427 c = preproc_lex_fetch_byte(pp);
|
|
428 if (c == '#')
|
|
429 ttype = TOK_DBLHASH;
|
|
430 else
|
|
431 preproc_lex_unfetch_byte(pp, c);
|
|
432 goto out;
|
|
433
|
|
434 case '^':
|
|
435 ttype = TOK_XOR;
|
|
436 c = preproc_lex_fetch_byte(pp);
|
|
437 if (c == '=')
|
|
438 ttype = TOK_XORASS;
|
|
439 else
|
|
440 preproc_lex_unfetch_byte(pp, c);
|
|
441 goto out;
|
|
442
|
|
443 case '!':
|
|
444 ttype = TOK_BNOT;
|
|
445 c = preproc_lex_fetch_byte(pp);
|
|
446 if (c == '=')
|
|
447 ttype = TOK_NE;
|
|
448 else
|
|
449 preproc_lex_unfetch_byte(pp, c);
|
|
450 goto out;
|
|
451
|
|
452 case '*':
|
|
453 ttype = TOK_STAR;
|
|
454 c = preproc_lex_fetch_byte(pp);
|
|
455 if (c == '=')
|
|
456 ttype = TOK_MULASS;
|
|
457 else
|
|
458 preproc_lex_unfetch_byte(pp, c);
|
|
459 goto out;
|
|
460
|
|
461 case '/':
|
|
462 ttype = TOK_DIV;
|
|
463 c = preproc_lex_fetch_byte(pp);
|
|
464 if (c == '=')
|
|
465 ttype = TOK_DIVASS;
|
|
466 else
|
|
467 preproc_lex_unfetch_byte(pp, c);
|
|
468 goto out;
|
|
469
|
|
470 case '=':
|
|
471 ttype = TOK_ASS;
|
|
472 c = preproc_lex_fetch_byte(pp);
|
|
473 if (c == '=')
|
|
474 ttype = TOK_EQ;
|
|
475 else
|
|
476 preproc_lex_unfetch_byte(pp, c);
|
|
477 goto out;
|
|
478
|
|
479 case '%':
|
|
480 ttype = TOK_MOD;
|
|
481 c = preproc_lex_fetch_byte(pp);
|
|
482 if (c == '=')
|
|
483 ttype = TOK_MODASS;
|
|
484 else
|
|
485 preproc_lex_unfetch_byte(pp, c);
|
|
486 goto out;
|
|
487
|
|
488 case '-':
|
|
489 ttype = TOK_SUB;
|
|
490 c = preproc_lex_fetch_byte(pp);
|
|
491 if (c == '=')
|
|
492 ttype = TOK_SUBASS;
|
|
493 else if (c == '-')
|
|
494 ttype = TOK_DBLSUB;
|
|
495 else if (c == '>')
|
|
496 ttype = TOK_ARROW;
|
|
497 else
|
|
498 preproc_lex_unfetch_byte(pp, c);
|
|
499 goto out;
|
|
500
|
|
501 case '+':
|
|
502 ttype = TOK_ADD;
|
|
503 c = preproc_lex_fetch_byte(pp);
|
|
504 if (c == '=')
|
|
505 ttype = TOK_ADDASS;
|
|
506 else if (c == '+')
|
|
507 ttype = TOK_DBLADD;
|
|
508 else
|
|
509 preproc_lex_unfetch_byte(pp, c);
|
|
510 goto out;
|
|
511
|
|
512
|
|
513 case '&':
|
|
514 ttype = TOK_BWAND;
|
|
515 c = preproc_lex_fetch_byte(pp);
|
|
516 if (c == '=')
|
|
517 ttype = TOK_BWANDASS;
|
|
518 else if (c == '&')
|
|
519 ttype = TOK_BAND;
|
|
520 else
|
|
521 preproc_lex_unfetch_byte(pp, c);
|
|
522 goto out;
|
|
523
|
|
524 case '|':
|
|
525 ttype = TOK_BWOR;
|
|
526 c = preproc_lex_fetch_byte(pp);
|
|
527 if (c == '=')
|
|
528 ttype = TOK_BWORASS;
|
|
529 else if (c == '|')
|
|
530 ttype = TOK_BOR;
|
|
531 else
|
|
532 preproc_lex_unfetch_byte(pp, c);
|
|
533 goto out;
|
|
534
|
|
535 case '<':
|
|
536 ttype = TOK_LT;
|
|
537 c = preproc_lex_fetch_byte(pp);
|
|
538 if (c == '=')
|
|
539 ttype = TOK_LE;
|
|
540 else if (c == '<')
|
|
541 {
|
|
542 ttype = TOK_LSH;
|
|
543 c = preproc_lex_fetch_byte(pp);
|
|
544 if (c == '=')
|
|
545 ttype = TOK_LSHASS;
|
|
546 else
|
|
547 preproc_lex_unfetch_byte(pp, c);
|
|
548 }
|
|
549 else
|
|
550 preproc_lex_unfetch_byte(pp, c);
|
|
551 goto out;
|
|
552
|
|
553
|
|
554 case '>':
|
|
555 ttype = TOK_GT;
|
|
556 c = preproc_lex_fetch_byte(pp);
|
|
557 if (c == '=')
|
|
558 ttype = TOK_GE;
|
|
559 else if (c == '>')
|
|
560 {
|
|
561 ttype = TOK_RSH;
|
|
562 c = preproc_lex_fetch_byte(pp);
|
|
563 if (c == '=')
|
|
564 ttype = TOK_RSHASS;
|
|
565 else
|
|
566 preproc_lex_unfetch_byte(pp, c);
|
|
567 }
|
|
568 else
|
|
569 preproc_lex_unfetch_byte(pp, c);
|
|
570 goto out;
|
|
571
|
|
572 case '\'':
|
|
573 /* character constant - turns into a uint */
|
|
574 chrlit:
|
|
575 cl = 0;
|
|
576 strbuf = strbuf_new();
|
|
577 for (;;)
|
|
578 {
|
|
579 c = preproc_lex_fetch_byte(pp);
|
|
580 if (c == CPP_EOF || c == CPP_EOL || c == '\'')
|
|
581 break;
|
|
582 cl++;
|
|
583 if (c == '\\')
|
|
584 {
|
|
585 strbuf_add(strbuf, '\\');
|
|
586 c = preproc_lex_fetch_byte(pp);
|
|
587 if (c == CPP_EOF || c == CPP_EOL)
|
|
588 {
|
|
589 preproc_throw_error(pp, "Invalid character constant");
|
|
590 break;
|
|
591 }
|
|
592 cl++;
|
|
593 strbuf_add(strbuf, c);
|
|
594 continue;
|
|
595 }
|
|
596 strbuf_add(strbuf, c);
|
|
597 }
|
|
598 if (cl == 0)
|
|
599 preproc_throw_error(pp, "Invalid character constant");
|
|
600 strval = strbuf_end(strbuf);
|
|
601 ttype = TOK_CHR_LIT;
|
|
602 goto out;
|
|
603
|
|
604 case '"':
|
|
605 strlit:
|
|
606 /* string literal */
|
|
607 strbuf = strbuf_new();
|
|
608 for (;;)
|
|
609 {
|
|
610 c = preproc_lex_fetch_byte(pp);
|
|
611 if (c == CPP_EOF || c == CPP_EOL || c == '"')
|
|
612 break;
|
|
613 if (c == '\\')
|
|
614 {
|
|
615 strbuf_add(strbuf, '\\');
|
|
616 c = preproc_lex_fetch_byte(pp);
|
|
617 if (c == CPP_EOF || c == CPP_EOL)
|
|
618 {
|
|
619 preproc_throw_error(pp, "Invalid string constant");
|
|
620 break;
|
|
621 }
|
|
622 cl++;
|
|
623 strbuf_add(strbuf, c);
|
|
624 continue;
|
|
625 }
|
|
626 strbuf_add(strbuf, c);
|
|
627 }
|
|
628 strval = strbuf_end(strbuf);
|
|
629 ttype = TOK_STR_LIT;
|
|
630 goto out;
|
|
631
|
|
632 case 'L':
|
|
633 /* check for wide string or wide char const */
|
|
634 c2 = preproc_lex_fetch_byte(pp);
|
|
635 if (c2 == '\'')
|
|
636 {
|
|
637 goto chrlit;
|
|
638 }
|
|
639 else if (c2 == '"')
|
|
640 {
|
|
641 goto strlit;
|
|
642 }
|
|
643 preproc_lex_unfetch_byte(pp, c2);
|
|
644 /* fall through for identifier */
|
|
645 case '_':
|
|
646 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
647 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
|
648 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
|
649 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
|
650 case 'y': case 'z':
|
|
651 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
652 case 'G': case 'H': case 'I': case 'J': case 'K':
|
|
653 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
|
654 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
|
655 case 'Y': case 'Z':
|
|
656 /* we have an identifier here */
|
|
657 strbuf = strbuf_new();
|
|
658 strbuf_add(strbuf, c);
|
|
659 for (;;)
|
|
660 {
|
|
661 c = preproc_lex_fetch_byte(pp);
|
|
662 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
|
|
663 {
|
|
664 strbuf_add(strbuf, c);
|
|
665 continue;
|
|
666 }
|
|
667 else
|
|
668 {
|
|
669 strbuf_add(strbuf, 0);
|
|
670 strval = strbuf_end(strbuf);
|
|
671 break;
|
|
672 }
|
|
673 }
|
|
674 preproc_lex_unfetch_byte(pp, c);
|
|
675 ttype = TOK_IDENT;
|
|
676 goto out;
|
|
677
|
|
678 case '.':
|
|
679 c = preproc_lex_fetch_byte(pp);
|
|
680 if (c >= '0' && c <= '9')
|
|
681 {
|
|
682 strbuf = strbuf_new();
|
|
683 strbuf_add(strbuf, '.');
|
|
684 goto numlit;
|
|
685 }
|
|
686 else if (c == '.')
|
|
687 {
|
|
688 c = preproc_lex_fetch_byte(pp);
|
|
689 if (c == '.')
|
|
690 {
|
|
691 ttype = TOK_ELLIPSIS;
|
|
692 goto out;
|
|
693 }
|
|
694 preproc_lex_unfetch_byte(pp, c);
|
|
695 }
|
|
696 preproc_lex_unfetch_byte(pp, c);
|
|
697 ttype = TOK_DOT;
|
|
698 goto out;
|
|
699
|
|
700 case '0': case '1': case '2': case '3': case '4':
|
|
701 case '5': case '6': case '7': case '8': case '9':
|
|
702 strbuf = strbuf_new();
|
|
703 numlit:
|
296
|
704 ttype = TOK_NUMBER;
|
295
|
705 strbuf_add(strbuf, c);
|
|
706 for (;;)
|
|
707 {
|
|
708 c = preproc_lex_fetch_byte(pp);
|
|
709 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')))
|
|
710 break;
|
|
711 strbuf_add(strbuf, c);
|
|
712 if (c == 'e' || c == 'E' || c == 'p' || c == 'P')
|
|
713 {
|
|
714 c = preproc_lex_fetch_byte(pp);
|
|
715 if (c == '+' || c == '-')
|
|
716 {
|
|
717 strbuf_add(strbuf, c);
|
|
718 continue;
|
|
719 }
|
|
720 preproc_lex_unfetch_byte(pp, c);
|
|
721 }
|
|
722 }
|
|
723 strval = strbuf_end(strbuf);
|
|
724 preproc_lex_unfetch_byte(pp, c);
|
|
725 goto out;
|
|
726
|
|
727 default:
|
|
728 ttype = TOK_CHAR;
|
|
729 strval = lw_alloc(2);
|
|
730 strval[0] = c;
|
|
731 strval[1] = 0;
|
|
732 break;
|
|
733 }
|
|
734 out:
|
|
735 t = token_create(ttype, strval, sline, scol, pp -> fn);
|
|
736 lw_free(strval);
|
|
737 return t;
|
|
738 }
|