Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 300:8d6c47395653 ccdev
Implemented #include and #line
Theoretically, directives are now implemented.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sun, 15 Sep 2013 13:49:00 -0600 |
parents | 856caf91ffaa |
children | d85d173ba120 |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "strbuf.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
194 static void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) | |
195 { | |
196 if (pp -> ungetbufl >= pp -> ungetbufs) | |
197 { | |
198 pp -> ungetbufs += 100; | |
199 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
200 } | |
201 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
202 } | |
203 | |
204 /* This function retrieves a byte from the input stream. It performs | |
205 backslash-newline splicing on the returned bytes. Any character | |
206 retrieved from the unfetch buffer is presumed to have already passed | |
207 the backslash-newline filter. */ | |
208 static int fetch_byte(struct preproc_info *pp) | |
209 { | |
210 int c; | |
211 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
212 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
213 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
214 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
215 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
216 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
217 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
218 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
219 |
295 | 220 if (pp -> ungetbufl > 0) |
221 { | |
222 pp -> ungetbufl--; | |
223 c = pp -> ungetbuf[pp -> ungetbufl]; | |
224 if (pp -> ungetbufl == 0) | |
225 { | |
226 lw_free(pp -> ungetbuf); | |
227 pp -> ungetbuf = NULL; | |
228 pp -> ungetbufs = 0; | |
229 } | |
230 return c; | |
231 } | |
232 | |
233 again: | |
234 if (pp -> unget != CPP_NOUNG) | |
235 { | |
236 c = pp -> unget; | |
237 pp -> unget = CPP_NOUNG; | |
238 } | |
239 else | |
240 { | |
241 c = fetch_byte_tg(pp); | |
242 } | |
243 if (c == '\\') | |
244 { | |
245 int c2; | |
246 c2 = fetch_byte_tg(pp); | |
247 if (c2 == CPP_EOL) | |
248 goto again; | |
249 else | |
250 pp -> unget = c2; | |
251 } | |
252 return c; | |
253 } | |
254 | |
255 | |
256 | |
257 /* | |
258 Lex a token off the current input file. | |
259 | |
260 Returned tokens are as follows: | |
261 | |
262 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
263 * numbers are returned as their appropriate type | |
264 * all whitespace in a sequence, including comments, is returned as | |
265 a single instance of TOK_WSPACE | |
266 * TOK_EOL is returned in the case of the end of a line | |
267 * TOK_EOF is returned when the end of the file is reached | |
268 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
269 * Any symbolic operator, etc., recognized by C will be returned as such | |
270 a token | |
271 * TOK_HASH will be returned for a # | |
272 * trigraphs will be interpreted | |
273 * backslash-newline will be interpreted | |
274 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
275 */ | |
276 | |
277 | |
278 static int preproc_lex_fetch_byte(struct preproc_info *pp) | |
279 { | |
280 int c; | |
281 c = fetch_byte(pp); | |
282 if (c == CPP_EOF && pp -> eolseen == 0) | |
283 { | |
284 preproc_throw_warning(pp, "No newline at end of file"); | |
285 pp -> eolseen = 1; | |
286 return CPP_EOL; | |
287 } | |
288 | |
289 if (c == CPP_EOL) | |
290 { | |
291 pp -> eolseen = 1; | |
292 return c; | |
293 } | |
300 | 294 |
295 | 295 pp -> eolseen = 0; |
296 | |
297 /* convert comments to a single space here */ | |
298 if (c == '/') | |
299 { | |
300 int c2; | |
301 c2 = fetch_byte(pp); | |
302 if (c2 == '/') | |
303 { | |
304 /* single line comment */ | |
305 c = ' '; | |
306 for (;;) | |
307 { | |
308 c2 = fetch_byte(pp); | |
309 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
310 break; | |
311 } | |
312 preproc_lex_unfetch_byte(pp, c2); | |
313 } | |
314 else if (c2 == '*') | |
315 { | |
316 /* block comment */ | |
317 c = ' '; | |
318 for (;;) | |
319 { | |
320 c2 = fetch_byte(pp); | |
321 if (c2 == CPP_EOL || c2 == CPP_EOF) | |
322 { | |
323 preproc_lex_unfetch_byte(pp, c); | |
324 break; | |
325 } | |
326 if (c2 == '*') | |
327 { | |
328 /* maybe end of comment */ | |
329 c2 = preproc_lex_fetch_byte(pp); | |
330 if (c2 == '/') | |
331 break; | |
332 } | |
333 } | |
334 } | |
335 else | |
336 { | |
337 /* not a comment - restore lookahead character */ | |
338 preproc_lex_unfetch_byte(pp, c2); | |
339 } | |
340 } | |
341 return c; | |
342 } | |
343 | |
344 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
345 { | |
346 int sline = pp -> lineno; | |
347 int scol = pp -> column; | |
348 char *strval = NULL; | |
349 int ttype = TOK_NONE; | |
350 int c, c2; | |
351 int cl; | |
352 struct strbuf *strbuf; | |
353 struct token *t; | |
300 | 354 struct preproc_info *fs; |
355 | |
356 fileagain: | |
295 | 357 c = preproc_lex_fetch_byte(pp); |
358 if (c == CPP_EOF) | |
359 { | |
360 if (pp -> nlseen == 0) | |
361 { | |
362 c = CPP_EOL; | |
363 } | |
364 } | |
365 | |
366 if (c == CPP_EOF) | |
367 { | |
300 | 368 /* check if we fell off the end of an include file */ |
369 if (pp -> filestack) | |
370 { | |
371 if (pp -> skip_level || pp -> found_level) | |
372 { | |
373 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
374 } | |
375 fclose(pp -> fp); | |
376 fs = pp -> filestack; | |
377 *pp = *fs; | |
378 pp -> filestack = fs -> n; | |
379 goto fileagain; | |
380 } | |
381 else | |
382 { | |
383 ttype = TOK_EOF; | |
384 goto out; | |
385 } | |
295 | 386 } |
387 if (c == CPP_EOL) | |
388 { | |
389 pp -> nlseen = 1; | |
390 ttype = TOK_EOL; | |
391 goto out; | |
392 } | |
393 | |
394 pp -> nlseen = 0; | |
395 if (isspace(c)) | |
396 { | |
397 while (isspace(c)) | |
398 c = preproc_lex_fetch_byte(pp); | |
399 preproc_lex_unfetch_byte(pp, c); | |
400 ttype = TOK_WSPACE; | |
401 goto out; | |
402 } | |
403 | |
404 switch (c) | |
405 { | |
406 case '?': | |
407 ttype = TOK_QMARK; | |
408 goto out; | |
409 | |
410 case ':': | |
411 ttype = TOK_COLON; | |
412 goto out; | |
413 | |
414 case ',': | |
415 ttype = TOK_COMMA; | |
416 goto out; | |
417 | |
418 case '(': | |
419 ttype = TOK_OPAREN; | |
420 goto out; | |
421 | |
422 case ')': | |
423 ttype = TOK_CPAREN; | |
424 goto out; | |
425 | |
426 case '{': | |
427 ttype = TOK_OBRACE; | |
428 goto out; | |
429 | |
430 case '}': | |
431 ttype = TOK_CBRACE; | |
432 goto out; | |
433 | |
434 case '[': | |
435 ttype = TOK_OSQUARE; | |
436 goto out; | |
437 | |
438 case ']': | |
439 ttype = TOK_CSQUARE; | |
440 goto out; | |
441 | |
442 case '~': | |
443 ttype = TOK_COM; | |
444 goto out; | |
445 | |
446 case ';': | |
447 ttype = TOK_EOS; | |
448 goto out; | |
449 | |
450 /* and now for the possible multi character tokens */ | |
451 case '#': | |
452 ttype = TOK_HASH; | |
453 c = preproc_lex_fetch_byte(pp); | |
454 if (c == '#') | |
455 ttype = TOK_DBLHASH; | |
456 else | |
457 preproc_lex_unfetch_byte(pp, c); | |
458 goto out; | |
459 | |
460 case '^': | |
461 ttype = TOK_XOR; | |
462 c = preproc_lex_fetch_byte(pp); | |
463 if (c == '=') | |
464 ttype = TOK_XORASS; | |
465 else | |
466 preproc_lex_unfetch_byte(pp, c); | |
467 goto out; | |
468 | |
469 case '!': | |
470 ttype = TOK_BNOT; | |
471 c = preproc_lex_fetch_byte(pp); | |
472 if (c == '=') | |
473 ttype = TOK_NE; | |
474 else | |
475 preproc_lex_unfetch_byte(pp, c); | |
476 goto out; | |
477 | |
478 case '*': | |
479 ttype = TOK_STAR; | |
480 c = preproc_lex_fetch_byte(pp); | |
481 if (c == '=') | |
482 ttype = TOK_MULASS; | |
483 else | |
484 preproc_lex_unfetch_byte(pp, c); | |
485 goto out; | |
486 | |
487 case '/': | |
488 ttype = TOK_DIV; | |
489 c = preproc_lex_fetch_byte(pp); | |
490 if (c == '=') | |
491 ttype = TOK_DIVASS; | |
492 else | |
493 preproc_lex_unfetch_byte(pp, c); | |
494 goto out; | |
495 | |
496 case '=': | |
497 ttype = TOK_ASS; | |
498 c = preproc_lex_fetch_byte(pp); | |
499 if (c == '=') | |
500 ttype = TOK_EQ; | |
501 else | |
502 preproc_lex_unfetch_byte(pp, c); | |
503 goto out; | |
504 | |
505 case '%': | |
506 ttype = TOK_MOD; | |
507 c = preproc_lex_fetch_byte(pp); | |
508 if (c == '=') | |
509 ttype = TOK_MODASS; | |
510 else | |
511 preproc_lex_unfetch_byte(pp, c); | |
512 goto out; | |
513 | |
514 case '-': | |
515 ttype = TOK_SUB; | |
516 c = preproc_lex_fetch_byte(pp); | |
517 if (c == '=') | |
518 ttype = TOK_SUBASS; | |
519 else if (c == '-') | |
520 ttype = TOK_DBLSUB; | |
521 else if (c == '>') | |
522 ttype = TOK_ARROW; | |
523 else | |
524 preproc_lex_unfetch_byte(pp, c); | |
525 goto out; | |
526 | |
527 case '+': | |
528 ttype = TOK_ADD; | |
529 c = preproc_lex_fetch_byte(pp); | |
530 if (c == '=') | |
531 ttype = TOK_ADDASS; | |
532 else if (c == '+') | |
533 ttype = TOK_DBLADD; | |
534 else | |
535 preproc_lex_unfetch_byte(pp, c); | |
536 goto out; | |
537 | |
538 | |
539 case '&': | |
540 ttype = TOK_BWAND; | |
541 c = preproc_lex_fetch_byte(pp); | |
542 if (c == '=') | |
543 ttype = TOK_BWANDASS; | |
544 else if (c == '&') | |
545 ttype = TOK_BAND; | |
546 else | |
547 preproc_lex_unfetch_byte(pp, c); | |
548 goto out; | |
549 | |
550 case '|': | |
551 ttype = TOK_BWOR; | |
552 c = preproc_lex_fetch_byte(pp); | |
553 if (c == '=') | |
554 ttype = TOK_BWORASS; | |
555 else if (c == '|') | |
556 ttype = TOK_BOR; | |
557 else | |
558 preproc_lex_unfetch_byte(pp, c); | |
559 goto out; | |
560 | |
561 case '<': | |
562 ttype = TOK_LT; | |
563 c = preproc_lex_fetch_byte(pp); | |
564 if (c == '=') | |
565 ttype = TOK_LE; | |
566 else if (c == '<') | |
567 { | |
568 ttype = TOK_LSH; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_LSHASS; | |
572 else | |
573 preproc_lex_unfetch_byte(pp, c); | |
574 } | |
575 else | |
576 preproc_lex_unfetch_byte(pp, c); | |
577 goto out; | |
578 | |
579 | |
580 case '>': | |
581 ttype = TOK_GT; | |
582 c = preproc_lex_fetch_byte(pp); | |
583 if (c == '=') | |
584 ttype = TOK_GE; | |
585 else if (c == '>') | |
586 { | |
587 ttype = TOK_RSH; | |
588 c = preproc_lex_fetch_byte(pp); | |
589 if (c == '=') | |
590 ttype = TOK_RSHASS; | |
591 else | |
592 preproc_lex_unfetch_byte(pp, c); | |
593 } | |
594 else | |
595 preproc_lex_unfetch_byte(pp, c); | |
596 goto out; | |
597 | |
598 case '\'': | |
599 /* character constant - turns into a uint */ | |
600 chrlit: | |
601 cl = 0; | |
602 strbuf = strbuf_new(); | |
603 for (;;) | |
604 { | |
605 c = preproc_lex_fetch_byte(pp); | |
606 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
607 break; | |
608 cl++; | |
609 if (c == '\\') | |
610 { | |
611 strbuf_add(strbuf, '\\'); | |
612 c = preproc_lex_fetch_byte(pp); | |
613 if (c == CPP_EOF || c == CPP_EOL) | |
614 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
615 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
616 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
617 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
618 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
619 goto out; |
295 | 620 } |
621 cl++; | |
622 strbuf_add(strbuf, c); | |
623 continue; | |
624 } | |
625 strbuf_add(strbuf, c); | |
626 } | |
627 strval = strbuf_end(strbuf); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
628 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
629 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
630 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
631 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
632 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
633 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
635 ttype = TOK_CHR_LIT; |
295 | 636 goto out; |
637 | |
638 case '"': | |
639 strlit: | |
640 /* string literal */ | |
641 strbuf = strbuf_new(); | |
642 for (;;) | |
643 { | |
644 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
645 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
646 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
647 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
648 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
649 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
650 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
651 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
653 if (c == '"') |
295 | 654 break; |
655 if (c == '\\') | |
656 { | |
657 strbuf_add(strbuf, '\\'); | |
658 c = preproc_lex_fetch_byte(pp); | |
659 if (c == CPP_EOF || c == CPP_EOL) | |
660 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
661 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
662 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
663 preproc_throw_error(pp, "Invalid string constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
664 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
665 goto out; |
295 | 666 } |
667 cl++; | |
668 strbuf_add(strbuf, c); | |
669 continue; | |
670 } | |
671 strbuf_add(strbuf, c); | |
672 } | |
673 strval = strbuf_end(strbuf); | |
674 ttype = TOK_STR_LIT; | |
675 goto out; | |
676 | |
677 case 'L': | |
678 /* check for wide string or wide char const */ | |
679 c2 = preproc_lex_fetch_byte(pp); | |
680 if (c2 == '\'') | |
681 { | |
682 goto chrlit; | |
683 } | |
684 else if (c2 == '"') | |
685 { | |
686 goto strlit; | |
687 } | |
688 preproc_lex_unfetch_byte(pp, c2); | |
689 /* fall through for identifier */ | |
690 case '_': | |
691 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
692 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
693 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
694 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
695 case 'y': case 'z': | |
696 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
697 case 'G': case 'H': case 'I': case 'J': case 'K': | |
698 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
699 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
700 case 'Y': case 'Z': | |
701 /* we have an identifier here */ | |
702 strbuf = strbuf_new(); | |
703 strbuf_add(strbuf, c); | |
704 for (;;) | |
705 { | |
706 c = preproc_lex_fetch_byte(pp); | |
707 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
708 { | |
709 strbuf_add(strbuf, c); | |
710 continue; | |
711 } | |
712 else | |
713 { | |
714 strbuf_add(strbuf, 0); | |
715 strval = strbuf_end(strbuf); | |
716 break; | |
717 } | |
718 } | |
719 preproc_lex_unfetch_byte(pp, c); | |
720 ttype = TOK_IDENT; | |
721 goto out; | |
722 | |
723 case '.': | |
724 c = preproc_lex_fetch_byte(pp); | |
725 if (c >= '0' && c <= '9') | |
726 { | |
727 strbuf = strbuf_new(); | |
728 strbuf_add(strbuf, '.'); | |
729 goto numlit; | |
730 } | |
731 else if (c == '.') | |
732 { | |
733 c = preproc_lex_fetch_byte(pp); | |
734 if (c == '.') | |
735 { | |
736 ttype = TOK_ELLIPSIS; | |
737 goto out; | |
738 } | |
739 preproc_lex_unfetch_byte(pp, c); | |
740 } | |
741 preproc_lex_unfetch_byte(pp, c); | |
742 ttype = TOK_DOT; | |
743 goto out; | |
744 | |
745 case '0': case '1': case '2': case '3': case '4': | |
746 case '5': case '6': case '7': case '8': case '9': | |
747 strbuf = strbuf_new(); | |
748 numlit: | |
296 | 749 ttype = TOK_NUMBER; |
295 | 750 strbuf_add(strbuf, c); |
751 for (;;) | |
752 { | |
753 c = preproc_lex_fetch_byte(pp); | |
754 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
755 break; | |
756 strbuf_add(strbuf, c); | |
757 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') | |
758 { | |
759 c = preproc_lex_fetch_byte(pp); | |
760 if (c == '+' || c == '-') | |
761 { | |
762 strbuf_add(strbuf, c); | |
763 continue; | |
764 } | |
765 preproc_lex_unfetch_byte(pp, c); | |
766 } | |
767 } | |
768 strval = strbuf_end(strbuf); | |
769 preproc_lex_unfetch_byte(pp, c); | |
770 goto out; | |
771 | |
772 default: | |
773 ttype = TOK_CHAR; | |
774 strval = lw_alloc(2); | |
775 strval[0] = c; | |
776 strval[1] = 0; | |
777 break; | |
778 } | |
779 out: | |
780 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
781 lw_free(strval); | |
782 return t; | |
783 } |