Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 301:6f7fe78bb868 ccdev
Add string -> number conversion for preproc expression evaluator
Q&D conversion from string to signed number. It should be noted that this
really should be done during tokenization and the type of number be set by
the tokenizer, including parsing floating point values. Then the
preprocessor can decide what to do with non-integer numbers.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sun, 15 Sep 2013 14:22:10 -0600 |
parents | 8d6c47395653 |
children | d85d173ba120 |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
26 | |
27 #include "cpp.h" | |
28 #include "strbuf.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
194 static void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) | |
195 { | |
196 if (pp -> ungetbufl >= pp -> ungetbufs) | |
197 { | |
198 pp -> ungetbufs += 100; | |
199 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
200 } | |
201 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
202 } | |
203 | |
204 /* This function retrieves a byte from the input stream. It performs | |
205 backslash-newline splicing on the returned bytes. Any character | |
206 retrieved from the unfetch buffer is presumed to have already passed | |
207 the backslash-newline filter. */ | |
208 static int fetch_byte(struct preproc_info *pp) | |
209 { | |
210 int c; | |
211 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
212 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
213 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
214 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
215 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
216 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
217 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
218 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
219 |
295 | 220 if (pp -> ungetbufl > 0) |
221 { | |
222 pp -> ungetbufl--; | |
223 c = pp -> ungetbuf[pp -> ungetbufl]; | |
224 if (pp -> ungetbufl == 0) | |
225 { | |
226 lw_free(pp -> ungetbuf); | |
227 pp -> ungetbuf = NULL; | |
228 pp -> ungetbufs = 0; | |
229 } | |
230 return c; | |
231 } | |
232 | |
233 again: | |
234 if (pp -> unget != CPP_NOUNG) | |
235 { | |
236 c = pp -> unget; | |
237 pp -> unget = CPP_NOUNG; | |
238 } | |
239 else | |
240 { | |
241 c = fetch_byte_tg(pp); | |
242 } | |
243 if (c == '\\') | |
244 { | |
245 int c2; | |
246 c2 = fetch_byte_tg(pp); | |
247 if (c2 == CPP_EOL) | |
248 goto again; | |
249 else | |
250 pp -> unget = c2; | |
251 } | |
252 return c; | |
253 } | |
254 | |
255 | |
256 | |
257 /* | |
258 Lex a token off the current input file. | |
259 | |
260 Returned tokens are as follows: | |
261 | |
262 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
263 * numbers are returned as their appropriate type | |
264 * all whitespace in a sequence, including comments, is returned as | |
265 a single instance of TOK_WSPACE | |
266 * TOK_EOL is returned in the case of the end of a line | |
267 * TOK_EOF is returned when the end of the file is reached | |
268 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
269 * Any symbolic operator, etc., recognized by C will be returned as such | |
270 a token | |
271 * TOK_HASH will be returned for a # | |
272 * trigraphs will be interpreted | |
273 * backslash-newline will be interpreted | |
274 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
275 */ | |
276 | |
277 | |
278 static int preproc_lex_fetch_byte(struct preproc_info *pp) | |
279 { | |
280 int c; | |
281 c = fetch_byte(pp); | |
282 if (c == CPP_EOF && pp -> eolseen == 0) | |
283 { | |
284 preproc_throw_warning(pp, "No newline at end of file"); | |
285 pp -> eolseen = 1; | |
286 return CPP_EOL; | |
287 } | |
288 | |
289 if (c == CPP_EOL) | |
290 { | |
291 pp -> eolseen = 1; | |
292 return c; | |
293 } | |
300 | 294 |
295 | 295 pp -> eolseen = 0; |
296 | |
297 /* convert comments to a single space here */ | |
298 if (c == '/') | |
299 { | |
300 int c2; | |
301 c2 = fetch_byte(pp); | |
302 if (c2 == '/') | |
303 { | |
304 /* single line comment */ | |
305 c = ' '; | |
306 for (;;) | |
307 { | |
308 c2 = fetch_byte(pp); | |
309 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
310 break; | |
311 } | |
312 preproc_lex_unfetch_byte(pp, c2); | |
313 } | |
314 else if (c2 == '*') | |
315 { | |
316 /* block comment */ | |
317 c = ' '; | |
318 for (;;) | |
319 { | |
320 c2 = fetch_byte(pp); | |
321 if (c2 == CPP_EOL || c2 == CPP_EOF) | |
322 { | |
323 preproc_lex_unfetch_byte(pp, c); | |
324 break; | |
325 } | |
326 if (c2 == '*') | |
327 { | |
328 /* maybe end of comment */ | |
329 c2 = preproc_lex_fetch_byte(pp); | |
330 if (c2 == '/') | |
331 break; | |
332 } | |
333 } | |
334 } | |
335 else | |
336 { | |
337 /* not a comment - restore lookahead character */ | |
338 preproc_lex_unfetch_byte(pp, c2); | |
339 } | |
340 } | |
341 return c; | |
342 } | |
343 | |
344 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
345 { | |
346 int sline = pp -> lineno; | |
347 int scol = pp -> column; | |
348 char *strval = NULL; | |
349 int ttype = TOK_NONE; | |
350 int c, c2; | |
351 int cl; | |
352 struct strbuf *strbuf; | |
353 struct token *t; | |
300 | 354 struct preproc_info *fs; |
355 | |
356 fileagain: | |
295 | 357 c = preproc_lex_fetch_byte(pp); |
358 if (c == CPP_EOF) | |
359 { | |
360 if (pp -> nlseen == 0) | |
361 { | |
362 c = CPP_EOL; | |
363 } | |
364 } | |
365 | |
366 if (c == CPP_EOF) | |
367 { | |
300 | 368 /* check if we fell off the end of an include file */ |
369 if (pp -> filestack) | |
370 { | |
371 if (pp -> skip_level || pp -> found_level) | |
372 { | |
373 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
374 } | |
375 fclose(pp -> fp); | |
376 fs = pp -> filestack; | |
377 *pp = *fs; | |
378 pp -> filestack = fs -> n; | |
379 goto fileagain; | |
380 } | |
381 else | |
382 { | |
383 ttype = TOK_EOF; | |
384 goto out; | |
385 } | |
295 | 386 } |
387 if (c == CPP_EOL) | |
388 { | |
389 pp -> nlseen = 1; | |
390 ttype = TOK_EOL; | |
391 goto out; | |
392 } | |
393 | |
394 pp -> nlseen = 0; | |
395 if (isspace(c)) | |
396 { | |
397 while (isspace(c)) | |
398 c = preproc_lex_fetch_byte(pp); | |
399 preproc_lex_unfetch_byte(pp, c); | |
400 ttype = TOK_WSPACE; | |
401 goto out; | |
402 } | |
403 | |
404 switch (c) | |
405 { | |
406 case '?': | |
407 ttype = TOK_QMARK; | |
408 goto out; | |
409 | |
410 case ':': | |
411 ttype = TOK_COLON; | |
412 goto out; | |
413 | |
414 case ',': | |
415 ttype = TOK_COMMA; | |
416 goto out; | |
417 | |
418 case '(': | |
419 ttype = TOK_OPAREN; | |
420 goto out; | |
421 | |
422 case ')': | |
423 ttype = TOK_CPAREN; | |
424 goto out; | |
425 | |
426 case '{': | |
427 ttype = TOK_OBRACE; | |
428 goto out; | |
429 | |
430 case '}': | |
431 ttype = TOK_CBRACE; | |
432 goto out; | |
433 | |
434 case '[': | |
435 ttype = TOK_OSQUARE; | |
436 goto out; | |
437 | |
438 case ']': | |
439 ttype = TOK_CSQUARE; | |
440 goto out; | |
441 | |
442 case '~': | |
443 ttype = TOK_COM; | |
444 goto out; | |
445 | |
446 case ';': | |
447 ttype = TOK_EOS; | |
448 goto out; | |
449 | |
450 /* and now for the possible multi character tokens */ | |
451 case '#': | |
452 ttype = TOK_HASH; | |
453 c = preproc_lex_fetch_byte(pp); | |
454 if (c == '#') | |
455 ttype = TOK_DBLHASH; | |
456 else | |
457 preproc_lex_unfetch_byte(pp, c); | |
458 goto out; | |
459 | |
460 case '^': | |
461 ttype = TOK_XOR; | |
462 c = preproc_lex_fetch_byte(pp); | |
463 if (c == '=') | |
464 ttype = TOK_XORASS; | |
465 else | |
466 preproc_lex_unfetch_byte(pp, c); | |
467 goto out; | |
468 | |
469 case '!': | |
470 ttype = TOK_BNOT; | |
471 c = preproc_lex_fetch_byte(pp); | |
472 if (c == '=') | |
473 ttype = TOK_NE; | |
474 else | |
475 preproc_lex_unfetch_byte(pp, c); | |
476 goto out; | |
477 | |
478 case '*': | |
479 ttype = TOK_STAR; | |
480 c = preproc_lex_fetch_byte(pp); | |
481 if (c == '=') | |
482 ttype = TOK_MULASS; | |
483 else | |
484 preproc_lex_unfetch_byte(pp, c); | |
485 goto out; | |
486 | |
487 case '/': | |
488 ttype = TOK_DIV; | |
489 c = preproc_lex_fetch_byte(pp); | |
490 if (c == '=') | |
491 ttype = TOK_DIVASS; | |
492 else | |
493 preproc_lex_unfetch_byte(pp, c); | |
494 goto out; | |
495 | |
496 case '=': | |
497 ttype = TOK_ASS; | |
498 c = preproc_lex_fetch_byte(pp); | |
499 if (c == '=') | |
500 ttype = TOK_EQ; | |
501 else | |
502 preproc_lex_unfetch_byte(pp, c); | |
503 goto out; | |
504 | |
505 case '%': | |
506 ttype = TOK_MOD; | |
507 c = preproc_lex_fetch_byte(pp); | |
508 if (c == '=') | |
509 ttype = TOK_MODASS; | |
510 else | |
511 preproc_lex_unfetch_byte(pp, c); | |
512 goto out; | |
513 | |
514 case '-': | |
515 ttype = TOK_SUB; | |
516 c = preproc_lex_fetch_byte(pp); | |
517 if (c == '=') | |
518 ttype = TOK_SUBASS; | |
519 else if (c == '-') | |
520 ttype = TOK_DBLSUB; | |
521 else if (c == '>') | |
522 ttype = TOK_ARROW; | |
523 else | |
524 preproc_lex_unfetch_byte(pp, c); | |
525 goto out; | |
526 | |
527 case '+': | |
528 ttype = TOK_ADD; | |
529 c = preproc_lex_fetch_byte(pp); | |
530 if (c == '=') | |
531 ttype = TOK_ADDASS; | |
532 else if (c == '+') | |
533 ttype = TOK_DBLADD; | |
534 else | |
535 preproc_lex_unfetch_byte(pp, c); | |
536 goto out; | |
537 | |
538 | |
539 case '&': | |
540 ttype = TOK_BWAND; | |
541 c = preproc_lex_fetch_byte(pp); | |
542 if (c == '=') | |
543 ttype = TOK_BWANDASS; | |
544 else if (c == '&') | |
545 ttype = TOK_BAND; | |
546 else | |
547 preproc_lex_unfetch_byte(pp, c); | |
548 goto out; | |
549 | |
550 case '|': | |
551 ttype = TOK_BWOR; | |
552 c = preproc_lex_fetch_byte(pp); | |
553 if (c == '=') | |
554 ttype = TOK_BWORASS; | |
555 else if (c == '|') | |
556 ttype = TOK_BOR; | |
557 else | |
558 preproc_lex_unfetch_byte(pp, c); | |
559 goto out; | |
560 | |
561 case '<': | |
562 ttype = TOK_LT; | |
563 c = preproc_lex_fetch_byte(pp); | |
564 if (c == '=') | |
565 ttype = TOK_LE; | |
566 else if (c == '<') | |
567 { | |
568 ttype = TOK_LSH; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_LSHASS; | |
572 else | |
573 preproc_lex_unfetch_byte(pp, c); | |
574 } | |
575 else | |
576 preproc_lex_unfetch_byte(pp, c); | |
577 goto out; | |
578 | |
579 | |
580 case '>': | |
581 ttype = TOK_GT; | |
582 c = preproc_lex_fetch_byte(pp); | |
583 if (c == '=') | |
584 ttype = TOK_GE; | |
585 else if (c == '>') | |
586 { | |
587 ttype = TOK_RSH; | |
588 c = preproc_lex_fetch_byte(pp); | |
589 if (c == '=') | |
590 ttype = TOK_RSHASS; | |
591 else | |
592 preproc_lex_unfetch_byte(pp, c); | |
593 } | |
594 else | |
595 preproc_lex_unfetch_byte(pp, c); | |
596 goto out; | |
597 | |
598 case '\'': | |
599 /* character constant - turns into a uint */ | |
600 chrlit: | |
601 cl = 0; | |
602 strbuf = strbuf_new(); | |
603 for (;;) | |
604 { | |
605 c = preproc_lex_fetch_byte(pp); | |
606 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
607 break; | |
608 cl++; | |
609 if (c == '\\') | |
610 { | |
611 strbuf_add(strbuf, '\\'); | |
612 c = preproc_lex_fetch_byte(pp); | |
613 if (c == CPP_EOF || c == CPP_EOL) | |
614 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
615 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
616 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
617 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
618 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
619 goto out; |
295 | 620 } |
621 cl++; | |
622 strbuf_add(strbuf, c); | |
623 continue; | |
624 } | |
625 strbuf_add(strbuf, c); | |
626 } | |
627 strval = strbuf_end(strbuf); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
628 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
629 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
630 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
631 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
632 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
633 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
635 ttype = TOK_CHR_LIT; |
295 | 636 goto out; |
637 | |
638 case '"': | |
639 strlit: | |
640 /* string literal */ | |
641 strbuf = strbuf_new(); | |
642 for (;;) | |
643 { | |
644 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
645 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
646 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
647 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
648 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
649 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
650 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
651 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
653 if (c == '"') |
295 | 654 break; |
655 if (c == '\\') | |
656 { | |
657 strbuf_add(strbuf, '\\'); | |
658 c = preproc_lex_fetch_byte(pp); | |
659 if (c == CPP_EOF || c == CPP_EOL) | |
660 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
661 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
662 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
663 preproc_throw_error(pp, "Invalid string constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
664 strval = strbuf_end(strbuf); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
665 goto out; |
295 | 666 } |
667 cl++; | |
668 strbuf_add(strbuf, c); | |
669 continue; | |
670 } | |
671 strbuf_add(strbuf, c); | |
672 } | |
673 strval = strbuf_end(strbuf); | |
674 ttype = TOK_STR_LIT; | |
675 goto out; | |
676 | |
677 case 'L': | |
678 /* check for wide string or wide char const */ | |
679 c2 = preproc_lex_fetch_byte(pp); | |
680 if (c2 == '\'') | |
681 { | |
682 goto chrlit; | |
683 } | |
684 else if (c2 == '"') | |
685 { | |
686 goto strlit; | |
687 } | |
688 preproc_lex_unfetch_byte(pp, c2); | |
689 /* fall through for identifier */ | |
690 case '_': | |
691 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
692 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
693 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
694 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
695 case 'y': case 'z': | |
696 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
697 case 'G': case 'H': case 'I': case 'J': case 'K': | |
698 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
699 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
700 case 'Y': case 'Z': | |
701 /* we have an identifier here */ | |
702 strbuf = strbuf_new(); | |
703 strbuf_add(strbuf, c); | |
704 for (;;) | |
705 { | |
706 c = preproc_lex_fetch_byte(pp); | |
707 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
708 { | |
709 strbuf_add(strbuf, c); | |
710 continue; | |
711 } | |
712 else | |
713 { | |
714 strbuf_add(strbuf, 0); | |
715 strval = strbuf_end(strbuf); | |
716 break; | |
717 } | |
718 } | |
719 preproc_lex_unfetch_byte(pp, c); | |
720 ttype = TOK_IDENT; | |
721 goto out; | |
722 | |
723 case '.': | |
724 c = preproc_lex_fetch_byte(pp); | |
725 if (c >= '0' && c <= '9') | |
726 { | |
727 strbuf = strbuf_new(); | |
728 strbuf_add(strbuf, '.'); | |
729 goto numlit; | |
730 } | |
731 else if (c == '.') | |
732 { | |
733 c = preproc_lex_fetch_byte(pp); | |
734 if (c == '.') | |
735 { | |
736 ttype = TOK_ELLIPSIS; | |
737 goto out; | |
738 } | |
739 preproc_lex_unfetch_byte(pp, c); | |
740 } | |
741 preproc_lex_unfetch_byte(pp, c); | |
742 ttype = TOK_DOT; | |
743 goto out; | |
744 | |
745 case '0': case '1': case '2': case '3': case '4': | |
746 case '5': case '6': case '7': case '8': case '9': | |
747 strbuf = strbuf_new(); | |
748 numlit: | |
296 | 749 ttype = TOK_NUMBER; |
295 | 750 strbuf_add(strbuf, c); |
751 for (;;) | |
752 { | |
753 c = preproc_lex_fetch_byte(pp); | |
754 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
755 break; | |
756 strbuf_add(strbuf, c); | |
757 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') | |
758 { | |
759 c = preproc_lex_fetch_byte(pp); | |
760 if (c == '+' || c == '-') | |
761 { | |
762 strbuf_add(strbuf, c); | |
763 continue; | |
764 } | |
765 preproc_lex_unfetch_byte(pp, c); | |
766 } | |
767 } | |
768 strval = strbuf_end(strbuf); | |
769 preproc_lex_unfetch_byte(pp, c); | |
770 goto out; | |
771 | |
772 default: | |
773 ttype = TOK_CHAR; | |
774 strval = lw_alloc(2); | |
775 strval[0] = c; | |
776 strval[1] = 0; | |
777 break; | |
778 } | |
779 out: | |
780 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
781 lw_free(strval); | |
782 return t; | |
783 } |