Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 514:1c26076891e3 lwtools-4.18
Bump version in preparation for release
author | William Astle <lost@l-w.ca> |
---|---|
date | Tue, 19 Jan 2021 19:06:20 -0700 |
parents | 670ea8f90212 |
children | ee3e52ab2288 |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
26 #include <lw_strbuf.h> |
295 | 27 |
28 #include "cpp.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
194 void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) |
295 | 195 { |
306
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
196 if (pp -> lexstr) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
197 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
198 if (c == CPP_EOL) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
199 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
200 if (pp -> lexstrloc > 0) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
201 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
202 pp -> lexstrloc--; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
203 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
204 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
205 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
206 |
295 | 207 if (pp -> ungetbufl >= pp -> ungetbufs) |
208 { | |
209 pp -> ungetbufs += 100; | |
210 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
211 } | |
212 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
213 } | |
214 | |
215 /* This function retrieves a byte from the input stream. It performs | |
216 backslash-newline splicing on the returned bytes. Any character | |
217 retrieved from the unfetch buffer is presumed to have already passed | |
218 the backslash-newline filter. */ | |
219 static int fetch_byte(struct preproc_info *pp) | |
220 { | |
221 int c; | |
222 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
223 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
224 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
225 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
226 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
227 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
228 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
229 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
230 |
295 | 231 if (pp -> ungetbufl > 0) |
232 { | |
233 pp -> ungetbufl--; | |
234 c = pp -> ungetbuf[pp -> ungetbufl]; | |
235 if (pp -> ungetbufl == 0) | |
236 { | |
237 lw_free(pp -> ungetbuf); | |
238 pp -> ungetbuf = NULL; | |
239 pp -> ungetbufs = 0; | |
240 } | |
241 return c; | |
242 } | |
243 | |
244 again: | |
245 if (pp -> unget != CPP_NOUNG) | |
246 { | |
247 c = pp -> unget; | |
248 pp -> unget = CPP_NOUNG; | |
249 } | |
250 else | |
251 { | |
252 c = fetch_byte_tg(pp); | |
253 } | |
254 if (c == '\\') | |
255 { | |
256 int c2; | |
257 c2 = fetch_byte_tg(pp); | |
258 if (c2 == CPP_EOL) | |
259 goto again; | |
260 else | |
261 pp -> unget = c2; | |
262 } | |
263 return c; | |
264 } | |
265 | |
266 | |
267 | |
268 /* | |
269 Lex a token off the current input file. | |
270 | |
271 Returned tokens are as follows: | |
272 | |
273 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
274 * numbers are returned as their appropriate type | |
275 * all whitespace in a sequence, including comments, is returned as | |
276 a single instance of TOK_WSPACE | |
277 * TOK_EOL is returned in the case of the end of a line | |
278 * TOK_EOF is returned when the end of the file is reached | |
279 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
280 * Any symbolic operator, etc., recognized by C will be returned as such | |
281 a token | |
282 * TOK_HASH will be returned for a # | |
283 * trigraphs will be interpreted | |
284 * backslash-newline will be interpreted | |
285 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
286 */ | |
287 | |
288 | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
289 int preproc_lex_fetch_byte(struct preproc_info *pp) |
295 | 290 { |
291 int c; | |
292 c = fetch_byte(pp); | |
293 if (c == CPP_EOF && pp -> eolseen == 0) | |
294 { | |
295 preproc_throw_warning(pp, "No newline at end of file"); | |
296 pp -> eolseen = 1; | |
297 return CPP_EOL; | |
298 } | |
299 | |
300 if (c == CPP_EOL) | |
301 { | |
302 pp -> eolseen = 1; | |
303 return c; | |
304 } | |
300 | 305 |
295 | 306 pp -> eolseen = 0; |
307 | |
308 /* convert comments to a single space here */ | |
309 if (c == '/') | |
310 { | |
311 int c2; | |
312 c2 = fetch_byte(pp); | |
313 if (c2 == '/') | |
314 { | |
315 /* single line comment */ | |
316 c = ' '; | |
317 for (;;) | |
318 { | |
319 c2 = fetch_byte(pp); | |
320 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
321 break; | |
322 } | |
323 preproc_lex_unfetch_byte(pp, c2); | |
324 } | |
325 else if (c2 == '*') | |
326 { | |
327 /* block comment */ | |
328 c = ' '; | |
329 for (;;) | |
330 { | |
331 c2 = fetch_byte(pp); | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
332 if (c2 == CPP_EOF) |
295 | 333 { |
334 preproc_lex_unfetch_byte(pp, c); | |
335 break; | |
336 } | |
337 if (c2 == '*') | |
338 { | |
339 /* maybe end of comment */ | |
340 c2 = preproc_lex_fetch_byte(pp); | |
341 if (c2 == '/') | |
342 break; | |
343 } | |
344 } | |
345 } | |
346 else | |
347 { | |
348 /* not a comment - restore lookahead character */ | |
349 preproc_lex_unfetch_byte(pp, c2); | |
350 } | |
351 } | |
352 return c; | |
353 } | |
354 | |
355 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
356 { | |
357 int sline = pp -> lineno; | |
358 int scol = pp -> column; | |
359 char *strval = NULL; | |
360 int ttype = TOK_NONE; | |
361 int c, c2; | |
362 int cl; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
363 struct lw_strbuf *strbuf; |
304
d85d173ba120
Checkpoint lwcc development - preprocessor is runnable but nonfunctional
William Astle <lost@l-w.ca>
parents:
300
diff
changeset
|
364 struct token *t = NULL; |
300 | 365 struct preproc_info *fs; |
366 | |
367 fileagain: | |
295 | 368 c = preproc_lex_fetch_byte(pp); |
369 if (c == CPP_EOF) | |
370 { | |
371 if (pp -> nlseen == 0) | |
372 { | |
373 c = CPP_EOL; | |
374 } | |
375 } | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
376 |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
377 if (pp -> lineno != sline) |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
378 { |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
379 sline = pp -> lineno; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
380 scol = pp -> column; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
381 } |
295 | 382 |
383 if (c == CPP_EOF) | |
384 { | |
300 | 385 /* check if we fell off the end of an include file */ |
386 if (pp -> filestack) | |
387 { | |
388 if (pp -> skip_level || pp -> found_level) | |
389 { | |
390 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
391 } | |
392 fclose(pp -> fp); | |
393 fs = pp -> filestack; | |
394 *pp = *fs; | |
395 pp -> filestack = fs -> n; | |
396 goto fileagain; | |
397 } | |
398 else | |
399 { | |
400 ttype = TOK_EOF; | |
401 goto out; | |
402 } | |
295 | 403 } |
404 if (c == CPP_EOL) | |
405 { | |
406 pp -> nlseen = 1; | |
407 ttype = TOK_EOL; | |
408 goto out; | |
409 } | |
410 | |
411 pp -> nlseen = 0; | |
412 if (isspace(c)) | |
413 { | |
414 while (isspace(c)) | |
415 c = preproc_lex_fetch_byte(pp); | |
416 preproc_lex_unfetch_byte(pp, c); | |
417 ttype = TOK_WSPACE; | |
418 goto out; | |
419 } | |
420 | |
421 switch (c) | |
422 { | |
423 case '?': | |
424 ttype = TOK_QMARK; | |
425 goto out; | |
426 | |
427 case ':': | |
428 ttype = TOK_COLON; | |
429 goto out; | |
430 | |
431 case ',': | |
432 ttype = TOK_COMMA; | |
433 goto out; | |
434 | |
435 case '(': | |
436 ttype = TOK_OPAREN; | |
437 goto out; | |
438 | |
439 case ')': | |
440 ttype = TOK_CPAREN; | |
441 goto out; | |
442 | |
443 case '{': | |
444 ttype = TOK_OBRACE; | |
445 goto out; | |
446 | |
447 case '}': | |
448 ttype = TOK_CBRACE; | |
449 goto out; | |
450 | |
451 case '[': | |
452 ttype = TOK_OSQUARE; | |
453 goto out; | |
454 | |
455 case ']': | |
456 ttype = TOK_CSQUARE; | |
457 goto out; | |
458 | |
459 case '~': | |
460 ttype = TOK_COM; | |
461 goto out; | |
462 | |
463 case ';': | |
464 ttype = TOK_EOS; | |
465 goto out; | |
466 | |
467 /* and now for the possible multi character tokens */ | |
468 case '#': | |
469 ttype = TOK_HASH; | |
470 c = preproc_lex_fetch_byte(pp); | |
471 if (c == '#') | |
472 ttype = TOK_DBLHASH; | |
473 else | |
474 preproc_lex_unfetch_byte(pp, c); | |
475 goto out; | |
476 | |
477 case '^': | |
478 ttype = TOK_XOR; | |
479 c = preproc_lex_fetch_byte(pp); | |
480 if (c == '=') | |
481 ttype = TOK_XORASS; | |
482 else | |
483 preproc_lex_unfetch_byte(pp, c); | |
484 goto out; | |
485 | |
486 case '!': | |
487 ttype = TOK_BNOT; | |
488 c = preproc_lex_fetch_byte(pp); | |
489 if (c == '=') | |
490 ttype = TOK_NE; | |
491 else | |
492 preproc_lex_unfetch_byte(pp, c); | |
493 goto out; | |
494 | |
495 case '*': | |
496 ttype = TOK_STAR; | |
497 c = preproc_lex_fetch_byte(pp); | |
498 if (c == '=') | |
499 ttype = TOK_MULASS; | |
500 else | |
501 preproc_lex_unfetch_byte(pp, c); | |
502 goto out; | |
503 | |
504 case '/': | |
505 ttype = TOK_DIV; | |
506 c = preproc_lex_fetch_byte(pp); | |
507 if (c == '=') | |
508 ttype = TOK_DIVASS; | |
509 else | |
510 preproc_lex_unfetch_byte(pp, c); | |
511 goto out; | |
512 | |
513 case '=': | |
514 ttype = TOK_ASS; | |
515 c = preproc_lex_fetch_byte(pp); | |
516 if (c == '=') | |
517 ttype = TOK_EQ; | |
518 else | |
519 preproc_lex_unfetch_byte(pp, c); | |
520 goto out; | |
521 | |
522 case '%': | |
523 ttype = TOK_MOD; | |
524 c = preproc_lex_fetch_byte(pp); | |
525 if (c == '=') | |
526 ttype = TOK_MODASS; | |
527 else | |
528 preproc_lex_unfetch_byte(pp, c); | |
529 goto out; | |
530 | |
531 case '-': | |
532 ttype = TOK_SUB; | |
533 c = preproc_lex_fetch_byte(pp); | |
534 if (c == '=') | |
535 ttype = TOK_SUBASS; | |
536 else if (c == '-') | |
537 ttype = TOK_DBLSUB; | |
538 else if (c == '>') | |
539 ttype = TOK_ARROW; | |
540 else | |
541 preproc_lex_unfetch_byte(pp, c); | |
542 goto out; | |
543 | |
544 case '+': | |
545 ttype = TOK_ADD; | |
546 c = preproc_lex_fetch_byte(pp); | |
547 if (c == '=') | |
548 ttype = TOK_ADDASS; | |
549 else if (c == '+') | |
550 ttype = TOK_DBLADD; | |
551 else | |
552 preproc_lex_unfetch_byte(pp, c); | |
553 goto out; | |
554 | |
555 | |
556 case '&': | |
557 ttype = TOK_BWAND; | |
558 c = preproc_lex_fetch_byte(pp); | |
559 if (c == '=') | |
560 ttype = TOK_BWANDASS; | |
561 else if (c == '&') | |
562 ttype = TOK_BAND; | |
563 else | |
564 preproc_lex_unfetch_byte(pp, c); | |
565 goto out; | |
566 | |
567 case '|': | |
568 ttype = TOK_BWOR; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_BWORASS; | |
572 else if (c == '|') | |
573 ttype = TOK_BOR; | |
574 else | |
575 preproc_lex_unfetch_byte(pp, c); | |
576 goto out; | |
577 | |
578 case '<': | |
579 ttype = TOK_LT; | |
580 c = preproc_lex_fetch_byte(pp); | |
581 if (c == '=') | |
582 ttype = TOK_LE; | |
583 else if (c == '<') | |
584 { | |
585 ttype = TOK_LSH; | |
586 c = preproc_lex_fetch_byte(pp); | |
587 if (c == '=') | |
588 ttype = TOK_LSHASS; | |
589 else | |
590 preproc_lex_unfetch_byte(pp, c); | |
591 } | |
592 else | |
593 preproc_lex_unfetch_byte(pp, c); | |
594 goto out; | |
595 | |
596 | |
597 case '>': | |
598 ttype = TOK_GT; | |
599 c = preproc_lex_fetch_byte(pp); | |
600 if (c == '=') | |
601 ttype = TOK_GE; | |
602 else if (c == '>') | |
603 { | |
604 ttype = TOK_RSH; | |
605 c = preproc_lex_fetch_byte(pp); | |
606 if (c == '=') | |
607 ttype = TOK_RSHASS; | |
608 else | |
609 preproc_lex_unfetch_byte(pp, c); | |
610 } | |
611 else | |
612 preproc_lex_unfetch_byte(pp, c); | |
613 goto out; | |
614 | |
615 case '\'': | |
616 /* character constant - turns into a uint */ | |
617 chrlit: | |
618 cl = 0; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
619 strbuf = lw_strbuf_new(); |
295 | 620 for (;;) |
621 { | |
622 c = preproc_lex_fetch_byte(pp); | |
623 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
624 break; | |
625 cl++; | |
626 if (c == '\\') | |
627 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
628 lw_strbuf_add(strbuf, '\\'); |
295 | 629 c = preproc_lex_fetch_byte(pp); |
630 if (c == CPP_EOF || c == CPP_EOL) | |
631 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
632 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
633 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
635 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
636 goto out; |
295 | 637 } |
638 cl++; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
639 lw_strbuf_add(strbuf, c); |
295 | 640 continue; |
641 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
642 lw_strbuf_add(strbuf, c); |
295 | 643 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
644 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
645 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
646 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
647 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
648 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
649 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
650 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
651 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 ttype = TOK_CHR_LIT; |
295 | 653 goto out; |
654 | |
655 case '"': | |
656 strlit: | |
657 /* string literal */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
658 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
659 lw_strbuf_add(strbuf, '"'); |
295 | 660 for (;;) |
661 { | |
662 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
663 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
664 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
665 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
666 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
667 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
668 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
669 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
670 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
671 if (c == '"') |
295 | 672 break; |
673 if (c == '\\') | |
674 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
675 lw_strbuf_add(strbuf, '\\'); |
295 | 676 c = preproc_lex_fetch_byte(pp); |
677 if (c == CPP_EOF || c == CPP_EOL) | |
678 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
679 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
680 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
681 preproc_throw_error(pp, "Invalid string constant"); |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
682 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
683 goto out; |
295 | 684 } |
685 cl++; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
686 lw_strbuf_add(strbuf, c); |
295 | 687 continue; |
688 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
689 lw_strbuf_add(strbuf, c); |
295 | 690 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
691 lw_strbuf_add(strbuf, '"'); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
692 strval = lw_strbuf_end(strbuf); |
295 | 693 ttype = TOK_STR_LIT; |
694 goto out; | |
695 | |
696 case 'L': | |
697 /* check for wide string or wide char const */ | |
698 c2 = preproc_lex_fetch_byte(pp); | |
699 if (c2 == '\'') | |
700 { | |
701 goto chrlit; | |
702 } | |
703 else if (c2 == '"') | |
704 { | |
705 goto strlit; | |
706 } | |
707 preproc_lex_unfetch_byte(pp, c2); | |
708 /* fall through for identifier */ | |
709 case '_': | |
710 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
711 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
712 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
713 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
714 case 'y': case 'z': | |
715 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
716 case 'G': case 'H': case 'I': case 'J': case 'K': | |
717 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
718 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
719 case 'Y': case 'Z': | |
720 /* we have an identifier here */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
721 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
722 lw_strbuf_add(strbuf, c); |
295 | 723 for (;;) |
724 { | |
725 c = preproc_lex_fetch_byte(pp); | |
726 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
727 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
728 lw_strbuf_add(strbuf, c); |
295 | 729 continue; |
730 } | |
731 else | |
732 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
733 lw_strbuf_add(strbuf, 0); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
734 strval = lw_strbuf_end(strbuf); |
295 | 735 break; |
736 } | |
737 } | |
738 preproc_lex_unfetch_byte(pp, c); | |
739 ttype = TOK_IDENT; | |
740 goto out; | |
741 | |
742 case '.': | |
743 c = preproc_lex_fetch_byte(pp); | |
744 if (c >= '0' && c <= '9') | |
745 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
746 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
747 lw_strbuf_add(strbuf, '.'); |
295 | 748 goto numlit; |
749 } | |
750 else if (c == '.') | |
751 { | |
752 c = preproc_lex_fetch_byte(pp); | |
753 if (c == '.') | |
754 { | |
755 ttype = TOK_ELLIPSIS; | |
756 goto out; | |
757 } | |
758 preproc_lex_unfetch_byte(pp, c); | |
759 } | |
760 preproc_lex_unfetch_byte(pp, c); | |
761 ttype = TOK_DOT; | |
762 goto out; | |
763 | |
764 case '0': case '1': case '2': case '3': case '4': | |
765 case '5': case '6': case '7': case '8': case '9': | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
766 strbuf = lw_strbuf_new(); |
295 | 767 numlit: |
296 | 768 ttype = TOK_NUMBER; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
769 lw_strbuf_add(strbuf, c); |
295 | 770 for (;;) |
771 { | |
772 c = preproc_lex_fetch_byte(pp); | |
773 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
774 break; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
775 lw_strbuf_add(strbuf, c); |
295 | 776 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') |
777 { | |
778 c = preproc_lex_fetch_byte(pp); | |
779 if (c == '+' || c == '-') | |
780 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
781 lw_strbuf_add(strbuf, c); |
295 | 782 continue; |
783 } | |
784 preproc_lex_unfetch_byte(pp, c); | |
785 } | |
786 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
787 strval = lw_strbuf_end(strbuf); |
295 | 788 preproc_lex_unfetch_byte(pp, c); |
789 goto out; | |
790 | |
791 default: | |
792 ttype = TOK_CHAR; | |
793 strval = lw_alloc(2); | |
794 strval[0] = c; | |
795 strval[1] = 0; | |
796 break; | |
797 } | |
798 out: | |
799 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
800 lw_free(strval); | |
801 return t; | |
802 } |