Mercurial > hg > index.cgi
annotate lwcc/lex.c @ 536:33a59e232a5b
Fix basic output target to keep lines below 249 characters
The line length limiter in the basic output was not properly moving to the
next before 249 characters, which is the limit Color Basic can read in an
ASCII basic program. Changed the line limiter to 240 from 247 to account
for a possible 5 digit number plus a comma and just a bit of extra breathing
space.
author | William Astle <lost@l-w.ca> |
---|---|
date | Thu, 16 Jun 2022 13:55:34 -0600 |
parents | 670ea8f90212 |
children | ee3e52ab2288 |
rev | line source |
---|---|
295 | 1 /* |
2 lwcc/lex.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <ctype.h> | |
23 #include <stdio.h> | |
24 | |
25 #include <lw_alloc.h> | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
26 #include <lw_strbuf.h> |
295 | 27 |
28 #include "cpp.h" | |
29 #include "token.h" | |
30 | |
31 /* fetch a raw input byte from the current file. Will return CPP_EOF if | |
32 EOF is encountered and CPP_EOL if an end of line sequence is encountered. | |
33 End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is | |
34 returned on the first CR or LF encountered. The complementary CR or LF | |
35 is munched, if present, when the *next* character is read. This always | |
36 operates on file_stack. | |
37 | |
38 This function also accounts for line numbers in input files and also | |
39 character columns. | |
40 */ | |
41 static int fetch_byte_ll(struct preproc_info *pp) | |
42 { | |
43 int c; | |
44 | |
45 if (pp -> eolstate != 0) | |
46 { | |
47 pp -> lineno++; | |
48 pp -> column = 0; | |
49 } | |
50 c = getc(pp -> fp); | |
51 pp -> column++; | |
52 if (pp -> eolstate == 1) | |
53 { | |
54 // just saw CR, munch LF | |
55 if (c == 10) | |
56 c = getc(pp -> fp); | |
57 pp -> eolstate = 0; | |
58 } | |
59 else if (pp -> eolstate == 2) | |
60 { | |
61 // just saw LF, much CR | |
62 if (c == 13) | |
63 c = getc(pp -> fp); | |
64 pp -> eolstate = 0; | |
65 } | |
66 | |
67 if (c == 10) | |
68 { | |
69 // we have LF - end of line, flag to munch CR | |
70 pp -> eolstate = 2; | |
71 c = CPP_EOL; | |
72 } | |
73 else if (c == 13) | |
74 { | |
75 // we have CR - end of line, flag to munch LF | |
76 pp -> eolstate = 1; | |
77 c = CPP_EOL; | |
78 } | |
79 else if (c == EOF) | |
80 { | |
81 c = CPP_EOF; | |
82 } | |
83 return c; | |
84 } | |
85 | |
86 /* This function takes a sequence of bytes from the _ll function above | |
87 and does trigraph interpretation on it, but only if the global | |
88 trigraphs is nonzero. */ | |
89 static int fetch_byte_tg(struct preproc_info *pp) | |
90 { | |
91 int c; | |
92 | |
93 if (!pp -> trigraphs) | |
94 { | |
95 c = fetch_byte_ll(pp); | |
96 } | |
97 else | |
98 { | |
99 /* we have to do the trigraph shit here */ | |
100 if (pp -> ra != CPP_NOUNG) | |
101 { | |
102 if (pp -> qseen > 0) | |
103 { | |
104 c = '?'; | |
105 pp -> qseen -= 1; | |
106 return c; | |
107 } | |
108 else | |
109 { | |
110 c = pp -> ra; | |
111 pp -> ra = CPP_NOUNG; | |
112 return c; | |
113 } | |
114 } | |
115 | |
116 c = fetch_byte_ll(pp); | |
117 while (c == '?') | |
118 { | |
119 pp -> qseen++; | |
120 c = fetch_byte_ll(pp); | |
121 } | |
122 | |
123 if (pp -> qseen >= 2) | |
124 { | |
125 // we have a trigraph | |
126 switch (c) | |
127 { | |
128 case '=': | |
129 c = '#'; | |
130 pp -> qseen -= 2; | |
131 break; | |
132 | |
133 case '/': | |
134 c = '\\'; | |
135 pp -> qseen -= 2; | |
136 break; | |
137 | |
138 case '\'': | |
139 c = '^'; | |
140 pp -> qseen -= 2; | |
141 break; | |
142 | |
143 case '(': | |
144 c = '['; | |
145 pp -> qseen -= 2; | |
146 break; | |
147 | |
148 case ')': | |
149 c = ']'; | |
150 pp -> qseen -= 2; | |
151 break; | |
152 | |
153 case '!': | |
154 c = '|'; | |
155 pp -> qseen -= 2; | |
156 break; | |
157 | |
158 case '<': | |
159 c = '{'; | |
160 pp -> qseen -= 2; | |
161 break; | |
162 | |
163 case '>': | |
164 c = '}'; | |
165 pp -> qseen -= 2; | |
166 break; | |
167 | |
168 case '-': | |
169 c = '~'; | |
170 pp -> qseen -= 2; | |
171 break; | |
172 } | |
173 if (pp -> qseen > 0) | |
174 { | |
175 pp -> ra = c; | |
176 c = '?'; | |
177 pp -> qseen--; | |
178 } | |
179 } | |
180 else if (pp -> qseen > 0) | |
181 { | |
182 pp -> ra = c; | |
183 c = '?'; | |
184 pp -> qseen--; | |
185 } | |
186 } | |
187 return c; | |
188 } | |
189 | |
190 /* This function puts a byte back onto the front of the input stream used | |
191 by fetch_byte(). Theoretically, an unlimited number of characters can | |
192 be unfetched. Line and column counting may be incorrect if unfetched | |
193 characters cross a token boundary. */ | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
194 void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) |
295 | 195 { |
306
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
196 if (pp -> lexstr) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
197 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
198 if (c == CPP_EOL) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
199 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
200 if (pp -> lexstrloc > 0) |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
201 { |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
202 pp -> lexstrloc--; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
203 return; |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
204 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
205 } |
b08787e5b9f3
Add include search paths and command line macro definitions
William Astle <lost@l-w.ca>
parents:
305
diff
changeset
|
206 |
295 | 207 if (pp -> ungetbufl >= pp -> ungetbufs) |
208 { | |
209 pp -> ungetbufs += 100; | |
210 pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); | |
211 } | |
212 pp -> ungetbuf[pp -> ungetbufl++] = c; | |
213 } | |
214 | |
215 /* This function retrieves a byte from the input stream. It performs | |
216 backslash-newline splicing on the returned bytes. Any character | |
217 retrieved from the unfetch buffer is presumed to have already passed | |
218 the backslash-newline filter. */ | |
219 static int fetch_byte(struct preproc_info *pp) | |
220 { | |
221 int c; | |
222 | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
223 if (pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
224 { |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
225 if (pp -> lexstr[pp -> lexstrloc]) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
226 return pp -> lexstr[pp -> lexstrloc++]; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
227 else |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
228 return CPP_EOL; |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
229 } |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
230 |
295 | 231 if (pp -> ungetbufl > 0) |
232 { | |
233 pp -> ungetbufl--; | |
234 c = pp -> ungetbuf[pp -> ungetbufl]; | |
235 if (pp -> ungetbufl == 0) | |
236 { | |
237 lw_free(pp -> ungetbuf); | |
238 pp -> ungetbuf = NULL; | |
239 pp -> ungetbufs = 0; | |
240 } | |
241 return c; | |
242 } | |
243 | |
244 again: | |
245 if (pp -> unget != CPP_NOUNG) | |
246 { | |
247 c = pp -> unget; | |
248 pp -> unget = CPP_NOUNG; | |
249 } | |
250 else | |
251 { | |
252 c = fetch_byte_tg(pp); | |
253 } | |
254 if (c == '\\') | |
255 { | |
256 int c2; | |
257 c2 = fetch_byte_tg(pp); | |
258 if (c2 == CPP_EOL) | |
259 goto again; | |
260 else | |
261 pp -> unget = c2; | |
262 } | |
263 return c; | |
264 } | |
265 | |
266 | |
267 | |
268 /* | |
269 Lex a token off the current input file. | |
270 | |
271 Returned tokens are as follows: | |
272 | |
273 * all words starting with [a-zA-Z_] are returned as TOK_IDENT | |
274 * numbers are returned as their appropriate type | |
275 * all whitespace in a sequence, including comments, is returned as | |
276 a single instance of TOK_WSPACE | |
277 * TOK_EOL is returned in the case of the end of a line | |
278 * TOK_EOF is returned when the end of the file is reached | |
279 * If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised | |
280 * Any symbolic operator, etc., recognized by C will be returned as such | |
281 a token | |
282 * TOK_HASH will be returned for a # | |
283 * trigraphs will be interpreted | |
284 * backslash-newline will be interpreted | |
285 * any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL | |
286 */ | |
287 | |
288 | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
289 int preproc_lex_fetch_byte(struct preproc_info *pp) |
295 | 290 { |
291 int c; | |
292 c = fetch_byte(pp); | |
293 if (c == CPP_EOF && pp -> eolseen == 0) | |
294 { | |
295 preproc_throw_warning(pp, "No newline at end of file"); | |
296 pp -> eolseen = 1; | |
297 return CPP_EOL; | |
298 } | |
299 | |
300 if (c == CPP_EOL) | |
301 { | |
302 pp -> eolseen = 1; | |
303 return c; | |
304 } | |
300 | 305 |
295 | 306 pp -> eolseen = 0; |
307 | |
308 /* convert comments to a single space here */ | |
309 if (c == '/') | |
310 { | |
311 int c2; | |
312 c2 = fetch_byte(pp); | |
313 if (c2 == '/') | |
314 { | |
315 /* single line comment */ | |
316 c = ' '; | |
317 for (;;) | |
318 { | |
319 c2 = fetch_byte(pp); | |
320 if (c2 == CPP_EOF || c2 == CPP_EOL) | |
321 break; | |
322 } | |
323 preproc_lex_unfetch_byte(pp, c2); | |
324 } | |
325 else if (c2 == '*') | |
326 { | |
327 /* block comment */ | |
328 c = ' '; | |
329 for (;;) | |
330 { | |
331 c2 = fetch_byte(pp); | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
332 if (c2 == CPP_EOF) |
295 | 333 { |
334 preproc_lex_unfetch_byte(pp, c); | |
335 break; | |
336 } | |
337 if (c2 == '*') | |
338 { | |
339 /* maybe end of comment */ | |
340 c2 = preproc_lex_fetch_byte(pp); | |
341 if (c2 == '/') | |
342 break; | |
343 } | |
344 } | |
345 } | |
346 else | |
347 { | |
348 /* not a comment - restore lookahead character */ | |
349 preproc_lex_unfetch_byte(pp, c2); | |
350 } | |
351 } | |
352 return c; | |
353 } | |
354 | |
355 struct token *preproc_lex_next_token(struct preproc_info *pp) | |
356 { | |
357 int sline = pp -> lineno; | |
358 int scol = pp -> column; | |
359 char *strval = NULL; | |
360 int ttype = TOK_NONE; | |
361 int c, c2; | |
362 int cl; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
363 struct lw_strbuf *strbuf; |
304
d85d173ba120
Checkpoint lwcc development - preprocessor is runnable but nonfunctional
William Astle <lost@l-w.ca>
parents:
300
diff
changeset
|
364 struct token *t = NULL; |
300 | 365 struct preproc_info *fs; |
366 | |
367 fileagain: | |
295 | 368 c = preproc_lex_fetch_byte(pp); |
369 if (c == CPP_EOF) | |
370 { | |
371 if (pp -> nlseen == 0) | |
372 { | |
373 c = CPP_EOL; | |
374 } | |
375 } | |
305
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
376 |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
377 if (pp -> lineno != sline) |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
378 { |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
379 sline = pp -> lineno; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
380 scol = pp -> column; |
54f213c8fb81
Various bugfixes and output tuning
William Astle <lost@l-w.ca>
parents:
304
diff
changeset
|
381 } |
295 | 382 |
383 if (c == CPP_EOF) | |
384 { | |
300 | 385 /* check if we fell off the end of an include file */ |
386 if (pp -> filestack) | |
387 { | |
388 if (pp -> skip_level || pp -> found_level) | |
389 { | |
390 preproc_throw_error(pp, "Unbalanced conditionals in include file"); | |
391 } | |
392 fclose(pp -> fp); | |
393 fs = pp -> filestack; | |
394 *pp = *fs; | |
395 pp -> filestack = fs -> n; | |
396 goto fileagain; | |
397 } | |
398 else | |
399 { | |
400 ttype = TOK_EOF; | |
401 goto out; | |
402 } | |
295 | 403 } |
404 if (c == CPP_EOL) | |
405 { | |
406 pp -> nlseen = 1; | |
407 ttype = TOK_EOL; | |
408 goto out; | |
409 } | |
410 | |
411 pp -> nlseen = 0; | |
412 if (isspace(c)) | |
413 { | |
414 while (isspace(c)) | |
415 c = preproc_lex_fetch_byte(pp); | |
416 preproc_lex_unfetch_byte(pp, c); | |
417 ttype = TOK_WSPACE; | |
418 goto out; | |
419 } | |
420 | |
421 switch (c) | |
422 { | |
423 case '?': | |
424 ttype = TOK_QMARK; | |
425 goto out; | |
426 | |
427 case ':': | |
428 ttype = TOK_COLON; | |
429 goto out; | |
430 | |
431 case ',': | |
432 ttype = TOK_COMMA; | |
433 goto out; | |
434 | |
435 case '(': | |
436 ttype = TOK_OPAREN; | |
437 goto out; | |
438 | |
439 case ')': | |
440 ttype = TOK_CPAREN; | |
441 goto out; | |
442 | |
443 case '{': | |
444 ttype = TOK_OBRACE; | |
445 goto out; | |
446 | |
447 case '}': | |
448 ttype = TOK_CBRACE; | |
449 goto out; | |
450 | |
451 case '[': | |
452 ttype = TOK_OSQUARE; | |
453 goto out; | |
454 | |
455 case ']': | |
456 ttype = TOK_CSQUARE; | |
457 goto out; | |
458 | |
459 case '~': | |
460 ttype = TOK_COM; | |
461 goto out; | |
462 | |
463 case ';': | |
464 ttype = TOK_EOS; | |
465 goto out; | |
466 | |
467 /* and now for the possible multi character tokens */ | |
468 case '#': | |
469 ttype = TOK_HASH; | |
470 c = preproc_lex_fetch_byte(pp); | |
471 if (c == '#') | |
472 ttype = TOK_DBLHASH; | |
473 else | |
474 preproc_lex_unfetch_byte(pp, c); | |
475 goto out; | |
476 | |
477 case '^': | |
478 ttype = TOK_XOR; | |
479 c = preproc_lex_fetch_byte(pp); | |
480 if (c == '=') | |
481 ttype = TOK_XORASS; | |
482 else | |
483 preproc_lex_unfetch_byte(pp, c); | |
484 goto out; | |
485 | |
486 case '!': | |
487 ttype = TOK_BNOT; | |
488 c = preproc_lex_fetch_byte(pp); | |
489 if (c == '=') | |
490 ttype = TOK_NE; | |
491 else | |
492 preproc_lex_unfetch_byte(pp, c); | |
493 goto out; | |
494 | |
495 case '*': | |
496 ttype = TOK_STAR; | |
497 c = preproc_lex_fetch_byte(pp); | |
498 if (c == '=') | |
499 ttype = TOK_MULASS; | |
500 else | |
501 preproc_lex_unfetch_byte(pp, c); | |
502 goto out; | |
503 | |
504 case '/': | |
505 ttype = TOK_DIV; | |
506 c = preproc_lex_fetch_byte(pp); | |
507 if (c == '=') | |
508 ttype = TOK_DIVASS; | |
509 else | |
510 preproc_lex_unfetch_byte(pp, c); | |
511 goto out; | |
512 | |
513 case '=': | |
514 ttype = TOK_ASS; | |
515 c = preproc_lex_fetch_byte(pp); | |
516 if (c == '=') | |
517 ttype = TOK_EQ; | |
518 else | |
519 preproc_lex_unfetch_byte(pp, c); | |
520 goto out; | |
521 | |
522 case '%': | |
523 ttype = TOK_MOD; | |
524 c = preproc_lex_fetch_byte(pp); | |
525 if (c == '=') | |
526 ttype = TOK_MODASS; | |
527 else | |
528 preproc_lex_unfetch_byte(pp, c); | |
529 goto out; | |
530 | |
531 case '-': | |
532 ttype = TOK_SUB; | |
533 c = preproc_lex_fetch_byte(pp); | |
534 if (c == '=') | |
535 ttype = TOK_SUBASS; | |
536 else if (c == '-') | |
537 ttype = TOK_DBLSUB; | |
538 else if (c == '>') | |
539 ttype = TOK_ARROW; | |
540 else | |
541 preproc_lex_unfetch_byte(pp, c); | |
542 goto out; | |
543 | |
544 case '+': | |
545 ttype = TOK_ADD; | |
546 c = preproc_lex_fetch_byte(pp); | |
547 if (c == '=') | |
548 ttype = TOK_ADDASS; | |
549 else if (c == '+') | |
550 ttype = TOK_DBLADD; | |
551 else | |
552 preproc_lex_unfetch_byte(pp, c); | |
553 goto out; | |
554 | |
555 | |
556 case '&': | |
557 ttype = TOK_BWAND; | |
558 c = preproc_lex_fetch_byte(pp); | |
559 if (c == '=') | |
560 ttype = TOK_BWANDASS; | |
561 else if (c == '&') | |
562 ttype = TOK_BAND; | |
563 else | |
564 preproc_lex_unfetch_byte(pp, c); | |
565 goto out; | |
566 | |
567 case '|': | |
568 ttype = TOK_BWOR; | |
569 c = preproc_lex_fetch_byte(pp); | |
570 if (c == '=') | |
571 ttype = TOK_BWORASS; | |
572 else if (c == '|') | |
573 ttype = TOK_BOR; | |
574 else | |
575 preproc_lex_unfetch_byte(pp, c); | |
576 goto out; | |
577 | |
578 case '<': | |
579 ttype = TOK_LT; | |
580 c = preproc_lex_fetch_byte(pp); | |
581 if (c == '=') | |
582 ttype = TOK_LE; | |
583 else if (c == '<') | |
584 { | |
585 ttype = TOK_LSH; | |
586 c = preproc_lex_fetch_byte(pp); | |
587 if (c == '=') | |
588 ttype = TOK_LSHASS; | |
589 else | |
590 preproc_lex_unfetch_byte(pp, c); | |
591 } | |
592 else | |
593 preproc_lex_unfetch_byte(pp, c); | |
594 goto out; | |
595 | |
596 | |
597 case '>': | |
598 ttype = TOK_GT; | |
599 c = preproc_lex_fetch_byte(pp); | |
600 if (c == '=') | |
601 ttype = TOK_GE; | |
602 else if (c == '>') | |
603 { | |
604 ttype = TOK_RSH; | |
605 c = preproc_lex_fetch_byte(pp); | |
606 if (c == '=') | |
607 ttype = TOK_RSHASS; | |
608 else | |
609 preproc_lex_unfetch_byte(pp, c); | |
610 } | |
611 else | |
612 preproc_lex_unfetch_byte(pp, c); | |
613 goto out; | |
614 | |
615 case '\'': | |
616 /* character constant - turns into a uint */ | |
617 chrlit: | |
618 cl = 0; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
619 strbuf = lw_strbuf_new(); |
295 | 620 for (;;) |
621 { | |
622 c = preproc_lex_fetch_byte(pp); | |
623 if (c == CPP_EOF || c == CPP_EOL || c == '\'') | |
624 break; | |
625 cl++; | |
626 if (c == '\\') | |
627 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
628 lw_strbuf_add(strbuf, '\\'); |
295 | 629 c = preproc_lex_fetch_byte(pp); |
630 if (c == CPP_EOF || c == CPP_EOL) | |
631 { | |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
632 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
633 preproc_throw_error(pp, "Invalid character constant"); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
634 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
635 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
636 goto out; |
295 | 637 } |
638 cl++; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
639 lw_strbuf_add(strbuf, c); |
295 | 640 continue; |
641 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
642 lw_strbuf_add(strbuf, c); |
295 | 643 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
644 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
645 if (cl == 0) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
646 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
647 ttype = TOK_ERROR; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
648 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
649 preproc_throw_error(pp, "Invalid character constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
650 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
651 else |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
652 ttype = TOK_CHR_LIT; |
295 | 653 goto out; |
654 | |
655 case '"': | |
656 strlit: | |
657 /* string literal */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
658 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
659 lw_strbuf_add(strbuf, '"'); |
295 | 660 for (;;) |
661 { | |
662 c = preproc_lex_fetch_byte(pp); | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
663 if (c == CPP_EOF || c == CPP_EOL) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
664 { |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
665 ttype = TOK_ERROR; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
666 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
667 if (!pp -> lexstr) |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
668 preproc_throw_error(pp, "Invalid string constant"); |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
669 goto out; |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
670 } |
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
671 if (c == '"') |
295 | 672 break; |
673 if (c == '\\') | |
674 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
675 lw_strbuf_add(strbuf, '\\'); |
295 | 676 c = preproc_lex_fetch_byte(pp); |
677 if (c == CPP_EOF || c == CPP_EOL) | |
678 { | |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
679 ttype = TOK_ERROR; |
298
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
680 if (!pp -> lexstr) |
6112c67728ba
Add stringification and token concatenation
William Astle <lost@l-w.ca>
parents:
296
diff
changeset
|
681 preproc_throw_error(pp, "Invalid string constant"); |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
682 strval = lw_strbuf_end(strbuf); |
299
856caf91ffaa
Added token list structure and switched some stuff to use it
William Astle <lost@l-w.ca>
parents:
298
diff
changeset
|
683 goto out; |
295 | 684 } |
685 cl++; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
686 lw_strbuf_add(strbuf, c); |
295 | 687 continue; |
688 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
689 lw_strbuf_add(strbuf, c); |
295 | 690 } |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
691 lw_strbuf_add(strbuf, '"'); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
692 strval = lw_strbuf_end(strbuf); |
295 | 693 ttype = TOK_STR_LIT; |
694 goto out; | |
695 | |
696 case 'L': | |
697 /* check for wide string or wide char const */ | |
698 c2 = preproc_lex_fetch_byte(pp); | |
699 if (c2 == '\'') | |
700 { | |
701 goto chrlit; | |
702 } | |
703 else if (c2 == '"') | |
704 { | |
705 goto strlit; | |
706 } | |
707 preproc_lex_unfetch_byte(pp, c2); | |
708 /* fall through for identifier */ | |
709 case '_': | |
710 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
711 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
712 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': | |
713 case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
714 case 'y': case 'z': | |
715 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
716 case 'G': case 'H': case 'I': case 'J': case 'K': | |
717 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
718 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
719 case 'Y': case 'Z': | |
720 /* we have an identifier here */ | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
721 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
722 lw_strbuf_add(strbuf, c); |
295 | 723 for (;;) |
724 { | |
725 c = preproc_lex_fetch_byte(pp); | |
726 if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) | |
727 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
728 lw_strbuf_add(strbuf, c); |
295 | 729 continue; |
730 } | |
731 else | |
732 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
733 lw_strbuf_add(strbuf, 0); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
734 strval = lw_strbuf_end(strbuf); |
295 | 735 break; |
736 } | |
737 } | |
738 preproc_lex_unfetch_byte(pp, c); | |
739 ttype = TOK_IDENT; | |
740 goto out; | |
741 | |
742 case '.': | |
743 c = preproc_lex_fetch_byte(pp); | |
744 if (c >= '0' && c <= '9') | |
745 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
746 strbuf = lw_strbuf_new(); |
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
747 lw_strbuf_add(strbuf, '.'); |
295 | 748 goto numlit; |
749 } | |
750 else if (c == '.') | |
751 { | |
752 c = preproc_lex_fetch_byte(pp); | |
753 if (c == '.') | |
754 { | |
755 ttype = TOK_ELLIPSIS; | |
756 goto out; | |
757 } | |
758 preproc_lex_unfetch_byte(pp, c); | |
759 } | |
760 preproc_lex_unfetch_byte(pp, c); | |
761 ttype = TOK_DOT; | |
762 goto out; | |
763 | |
764 case '0': case '1': case '2': case '3': case '4': | |
765 case '5': case '6': case '7': case '8': case '9': | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
766 strbuf = lw_strbuf_new(); |
295 | 767 numlit: |
296 | 768 ttype = TOK_NUMBER; |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
769 lw_strbuf_add(strbuf, c); |
295 | 770 for (;;) |
771 { | |
772 c = preproc_lex_fetch_byte(pp); | |
773 if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) | |
774 break; | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
775 lw_strbuf_add(strbuf, c); |
295 | 776 if (c == 'e' || c == 'E' || c == 'p' || c == 'P') |
777 { | |
778 c = preproc_lex_fetch_byte(pp); | |
779 if (c == '+' || c == '-') | |
780 { | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
781 lw_strbuf_add(strbuf, c); |
295 | 782 continue; |
783 } | |
784 preproc_lex_unfetch_byte(pp, c); | |
785 } | |
786 } | |
308
670ea8f90212
Converted preproc logic to library and moved some utility stuff to lwlib
William Astle <lost@l-w.ca>
parents:
306
diff
changeset
|
787 strval = lw_strbuf_end(strbuf); |
295 | 788 preproc_lex_unfetch_byte(pp, c); |
789 goto out; | |
790 | |
791 default: | |
792 ttype = TOK_CHAR; | |
793 strval = lw_alloc(2); | |
794 strval[0] = c; | |
795 strval[1] = 0; | |
796 break; | |
797 } | |
798 out: | |
799 t = token_create(ttype, strval, sline, scol, pp -> fn); | |
800 lw_free(strval); | |
801 return t; | |
802 } |