Mercurial > hg > index.cgi
comparison lwcc/cpp/preproc.c @ 293:c419b3b3d43f ccdev
Checkpoint on lwcc-cpp development
This is a checkpoint with some substantial code cleanups on what is so far
implemented. This should avoid substantial code duplication later.
author | William Astle <lost@l-w.ca> |
---|---|
date | Mon, 09 Sep 2013 23:07:19 -0600 |
parents | |
children | 048adfee2933 |
comparison
equal
deleted
inserted
replaced
292:40ecbd5da481 | 293:c419b3b3d43f |
---|---|
1 /* | |
2 lwcc/cpp/preproc.c | |
3 | |
4 Copyright © 2013 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 #include <stdio.h> | |
23 #include <stdlib.h> | |
24 | |
25 #include <lw_alloc.h> | |
26 | |
27 #include "cpp.h" | |
28 | |
29 | |
30 int munch_comment(void); | |
31 char *parse_str_lit(void); | |
32 char *parse_chr_lit(void); | |
33 char *parse_num_lit(int); | |
34 void preprocess_identifier(int); | |
35 void preprocess_directive(void); | |
36 | |
37 | |
38 int skip_level; | |
39 | |
40 /* | |
41 Notes: | |
42 | |
43 Rather than tokenize the entire file, we run through it interpreting | |
44 things only as much as we need to in order to identify the following: | |
45 | |
46 preprocessing directives (#...) | |
47 identifiers which might need to be replaced with macros | |
48 | |
49 We have to interpret strings, character constants, and numbers to prevent | |
50 false positives in those situations. | |
51 | |
52 When we find a preprocessing directive, it is handled with a more | |
53 aggressive tokenization process and then intepreted accordingly. | |
54 | |
55 nlws is used to record the fact that only whitespace has occurred at the | |
56 start of a line. Whitespace is defined as comments or isspace(c). It gets | |
57 reset to 1 after each EOL character. If a non-whitespace character is | |
58 encountered, it is set to -1. If the character processing decides it really | |
59 is a whitespace character, it will set nlws back to 1 (block comment). | |
60 Elsewise, it will get set to 0 if it is still -1 when the loop starts again. | |
61 | |
62 This is needed so we can identify whitespace interposed before a | |
63 preprocessor directive. This is the only case where it matters for | |
64 the preprocessor. | |
65 | |
66 */ | |
67 void preprocess_file() | |
68 { | |
69 int c; | |
70 int nlws = 1; | |
71 | |
72 preprocess_output_location(1); | |
73 for (;;) | |
74 { | |
75 c = fetch_byte(); | |
76 // if we had non-whitespace that wasn't munched (comment), set flag correctly | |
77 if (nlws == -1) | |
78 nlws = 0; | |
79 if (c == CPP_EOF) | |
80 { | |
81 // end of input - make sure newline is present | |
82 outchr('\n'); | |
83 return; | |
84 } | |
85 if (c == CPP_EOL) | |
86 { | |
87 // flag that we just hit the start of a new line | |
88 nlws = 1; | |
89 outchr(CPP_EOL); | |
90 continue; | |
91 } | |
92 | |
93 /* if we have a non-whitespace character, flag it as such */ | |
94 if (!is_whitespace(c)) | |
95 nlws = -1; | |
96 | |
97 if (c == '#' && nlws) | |
98 { | |
99 // we have a preprocessor directive here - this call will do | |
100 // everything including outputting the blank line, if appropriate | |
101 preprocess_directive(); | |
102 continue; | |
103 } | |
104 else if (c == '\'') | |
105 { | |
106 // we have a character constant here | |
107 outstr(parse_chr_lit()); | |
108 continue; | |
109 } | |
110 else if (c == '"') | |
111 { | |
112 // we have a string constant here | |
113 outstr(parse_str_lit()); | |
114 continue; | |
115 } | |
116 else if (c == '.') | |
117 { | |
118 // we might have a number here | |
119 outchr('.'); | |
120 c = fetch_byte(); | |
121 if (is_dec(c)) | |
122 outstr(parse_num_lit(c)); | |
123 continue; | |
124 } | |
125 else if (is_dec(c)) | |
126 { | |
127 // we have a number here | |
128 outstr(parse_num_lit(c)); | |
129 } | |
130 else if (c == '/') | |
131 { | |
132 // we might have a comment here | |
133 c = munch_comment(); | |
134 if (c < 0) | |
135 { | |
136 outchr('/'); | |
137 continue; | |
138 } | |
139 // comments are white space - count them as such at start of line | |
140 if (nlws == -1) | |
141 nlws = 0; | |
142 /* c is the number of EOL characters the comment spanned */ | |
143 while (c--) | |
144 outchr(CPP_EOL); | |
145 continue; | |
146 } | |
147 else if (c == 'L') | |
148 { | |
149 // wide character string or wide character constant, or identifier | |
150 c = fetch_byte(); | |
151 if (c == '"') | |
152 { | |
153 outchr('L'); | |
154 outstr(parse_str_lit()); | |
155 continue; | |
156 } | |
157 else if (c == '\'') | |
158 { | |
159 outchr('L'); | |
160 outstr(parse_chr_lit()); | |
161 continue; | |
162 } | |
163 unfetch_byte(c); | |
164 preprocess_identifier('L'); | |
165 continue; | |
166 } | |
167 else if (is_sidchr(c)) | |
168 { | |
169 // identifier of some kind | |
170 preprocess_identifier(c); | |
171 continue; | |
172 } | |
173 else | |
174 { | |
175 // random character - pass through | |
176 outchr(c); | |
177 } | |
178 } | |
179 } | |
180 | |
181 void preprocess_identifier(int c) | |
182 { | |
183 char *ident = NULL; | |
184 int idlen = 0; | |
185 int idbufl = 0; | |
186 | |
187 do | |
188 { | |
189 if (idlen >= idbufl) | |
190 { | |
191 idbufl += 50; | |
192 ident = lw_realloc(ident, idbufl); | |
193 } | |
194 ident[idlen++] = c; | |
195 c = fetch_byte(); | |
196 } while (is_idchr(c)); | |
197 | |
198 ident[idlen++] = 0; | |
199 unfetch_byte(c); | |
200 | |
201 /* do something with the identifier here - macros, etc. */ | |
202 outstr(ident); | |
203 lw_free(ident); | |
204 } | |
205 | |
206 #define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0) | |
207 char *parse_num_lit(int c) | |
208 { | |
209 static char *ident = NULL; | |
210 int idlen = 0; | |
211 static int idbufl = 0; | |
212 | |
213 do | |
214 { | |
215 to_buf(c); | |
216 c = fetch_byte(); | |
217 if (is_ep(c)) | |
218 { | |
219 to_buf(c); | |
220 c = fetch_byte(); | |
221 if (c == '-' || c == '+') | |
222 { | |
223 to_buf(c); | |
224 c = fetch_byte(); | |
225 } | |
226 } | |
227 } while ((is_dec(c)) || (c == '.')); | |
228 to_buf(0); | |
229 | |
230 return ident; | |
231 } | |
232 | |
233 char *parse_chr_lit(void) | |
234 { | |
235 static char *ident = NULL; | |
236 int idlen = 0; | |
237 static int idbufl = 0; | |
238 int c; | |
239 | |
240 to_buf('\''); | |
241 while ((c = fetch_byte()) != '\'') | |
242 { | |
243 if (c == CPP_EOL || c == CPP_EOF) | |
244 { | |
245 unfetch_byte(c); | |
246 to_buf(0); | |
247 do_warning("Unterminated character constant"); | |
248 return ident; | |
249 } | |
250 if (c == '\\') | |
251 { | |
252 to_buf(c); | |
253 c = fetch_byte(); | |
254 if (c == CPP_EOL || c == CPP_EOF) | |
255 { | |
256 unfetch_byte(c); | |
257 to_buf(0); | |
258 do_warning("Unterminated character constant"); | |
259 return ident; | |
260 } | |
261 } | |
262 to_buf(c); | |
263 } | |
264 to_buf(c); | |
265 to_buf(0); | |
266 return ident; | |
267 } | |
268 | |
269 char *parse_str_lit(void) | |
270 { | |
271 static char *ident = NULL; | |
272 int idlen = 0; | |
273 static int idbufl = 0; | |
274 int c; | |
275 | |
276 to_buf('"'); | |
277 while ((c = fetch_byte()) != '"') | |
278 { | |
279 if (c == CPP_EOL || c == CPP_EOF) | |
280 { | |
281 unfetch_byte(c); | |
282 to_buf(0); | |
283 do_warning("Unterminated string literal"); | |
284 return ident; | |
285 } | |
286 if (c == '\\') | |
287 { | |
288 to_buf(c); | |
289 c = fetch_byte(); | |
290 if (c == CPP_EOL || c == CPP_EOF) | |
291 { | |
292 unfetch_byte(c); | |
293 to_buf(0); | |
294 do_warning("Unterminated string literal"); | |
295 return ident; | |
296 } | |
297 } | |
298 to_buf(c); | |
299 } | |
300 to_buf(c); | |
301 to_buf(0); | |
302 return ident; | |
303 } | |
304 | |
305 int munch_comment(void) | |
306 { | |
307 int nlc = 0; | |
308 int c; | |
309 | |
310 c = fetch_byte(); | |
311 if (c == '/') | |
312 { | |
313 // single line comment | |
314 for (;;) | |
315 { | |
316 c = fetch_byte(); | |
317 if (c == CPP_EOL) | |
318 nlc = 1; | |
319 if (c == CPP_EOL || c == CPP_EOF) | |
320 return nlc; | |
321 } | |
322 } | |
323 else if (c == '*') | |
324 { | |
325 // block comment | |
326 for (;;) | |
327 { | |
328 c = fetch_byte(); | |
329 if (c == CPP_EOL) | |
330 nlc++; | |
331 if (c == CPP_EOF) | |
332 return nlc; | |
333 if (c == '*') | |
334 { | |
335 c = fetch_byte(); | |
336 if (c == '/' || c == CPP_EOF) | |
337 return nlc; | |
338 if (c == CPP_EOL) | |
339 nlc++; | |
340 } | |
341 } | |
342 return nlc; | |
343 } | |
344 else | |
345 { | |
346 unfetch_byte(c); | |
347 return -1; | |
348 } | |
349 | |
350 return nlc; | |
351 } | |
352 | |
353 /* Output a location directive to synchronize the compiler with the correct | |
354 input line number and file. This is of the form: | |
355 | |
356 # <linenum> <filename> <flag> | |
357 | |
358 where <linenum> is the line number inside the file, <filename> is the | |
359 filename (as a C string), and <flag> is the specified flag argument which | |
360 should be 1 for the start of a new file or 2 for returning to the file from | |
361 another file. <linenum> is the line number the following line came from. | |
362 */ | |
363 void preprocess_output_location(int flag) | |
364 { | |
365 fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag); | |
366 } | |
367 | |
368 /* process a preprocessor directive */ | |
369 void preprocess_directive(void) | |
370 { | |
371 outchr('>'); | |
372 outchr('#'); | |
373 } |