293
|
1 /*
|
|
2 lwcc/cpp/preproc.c
|
|
3
|
|
4 Copyright © 2013 William Astle
|
|
5
|
|
6 This file is part of LWTOOLS.
|
|
7
|
|
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
|
|
9 terms of the GNU General Public License as published by the Free Software
|
|
10 Foundation, either version 3 of the License, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 This program is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
16 more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License along with
|
|
19 this program. If not, see <http://www.gnu.org/licenses/>.
|
|
20 */
|
|
21
|
|
22 #include <stdio.h>
|
|
23 #include <stdlib.h>
|
|
24
|
|
25 #include <lw_alloc.h>
|
|
26
|
|
27 #include "cpp.h"
|
|
28
|
|
29
|
|
30 int munch_comment(void);
|
|
31 char *parse_str_lit(void);
|
|
32 char *parse_chr_lit(void);
|
|
33 char *parse_num_lit(int);
|
|
34 void preprocess_identifier(int);
|
|
35 void preprocess_directive(void);
|
|
36
|
|
37
|
|
38 int skip_level;
|
|
39
|
|
40 /*
|
|
41 Notes:
|
|
42
|
|
43 Rather than tokenize the entire file, we run through it interpreting
|
|
44 things only as much as we need to in order to identify the following:
|
|
45
|
|
46 preprocessing directives (#...)
|
|
47 identifiers which might need to be replaced with macros
|
|
48
|
|
49 We have to interpret strings, character constants, and numbers to prevent
|
|
50 false positives in those situations.
|
|
51
|
|
52 When we find a preprocessing directive, it is handled with a more
|
|
53 aggressive tokenization process and then intepreted accordingly.
|
|
54
|
|
55 nlws is used to record the fact that only whitespace has occurred at the
|
|
56 start of a line. Whitespace is defined as comments or isspace(c). It gets
|
|
57 reset to 1 after each EOL character. If a non-whitespace character is
|
|
58 encountered, it is set to -1. If the character processing decides it really
|
|
59 is a whitespace character, it will set nlws back to 1 (block comment).
|
|
60 Elsewise, it will get set to 0 if it is still -1 when the loop starts again.
|
|
61
|
|
62 This is needed so we can identify whitespace interposed before a
|
|
63 preprocessor directive. This is the only case where it matters for
|
|
64 the preprocessor.
|
|
65
|
|
66 */
|
|
67 void preprocess_file()
|
|
68 {
|
|
69 int c;
|
|
70 int nlws = 1;
|
|
71
|
|
72 preprocess_output_location(1);
|
|
73 for (;;)
|
|
74 {
|
|
75 c = fetch_byte();
|
|
76 // if we had non-whitespace that wasn't munched (comment), set flag correctly
|
|
77 if (nlws == -1)
|
|
78 nlws = 0;
|
|
79 if (c == CPP_EOF)
|
|
80 {
|
|
81 // end of input - make sure newline is present
|
|
82 outchr('\n');
|
|
83 return;
|
|
84 }
|
|
85 if (c == CPP_EOL)
|
|
86 {
|
|
87 // flag that we just hit the start of a new line
|
|
88 nlws = 1;
|
|
89 outchr(CPP_EOL);
|
|
90 continue;
|
|
91 }
|
|
92
|
|
93 /* if we have a non-whitespace character, flag it as such */
|
|
94 if (!is_whitespace(c))
|
|
95 nlws = -1;
|
|
96
|
|
97 if (c == '#' && nlws)
|
|
98 {
|
|
99 // we have a preprocessor directive here - this call will do
|
|
100 // everything including outputting the blank line, if appropriate
|
|
101 preprocess_directive();
|
|
102 continue;
|
|
103 }
|
|
104 else if (c == '\'')
|
|
105 {
|
|
106 // we have a character constant here
|
|
107 outstr(parse_chr_lit());
|
|
108 continue;
|
|
109 }
|
|
110 else if (c == '"')
|
|
111 {
|
|
112 // we have a string constant here
|
|
113 outstr(parse_str_lit());
|
|
114 continue;
|
|
115 }
|
|
116 else if (c == '.')
|
|
117 {
|
|
118 // we might have a number here
|
|
119 outchr('.');
|
|
120 c = fetch_byte();
|
|
121 if (is_dec(c))
|
|
122 outstr(parse_num_lit(c));
|
|
123 continue;
|
|
124 }
|
|
125 else if (is_dec(c))
|
|
126 {
|
|
127 // we have a number here
|
|
128 outstr(parse_num_lit(c));
|
|
129 }
|
|
130 else if (c == '/')
|
|
131 {
|
|
132 // we might have a comment here
|
|
133 c = munch_comment();
|
|
134 if (c < 0)
|
|
135 {
|
|
136 outchr('/');
|
|
137 continue;
|
|
138 }
|
|
139 // comments are white space - count them as such at start of line
|
|
140 if (nlws == -1)
|
|
141 nlws = 0;
|
|
142 /* c is the number of EOL characters the comment spanned */
|
|
143 while (c--)
|
|
144 outchr(CPP_EOL);
|
|
145 continue;
|
|
146 }
|
|
147 else if (c == 'L')
|
|
148 {
|
|
149 // wide character string or wide character constant, or identifier
|
|
150 c = fetch_byte();
|
|
151 if (c == '"')
|
|
152 {
|
|
153 outchr('L');
|
|
154 outstr(parse_str_lit());
|
|
155 continue;
|
|
156 }
|
|
157 else if (c == '\'')
|
|
158 {
|
|
159 outchr('L');
|
|
160 outstr(parse_chr_lit());
|
|
161 continue;
|
|
162 }
|
|
163 unfetch_byte(c);
|
|
164 preprocess_identifier('L');
|
|
165 continue;
|
|
166 }
|
|
167 else if (is_sidchr(c))
|
|
168 {
|
|
169 // identifier of some kind
|
|
170 preprocess_identifier(c);
|
|
171 continue;
|
|
172 }
|
|
173 else
|
|
174 {
|
|
175 // random character - pass through
|
|
176 outchr(c);
|
|
177 }
|
|
178 }
|
|
179 }
|
|
180
|
|
181 void preprocess_identifier(int c)
|
|
182 {
|
|
183 char *ident = NULL;
|
|
184 int idlen = 0;
|
|
185 int idbufl = 0;
|
|
186
|
|
187 do
|
|
188 {
|
|
189 if (idlen >= idbufl)
|
|
190 {
|
|
191 idbufl += 50;
|
|
192 ident = lw_realloc(ident, idbufl);
|
|
193 }
|
|
194 ident[idlen++] = c;
|
|
195 c = fetch_byte();
|
|
196 } while (is_idchr(c));
|
|
197
|
|
198 ident[idlen++] = 0;
|
|
199 unfetch_byte(c);
|
|
200
|
|
201 /* do something with the identifier here - macros, etc. */
|
|
202 outstr(ident);
|
|
203 lw_free(ident);
|
|
204 }
|
|
205
|
|
206 #define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0)
|
|
207 char *parse_num_lit(int c)
|
|
208 {
|
|
209 static char *ident = NULL;
|
|
210 int idlen = 0;
|
|
211 static int idbufl = 0;
|
|
212
|
|
213 do
|
|
214 {
|
|
215 to_buf(c);
|
|
216 c = fetch_byte();
|
|
217 if (is_ep(c))
|
|
218 {
|
|
219 to_buf(c);
|
|
220 c = fetch_byte();
|
|
221 if (c == '-' || c == '+')
|
|
222 {
|
|
223 to_buf(c);
|
|
224 c = fetch_byte();
|
|
225 }
|
|
226 }
|
|
227 } while ((is_dec(c)) || (c == '.'));
|
|
228 to_buf(0);
|
|
229
|
|
230 return ident;
|
|
231 }
|
|
232
|
|
233 char *parse_chr_lit(void)
|
|
234 {
|
|
235 static char *ident = NULL;
|
|
236 int idlen = 0;
|
|
237 static int idbufl = 0;
|
|
238 int c;
|
|
239
|
|
240 to_buf('\'');
|
|
241 while ((c = fetch_byte()) != '\'')
|
|
242 {
|
|
243 if (c == CPP_EOL || c == CPP_EOF)
|
|
244 {
|
|
245 unfetch_byte(c);
|
|
246 to_buf(0);
|
|
247 do_warning("Unterminated character constant");
|
|
248 return ident;
|
|
249 }
|
|
250 if (c == '\\')
|
|
251 {
|
|
252 to_buf(c);
|
|
253 c = fetch_byte();
|
|
254 if (c == CPP_EOL || c == CPP_EOF)
|
|
255 {
|
|
256 unfetch_byte(c);
|
|
257 to_buf(0);
|
|
258 do_warning("Unterminated character constant");
|
|
259 return ident;
|
|
260 }
|
|
261 }
|
|
262 to_buf(c);
|
|
263 }
|
|
264 to_buf(c);
|
|
265 to_buf(0);
|
|
266 return ident;
|
|
267 }
|
|
268
|
|
269 char *parse_str_lit(void)
|
|
270 {
|
|
271 static char *ident = NULL;
|
|
272 int idlen = 0;
|
|
273 static int idbufl = 0;
|
|
274 int c;
|
|
275
|
|
276 to_buf('"');
|
|
277 while ((c = fetch_byte()) != '"')
|
|
278 {
|
|
279 if (c == CPP_EOL || c == CPP_EOF)
|
|
280 {
|
|
281 unfetch_byte(c);
|
|
282 to_buf(0);
|
|
283 do_warning("Unterminated string literal");
|
|
284 return ident;
|
|
285 }
|
|
286 if (c == '\\')
|
|
287 {
|
|
288 to_buf(c);
|
|
289 c = fetch_byte();
|
|
290 if (c == CPP_EOL || c == CPP_EOF)
|
|
291 {
|
|
292 unfetch_byte(c);
|
|
293 to_buf(0);
|
|
294 do_warning("Unterminated string literal");
|
|
295 return ident;
|
|
296 }
|
|
297 }
|
|
298 to_buf(c);
|
|
299 }
|
|
300 to_buf(c);
|
|
301 to_buf(0);
|
|
302 return ident;
|
|
303 }
|
|
304
|
|
305 int munch_comment(void)
|
|
306 {
|
|
307 int nlc = 0;
|
|
308 int c;
|
|
309
|
|
310 c = fetch_byte();
|
|
311 if (c == '/')
|
|
312 {
|
|
313 // single line comment
|
|
314 for (;;)
|
|
315 {
|
|
316 c = fetch_byte();
|
|
317 if (c == CPP_EOL)
|
|
318 nlc = 1;
|
|
319 if (c == CPP_EOL || c == CPP_EOF)
|
|
320 return nlc;
|
|
321 }
|
|
322 }
|
|
323 else if (c == '*')
|
|
324 {
|
|
325 // block comment
|
|
326 for (;;)
|
|
327 {
|
|
328 c = fetch_byte();
|
|
329 if (c == CPP_EOL)
|
|
330 nlc++;
|
|
331 if (c == CPP_EOF)
|
|
332 return nlc;
|
|
333 if (c == '*')
|
|
334 {
|
|
335 c = fetch_byte();
|
|
336 if (c == '/' || c == CPP_EOF)
|
|
337 return nlc;
|
|
338 if (c == CPP_EOL)
|
|
339 nlc++;
|
|
340 }
|
|
341 }
|
|
342 return nlc;
|
|
343 }
|
|
344 else
|
|
345 {
|
|
346 unfetch_byte(c);
|
|
347 return -1;
|
|
348 }
|
|
349
|
|
350 return nlc;
|
|
351 }
|
|
352
|
|
353 /* Output a location directive to synchronize the compiler with the correct
|
|
354 input line number and file. This is of the form:
|
|
355
|
|
356 # <linenum> <filename> <flag>
|
|
357
|
|
358 where <linenum> is the line number inside the file, <filename> is the
|
|
359 filename (as a C string), and <flag> is the specified flag argument which
|
|
360 should be 1 for the start of a new file or 2 for returning to the file from
|
|
361 another file. <linenum> is the line number the following line came from.
|
|
362 */
|
|
363 void preprocess_output_location(int flag)
|
|
364 {
|
|
365 fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag);
|
|
366 }
|
|
367
|
|
368 /* process a preprocessor directive */
|
|
369 void preprocess_directive(void)
|
|
370 {
|
|
371 outchr('>');
|
|
372 outchr('#');
|
|
373 }
|