comparison lwcc/cpp/preproc.c @ 293:c419b3b3d43f ccdev

Checkpoint on lwcc-cpp development This is a checkpoint with some substantial code cleanups on what is so far implemented. This should avoid substantial code duplication later.
author William Astle <lost@l-w.ca>
date Mon, 09 Sep 2013 23:07:19 -0600
parents
children 048adfee2933
comparison
equal deleted inserted replaced
292:40ecbd5da481 293:c419b3b3d43f
1 /*
2 lwcc/cpp/preproc.c
3
4 Copyright © 2013 William Astle
5
6 This file is part of LWTOOLS.
7
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation, either version 3 of the License, or (at your option) any later
11 version.
12
13 This program is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 You should have received a copy of the GNU General Public License along with
19 this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24
25 #include <lw_alloc.h>
26
27 #include "cpp.h"
28
29
30 int munch_comment(void);
31 char *parse_str_lit(void);
32 char *parse_chr_lit(void);
33 char *parse_num_lit(int);
34 void preprocess_identifier(int);
35 void preprocess_directive(void);
36
37
38 int skip_level;
39
40 /*
41 Notes:
42
43 Rather than tokenize the entire file, we run through it interpreting
44 things only as much as we need to in order to identify the following:
45
46 preprocessing directives (#...)
47 identifiers which might need to be replaced with macros
48
49 We have to interpret strings, character constants, and numbers to prevent
50 false positives in those situations.
51
52 When we find a preprocessing directive, it is handled with a more
53 aggressive tokenization process and then intepreted accordingly.
54
55 nlws is used to record the fact that only whitespace has occurred at the
56 start of a line. Whitespace is defined as comments or isspace(c). It gets
57 reset to 1 after each EOL character. If a non-whitespace character is
58 encountered, it is set to -1. If the character processing decides it really
59 is a whitespace character, it will set nlws back to 1 (block comment).
60 Elsewise, it will get set to 0 if it is still -1 when the loop starts again.
61
62 This is needed so we can identify whitespace interposed before a
63 preprocessor directive. This is the only case where it matters for
64 the preprocessor.
65
66 */
67 void preprocess_file()
68 {
69 int c;
70 int nlws = 1;
71
72 preprocess_output_location(1);
73 for (;;)
74 {
75 c = fetch_byte();
76 // if we had non-whitespace that wasn't munched (comment), set flag correctly
77 if (nlws == -1)
78 nlws = 0;
79 if (c == CPP_EOF)
80 {
81 // end of input - make sure newline is present
82 outchr('\n');
83 return;
84 }
85 if (c == CPP_EOL)
86 {
87 // flag that we just hit the start of a new line
88 nlws = 1;
89 outchr(CPP_EOL);
90 continue;
91 }
92
93 /* if we have a non-whitespace character, flag it as such */
94 if (!is_whitespace(c))
95 nlws = -1;
96
97 if (c == '#' && nlws)
98 {
99 // we have a preprocessor directive here - this call will do
100 // everything including outputting the blank line, if appropriate
101 preprocess_directive();
102 continue;
103 }
104 else if (c == '\'')
105 {
106 // we have a character constant here
107 outstr(parse_chr_lit());
108 continue;
109 }
110 else if (c == '"')
111 {
112 // we have a string constant here
113 outstr(parse_str_lit());
114 continue;
115 }
116 else if (c == '.')
117 {
118 // we might have a number here
119 outchr('.');
120 c = fetch_byte();
121 if (is_dec(c))
122 outstr(parse_num_lit(c));
123 continue;
124 }
125 else if (is_dec(c))
126 {
127 // we have a number here
128 outstr(parse_num_lit(c));
129 }
130 else if (c == '/')
131 {
132 // we might have a comment here
133 c = munch_comment();
134 if (c < 0)
135 {
136 outchr('/');
137 continue;
138 }
139 // comments are white space - count them as such at start of line
140 if (nlws == -1)
141 nlws = 0;
142 /* c is the number of EOL characters the comment spanned */
143 while (c--)
144 outchr(CPP_EOL);
145 continue;
146 }
147 else if (c == 'L')
148 {
149 // wide character string or wide character constant, or identifier
150 c = fetch_byte();
151 if (c == '"')
152 {
153 outchr('L');
154 outstr(parse_str_lit());
155 continue;
156 }
157 else if (c == '\'')
158 {
159 outchr('L');
160 outstr(parse_chr_lit());
161 continue;
162 }
163 unfetch_byte(c);
164 preprocess_identifier('L');
165 continue;
166 }
167 else if (is_sidchr(c))
168 {
169 // identifier of some kind
170 preprocess_identifier(c);
171 continue;
172 }
173 else
174 {
175 // random character - pass through
176 outchr(c);
177 }
178 }
179 }
180
181 void preprocess_identifier(int c)
182 {
183 char *ident = NULL;
184 int idlen = 0;
185 int idbufl = 0;
186
187 do
188 {
189 if (idlen >= idbufl)
190 {
191 idbufl += 50;
192 ident = lw_realloc(ident, idbufl);
193 }
194 ident[idlen++] = c;
195 c = fetch_byte();
196 } while (is_idchr(c));
197
198 ident[idlen++] = 0;
199 unfetch_byte(c);
200
201 /* do something with the identifier here - macros, etc. */
202 outstr(ident);
203 lw_free(ident);
204 }
205
206 #define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0)
207 char *parse_num_lit(int c)
208 {
209 static char *ident = NULL;
210 int idlen = 0;
211 static int idbufl = 0;
212
213 do
214 {
215 to_buf(c);
216 c = fetch_byte();
217 if (is_ep(c))
218 {
219 to_buf(c);
220 c = fetch_byte();
221 if (c == '-' || c == '+')
222 {
223 to_buf(c);
224 c = fetch_byte();
225 }
226 }
227 } while ((is_dec(c)) || (c == '.'));
228 to_buf(0);
229
230 return ident;
231 }
232
233 char *parse_chr_lit(void)
234 {
235 static char *ident = NULL;
236 int idlen = 0;
237 static int idbufl = 0;
238 int c;
239
240 to_buf('\'');
241 while ((c = fetch_byte()) != '\'')
242 {
243 if (c == CPP_EOL || c == CPP_EOF)
244 {
245 unfetch_byte(c);
246 to_buf(0);
247 do_warning("Unterminated character constant");
248 return ident;
249 }
250 if (c == '\\')
251 {
252 to_buf(c);
253 c = fetch_byte();
254 if (c == CPP_EOL || c == CPP_EOF)
255 {
256 unfetch_byte(c);
257 to_buf(0);
258 do_warning("Unterminated character constant");
259 return ident;
260 }
261 }
262 to_buf(c);
263 }
264 to_buf(c);
265 to_buf(0);
266 return ident;
267 }
268
269 char *parse_str_lit(void)
270 {
271 static char *ident = NULL;
272 int idlen = 0;
273 static int idbufl = 0;
274 int c;
275
276 to_buf('"');
277 while ((c = fetch_byte()) != '"')
278 {
279 if (c == CPP_EOL || c == CPP_EOF)
280 {
281 unfetch_byte(c);
282 to_buf(0);
283 do_warning("Unterminated string literal");
284 return ident;
285 }
286 if (c == '\\')
287 {
288 to_buf(c);
289 c = fetch_byte();
290 if (c == CPP_EOL || c == CPP_EOF)
291 {
292 unfetch_byte(c);
293 to_buf(0);
294 do_warning("Unterminated string literal");
295 return ident;
296 }
297 }
298 to_buf(c);
299 }
300 to_buf(c);
301 to_buf(0);
302 return ident;
303 }
304
305 int munch_comment(void)
306 {
307 int nlc = 0;
308 int c;
309
310 c = fetch_byte();
311 if (c == '/')
312 {
313 // single line comment
314 for (;;)
315 {
316 c = fetch_byte();
317 if (c == CPP_EOL)
318 nlc = 1;
319 if (c == CPP_EOL || c == CPP_EOF)
320 return nlc;
321 }
322 }
323 else if (c == '*')
324 {
325 // block comment
326 for (;;)
327 {
328 c = fetch_byte();
329 if (c == CPP_EOL)
330 nlc++;
331 if (c == CPP_EOF)
332 return nlc;
333 if (c == '*')
334 {
335 c = fetch_byte();
336 if (c == '/' || c == CPP_EOF)
337 return nlc;
338 if (c == CPP_EOL)
339 nlc++;
340 }
341 }
342 return nlc;
343 }
344 else
345 {
346 unfetch_byte(c);
347 return -1;
348 }
349
350 return nlc;
351 }
352
353 /* Output a location directive to synchronize the compiler with the correct
354 input line number and file. This is of the form:
355
356 # <linenum> <filename> <flag>
357
358 where <linenum> is the line number inside the file, <filename> is the
359 filename (as a C string), and <flag> is the specified flag argument which
360 should be 1 for the start of a new file or 2 for returning to the file from
361 another file. <linenum> is the line number the following line came from.
362 */
363 void preprocess_output_location(int flag)
364 {
365 fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag);
366 }
367
368 /* process a preprocessor directive */
369 void preprocess_directive(void)
370 {
371 outchr('>');
372 outchr('#');
373 }