diff lwcc/cpp/preproc.c @ 293:c419b3b3d43f ccdev

Checkpoint on lwcc-cpp development This is a checkpoint with some substantial code cleanups on what is so far implemented. This should avoid substantial code duplication later.
author William Astle <lost@l-w.ca>
date Mon, 09 Sep 2013 23:07:19 -0600
parents
children 048adfee2933
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lwcc/cpp/preproc.c	Mon Sep 09 23:07:19 2013 -0600
@@ -0,0 +1,373 @@
+/*
+lwcc/cpp/preproc.c
+
+Copyright © 2013 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <lw_alloc.h>
+
+#include "cpp.h"
+
+
+int munch_comment(void);
+char *parse_str_lit(void);
+char *parse_chr_lit(void);
+char *parse_num_lit(int);
+void preprocess_identifier(int);
+void preprocess_directive(void);
+
+
+int skip_level;
+
+/*
+Notes:
+
+Rather than tokenize the entire file, we run through it interpreting
+things only as much as we need to in order to identify the following:
+
+preprocessing directives (#...)
+identifiers which might need to be replaced with macros
+
+We have to interpret strings, character constants, and numbers to prevent
+false positives in those situations.
+
+When we find a preprocessing directive, it is handled with a more
+aggressive tokenization process and then intepreted accordingly.
+
+nlws is used to record the fact that only whitespace has occurred at the
+start of a line. Whitespace is defined as comments or isspace(c). It gets
+reset to 1 after each EOL character. If a non-whitespace character is
+encountered, it is set to -1. If the character processing decides it really
+is a whitespace character, it will set nlws back to 1 (block comment).
+Elsewise, it will get set to 0 if it is still -1 when the loop starts again.
+
+This is needed so we can identify whitespace interposed before a
+preprocessor directive. This is the only case where it matters for
+the preprocessor.
+
+*/
+void preprocess_file()
+{
+	int c;
+	int nlws = 1;
+	
+	preprocess_output_location(1);
+	for (;;)
+	{
+		c = fetch_byte();
+		// if we had non-whitespace that wasn't munched (comment), set flag correctly
+		if (nlws == -1)
+			nlws = 0;
+		if (c == CPP_EOF)
+		{
+			// end of input - make sure newline is present
+			outchr('\n');
+			return;
+		}
+		if (c == CPP_EOL)
+		{
+			// flag that we just hit the start of a new line
+			nlws = 1;
+			outchr(CPP_EOL);
+			continue;
+		}
+		
+		/* if we have a non-whitespace character, flag it as such */
+		if (!is_whitespace(c))
+			nlws = -1;
+		
+		if (c == '#' && nlws)
+		{
+			// we have a preprocessor directive here - this call will do
+			// everything including outputting the blank line, if appropriate
+			preprocess_directive();
+			continue;
+		}
+		else if (c == '\'')
+		{
+			// we have a character constant here
+			outstr(parse_chr_lit());
+			continue;
+		}
+		else if (c == '"')
+		{
+			// we have a string constant here
+			outstr(parse_str_lit());
+			continue;
+		}
+		else if (c == '.')
+		{
+			// we might have a number here
+			outchr('.');
+			c = fetch_byte();
+			if (is_dec(c))
+				outstr(parse_num_lit(c));
+			continue;
+		}
+		else if (is_dec(c))
+		{
+			// we have a number here
+			outstr(parse_num_lit(c));
+		}
+		else if (c == '/')
+		{
+			// we might have a comment here
+			c = munch_comment();
+			if (c < 0)
+			{
+				outchr('/');
+				continue;
+			}
+			// comments are white space - count them as such at start of line
+			if (nlws == -1)
+				nlws = 0;
+			/* c is the number of EOL characters the comment spanned */
+			while (c--)
+				outchr(CPP_EOL);
+			continue;
+		}
+		else if (c == 'L')
+		{
+			// wide character string or wide character constant, or identifier
+			c = fetch_byte();
+			if (c == '"')
+			{
+				outchr('L');
+				outstr(parse_str_lit());
+				continue;
+			}
+			else if (c == '\'')
+			{
+				outchr('L');
+				outstr(parse_chr_lit());
+				continue;
+			}
+			unfetch_byte(c);
+			preprocess_identifier('L');
+			continue;
+		}
+		else if (is_sidchr(c))
+		{
+			// identifier of some kind
+			preprocess_identifier(c);
+			continue;
+		}
+		else
+		{
+			// random character - pass through
+			outchr(c);
+		}
+	}	
+}
+
+void preprocess_identifier(int c)
+{
+	char *ident = NULL;
+	int idlen = 0;
+	int idbufl = 0;
+
+	do
+	{
+		if (idlen >= idbufl)
+		{
+			idbufl += 50;
+			ident = lw_realloc(ident, idbufl);
+		}
+		ident[idlen++] = c;
+		c = fetch_byte();
+	} while (is_idchr(c));
+
+	ident[idlen++] = 0;
+	unfetch_byte(c);
+	
+	/* do something with the identifier here  - macros, etc. */
+	outstr(ident);
+	lw_free(ident);
+}
+
+#define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0)
+char *parse_num_lit(int c)
+{
+	static char *ident = NULL;
+	int idlen = 0;
+	static int idbufl = 0;
+	
+	do
+	{
+		to_buf(c);
+		c = fetch_byte();
+		if (is_ep(c))
+		{
+			to_buf(c);
+			c = fetch_byte();
+			if (c == '-' || c == '+')
+			{
+				to_buf(c);
+				c = fetch_byte();
+			}
+		}
+	} while ((is_dec(c)) || (c == '.'));
+	to_buf(0);
+	
+	return ident;
+}
+
+char *parse_chr_lit(void)
+{
+	static char *ident = NULL;
+	int idlen = 0;
+	static int idbufl = 0;
+	int c;
+		
+	to_buf('\'');
+	while ((c = fetch_byte()) != '\'')
+	{
+		if (c == CPP_EOL || c == CPP_EOF)
+		{
+			unfetch_byte(c);
+			to_buf(0);
+			do_warning("Unterminated character constant");
+			return ident;
+		}
+		if (c == '\\')
+		{
+			to_buf(c);
+			c = fetch_byte();
+			if (c == CPP_EOL || c == CPP_EOF)
+			{
+				unfetch_byte(c);
+				to_buf(0);
+				do_warning("Unterminated character constant");
+				return ident;
+			}
+		}
+		to_buf(c);
+	}
+	to_buf(c);
+	to_buf(0);
+	return ident;
+}
+
+char *parse_str_lit(void)
+{
+	static char *ident = NULL;
+	int idlen = 0;
+	static int idbufl = 0;
+	int c;
+	
+	to_buf('"');
+	while ((c = fetch_byte()) != '"')
+	{
+		if (c == CPP_EOL || c == CPP_EOF)
+		{
+			unfetch_byte(c);
+			to_buf(0);
+			do_warning("Unterminated string literal");
+			return ident;
+		}
+		if (c == '\\')
+		{
+			to_buf(c);
+			c = fetch_byte();
+			if (c == CPP_EOL || c == CPP_EOF)
+			{
+				unfetch_byte(c);
+				to_buf(0);
+				do_warning("Unterminated string literal");
+				return ident;
+			}
+		}
+		to_buf(c);
+	}
+	to_buf(c);
+	to_buf(0);
+	return ident;
+}
+
+int munch_comment(void)
+{
+	int nlc = 0;
+	int c;
+	
+	c = fetch_byte();
+	if (c == '/')
+	{
+		// single line comment
+		for (;;)
+		{
+			c = fetch_byte();
+			if (c == CPP_EOL)
+				nlc = 1;
+			if (c == CPP_EOL || c == CPP_EOF)
+				return nlc;
+		}
+	}
+	else if (c == '*')
+	{
+		// block comment
+		for (;;)
+		{
+			c = fetch_byte();
+			if (c == CPP_EOL)
+				nlc++;
+			if (c == CPP_EOF)
+				return nlc;
+			if (c == '*')
+			{
+				c = fetch_byte();
+				if (c == '/' || c == CPP_EOF)
+					return nlc;
+				if (c == CPP_EOL)
+					nlc++;
+			}
+		}
+		return nlc;
+	}
+	else
+	{
+		unfetch_byte(c);
+		return -1;
+	}
+	
+	return nlc;
+}
+
+/* Output a location directive to synchronize the compiler with the correct
+   input line number and file. This is of the form:
+
+# <linenum> <filename> <flag>
+
+where <linenum> is the line number inside the file, <filename> is the
+filename (as a C string), and <flag> is the specified flag argument which
+should be 1 for the start of a new file or 2 for returning to the file from
+another file. <linenum> is the line number the following line came from.
+ */
+void preprocess_output_location(int flag)
+{
+	fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag);
+}
+
+/* process a preprocessor directive */
+void preprocess_directive(void)
+{
+	outchr('>');
+	outchr('#');
+}