Mercurial > hg > index.cgi
changeset 295:4b17780f2777 ccdev
Checkpoint lwcc development
Changed tactics with the preprocessor. Instead of getting clever and trying
to do things the "fast" way, instead, just tokenize the whole input and
process it that way. Also, set up so the preprocessor and compiler can be
integrated instead of having to have a specifically correct output for the
preprocessed file.
Also removed the subdirectories in the lwcc directory. It made things more
complicated than they needed to be.
author | William Astle <lost@l-w.ca> |
---|---|
date | Thu, 12 Sep 2013 22:06:26 -0600 |
parents | 048adfee2933 |
children | 83fcc1ed6ad6 |
files | Makefile lwcc/cpp-main.c lwcc/cpp.c lwcc/cpp.h lwcc/cpp/char_p.c lwcc/cpp/cpp.h lwcc/cpp/error.c lwcc/cpp/file.c lwcc/cpp/main.c lwcc/cpp/preproc.c lwcc/cpp/symbol.c lwcc/driver-main.c lwcc/driver/main.c lwcc/lex.c lwcc/strbuf.c lwcc/strbuf.h lwcc/token.c lwcc/token.h |
diffstat | 18 files changed, 2533 insertions(+), 2793 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Tue Sep 10 19:56:05 2013 -0600 +++ b/Makefile Thu Sep 12 22:06:26 2013 -0600 @@ -55,10 +55,10 @@ lwlink/lwlink$(PROGSUFFIX) \ lwar/lwar$(PROGSUFFIX) \ lwlink/lwobjdump$(PROGSUFFIX) \ - lwcc/driver/lwcc$(PROGSUFFIX) \ - lwcc/cpp/lwcc-cpp$(PROGSUFFIX) + lwcc/lwcc$(PROGSUFFIX) \ + lwcc/lwcc-cpp$(PROGSUFFIX) -LWCC_LIBBIN_FILES = lwcc/cpp/lwcc-cpp$(PROGSUFFIX) +LWCC_LIBBIN_FILES = lwcc/lwcc-cpp$(PROGSUFFIX) LWCC_LIBLIB_FILES = LWCC_LIBINC_FILES = @@ -96,23 +96,25 @@ lwlib_deps := $(lwlib_srcs:.c=.d) lwobjdump_deps := $(lwobjdump_srcs:.c=.d) -lwcc_driver_srcs := main.c -lwcc_driver_srcs := $(addprefix lwcc/driver/,$(lwcc_driver_srcs)) +lwcc_driver_srcs := driver-main.c +lwcc_driver_srcs := $(addprefix lwcc/,$(lwcc_driver_srcs)) lwcc_driver_objs := $(lwcc_driver_srcs:.c=.o) lwcc_driver_deps := $(lwcc_driver_srcs:.c=.d) -lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c symbol.c -lwcc_cpp_srcs := $(addprefix lwcc/cpp/,$(lwcc_cpp_srcs)) +lwcc_cpp_srcs := cpp-main.c cpp.c lex.c strbuf.c token.c +lwcc_cpp_srcs := $(addprefix lwcc/,$(lwcc_cpp_srcs)) lwcc_cpp_objs := $(lwcc_cpp_srcs:.c=.o) lwcc_cpp_deps := $(lwcc_cpp_srcs:.c=.d) +lwcc_deps := $(lwcc_cpp_deps) $(lwcc_driver_deps) + .PHONY: lwlink lwasm lwar lwobjdump lwcc lwlink: lwlink/lwlink$(PROGSUFFIX) lwasm: lwasm/lwasm$(PROGSUFFIX) lwar: lwar/lwar$(PROGSUFFIX) lwobjdump: lwlink/lwobjdump$(PROGSUFFIX) -lwcc: lwcc/driver/lwcc$(PROGSUFFIX) -lwcc-cpp: lwcc/cpp/lwcc-cpp$(PROGSUFFIX) +lwcc: lwcc/lwcc$(PROGSUFFIX) +lwcc-cpp: lwcc/lwcc-cpp$(PROGSUFFIX) lwasm/lwasm$(PROGSUFFIX): $(lwasm_objs) lwlib @echo Linking $@ @@ -130,11 +132,11 @@ @echo Linking $@ @$(CC) -o $@ $(lwar_objs) $(LDFLAGS) -lwcc/driver/lwcc$(PROGSUFFIX): $(lwcc_driver_objs) lwlib +lwcc/lwcc$(PROGSUFFIX): $(lwcc_driver_objs) lwlib @echo Linking $@ @$(CC) -o $@ $(lwcc_driver_objs) $(LDFLAGS) -lwcc/cpp/lwcc-cpp$(PROGSUFFIX): $(lwcc_cpp_objs) lwlib +lwcc/lwcc-cpp$(PROGSUFFIX): $(lwcc_cpp_objs) lwlib @echo Linking $@ @$(CC) -o $@ $(lwcc_cpp_objs) $(LDFLAGS) @@ -168,7 +170,7 @@ clean: $(cleantargs) @echo "Cleaning up" @rm -f lwlib/liblw.a lwasm/lwasm$(PROGSUFFIX) lwlink/lwlink$(PROGSUFFIX) lwlink/lwobjdump$(PROGSUFFIX) lwar/lwar$(PROGSUFFIX) - @rm -f lwcc/driver/lwcc$(PROGSUFFIX) lwcc/cpp/lwcc-cpp$(PROGSUFFIX) + @rm -f lwcc/lwcc$(PROGSUFFIX) lwcc/lwcc-cpp$(PROGSUFFIX) @rm -f $(lwcc_driver_ojbs) $(lwcc_cpp_objs) @rm -f $(lwasm_objs) $(lwlink_objs) $(lwar_objs) $(lwlib_objs) $(lwobjdump_objs) @rm -f $(extra_clean)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp-main.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,171 @@ +/* +lwcc/cpp-main.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <lw_stringlist.h> +#include <lw_cmdline.h> + +#include "cpp.h" + +int process_file(const char *); +static void do_error(const char *f, ...); + +/* command line option handling */ +#define PROGVER "lwcc-cpp from " PACKAGE_STRING +char *program_name; + +/* input files */ +lw_stringlist_t input_files; + +/* various flags */ +int trigraphs = 0; +char *output_file = NULL; +FILE *output_fp = NULL; + +static struct lw_cmdline_options options[] = +{ + { "output", 'o', "FILE", 0, "Output to FILE"}, + { "include", 'i', "FILE", 0, "Pre-include FILE" }, + { "includedir", 'I', "PATH", 0, "Add entry to the user include path" }, + { "sincludedir", 'S', "PATH", 0, "Add entry to the system include path" }, + { "define", 'D', "SYM[=VAL]",0, "Automatically define SYM to be VAL (or 1)"}, + { "trigraphs", 0x100, NULL, 0, "Enable interpretation of trigraphs" }, + { 0 } +}; + +static int parse_opts(int key, char *arg, void *state) +{ + switch (key) + { + case 'o': + if (output_file) + do_error("Output file specified more than once."); + output_file = arg; + break; + + case 0x100: + trigraphs = 1; + break; + + case lw_cmdline_key_end: + break; + + case lw_cmdline_key_arg: + lw_stringlist_addstring(input_files, arg); + break; + + default: + return lw_cmdline_err_unknown; + } + return 0; +} + +static struct lw_cmdline_parser cmdline_parser = +{ + options, + parse_opts, + "INPUTFILE", + "lwcc-cpp - C preprocessor for lwcc", + PROGVER +}; + +int main(int argc, char **argv) +{ + program_name = argv[0]; + int retval = 0; + + input_files = lw_stringlist_create(); + + /* parse command line arguments */ + lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, NULL); + + /* set up output file */ + if (output_file == NULL || strcmp(output_file, "-") == 0) + { + output_fp = stdout; + } + else + { + output_fp = fopen(output_file, "wb"); + if (output_fp == NULL) + { + do_error("Failed to create output file %s: %s", output_file, strerror(errno)); + } + } + + if (lw_stringlist_nstrings(input_files) == 0) + { + /* if no input files, work on stdin */ + retval = process_file("-"); + retval = 1; + } + else + { + char *s; + lw_stringlist_reset(input_files); + for (s = lw_stringlist_current(input_files); s; s = lw_stringlist_next(input_files)) + { + retval = process_file(s); + if (retval != 0) + break; + } + } + lw_stringlist_destroy(input_files); + +// symbol_dump(); + exit(retval); +} + +int process_file(const char *fn) +{ + struct preproc_info *pp; + struct token *tok; + + pp = preproc_init(fn); + if (!pp) + return -1; + + for (;;) + { + tok = preproc_next_token(pp); + if (tok -> ttype == TOK_EOF) + break; + token_print(tok, output_fp); + } + preproc_finish(pp); + return 0; +} + +static void do_error(const char *f, ...) +{ + va_list args; + va_start(args, f); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, f, args); + va_end(args); + fprintf(stderr, "\n"); + exit(1); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,129 @@ +/* +lwcc/cpp.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#include "cpp.h" + +struct token *preproc_lex_next_token(struct preproc_info *); + +struct preproc_info *preproc_init(const char *fn) +{ + FILE *fp; + struct preproc_info *pp; + + if (!fn || (fn[0] == '-' && fn[1] == '0')) + { + fp = stdin; + } + else + { + fp = fopen(fn, "rb"); + } + if (!fp) + return NULL; + + pp = lw_alloc(sizeof(struct preproc_info)); + memset(pp, 0, sizeof(struct preproc_info)); + pp -> fn = lw_strdup(fn); + pp -> fp = fp; + pp -> ra = CPP_NOUNG; + return pp; +} + +struct token *preproc_next_token(struct preproc_info *pp) +{ + struct token *t; + + if (pp -> tokqueue) + { + t = pp -> tokqueue; + pp -> tokqueue = t -> next; + if (pp -> tokqueue) + pp -> tokqueue -> prev = NULL; + t -> next = NULL; + t -> prev = NULL; + return t; + } + return(preproc_lex_next_token(pp)); +} + +void preproc_finish(struct preproc_info *pp) +{ + lw_free((void *)(pp -> fn)); + fclose(pp -> fp); + lw_free(pp); +} + +void preproc_register_error_callback(struct preproc_info *pp, void (*cb)(const char *)) +{ + pp -> errorcb = cb; +} + +void preproc_register_warning_callback(struct preproc_info *pp, void (*cb)(const char *)) +{ + pp -> warningcb = cb; +} + +static void preproc_throw_error_default(const char *m) +{ + fprintf(stderr, "ERROR: %s\n", m); +} + +static void preproc_throw_warning_default(const char *m) +{ + fprintf(stderr, "WARNING: %s\n", m); +} + +static void preproc_throw_message(void (*cb)(const char *), const char *m, va_list args) +{ + int s; + char *b; + + s = vsnprintf(NULL, 0, m, args); + b = lw_alloc(s + 1); + vsnprintf(b, s + 1, m, args); + (*cb)(b); + lw_free(b); +} + +void preproc_throw_error(struct preproc_info *pp, const char *m, ...) +{ + va_list args; + va_start(args, m); + preproc_throw_message(pp -> errorcb ? pp -> errorcb : preproc_throw_error_default, m, args); + va_end(args); + exit(1); +} + +void preproc_throw_warning(struct preproc_info *pp, const char *m, ...) +{ + va_list args; + va_start(args, m); + preproc_throw_message(pp -> warningcb ? pp -> warningcb : preproc_throw_warning_default, m, args); + va_end(args); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp.h Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,61 @@ +/* +lwcc/cpp.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef cpp_h_seen___ +#define cpp_h_seen___ + +#include <stdio.h> + +#include "token.h" + +#define TOKBUFSIZE 32 + +struct preproc_info +{ + const char *fn; + FILE *fp; + struct token *tokbuf[TOKBUFSIZE]; + struct token *tokqueue; + int tokbuf_ptr; + void (*errorcb)(const char *); + void (*warningcb)(const char *); + int eolstate; + int lineno; + int column; + int trigraphs; + int ra; + int qseen; + int ungetbufl; + int ungetbufs; + int *ungetbuf; + int unget; + int eolseen; + int nlseen; +}; + +extern struct preproc_info *preproc_init(const char *); +extern struct token *preproc_next_token(struct preproc_info *); +extern void preproc_finish(struct preproc_info *); +extern void preproc_register_error_callback(struct preproc_info *, void (*)(const char *)); +extern void preproc_register_warning_callback(struct preproc_info *, void (*)(const char *)); +extern void preproc_throw_error(struct preproc_info *, const char *, ...); +extern void preproc_throw_warning(struct preproc_info *, const char *, ...); +#endif // cpp_h_seen___
--- a/lwcc/cpp/char_p.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -int is_whitespace(int c) -{ - switch (c) - { - case ' ': - case '\t': - case '\r': - case '\n': - return 1; - } - return 0; -} - -int is_sidchr(c) -{ - if (c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) - return 1; - return 0; -} - -int is_idchr(int c) -{ - if (c >= '0' && c <= '9') - return 1; - return is_sidchr(c); -} - -int is_ep(int c) -{ - if (c == 'e' || c == 'E' || c == 'p' || c == 'P') - return 1; - return 0; -} - -int is_hex(int c) -{ - if (c >= 'a' && c <= 'f') - return 1; - if (c >= 'A' && c <= 'F') - return 1; - if (c >= '0' && c <= '9') - return 1; - return 0; -} - -int is_dec(int c) -{ - if (c >= '0' && c <= '9') - return 1; - return 0; -} -
--- a/lwcc/cpp/cpp.h Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,124 +0,0 @@ -/* -lwcc/cpp/cpp.h - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef cpp_h_seen___ -#define cpp_h_seen___ - -#include <stdio.h> - -enum -{ - CPP_NOUNG = -3, - CPP_EOL = -2, - CPP_EOF = -1, -}; - -enum -{ - TOK_NONE = 0, - TOK_EOF, - TOK_EOL, - TOK_WSPACE, - TOK_IDENT, - TOK_NUMBER, - TOK_STRING, - TOK_CHAR, - TOK_DIV, - TOK_MUL, - TOK_ADD, - TOK_SUB, - TOK_OPAREN, - TOK_CPAREN, - TOK_NE, - TOK_EQ, - TOK_LE, - TOK_LT, - TOK_GE, - TOK_GT, - TOK_BAND, - TOK_BOR, - TOK_BNOT, - TOK_MAX -}; - -struct token -{ - int ttype; // token type - char *strval; // string value of token - the text it matched -}; - -struct file_stack_e -{ - const char *fn; - FILE *fp; - struct file_stack_e *next; - int line; - int col; - int eolstate; // end of line state for interpreting \r\n \n\r \n \r - int ra; // read ahead byte for trigraph scan - int qseen; // number of ? seen during trigraph scan - int unget; // character that has been "ungot" - int curc; // the most recent character retrieved - int *ungetbuf; // buffer for "unfetch" - int ungetbufl; // length offset in unget buffer - int ungetbufs; // size of unget buffer -}; - -struct symtab_e -{ - char *name; // the symbol identifier - struct symtab_e *next; // next symbol in table - char *strval; // the actual value of the macro - int nargs; // number of fixed args; -1 for basic, >= 0 for function like - int vargs; // set if macro is varargs -}; - -extern struct symtab_e *symbol_find(const char *); -extern void symbol_undef(const char *); -extern struct symtab_e *symbol_add(const char *, const char *, int, int); - -extern FILE *output_fp; -extern int trigraphs; -extern struct file_stack_e *file_stack; - -extern int process_file(const char *); -extern void preprocess_file(void); -extern void preprocess_output_location(int); - -extern void do_error(const char *, ...); -extern void do_warning(const char *, ...); - -extern int fetch_byte(void); -extern void unfetch_byte(int); -extern void outchr(int); -extern void outstr(char *); -extern void skip_eol(void); - -extern int is_whitespace(int); -extern int is_ep(int); -extern int is_sidchr(int); -extern int is_idchr(int); -extern int is_dec(int); -extern int is_hex(int); - -extern int skip_level; - -#endif // cpp_h_seen___
--- a/lwcc/cpp/error.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -/* -lwcc/cpp/error.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> - -#include "cpp.h" - -static void show_file_pos(void) -{ - if (file_stack == NULL) - return; - - fprintf(stderr, "(%s:%d): ", file_stack -> fn, file_stack -> line); -} - -void do_error(const char *f, ...) -{ - va_list arg; - - va_start(arg, f); - fprintf(stderr, "ERROR: "); - show_file_pos(); - vfprintf(stderr, f, arg); - fprintf(stderr, "\n"); - va_end(arg); - exit(1); -} - -void do_warning(const char *f, ...) -{ - va_list arg; - - va_start(arg, f); - fprintf(stderr, "WARNING: "); - show_file_pos(); - vfprintf(stderr, f, arg); - fprintf(stderr, "\n"); - va_end(arg); -}
--- a/lwcc/cpp/file.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,337 +0,0 @@ -/* -lwcc/cpp/file.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. - -*/ - -#include <errno.h> -#include <stdio.h> -#include <string.h> - -#include <lw_alloc.h> - -#include "cpp.h" - -struct file_stack_e *file_stack = NULL; - -/* output a byte to the current output stream as long as we aren't in the - middle of a false conditional. CPP_EOL will be converted to '\n' - on output. */ -void outchr(int c) -{ - if (skip_level) - return; - if (c == CPP_EOL) - c = '\n'; - fputc(c, output_fp); -} - -/* output a string to the current output stream as long as we aren't in the - middle of a false conditional */ -void outstr(char *s) -{ - if (skip_level) - return; - while (*s) - outchr(*s++); -} - -/* fetch a raw input byte from the current file. Will return CPP_EOF if - EOF is encountered and CPP_EOL if an end of line sequence is encountered. - End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is - returned on the first CR or LF encountered. The complementary CR or LF - is munched, if present, when the *next* character is read. This always - operates on file_stack. - - This function also accounts for line numbers in input files and also - character columns. -*/ -int fetch_byte_ll(void) -{ - int c; - - if (file_stack -> eolstate != 0) - { - file_stack -> line++; - file_stack -> col = 0; - } - c = getc(file_stack -> fp); - file_stack -> col++; - if (file_stack -> eolstate == 1) - { - // just saw CR, munch LF - if (c == 10) - c = getc(file_stack -> fp); - file_stack -> eolstate = 0; - } - else if (file_stack -> eolstate == 2) - { - // just saw LF, much CR - if (c == 13) - c = getc(file_stack -> fp); - file_stack -> eolstate = 0; - } - - if (c == 10) - { - // we have LF - end of line, flag to munch CR - file_stack -> eolstate = 2; - c = CPP_EOL; - } - else if (c == 13) - { - // we have CR - end of line, flag to munch LF - file_stack -> eolstate = 1; - c = CPP_EOL; - } - else if (c == EOF) - { - c = CPP_EOF; - } - return c; -} - -/* This function takes a sequence of bytes from the _ll function above - and does trigraph interpretation on it, but only if the global - trigraphs is nonzero. */ -int fetch_byte_tg(void) -{ - int c; - - if (!trigraphs) - { - c = fetch_byte_ll(); - } - else - { - /* we have to do the trigraph shit here */ - if (file_stack -> ra != CPP_NOUNG) - { - if (file_stack -> qseen > 0) - { - c = '?'; - file_stack -> qseen -= 1; - return c; - } - else - { - c = file_stack -> ra; - file_stack -> ra = CPP_NOUNG; - return c; - } - } - - c = fetch_byte_ll(); - while (c == '?') - { - file_stack -> qseen++; - c = fetch_byte_ll(); - } - - if (file_stack -> qseen >= 2) - { - // we have a trigraph - switch (c) - { - case '=': - c = '#'; - file_stack -> qseen -= 2; - break; - - case '/': - c = '\\'; - file_stack -> qseen -= 2; - break; - - case '\'': - c = '^'; - file_stack -> qseen -= 2; - break; - - case '(': - c = '['; - file_stack -> qseen -= 2; - break; - - case ')': - c = ']'; - file_stack -> qseen -= 2; - break; - - case '!': - c = '|'; - file_stack -> qseen -= 2; - break; - - case '<': - c = '{'; - file_stack -> qseen -= 2; - break; - - case '>': - c = '}'; - file_stack -> qseen -= 2; - break; - - case '~': - c = '~'; - file_stack -> qseen -= 2; - break; - } - if (file_stack -> qseen > 0) - { - file_stack -> ra = c; - c = '?'; - file_stack -> qseen--; - } - } - else if (file_stack -> qseen > 0) - { - file_stack -> ra = c; - c = '?'; - file_stack -> qseen--; - } - } - return c; -} - -/* This function puts a byte back onto the front of the input stream used - by fetch_byte(). Theoretically, an unlimited number of characters can - be unfetched. Line and column counting may be incorrect if unfetched - characters cross a token boundary. */ -void unfetch_byte(int c) -{ - if (file_stack -> ungetbufl >= file_stack -> ungetbufs) - { - file_stack -> ungetbufs += 100; - file_stack -> ungetbuf = lw_realloc(file_stack -> ungetbuf, file_stack -> ungetbufs); - } - file_stack -> ungetbuf[file_stack -> ungetbufl++] = c; -} - -/* This function retrieves a byte from the input stream. It performs - backslash-newline splicing on the returned bytes. Any character - retrieved from the unfetch buffer is presumed to have already passed - the backslash-newline filter. */ -int fetch_byte(void) -{ - int c; - - if (file_stack -> ungetbufl > 0) - { - file_stack -> ungetbufl--; - c = file_stack -> ungetbuf[file_stack -> ungetbufl]; - if (file_stack -> ungetbufl == 0) - { - lw_free(file_stack -> ungetbuf); - file_stack -> ungetbuf = NULL; - file_stack -> ungetbufs = 0; - } - return c; - } - -again: - if (file_stack -> unget != CPP_NOUNG) - { - c = file_stack -> unget; - file_stack -> unget = CPP_NOUNG; - } - else - { - c = fetch_byte_tg(); - } - if (c == '\\') - { - int c2; - c2 = fetch_byte_tg(); - if (c2 == CPP_EOL) - goto again; - else - file_stack -> unget = c2; - } - file_stack -> curc = c; - return c; -} - -void skip_eol(void) -{ - int c; - for (;;) - { - c = fetch_byte(); - if (c == CPP_EOF || c == CPP_EOL) - { - unfetch_byte(c); - return; - } - if (c == '/') - { - c = munch_comment(); - if (c > 0) - { - while (c--) - outchr(CPP_EOL); - } - } - } -} - - -/* This function opens (if not stdin) the file f and pushes it onto the - top of the input file stack. It then proceeds to process the file - and return. Nonzero return means the file could not be opened. */ -int process_file(const char *f) -{ - struct file_stack_e nf; - FILE *fp; - - fprintf(stderr, "Processing %s\n", f); - - if (strcmp(f, "-") == 0) - fp = stdin; - else - fp = fopen(f, "rb"); - if (fp == NULL) - { - do_warning("Cannot open %s: %s", f, strerror(errno)); - return -1; - } - - /* push the file onto the file stack */ - nf.fn = f; - nf.fp = fp; - nf.next = file_stack; - nf.line = 1; - nf.col = 0; - nf.qseen = 0; - nf.ra = CPP_NOUNG; - nf.unget = CPP_NOUNG; - file_stack = &nf; - nf.ungetbuf = NULL; - nf.ungetbufs = 0; - nf.ungetbufl = 0; - - /* go preprocess the file */ - preprocess_file(); - - if (nf.fp != stdin) - fclose(nf.fp); - file_stack = nf.next; - return 0; -}
--- a/lwcc/cpp/main.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -/* -lwcc/cpp/main.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <lw_stringlist.h> -#include <lw_cmdline.h> - -#include "cpp.h" - -/* command line option handling */ -#define PROGVER "lwcc-cpp from " PACKAGE_STRING -char *program_name; - -/* input files */ -lw_stringlist_t input_files; - -/* various flags */ -int trigraphs = 0; -char *output_file = NULL; -FILE *output_fp = NULL; - -static struct lw_cmdline_options options[] = -{ - { "output", 'o', "FILE", 0, "Output to FILE"}, - { "include", 'i', "FILE", 0, "Pre-include FILE" }, - { "includedir", 'I', "PATH", 0, "Add entry to the user include path" }, - { "sincludedir", 'S', "PATH", 0, "Add entry to the system include path" }, - { "define", 'D', "SYM[=VAL]",0, "Automatically define SYM to be VAL (or 1)"}, - { "trigraphs", 0x100, NULL, 0, "Enable interpretation of trigraphs" }, - { 0 } -}; - - -static int parse_opts(int key, char *arg, void *state) -{ - switch (key) - { - case 'o': - if (output_file) - do_error("Output file specified more than once."); - output_file = arg; - break; - - case 0x100: - trigraphs = 1; - break; - - case lw_cmdline_key_end: - break; - - case lw_cmdline_key_arg: - lw_stringlist_addstring(input_files, arg); - break; - - default: - return lw_cmdline_err_unknown; - } - return 0; -} - -static struct lw_cmdline_parser cmdline_parser = -{ - options, - parse_opts, - "INPUTFILE", - "lwcc-cpp - C preprocessor for lwcc", - PROGVER -}; - -int main(int argc, char **argv) -{ - program_name = argv[0]; - int retval = 0; - - input_files = lw_stringlist_create(); - - /* parse command line arguments */ - lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, NULL); - - /* set up output file */ - if (output_file == NULL || strcmp(output_file, "-") == 0) - { - output_fp = stdout; - } - else - { - output_fp = fopen(output_file, "wb"); - if (output_fp == NULL) - { - do_error("Failed to create output file %s: %s", output_file, strerror(errno)); - } - } - - if (lw_stringlist_nstrings(input_files) == 0) - { - /* if no input files, work on stdin */ - retval = process_file("-"); - } - else - { - char *s; - lw_stringlist_reset(input_files); - for (s = lw_stringlist_current(input_files); s; s = lw_stringlist_next(input_files)) - { - retval = process_file(s); - if (retval != 0) - break; - } - } - lw_stringlist_destroy(input_files); - exit(retval); -}
--- a/lwcc/cpp/preproc.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,924 +0,0 @@ -/* -lwcc/cpp/preproc.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <lw_alloc.h> -#include <lw_string.h> - -#include "cpp.h" - - -int munch_comment(void); -char *parse_str_lit(void); -char *parse_chr_lit(void); -char *parse_num_lit(int); -char *parse_identifier(int); -void preprocess_identifier(char *); -void preprocess_directive(void); -void next_token(void); -void next_token_nws(void); -int eval_expr(void); - -int skip_level = 0; -int found_level = 0; -int else_level = 0; -int else_skip_level = 0; - -struct token curtok = { .ttype = TOK_NONE, .strval = NULL }; - -/* -Notes: - -Rather than tokenize the entire file, we run through it interpreting -things only as much as we need to in order to identify the following: - -preprocessing directives (#...) -identifiers which might need to be replaced with macros - -We have to interpret strings, character constants, and numbers to prevent -false positives in those situations. - -When we find a preprocessing directive, it is handled with a more -aggressive tokenization process and then intepreted accordingly. - -nlws is used to record the fact that only whitespace has occurred at the -start of a line. Whitespace is defined as comments or isspace(c). It gets -reset to 1 after each EOL character. If a non-whitespace character is -encountered, it is set to -1. If the character processing decides it really -is a whitespace character, it will set nlws back to 1 (block comment). -Elsewise, it will get set to 0 if it is still -1 when the loop starts again. - -This is needed so we can identify whitespace interposed before a -preprocessor directive. This is the only case where it matters for -the preprocessor. - -*/ -void preprocess_file() -{ - int c; - int nlws = 1; - - preprocess_output_location(1); - for (;;) - { - c = fetch_byte(); - // if we had non-whitespace that wasn't munched (comment), set flag correctly - if (nlws == -1) - nlws = 0; - if (c == CPP_EOF) - { - // end of input - make sure newline is present - outchr('\n'); - return; - } - if (c == CPP_EOL) - { - // flag that we just hit the start of a new line - nlws = 1; - outchr(CPP_EOL); - continue; - } - - /* if we have a non-whitespace character, flag it as such */ - if (!is_whitespace(c)) - nlws = -1; - - if (c == '#' && nlws) - { - // we have a preprocessor directive here - this call will do - // everything including outputting the blank line, if appropriate - preprocess_directive(); - continue; - } - else if (c == '\'') - { - // we have a character constant here - outstr(parse_chr_lit()); - continue; - } - else if (c == '"') - { - // we have a string constant here - outstr(parse_str_lit()); - continue; - } - else if (c == '.') - { - // we might have a number here - outchr('.'); - c = fetch_byte(); - if (is_dec(c)) - outstr(parse_num_lit(c)); - continue; - } - else if (is_dec(c)) - { - // we have a number here - outstr(parse_num_lit(c)); - } - else if (c == '/') - { - // we might have a comment here - c = munch_comment(); - if (c < 0) - { - outchr('/'); - continue; - } - // comments are white space - count them as such at start of line - if (nlws == -1) - nlws = 0; - /* c is the number of EOL characters the comment spanned */ - while (c--) - outchr(CPP_EOL); - continue; - } - else if (c == 'L') - { - // wide character string or wide character constant, or identifier - c = fetch_byte(); - if (c == '"') - { - outchr('L'); - outstr(parse_str_lit()); - continue; - } - else if (c == '\'') - { - outchr('L'); - outstr(parse_chr_lit()); - continue; - } - unfetch_byte(c); - preprocess_identifier(parse_identifier('L')); - continue; - } - else if (is_sidchr(c)) - { - // identifier of some kind - char *s; - s = parse_identifier(c); - preprocess_identifier(s); - continue; - } - else - { - // random character - pass through - outchr(c); - } - } -} - -char *parse_identifier(int c) -{ - static char *ident = NULL; - int idlen = 0; - static int idbufl = 0; - - do - { - if (idlen >= idbufl) - { - idbufl += 50; - ident = lw_realloc(ident, idbufl); - } - ident[idlen++] = c; - c = fetch_byte(); - } while (is_idchr(c)); - - ident[idlen++] = 0; - unfetch_byte(c); - - return ident; -} - -void preprocess_identifier(char *s) -{ - /* do something with the identifier here - macros, etc. */ - outstr(s); -} - -#define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0) -char *parse_num_lit(int c) -{ - static char *ident = NULL; - int idlen = 0; - static int idbufl = 0; - - do - { - to_buf(c); - c = fetch_byte(); - if (is_ep(c)) - { - to_buf(c); - c = fetch_byte(); - if (c == '-' || c == '+') - { - to_buf(c); - c = fetch_byte(); - } - } - } while ((is_idchr(c)) || (c == '.')); - to_buf(0); - - return ident; -} - -char *parse_chr_lit(void) -{ - static char *ident = NULL; - int idlen = 0; - static int idbufl = 0; - int c; - - to_buf('\''); - while ((c = fetch_byte()) != '\'') - { - if (c == CPP_EOL || c == CPP_EOF) - { - unfetch_byte(c); - to_buf(0); - do_warning("Unterminated character constant"); - return ident; - } - if (c == '\\') - { - to_buf(c); - c = fetch_byte(); - if (c == CPP_EOL || c == CPP_EOF) - { - unfetch_byte(c); - to_buf(0); - do_warning("Unterminated character constant"); - return ident; - } - } - to_buf(c); - } - to_buf(c); - to_buf(0); - return ident; -} - -char *parse_str_lit(void) -{ - static char *ident = NULL; - int idlen = 0; - static int idbufl = 0; - int c; - - to_buf('"'); - while ((c = fetch_byte()) != '"') - { - if (c == CPP_EOL || c == CPP_EOF) - { - unfetch_byte(c); - to_buf(0); - do_warning("Unterminated string literal"); - return ident; - } - if (c == '\\') - { - to_buf(c); - c = fetch_byte(); - if (c == CPP_EOL || c == CPP_EOF) - { - unfetch_byte(c); - to_buf(0); - do_warning("Unterminated string literal"); - return ident; - } - } - to_buf(c); - } - to_buf(c); - to_buf(0); - return ident; -} - -int munch_comment(void) -{ - int nlc = 0; - int c; - - c = fetch_byte(); - if (c == '/') - { - // single line comment - for (;;) - { - c = fetch_byte(); - if (c == CPP_EOL) - nlc = 1; - if (c == CPP_EOL || c == CPP_EOF) - return nlc; - } - } - else if (c == '*') - { - // block comment - for (;;) - { - c = fetch_byte(); - if (c == CPP_EOL) - nlc++; - if (c == CPP_EOF) - return nlc; - if (c == '*') - { - c = fetch_byte(); - if (c == '/' || c == CPP_EOF) - return nlc; - if (c == CPP_EOL) - nlc++; - } - } - return nlc; - } - else - { - unfetch_byte(c); - return -1; - } - - return nlc; -} - -/* Output a location directive to synchronize the compiler with the correct - input line number and file. This is of the form: - -# <linenum> <filename> <flag> - -where <linenum> is the line number inside the file, <filename> is the -filename (as a C string), and <flag> is the specified flag argument which -should be 1 for the start of a new file or 2 for returning to the file from -another file. <linenum> is the line number the following line came from. - */ -void preprocess_output_location(int flag) -{ - fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag); -} - -void preproc_ifndef(void); -void preproc_ifdef(void); -void preproc_if(void); -void preproc_include(void); -void preproc_else(void); -void preproc_endif(void); -void preproc_error(void); -void preproc_warning(void); -void preproc_define(void); -void preproc_undef(void); -void preproc_line(void); -void preproc_pragma(void); -void preproc_elif(void); - -struct { char *name; void (*fn)(void); } directive_list[] = { - { "ifndef", preproc_ifndef }, - { "ifdef", preproc_ifdef }, - { "if", preproc_if }, - { "include", preproc_include }, - { "else", preproc_else }, - { "endif", preproc_endif }, - { "error", preproc_error }, - { "warning", preproc_warning }, - { "define", preproc_define }, - { "undef", preproc_undef }, - { "line", preproc_line }, - { "pragma", preproc_pragma }, - { "elif", preproc_elif }, - { NULL, NULL } -}; - -/* process a preprocessor directive */ -#define DIRBUFLEN 20 -void preprocess_directive(void) -{ - static char dirbuf[DIRBUFLEN+1]; - int c; - int dl = 0; - - for (;;) - { - c = fetch_byte(); - if (is_whitespace(c)) - continue; - if (c == '/') - { - c = munch_comment(); - if (c < 0) - goto baddir; - if (c > 0) - { - while (c--) - outchr(CPP_EOL); - } - continue; - } - if (c == CPP_EOL) - { - // NULL directive - do nothing - outchr(CPP_EOL); - return; - } - break; - } - - - dl = 0; - while (((c >= 'a' && c <= 'z') || c == '_') && dl < DIRBUFLEN) - { - dirbuf[dl++] = c; - c = fetch_byte(); - } - dirbuf[dl] = 0; - -commagain: - if (c == '/') - { - c = munch_comment(); - if (c < 0) - c = '/'; - else - { - while (c--) - { - outchr(CPP_EOL); - } - c = fetch_byte(); - goto commagain; - } - } - - if (!is_whitespace(c) && c != CPP_EOL && c != CPP_EOF) - goto baddir; - - for (dl = 0; directive_list[dl].name; dl++) - { - if (strcmp(directive_list[dl].name, dirbuf) == 0) - { - (*(directive_list[dl].fn))(); - outchr(CPP_EOL); - return; - } - } - -baddir: - dirbuf[dl] = 0; - if (skip_level == 0) - do_error("Bad preprocessor directive %s", dirbuf); - outchr(CPP_EOL); -} - -void check_eol(void) -{ - next_token_nws(); - if (curtok.ttype == TOK_EOL) - return; - if (curtok.ttype == TOK_EOF) - return; - do_warning("Extra text after preprocessor directive"); - skip_eol(); -} - -void preproc_ifndef(void) -{ - if (skip_level) - { - skip_level++; - skip_eol(); - return; - } - next_token_nws(); - if (curtok.ttype != TOK_IDENT) - { - do_error("Bad #ifndef"); - skip_eol(); - } - - if (symbol_find(curtok.strval)) - { - skip_level++; - } - else - { - found_level++; - } - check_eol(); -} - -void preproc_ifdef(void) -{ - if (skip_level) - { - skip_level++; - skip_eol(); - return; - } - next_token_nws(); - if (curtok.ttype != TOK_IDENT) - { - do_error("Bad #ifdef"); - skip_eol(); - } - - if (symbol_find(curtok.strval) == NULL) - { - skip_level++; - } - else - { - found_level++; - } - check_eol(); -} - -void preproc_if(void) -{ - skip_eol(); -} - -void preproc_include(void) -{ - skip_eol(); -} - -void preproc_else(void) -{ - if (skip_level) - { - if (else_skip_level > found_level) - ; - else if (--skip_level != 0) - skip_level++; - else - found_level++; - } - else if (found_level) - { - skip_level++; - found_level--; - } - else - { - do_error("#else in non-conditional section"); - } - if (else_level == found_level + skip_level) - { - do_error("Too many #else"); - } - else_level = found_level + skip_level; - check_eol(); -} - -void preproc_endif(void) -{ - if (skip_level) - skip_level--; - else if (found_level) - found_level--; - else - do_error("#endif in non-conditional section"); - if (skip_level == 0) - else_skip_level = 0; - else_level = 0; - check_eol(); -} - -void preproc_error(void) -{ - skip_eol(); -} - -void preproc_warning(void) -{ - skip_eol(); -} - -void preproc_define(void) -{ - skip_eol(); -} - -void preproc_undef(void) -{ - if (skip_level) - { - skip_eol(); - return; - } - - next_token_nws(); - if (curtok.ttype != TOK_IDENT) - { - do_error("Bad #undef"); - symbol_undef(curtok.strval); - } - check_eol(); -} - -void preproc_line(void) -{ - skip_eol(); -} - -void preproc_pragma(void) -{ - if (skip_level || !eval_expr()) - skip_level++; - else - found_level++; -} - -void preproc_elif(void) -{ - if (skip_level == 0) - else_skip_level = found_level; - if (skip_level) - { - if (else_skip_level > found_level) - ; - else if (--skip_level != 0) - skip_level++; - else if (eval_expr()) - found_level++; - else - skip_level++; - } - else if (found_level) - { - skip_level++; - found_level--; - } - else - do_error("#elif in non-conditional section"); -} - - - -/* tokenizing stuff here */ -#undef to_buf -#define to_buf(c) do { if (strlen >= strbufl) { strbufl += 100; strbuf = lw_realloc(strbuf, strbufl); } strbuf[strlen++] = (c); strbuf[strlen] = 0; } while (0) -void next_token(void) -{ - int strbufl = 0; - int strlen = 0; - char *strbuf = NULL; - int c; - int ttype; - - lw_free(curtok.strval); - curtok.strval = NULL; - curtok.ttype = TOK_NONE; - - c = fetch_byte(); - if (c == CPP_EOL) - { - curtok.ttype = TOK_EOL; - return; - } - - if (c == CPP_EOF) - { - curtok.ttype = TOK_EOF; - return; - } - - if (is_whitespace(c)) - { - do - { - to_buf(c); - c = fetch_byte(); - } while (is_whitespace(c)); - unfetch_byte(c); - ttype = TOK_WSPACE; - goto out; - } - if (c == '/') - { - c = munch_comment(); - if (c >= 0) - { - to_buf(' '); - while (c--) - outchr(CPP_EOL); - ttype = TOK_WSPACE; - goto out; - } - c = '/'; - } - - if (c == '\'') - { - // we have a character constant here - ttype = TOK_NUMBER; - strbuf = lw_strdup(parse_chr_lit()); - goto out; - } - else if (c == '"') - { - // we have a string constant here - ttype = TOK_STRING; - strbuf = lw_strdup(parse_str_lit()); - goto out; - } - else if (c == '.') - { - // we might have a number here - c = fetch_byte(); - if (is_dec(c)) - { - unfetch_byte(c); - ttype = TOK_NUMBER; - strbuf = lw_strdup(parse_num_lit('.')); - goto out; - } - else - { - goto ttypegen; - } - } - else if (is_dec(c)) - { - // we have a number here - ttype = TOK_NUMBER; - strbuf = lw_strdup(parse_num_lit(c)); - } - else if (c == 'L') - { - // wide character string or wide character constant, or identifier - c = fetch_byte(); - if (c == '"') - { - char *s; - to_buf('L'); - s = parse_str_lit(); - while (*s) - to_buf(*s++); - ttype = TOK_STRING; - goto out; - } - else if (c == '\'') - { - char *s; - to_buf('L'); - s = parse_chr_lit(); - while (*s) - to_buf(*s++); - ttype = TOK_NUMBER; - goto out; - } - unfetch_byte(c); - ttype = TOK_IDENT; - strbuf = lw_strdup(parse_identifier('L')); - goto out; - } - else if (is_sidchr(c)) - { - // identifier of some kind - strbuf = lw_strdup(parse_identifier(c)); - ttype = TOK_IDENT; - } - else - { -ttypegen: - ttype = TOK_CHAR; - to_buf(c); - - switch (c) - { - case '/': - ttype = TOK_DIV; - break; - - case '*': - ttype = TOK_MUL; - break; - - case '+': - ttype = TOK_ADD; - break; - - case '-': - ttype = TOK_SUB; - break; - - case '<': - c = fetch_byte(); - if (c == '=') - ttype = TOK_LE; - else - { - ttype = TOK_LT; - unfetch_byte(c); - } - break; - - case '>': - c = fetch_byte(); - if (c == '=') - ttype = TOK_GE; - else - { - ttype = TOK_GT; - unfetch_byte(c); - } - break; - - case '=': - c = fetch_byte(); - if (c == '=') - ttype = TOK_EQ; - else - unfetch_byte(c); - break; - - case '!': - c = fetch_byte(); - if (c == '=') - ttype = TOK_NE; - else - { - ttype = TOK_BNOT; - unfetch_byte(c); - } - break; - - case '&': - c = fetch_byte(); - if (c == '&') - ttype = TOK_BAND; - else - unfetch_byte(c); - break; - - case '|': - c = fetch_byte(); - if (c == '|') - ttype = TOK_BOR; - else - unfetch_byte(c); - break; - - case '(': - ttype = TOK_OPAREN; - break; - - case ')': - ttype = TOK_CPAREN; - break; - - } - goto out; - } - -out: - curtok.ttype = ttype; - curtok.strval = strbuf; -} - -void next_token_nws(void) -{ - do - { - next_token(); - } while (curtok.ttype == TOK_WSPACE); -} - - -/* -evaluate an expression. Return true if expression is true, false if it -is false. Expression ends at the end of the line. Enter at eval_expr(). - -eval_term_real() evaluates a term in the expression. eval_expr_real() is -the main expression evaluator. -*/ - -int eval_expr(void) -{ - skip_eol(); - return 0; -}
--- a/lwcc/cpp/symbol.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -/* -lwcc/cpp/symbol.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <stdlib.h> -#include <string.h> - -#include <lw_alloc.h> -#include <lw_string.h> - -#include "cpp.h" - -struct symtab_e *symtab_head = NULL; - -struct symtab_e *symbol_find(const char *s) -{ - struct symtab_e *r; - - for (r = symtab_head; r; r = r -> next) - if (strcmp(r -> name, s) == 0) - return r; - return NULL; -} - -void symbol_free(struct symtab_e *r) -{ - lw_free(r -> name); - lw_free(r -> strval); - lw_free(r); -} - -void symbol_undef(const char *s) -{ - struct symtab_e *r, **p; - - p = &symtab_head; - for (r = symtab_head; r; r = r -> next) - { - if (strcmp(r -> name, s) == 0) - { - *p = r -> next; - symbol_free(r); - return; - } - p = &(r -> next); - } -} - -struct symtab_e *symbol_add(const char *s, const char *str, int nargs, int vargs) -{ - struct symtab_e *r; - - r = lw_alloc(sizeof (struct symtab_e)); - *r = (struct symtab_e){ - .name = lw_strdup(s), - .strval = lw_strdup(str), - .nargs = nargs, - .vargs = vargs, - .next = symtab_head }; - symtab_head = r; - return r; -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/driver-main.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,1072 @@ +/* +lwcc/driver/main.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <lw_alloc.h> +#include <lw_string.h> +#include <lw_stringlist.h> + +#define VERSTRING "lwcc from " PACKAGE_STRING +#define S(x) S2(x) +#define S2(x) #x + +#define BASEDIR S(LWCC_LIBDIR) + +/* list of compilation phases */ +enum phase_t { + PHASE_DEFAULT = 0, + PHASE_PREPROCESS, + PHASE_COMPILE, + PHASE_ASSEMBLE, + PHASE_LINK +}; + +/* these are the names of various programs the compiler calls */ +const char *linker_program_name = "lwlink"; +const char *compiler_program_name = "lwcc1"; +const char *assembler_program_name = "lwasm"; +const char *preprocessor_program_name = "lwcpp"; + +/* this will be set to the directory where temporary files get created */ +const char *temp_directory = NULL; + +/* these are for book keeping if we get interrupted - the volatile and atomic + types are needed because they are accessed in a signal handler */ +static volatile sig_atomic_t sigterm_received = 0; +static volatile sig_atomic_t child_pid = 0; + +/* path specified with --sysroot */ +const char *sysroot = ""; +/* path specified with -isysroot */ +const char *isysroot = NULL; + +/* record which phase to stop after for -c, -E, and -S */ +/* default is to stop after PHASE_LINK */ +static int stop_after = PHASE_DEFAULT; + +int nostdinc = 0; // set if -nostdinc is specified +int nostartfiles = 0; // set if -nostartfiles is specified +int nostdlib = 0; // set if -nostdlib is specified +int verbose_mode = 0; // set to number of --verbose arguments +int save_temps = 0; // set if -save-temps is specified +int debug_mode = 0; // set if -g specified +int pic_mode = 0; // set to 1 if -fpic, 2 if -fPIC; last one specified wins +const char *output_file; // set to the value of the -o option (output file) + +/* compiler base directory - from -B */ +const char *basedir = BASEDIR; + +/* used to ensure a unique temporary file at every stage */ +static int file_counter = 0; + +/* these are various string lists used to keep track of things, mostly + command line arguments. */ + +lw_stringlist_t input_files; // input files from command line +lw_stringlist_t runtime_dirs; // directories to search for runtime files +lw_stringlist_t lib_dirs; // directories to search for library files +lw_stringlist_t program_dirs; // directories to search for compiler program components +lw_stringlist_t preproc_args; // recorded arguments to pass through to the preprocessor +lw_stringlist_t include_dirs; // include paths specified with -I +lw_stringlist_t includes; // include paths specified with -include +lw_stringlist_t user_sysincdirs; // include paths specified with -isystem +lw_stringlist_t asm_args; // recorded arguments to pass through to the assembler +lw_stringlist_t linker_args; // recorded arguments to pass through to the linker +lw_stringlist_t sysincdirs; // the standard system include directories +lw_stringlist_t tempfiles; // a list of temporary files created which need to be cleaned up +lw_stringlist_t compiler_args; // recorded arguments to pass through to the compiler +lw_stringlist_t priv_sysincdirs; // system include directories for lwcc itself + +/* forward delcarations */ +static void parse_command_line(int, char **); + +/* signal handler for SIGTERM - all it does is record the fact that + SIGTERM happened and propagate the signal to whatever child process + might currently be running */ +static void exit_on_signal(int sig) +{ + sigterm_received = 1; + if (child_pid) + kill(child_pid, SIGTERM); +} + +/* utility function to carp about an error condition and bail */ +void do_error(const char *f, ...) +{ + va_list arg; + va_start(arg, f); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, f, arg); + putc('\n', stderr); + va_end(arg); + exit(1); +} + +/* utility function to carp about some condition; do not bail */ +void do_warning(const char *f, ...) +{ + va_list arg; + va_start(arg, f); + fprintf(stderr, "WARNING: "); + vfprintf(stderr, f, arg); + putc('\n', stderr); + va_end(arg); +} + +/* utility function to print out an array of strings - stops at the first + NULL string pointer. */ +static void print_array(char **arr) +{ + int c = 0; + while (*arr) + { + if (c) + printf(" "); + printf("%s", *arr); + arr++; + c = 1; + } +} + +/* expand any search path entries to reflect the sysroot and + isysroot settings. Note that it does NOT apply to the compiler + program search path */ +static void expand_sysroot(void) +{ + /* list of path lists to process for replacements of = */ + lw_stringlist_t *lists[] = { &sysincdirs, &include_dirs, &user_sysincdirs, &lib_dirs, NULL }; + /* list of replacement strings for = in the same order */ + const char *sysroots[] = { isysroot, isysroot, isysroot, sysroot, NULL }; + size_t i, sysroot_len, value_len; + char *path; + lw_stringlist_t newlist; + lw_stringlist_t working; + char *s; + + /* for each list, run through entry by entry, do any needed replacement + and add the entry to a new list. Then replace the old list with the + new one. */ + for (i = 0; lists[i] != NULL; i++) + { + working = *lists[i]; + newlist = lw_stringlist_create(); + + lw_stringlist_reset(working); + for (s = lw_stringlist_current(working); s; s = lw_stringlist_next(working)) + { + if (s[0] == '=') + { + sysroot_len = strlen(sysroots[i]); + value_len = strlen(s); + /* note that the skipped = will make up for the trailing NUL */ + path = lw_alloc(sysroot_len + value_len); + memcpy(path, sysroots[i], sysroot_len); + /* the +1 here will copy the trailing NUL */ + memcpy(path + sysroot_len, s + 1, value_len); + lw_stringlist_addstring(newlist, path); + lw_free(path); + } + else + { + lw_stringlist_addstring(newlist, s); + } + } + lw_stringlist_destroy(working); + *lists[i] = newlist; + } +} + +/* look for file fn in path list p which is okay for access mode mode. + Return a string allocated by lw_alloc. */ +static char *find_file(const char *fn, lw_stringlist_t p, int mode) +{ + char *s; + char *f; + size_t lf, lp; + int need_slash; + + lf = strlen(fn); + lw_stringlist_reset(p); + for (s = lw_stringlist_current(p); s; s = lw_stringlist_next(p)) + { + lp = strlen(s); + need_slash = 0; + if (lp && s[lp - 1] == '/') + need_slash = 1; + f = lw_alloc(lp + lf + need_slash + 1); + memcpy(f, s, lp); + if (need_slash) + f[lp] = '/'; + /* +1 gets the NUL */ + memcpy(f + lp + need_slash, fn, lf + 1); + if (access(f, mode) == 0) + return f; + lw_free(f); + } + /* if not found anywhere, try the bare filename - it might work */ + return lw_strdup(fn); +} + +/* take a string list which contains an argv and execute the specified + program */ +static int execute_program(lw_stringlist_t args) +{ + int argc; + char **argv; + int result; + char *s; + + argc = lw_stringlist_nstrings(args); + argv = lw_alloc(sizeof(char *) * (argc + 1)); + lw_stringlist_reset(args); + for (result = 0, s = lw_stringlist_current(args); s; s = lw_stringlist_next(args)) + { + argv[result] = s; + } + argv[result] = NULL; + + if (verbose_mode) + { + printf("Executing "); + print_array(argv); + printf("\n"); + } + + /* bail now if a signal happened */ + if (sigterm_received) + { + lw_free(argv); + return 1; + } + + /* make sure stdio has flushed everything so that output from the + child process doesn't get intermingled */ + fflush(NULL); + + /* now make the child process */ + child_pid = fork(); + if (child_pid == 0) + { + /* child process */ + /* try executing program */ + execvp(argv[0], argv); + /* only way to get here is if execvp() failed so carp about it and exit */ + fprintf(stderr, "Exec of %s failed: %s", argv[0], strerror(errno)); + /* exit with failure but don't call any atexit(), etc., functions */ + _exit(127); + } + else if (child_pid == -1) + { + /* failure to make child process */ + do_error("Failed to execute program %s: %s", argv[0], strerror(errno)); + } + /* clean up argv */ + lw_free(argv); + + /* parent process - wait for child to exit */ + while (waitpid(child_pid, &result, 0) == -1 && errno == EINTR) + /* do nothing */; + /* fetch actual return status */ + result = WEXITSTATUS(result); + if (result) + { + /* carp about non-zero return status */ + do_error("%s terminated with status %d", argv[0], result); + } + /* return nonzero if signalled to exit */ + return sigterm_received; +} + +/* +construct an output file name as follows: + +1. if it is the last phase of compilation and an output file name is + specified, use that if not specified +2. if it is the last phase or we are saving temporary files, any suffix + on f is removed and replaced with nsuffix +3. otherwise, a temporary file is created. If necessary, a temporary + directory is created to hold the temporary file. The name of the temporary + file is recorded in the tempfiles string list for later cleanup. The name + of the temporary directory is recorded in temp_directory for later cleanup. +*/ +static char *output_name(const char *f, const char *nsuffix, int last) +{ + const char *osuffix; + char *name; + size_t lf, ls, len; + int counter_len; + + /* get a new file counter */ + file_counter++; + + /* if the output was specified, use it */ + if (last && output_file) + { + return lw_strdup(output_file); + } + + /* find the start of the old suffix */ + osuffix = strrchr(f, '.'); + if (osuffix != NULL && strchr(osuffix, '/') != NULL) + osuffix = NULL; + if (osuffix == NULL) + osuffix = f + strlen(f); + + ls = strlen(nsuffix); + + /* if this is the last stage or we're saving temps, use a name derived + from the original file name by replacing the suffix with nsuffix */ + if (save_temps || last) + { + lf = osuffix - f; + name = lw_alloc(lf + ls + 1); + memcpy(name, f, lf); + /* note that the +1 will copy the trailing NUL */ + memcpy(name + lf, nsuffix, ls + 1); + return name; + } + + /* finally, use a temporary file */ + if (temp_directory == NULL) + { + /* if we haven't already made a temporary directory, do so */ + const char *dirtempl; + char *path; + size_t dirtempl_len; + int need_slash; + + /* look for a TMPFIR environment variable and use that if present + but use /tmp as a fallback */ + dirtempl = getenv("TMPDIR"); + if (dirtempl == NULL) + dirtempl = "/tmp"; + dirtempl_len = strlen(dirtempl); + /* work out if we need to add a slash on the end of the directory */ + if (dirtempl_len && dirtempl[dirtempl_len - 1] == '/') + need_slash = 0; + else + need_slash = 1; + /* make a string of the form <tempdir>/lwcc-XXXXXX */ + path = lw_alloc(dirtempl_len + need_slash + 11 + 1); + memcpy(path, dirtempl, dirtempl_len); + if (need_slash) + path[dirtempl_len] = '/'; + memcpy(path + dirtempl_len + need_slash, "lwcc-XXXXXX", 12); + /* now make a temporary directory */ + if (mkdtemp(path) == NULL) + do_error("mkdtemp failed: %s", strerror(errno)); + /* record the temporary directory name */ + temp_directory = path; + } + /* now create a file name in the temporary directory. The strategy here + uses a counter that is passed along and is guaranteed to be unique for + every file requested. */ + lf = strlen(temp_directory); + /* this gets the length of the counter as a string but doesn't actually + allocate anything so we can make a string long enough */ + counter_len = snprintf(NULL, 0, "%d", file_counter); + if (counter_len < 1) + do_error("snprintf failure: %s", strerror(errno)); + len = lf + 1 + (size_t)counter_len + ls + 1; + name = lw_alloc(len); + /* it should be impossible for ths snprintf call to fail */ + snprintf(name, len, "%s/%d%s", temp_directory, file_counter, nsuffix); + + /* record the temporary file name for later */ + lw_stringlist_addstring(tempfiles, name); + return name; +} + +/* this calls the actual compiler, passing the contents of compiler_args + as arguments. It also adds the input file and output file. */ +static int compile_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *out; + int retval; + char *s; + + args = lw_stringlist_create(); + + /* find the compiler executable and make that argv[0] */ + s = find_file(compiler_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add all the saved compiler arguments to argv */ + lw_stringlist_reset(compiler_args); + for (s = lw_stringlist_current(compiler_args); s; s = lw_stringlist_next(compiler_args)) + { + lw_stringlist_addstring(args, s); + } + /* work out the output file name and add that to argv */ + out = output_name(file, suffix, stop_after == PHASE_COMPILE); + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + /* add the input file to argv */ + lw_stringlist_addstring(args, input); + /* if the input file name and the output file name pointers are the same + free the input one */ + if (*output == input) + lw_free(input); + /* tell the caller what the output name is */ + *output = out; + /* actually run the compiler */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* this calls the actual assembler, passing the contents of asm_args + as arguments. It also adds the input file and output file. */ +static int assemble_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *out; + int retval; + char *s; + + args = lw_stringlist_create(); + + /* find the assembler binary and add that as argv[0] */ + s = find_file(assembler_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add asm_args to argv */ + lw_stringlist_reset(asm_args); + for (s = lw_stringlist_current(asm_args); s; s = lw_stringlist_next(asm_args)) + { + lw_stringlist_addstring(args, s); + } + /* get an output file name and add that to argv */ + out = output_name(file, ".o", stop_after == PHASE_ASSEMBLE); + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + /* finally, add the input file */ + lw_stringlist_addstring(args, input); + /* clean up input file name if same as output pointer */ + if (*output == input) + lw_free(input); + /* tell caller what file we made */ + *output = out; + /* actually run the assembler */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* run the preprocessor. Pass along preproc_args and appropriate options + for all the include directories */ +static int preprocess_file(const char *file, char *input, char **output, const char *suffix) +{ + lw_stringlist_t args; + char *s; + char *out; + int retval; + + args = lw_stringlist_create(); + + /* find the linker binary and make that argv[0] */ + s = find_file(preprocessor_program_name, program_dirs, X_OK); + lw_stringlist_addstring(args, s); + lw_free(s); + + /* add preproc_args to argv */ + lw_stringlist_reset(preproc_args); + for (s = lw_stringlist_current(preproc_args); s; s = lw_stringlist_next(preproc_args)) + { + lw_stringlist_addstring(args, s); + } + + /* add the include files specified by -i */ + lw_stringlist_reset(includes); + for (s = lw_stringlist_current(includes); s; s = lw_stringlist_next(includes)) + { + lw_stringlist_addstring(args, "-i"); + lw_stringlist_addstring(args, s); + } + + /* add the include directories specified by -I */ + lw_stringlist_reset(include_dirs); + for (s = lw_stringlist_current(include_dirs); s; s = lw_stringlist_next(include_dirs)) + { + lw_stringlist_addstring(args, "-I"); + lw_stringlist_addstring(args, s); + } + + /* add the user specified system include directories (-isystem) */ + lw_stringlist_reset(user_sysincdirs); + for (s = lw_stringlist_current(user_sysincdirs); s; s = lw_stringlist_next(user_sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + + /* and, if not -nostdinc, the standard system include directories */ + if (!nostdinc) + { + lw_stringlist_reset(priv_sysincdirs); + for (s = lw_stringlist_current(priv_sysincdirs); s; s = lw_stringlist_next(priv_sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + lw_stringlist_reset(sysincdirs); + for (s = lw_stringlist_current(sysincdirs); s; s = lw_stringlist_next(sysincdirs)) + { + lw_stringlist_addstring(args, "-S"); + lw_stringlist_addstring(args, s); + } + } + + /* if we stop after preprocessing, output to stdout if no output file */ + if (stop_after == PHASE_PREPROCESS && output_file == NULL) + { + out = lw_strdup("-"); + } + else + { + /* otherwise, make an output file */ + out = output_name(file, suffix, stop_after == PHASE_PREPROCESS); + } + /* if not stdout, add the output file to argv */ + if (strcmp(out, "-") != 0) + { + lw_stringlist_addstring(args, "-o"); + lw_stringlist_addstring(args, out); + } + /* add the input file name to argv */ + lw_stringlist_addstring(args, input); + + /* if input and output pointers are same, clean up input */ + if (*output == input) + lw_free(input); + /* tell caller what our output file is */ + *output = out; + /* finally, actually run the preprocessor */ + retval = execute_program(args); + + lw_stringlist_destroy(args); + return retval; +} + +/* +handle an input file through the various stages of compilation. If any +stage decides to handle an input file, that fact is recorded. If control +reaches the end of the function without a file being handled, that +fact is mentioned to the user. Unknown files are passed to the linker +if nothing handles them and linking is to be done. It's possible the linker +will actually know what to do with them. +*/ +static int handle_input_file(const char *f) +{ + const char *suffix; + char *src; + int handled, retval; + + /* note: this needs to handle -x but for now, assume c for stdin */ + if (strcmp(f, "-") == 0) + { + suffix = ".c"; + } + else + { + /* work out the suffix on the file */ + suffix = strrchr(f, '.'); + if (suffix != NULL && strchr(suffix, '/') != NULL) + suffix = NULL; + if (suffix == NULL) + suffix = ""; + } + + /* make a copy of the file */ + src = lw_strdup(f); + + /* preprocess if appropriate */ + if (strcmp(suffix, ".c") == 0) + { + /* preprocessed c input source goes to .i */ + suffix = ".i"; + retval = preprocess_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + else if (strcmp(suffix, ".S") == 0) + { + /* preprocessed asm source goes to .s */ + suffix = ".s"; + retval = preprocess_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* if we're only preprocessing, bail */ + if (stop_after == PHASE_PREPROCESS) + goto done; + + /* now on to compile if appropriate */ + if (strcmp(suffix, ".i") == 0) + { + /* preprocessed c source goes to .s after compiling */ + suffix = ".s"; + retval = compile_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* bail if we're only compiling, not assembling */ + if (stop_after == PHASE_COMPILE) + goto done; + + /* assemble if appropriate */ + if (strcmp(suffix, ".s") == 0) + { + /* assembler output is an object file */ + suffix = ".o"; + retval = assemble_file(f, src, &src, suffix); + if (retval) + goto done; + handled = 1; + } + /* bail if we're not linking */ + if (stop_after == PHASE_ASSEMBLE) + goto done; + + /* if we get here with a .o unhandled, pretend it is handled */ + if (strcmp(suffix, ".o") == 0) + handled = 1; + + /* add the final file name to the linker args */ + lw_stringlist_addstring(linker_args, src); +done: + if (!handled && !retval) + { + /* carp about unhandled files if there is no error */ + if (stop_after == PHASE_LINK) + { + do_warning("unknown suffix %s; passing file down to linker", suffix); + } + else + { + do_warning("unknown suffix %s; skipped", suffix); + } + } + /* clean up the file name */ + lw_free(src); + + return retval; +} + +/* +This actually runs the linker. Along the way, all the files the linker +is supposed to handle will have been added to linker_args. +*/ +static int handle_linking(void) +{ + lw_stringlist_t linker_flags; + char *s; + int retval; + + linker_flags = lw_stringlist_create(); + + /* find the linker binary and make that argv[0] */ + s = find_file(linker_program_name, program_dirs, X_OK); + lw_stringlist_addstring(linker_flags, s); + lw_free(s); + + /* tell the linker about the output file name, if specified */ + if (output_file) + { + lw_stringlist_addstring(linker_flags, "-o"); + lw_stringlist_addstring(linker_flags, (char *)output_file); + } + + /* add the standard library options if not -nostdlib */ + if (!nostdlib) + { + } + + /* add the standard startup files if not -nostartfiles */ + if (!nostartfiles) + { + } + + /* pass along the various input files, etc., to the linker */ + lw_stringlist_reset(linker_args); + for (s = lw_stringlist_current(linker_args); s; s = lw_stringlist_next(linker_args)) + { + lw_stringlist_addstring(linker_flags, s); + } + + /* actually run the linker */ + retval = execute_program(linker_flags); + + lw_stringlist_destroy(linker_flags); + return retval; +} + +/* +Do various setup tasks, process the command line, handle the input files, +and clean up. +*/ +int main(int argc, char **argv) +{ + char *ap; + int retval; + + input_files = lw_stringlist_create(); + runtime_dirs = lw_stringlist_create(); + lib_dirs = lw_stringlist_create(); + program_dirs = lw_stringlist_create(); + preproc_args = lw_stringlist_create(); + include_dirs = lw_stringlist_create(); + user_sysincdirs = lw_stringlist_create(); + asm_args = lw_stringlist_create(); + linker_args = lw_stringlist_create(); + sysincdirs = lw_stringlist_create(); + includes = lw_stringlist_create(); + tempfiles = lw_stringlist_create(); + compiler_args = lw_stringlist_create(); + priv_sysincdirs = lw_stringlist_create(); + + parse_command_line(argc, argv); + if (stop_after == PHASE_DEFAULT) + stop_after = PHASE_LINK; + + if (verbose_mode) + printf("%s\n", VERSTRING); + + if (isysroot == NULL) + isysroot = sysroot; + expand_sysroot(); + + if (stop_after != PHASE_LINK && output_file && lw_stringlist_nstrings(input_files) > 1) + { + do_error("-o cannot be specified with multiple inputs unless linking"); + } + + // default to stdout for preprocessing + if (stop_after == PHASE_PREPROCESS && output_file == NULL) + output_file = "-"; + + if (lw_stringlist_nstrings(input_files) == 0) + do_error("No input files specified"); + + /* handle -B here */ + ap = lw_alloc(strlen(basedir) + 10); + strcpy(ap, basedir); + strcat(ap, "/bin"); + lw_stringlist_addstring(program_dirs, ap); + strcpy(ap, basedir); + strcat(ap, "/lib"); + lw_stringlist_addstring(runtime_dirs, ap); + strcpy(ap, basedir); + strcat(ap, "/include"); + lw_stringlist_addstring(priv_sysincdirs, ap); + lw_free(ap); + + retval = 0; + /* make sure we exit if interrupted */ + signal(SIGTERM, exit_on_signal); + + /* handle input files */ + lw_stringlist_reset(input_files); + for (ap = lw_stringlist_current(input_files); ap; ap = lw_stringlist_next(input_files)) + { + if (handle_input_file(ap)) + retval = 1; + } + + if (!retval && stop_after >= PHASE_LINK) + { + retval = handle_linking(); + } + + /* if a signal nixed us, mention the fact */ + if (sigterm_received) + do_warning("Terminating on signal"); + + /* clean up temporary files */ + if (!save_temps) + { + lw_stringlist_reset(tempfiles); + for (ap = lw_stringlist_current(tempfiles); ap; ap = lw_stringlist_next(tempfiles)) + { + if (unlink(ap) == -1) + { + do_warning("Removal of %s failed: %s", ap, strerror(errno)); + } + } + if (temp_directory) + { + if (rmdir(temp_directory) == -1) + { + do_warning("Removal of temporary directory %s failed: %s", temp_directory, strerror(errno)); + } + } + } + + /* be polite and clean up all the string lists */ + lw_stringlist_destroy(input_files); + lw_stringlist_destroy(runtime_dirs); + lw_stringlist_destroy(lib_dirs); + lw_stringlist_destroy(program_dirs); + lw_stringlist_destroy(preproc_args); + lw_stringlist_destroy(include_dirs); + lw_stringlist_destroy(user_sysincdirs); + lw_stringlist_destroy(asm_args); + lw_stringlist_destroy(linker_args); + lw_stringlist_destroy(sysincdirs); + lw_stringlist_destroy(includes); + lw_stringlist_destroy(tempfiles); + lw_stringlist_destroy(compiler_args); + lw_stringlist_destroy(priv_sysincdirs); + return retval; +} + +struct option_e +{ + char *optbase; // base name of option, with - + int needarg; // nonzero if option needs argument + int noextra; // nonzero if there must not be anything after optbase + int optcode; // option code (passed to fn) + void *optptr; // pointer for opt (passed to fn) + int (*fn)(char *, char *, int, void *); // function to handle argument, NULL to ignore it +}; + +enum CMD_MISC { + CMD_MISC_VERSION, + CMD_MISC_OPTIMIZE, +}; + +enum OPT_ARG { + OPT_ARG_OPT = 0, // argument is optional + OPT_ARG_SEP = 1, // argument may be separate + OPT_ARG_INC = 2, // argument must not be separate +}; + +/* set an integer at *optptr to optcode */ +static int cmdline_set_int(char *opt, char *optarg, int optcode, void *optptr) +{ + *((int *)optptr) = optcode; + return 0; +} + +/* set a string at *optptr to optarg */ +static int cmdline_set_string(char *opt, char *optarg, int optcode, void *optptr) +{ + char **s = (char **)optptr; + *s = optarg; + + return 0; +} + +/* set a string at *optptr to optarg */ +static int cmdline_set_stringifnull(char *opt, char *optarg, int optcode, void *optptr) +{ + char **s = (char **)optptr; + + if (*s) + do_error("Multiple %.*s options specified", optcode ? optcode : strlen(opt), opt); + *s = optarg; + + return 0; +} + +/* split arg on commas and add the results to string list *optptr */ +static int cmdline_argsplit(char *opt, char *arg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + char *next; + + for (; arg != NULL; arg = next) + { + next = strchr(arg, ','); + if (next != NULL) + *next++ = '\0'; + lw_stringlist_addstring(l, arg); + } + return 0; +} + +/* add opt to string list *optptr */ +static int cmdline_arglist(char *opt, char *arg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + + lw_stringlist_addstring(l, opt); + return 0; +} + +/* add optarg to string list *optptr */ +static int cmdline_optarglist(char *opt, char *optarg, int optcode, void *optptr) +{ + lw_stringlist_t l = *(lw_stringlist_t *)optptr; + + lw_stringlist_addstring(l, optarg); + return 0; +} + +static int cmdline_misc(char *opt, char *optarg, int optcode, void *optptr) +{ + switch (optcode) + { + case CMD_MISC_VERSION: + printf("%s\n", VERSTRING); + exit(0); + + case CMD_MISC_OPTIMIZE: + if (!optarg) + return 0; + switch (*optarg) + { + case '0': + case '1': + case '2': + case '3': + case 's': + return 0; + } + return -1; + + default: + return -1; + } + return 0; +} + +static int cmdline_set_intifzero(char *opt, char *optarg, int optcode, void *optptr) +{ + int *iv = (int *)optptr; + + if (*iv && *iv != optcode) + { + do_error("conflicting compiler option specified: %s", opt); + } + *iv = optcode; + return 0; +} + +struct option_e optionlist[] = +{ + { "--version", OPT_ARG_OPT, 1, CMD_MISC_VERSION, NULL, cmdline_misc }, + { "--sysroot=", OPT_ARG_INC, 0, 0, &sysroot, cmdline_set_string }, + { "-B", OPT_ARG_INC, 0, 0, &basedir, cmdline_set_string }, + { "-C", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-c", OPT_ARG_OPT, 1, PHASE_COMPILE, &stop_after, cmdline_set_intifzero }, + { "-D", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, + { "-E", OPT_ARG_OPT, 1, PHASE_PREPROCESS, &stop_after, cmdline_set_intifzero }, + { "-fPIC", OPT_ARG_OPT, 1, 2, &pic_mode, cmdline_set_int }, + { "-fpic", OPT_ARG_OPT, 1, 1, &pic_mode, cmdline_set_int }, + { "-g", OPT_ARG_OPT, 1, 1, &debug_mode, cmdline_set_int }, + { "-I", OPT_ARG_SEP, 0, 0, &include_dirs, cmdline_optarglist }, + { "-include", OPT_ARG_SEP, 1, 0, &includes, cmdline_optarglist }, + { "-isysroot", OPT_ARG_SEP, 1, 0, &isysroot, cmdline_set_string }, + { "-isystem", OPT_ARG_SEP, 1, 0, &user_sysincdirs, cmdline_optarglist }, + { "-M", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-nostartfiles", OPT_ARG_OPT, 1, 1, &nostartfiles, cmdline_set_int }, + { "-nostdinc", OPT_ARG_OPT, 1, 1, &nostdinc, cmdline_set_int }, + { "-nostdlib", OPT_ARG_OPT, 1, 1, &nostdlib, cmdline_set_int }, + { "-O", OPT_ARG_OPT, 0, CMD_MISC_OPTIMIZE, NULL, cmdline_misc }, + { "-o", OPT_ARG_SEP, 0, 2, &output_file, cmdline_set_stringifnull }, + { "-S", OPT_ARG_OPT, 1, PHASE_ASSEMBLE, &stop_after, cmdline_set_intifzero }, + { "-save-temps", OPT_ARG_OPT, 1, 1, &save_temps, cmdline_set_int }, + { "-trigraphs", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, + { "-U", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, + { "-v", OPT_ARG_OPT, 1, 1, &verbose_mode, cmdline_set_int }, + { "-Wp,", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_argsplit }, + { "-Wa,", OPT_ARG_INC, 0, 0, &asm_args, cmdline_argsplit }, + { "-Wl,", OPT_ARG_INC, 0, 0, &linker_args, cmdline_argsplit }, + { "-W", OPT_ARG_INC, 0, 0, NULL, NULL }, /* warning options */ + { "-x", OPT_ARG_SEP, 1, 0, NULL, NULL }, /* language options */ + { NULL, 0, 0 } +}; + +static void parse_command_line(int argc, char **argv) +{ + int i, j, olen, ilen; + char *optarg; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] != '-' || argv[i][1] == '\0') + { + /* we have a non-option argument */ + lw_stringlist_addstring(input_files, argv[i]); + continue; + } + olen = strlen(argv[i]); + for (j = 0; optionlist[j].optbase; j++) + { + ilen = strlen(optionlist[j].optbase); + /* if length of optbase is longer than argv[i], it can't match */ + if (ilen > olen) + continue; + /* does the base match? */ + if (strncmp(optionlist[j].optbase, argv[i], ilen) == 0) + break; + } + if (optionlist[j].optbase == NULL) + { + do_error("Unsupported option %s", argv[i]); + } + /* is the option supposed to be exact? */ + if (optionlist[j].noextra && argv[i][ilen] != '\0') + { + do_error("Unsupported option %s", argv[i]); + } + /* is there an argument? */ + optarg = NULL; + if (argv[i][ilen]) + optarg = argv[i] + ilen; + if (!optarg && optionlist[j].needarg == 1) + { + if (i == argc) + { + do_error("Option %s requires an argument", argv[i]); + } + optarg = argv[++i]; + } + if (!optarg && optionlist[j].needarg == 2) + { + do_error("Option %s requires an argument", argv[i]); + } + /* handle the option */ + if (optionlist[j].fn) + { + if ((*(optionlist[j].fn))(argv[i], optarg, optionlist[j].optcode, optionlist[j].optptr) != 0) + do_error("Unsupported option %s %s", argv[i], optarg ? optarg : ""); + } + } +}
--- a/lwcc/driver/main.c Tue Sep 10 19:56:05 2013 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1072 +0,0 @@ -/* -lwcc/driver/main.c - -Copyright © 2013 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <errno.h> -#include <signal.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <unistd.h> - -#include <lw_alloc.h> -#include <lw_string.h> -#include <lw_stringlist.h> - -#define VERSTRING "lwcc from " PACKAGE_STRING -#define S(x) S2(x) -#define S2(x) #x - -#define BASEDIR S(LWCC_LIBDIR) - -/* list of compilation phases */ -enum phase_t { - PHASE_DEFAULT = 0, - PHASE_PREPROCESS, - PHASE_COMPILE, - PHASE_ASSEMBLE, - PHASE_LINK -}; - -/* these are the names of various programs the compiler calls */ -const char *linker_program_name = "lwlink"; -const char *compiler_program_name = "lwcc1"; -const char *assembler_program_name = "lwasm"; -const char *preprocessor_program_name = "lwcpp"; - -/* this will be set to the directory where temporary files get created */ -const char *temp_directory = NULL; - -/* these are for book keeping if we get interrupted - the volatile and atomic - types are needed because they are accessed in a signal handler */ -static volatile sig_atomic_t sigterm_received = 0; -static volatile sig_atomic_t child_pid = 0; - -/* path specified with --sysroot */ -const char *sysroot = ""; -/* path specified with -isysroot */ -const char *isysroot = NULL; - -/* record which phase to stop after for -c, -E, and -S */ -/* default is to stop after PHASE_LINK */ -static int stop_after = PHASE_DEFAULT; - -int nostdinc = 0; // set if -nostdinc is specified -int nostartfiles = 0; // set if -nostartfiles is specified -int nostdlib = 0; // set if -nostdlib is specified -int verbose_mode = 0; // set to number of --verbose arguments -int save_temps = 0; // set if -save-temps is specified -int debug_mode = 0; // set if -g specified -int pic_mode = 0; // set to 1 if -fpic, 2 if -fPIC; last one specified wins -const char *output_file; // set to the value of the -o option (output file) - -/* compiler base directory - from -B */ -const char *basedir = BASEDIR; - -/* used to ensure a unique temporary file at every stage */ -static int file_counter = 0; - -/* these are various string lists used to keep track of things, mostly - command line arguments. */ - -lw_stringlist_t input_files; // input files from command line -lw_stringlist_t runtime_dirs; // directories to search for runtime files -lw_stringlist_t lib_dirs; // directories to search for library files -lw_stringlist_t program_dirs; // directories to search for compiler program components -lw_stringlist_t preproc_args; // recorded arguments to pass through to the preprocessor -lw_stringlist_t include_dirs; // include paths specified with -I -lw_stringlist_t includes; // include paths specified with -include -lw_stringlist_t user_sysincdirs; // include paths specified with -isystem -lw_stringlist_t asm_args; // recorded arguments to pass through to the assembler -lw_stringlist_t linker_args; // recorded arguments to pass through to the linker -lw_stringlist_t sysincdirs; // the standard system include directories -lw_stringlist_t tempfiles; // a list of temporary files created which need to be cleaned up -lw_stringlist_t compiler_args; // recorded arguments to pass through to the compiler -lw_stringlist_t priv_sysincdirs; // system include directories for lwcc itself - -/* forward delcarations */ -static void parse_command_line(int, char **); - -/* signal handler for SIGTERM - all it does is record the fact that - SIGTERM happened and propagate the signal to whatever child process - might currently be running */ -static void exit_on_signal(int sig) -{ - sigterm_received = 1; - if (child_pid) - kill(child_pid, SIGTERM); -} - -/* utility function to carp about an error condition and bail */ -void do_error(const char *f, ...) -{ - va_list arg; - va_start(arg, f); - fprintf(stderr, "ERROR: "); - vfprintf(stderr, f, arg); - putc('\n', stderr); - va_end(arg); - exit(1); -} - -/* utility function to carp about some condition; do not bail */ -void do_warning(const char *f, ...) -{ - va_list arg; - va_start(arg, f); - fprintf(stderr, "WARNING: "); - vfprintf(stderr, f, arg); - putc('\n', stderr); - va_end(arg); -} - -/* utility function to print out an array of strings - stops at the first - NULL string pointer. */ -static void print_array(char **arr) -{ - int c = 0; - while (*arr) - { - if (c) - printf(" "); - printf("%s", *arr); - arr++; - c = 1; - } -} - -/* expand any search path entries to reflect the sysroot and - isysroot settings. Note that it does NOT apply to the compiler - program search path */ -static void expand_sysroot(void) -{ - /* list of path lists to process for replacements of = */ - lw_stringlist_t *lists[] = { &sysincdirs, &include_dirs, &user_sysincdirs, &lib_dirs, NULL }; - /* list of replacement strings for = in the same order */ - const char *sysroots[] = { isysroot, isysroot, isysroot, sysroot, NULL }; - size_t i, sysroot_len, value_len; - char *path; - lw_stringlist_t newlist; - lw_stringlist_t working; - char *s; - - /* for each list, run through entry by entry, do any needed replacement - and add the entry to a new list. Then replace the old list with the - new one. */ - for (i = 0; lists[i] != NULL; i++) - { - working = *lists[i]; - newlist = lw_stringlist_create(); - - lw_stringlist_reset(working); - for (s = lw_stringlist_current(working); s; s = lw_stringlist_next(working)) - { - if (s[0] == '=') - { - sysroot_len = strlen(sysroots[i]); - value_len = strlen(s); - /* note that the skipped = will make up for the trailing NUL */ - path = lw_alloc(sysroot_len + value_len); - memcpy(path, sysroots[i], sysroot_len); - /* the +1 here will copy the trailing NUL */ - memcpy(path + sysroot_len, s + 1, value_len); - lw_stringlist_addstring(newlist, path); - lw_free(path); - } - else - { - lw_stringlist_addstring(newlist, s); - } - } - lw_stringlist_destroy(working); - *lists[i] = newlist; - } -} - -/* look for file fn in path list p which is okay for access mode mode. - Return a string allocated by lw_alloc. */ -static char *find_file(const char *fn, lw_stringlist_t p, int mode) -{ - char *s; - char *f; - size_t lf, lp; - int need_slash; - - lf = strlen(fn); - lw_stringlist_reset(p); - for (s = lw_stringlist_current(p); s; s = lw_stringlist_next(p)) - { - lp = strlen(s); - need_slash = 0; - if (lp && s[lp - 1] == '/') - need_slash = 1; - f = lw_alloc(lp + lf + need_slash + 1); - memcpy(f, s, lp); - if (need_slash) - f[lp] = '/'; - /* +1 gets the NUL */ - memcpy(f + lp + need_slash, fn, lf + 1); - if (access(f, mode) == 0) - return f; - lw_free(f); - } - /* if not found anywhere, try the bare filename - it might work */ - return lw_strdup(fn); -} - -/* take a string list which contains an argv and execute the specified - program */ -static int execute_program(lw_stringlist_t args) -{ - int argc; - char **argv; - int result; - char *s; - - argc = lw_stringlist_nstrings(args); - argv = lw_alloc(sizeof(char *) * (argc + 1)); - lw_stringlist_reset(args); - for (result = 0, s = lw_stringlist_current(args); s; s = lw_stringlist_next(args)) - { - argv[result] = s; - } - argv[result] = NULL; - - if (verbose_mode) - { - printf("Executing "); - print_array(argv); - printf("\n"); - } - - /* bail now if a signal happened */ - if (sigterm_received) - { - lw_free(argv); - return 1; - } - - /* make sure stdio has flushed everything so that output from the - child process doesn't get intermingled */ - fflush(NULL); - - /* now make the child process */ - child_pid = fork(); - if (child_pid == 0) - { - /* child process */ - /* try executing program */ - execvp(argv[0], argv); - /* only way to get here is if execvp() failed so carp about it and exit */ - fprintf(stderr, "Exec of %s failed: %s", argv[0], strerror(errno)); - /* exit with failure but don't call any atexit(), etc., functions */ - _exit(127); - } - else if (child_pid == -1) - { - /* failure to make child process */ - do_error("Failed to execute program %s: %s", argv[0], strerror(errno)); - } - /* clean up argv */ - lw_free(argv); - - /* parent process - wait for child to exit */ - while (waitpid(child_pid, &result, 0) == -1 && errno == EINTR) - /* do nothing */; - /* fetch actual return status */ - result = WEXITSTATUS(result); - if (result) - { - /* carp about non-zero return status */ - do_error("%s terminated with status %d", argv[0], result); - } - /* return nonzero if signalled to exit */ - return sigterm_received; -} - -/* -construct an output file name as follows: - -1. if it is the last phase of compilation and an output file name is - specified, use that if not specified -2. if it is the last phase or we are saving temporary files, any suffix - on f is removed and replaced with nsuffix -3. otherwise, a temporary file is created. If necessary, a temporary - directory is created to hold the temporary file. The name of the temporary - file is recorded in the tempfiles string list for later cleanup. The name - of the temporary directory is recorded in temp_directory for later cleanup. -*/ -static char *output_name(const char *f, const char *nsuffix, int last) -{ - const char *osuffix; - char *name; - size_t lf, ls, len; - int counter_len; - - /* get a new file counter */ - file_counter++; - - /* if the output was specified, use it */ - if (last && output_file) - { - return lw_strdup(output_file); - } - - /* find the start of the old suffix */ - osuffix = strrchr(f, '.'); - if (osuffix != NULL && strchr(osuffix, '/') != NULL) - osuffix = NULL; - if (osuffix == NULL) - osuffix = f + strlen(f); - - ls = strlen(nsuffix); - - /* if this is the last stage or we're saving temps, use a name derived - from the original file name by replacing the suffix with nsuffix */ - if (save_temps || last) - { - lf = osuffix - f; - name = lw_alloc(lf + ls + 1); - memcpy(name, f, lf); - /* note that the +1 will copy the trailing NUL */ - memcpy(name + lf, nsuffix, ls + 1); - return name; - } - - /* finally, use a temporary file */ - if (temp_directory == NULL) - { - /* if we haven't already made a temporary directory, do so */ - const char *dirtempl; - char *path; - size_t dirtempl_len; - int need_slash; - - /* look for a TMPFIR environment variable and use that if present - but use /tmp as a fallback */ - dirtempl = getenv("TMPDIR"); - if (dirtempl == NULL) - dirtempl = "/tmp"; - dirtempl_len = strlen(dirtempl); - /* work out if we need to add a slash on the end of the directory */ - if (dirtempl_len && dirtempl[dirtempl_len - 1] == '/') - need_slash = 0; - else - need_slash = 1; - /* make a string of the form <tempdir>/lwcc-XXXXXX */ - path = lw_alloc(dirtempl_len + need_slash + 11 + 1); - memcpy(path, dirtempl, dirtempl_len); - if (need_slash) - path[dirtempl_len] = '/'; - memcpy(path + dirtempl_len + need_slash, "lwcc-XXXXXX", 12); - /* now make a temporary directory */ - if (mkdtemp(path) == NULL) - do_error("mkdtemp failed: %s", strerror(errno)); - /* record the temporary directory name */ - temp_directory = path; - } - /* now create a file name in the temporary directory. The strategy here - uses a counter that is passed along and is guaranteed to be unique for - every file requested. */ - lf = strlen(temp_directory); - /* this gets the length of the counter as a string but doesn't actually - allocate anything so we can make a string long enough */ - counter_len = snprintf(NULL, 0, "%d", file_counter); - if (counter_len < 1) - do_error("snprintf failure: %s", strerror(errno)); - len = lf + 1 + (size_t)counter_len + ls + 1; - name = lw_alloc(len); - /* it should be impossible for ths snprintf call to fail */ - snprintf(name, len, "%s/%d%s", temp_directory, file_counter, nsuffix); - - /* record the temporary file name for later */ - lw_stringlist_addstring(tempfiles, name); - return name; -} - -/* this calls the actual compiler, passing the contents of compiler_args - as arguments. It also adds the input file and output file. */ -static int compile_file(const char *file, char *input, char **output, const char *suffix) -{ - lw_stringlist_t args; - char *out; - int retval; - char *s; - - args = lw_stringlist_create(); - - /* find the compiler executable and make that argv[0] */ - s = find_file(compiler_program_name, program_dirs, X_OK); - lw_stringlist_addstring(args, s); - lw_free(s); - - /* add all the saved compiler arguments to argv */ - lw_stringlist_reset(compiler_args); - for (s = lw_stringlist_current(compiler_args); s; s = lw_stringlist_next(compiler_args)) - { - lw_stringlist_addstring(args, s); - } - /* work out the output file name and add that to argv */ - out = output_name(file, suffix, stop_after == PHASE_COMPILE); - lw_stringlist_addstring(args, "-o"); - lw_stringlist_addstring(args, out); - /* add the input file to argv */ - lw_stringlist_addstring(args, input); - /* if the input file name and the output file name pointers are the same - free the input one */ - if (*output == input) - lw_free(input); - /* tell the caller what the output name is */ - *output = out; - /* actually run the compiler */ - retval = execute_program(args); - - lw_stringlist_destroy(args); - return retval; -} - -/* this calls the actual assembler, passing the contents of asm_args - as arguments. It also adds the input file and output file. */ -static int assemble_file(const char *file, char *input, char **output, const char *suffix) -{ - lw_stringlist_t args; - char *out; - int retval; - char *s; - - args = lw_stringlist_create(); - - /* find the assembler binary and add that as argv[0] */ - s = find_file(assembler_program_name, program_dirs, X_OK); - lw_stringlist_addstring(args, s); - lw_free(s); - - /* add asm_args to argv */ - lw_stringlist_reset(asm_args); - for (s = lw_stringlist_current(asm_args); s; s = lw_stringlist_next(asm_args)) - { - lw_stringlist_addstring(args, s); - } - /* get an output file name and add that to argv */ - out = output_name(file, ".o", stop_after == PHASE_ASSEMBLE); - lw_stringlist_addstring(args, "-o"); - lw_stringlist_addstring(args, out); - /* finally, add the input file */ - lw_stringlist_addstring(args, input); - /* clean up input file name if same as output pointer */ - if (*output == input) - lw_free(input); - /* tell caller what file we made */ - *output = out; - /* actually run the assembler */ - retval = execute_program(args); - - lw_stringlist_destroy(args); - return retval; -} - -/* run the preprocessor. Pass along preproc_args and appropriate options - for all the include directories */ -static int preprocess_file(const char *file, char *input, char **output, const char *suffix) -{ - lw_stringlist_t args; - char *s; - char *out; - int retval; - - args = lw_stringlist_create(); - - /* find the linker binary and make that argv[0] */ - s = find_file(preprocessor_program_name, program_dirs, X_OK); - lw_stringlist_addstring(args, s); - lw_free(s); - - /* add preproc_args to argv */ - lw_stringlist_reset(preproc_args); - for (s = lw_stringlist_current(preproc_args); s; s = lw_stringlist_next(preproc_args)) - { - lw_stringlist_addstring(args, s); - } - - /* add the include files specified by -i */ - lw_stringlist_reset(includes); - for (s = lw_stringlist_current(includes); s; s = lw_stringlist_next(includes)) - { - lw_stringlist_addstring(args, "-i"); - lw_stringlist_addstring(args, s); - } - - /* add the include directories specified by -I */ - lw_stringlist_reset(include_dirs); - for (s = lw_stringlist_current(include_dirs); s; s = lw_stringlist_next(include_dirs)) - { - lw_stringlist_addstring(args, "-I"); - lw_stringlist_addstring(args, s); - } - - /* add the user specified system include directories (-isystem) */ - lw_stringlist_reset(user_sysincdirs); - for (s = lw_stringlist_current(user_sysincdirs); s; s = lw_stringlist_next(user_sysincdirs)) - { - lw_stringlist_addstring(args, "-S"); - lw_stringlist_addstring(args, s); - } - - /* and, if not -nostdinc, the standard system include directories */ - if (!nostdinc) - { - lw_stringlist_reset(priv_sysincdirs); - for (s = lw_stringlist_current(priv_sysincdirs); s; s = lw_stringlist_next(priv_sysincdirs)) - { - lw_stringlist_addstring(args, "-S"); - lw_stringlist_addstring(args, s); - } - lw_stringlist_reset(sysincdirs); - for (s = lw_stringlist_current(sysincdirs); s; s = lw_stringlist_next(sysincdirs)) - { - lw_stringlist_addstring(args, "-S"); - lw_stringlist_addstring(args, s); - } - } - - /* if we stop after preprocessing, output to stdout if no output file */ - if (stop_after == PHASE_PREPROCESS && output_file == NULL) - { - out = lw_strdup("-"); - } - else - { - /* otherwise, make an output file */ - out = output_name(file, suffix, stop_after == PHASE_PREPROCESS); - } - /* if not stdout, add the output file to argv */ - if (strcmp(out, "-") != 0) - { - lw_stringlist_addstring(args, "-o"); - lw_stringlist_addstring(args, out); - } - /* add the input file name to argv */ - lw_stringlist_addstring(args, input); - - /* if input and output pointers are same, clean up input */ - if (*output == input) - lw_free(input); - /* tell caller what our output file is */ - *output = out; - /* finally, actually run the preprocessor */ - retval = execute_program(args); - - lw_stringlist_destroy(args); - return retval; -} - -/* -handle an input file through the various stages of compilation. If any -stage decides to handle an input file, that fact is recorded. If control -reaches the end of the function without a file being handled, that -fact is mentioned to the user. Unknown files are passed to the linker -if nothing handles them and linking is to be done. It's possible the linker -will actually know what to do with them. -*/ -static int handle_input_file(const char *f) -{ - const char *suffix; - char *src; - int handled, retval; - - /* note: this needs to handle -x but for now, assume c for stdin */ - if (strcmp(f, "-") == 0) - { - suffix = ".c"; - } - else - { - /* work out the suffix on the file */ - suffix = strrchr(f, '.'); - if (suffix != NULL && strchr(suffix, '/') != NULL) - suffix = NULL; - if (suffix == NULL) - suffix = ""; - } - - /* make a copy of the file */ - src = lw_strdup(f); - - /* preprocess if appropriate */ - if (strcmp(suffix, ".c") == 0) - { - /* preprocessed c input source goes to .i */ - suffix = ".i"; - retval = preprocess_file(f, src, &src, suffix); - if (retval) - goto done; - handled = 1; - } - else if (strcmp(suffix, ".S") == 0) - { - /* preprocessed asm source goes to .s */ - suffix = ".s"; - retval = preprocess_file(f, src, &src, suffix); - if (retval) - goto done; - handled = 1; - } - /* if we're only preprocessing, bail */ - if (stop_after == PHASE_PREPROCESS) - goto done; - - /* now on to compile if appropriate */ - if (strcmp(suffix, ".i") == 0) - { - /* preprocessed c source goes to .s after compiling */ - suffix = ".s"; - retval = compile_file(f, src, &src, suffix); - if (retval) - goto done; - handled = 1; - } - /* bail if we're only compiling, not assembling */ - if (stop_after == PHASE_COMPILE) - goto done; - - /* assemble if appropriate */ - if (strcmp(suffix, ".s") == 0) - { - /* assembler output is an object file */ - suffix = ".o"; - retval = assemble_file(f, src, &src, suffix); - if (retval) - goto done; - handled = 1; - } - /* bail if we're not linking */ - if (stop_after == PHASE_ASSEMBLE) - goto done; - - /* if we get here with a .o unhandled, pretend it is handled */ - if (strcmp(suffix, ".o") == 0) - handled = 1; - - /* add the final file name to the linker args */ - lw_stringlist_addstring(linker_args, src); -done: - if (!handled && !retval) - { - /* carp about unhandled files if there is no error */ - if (stop_after == PHASE_LINK) - { - do_warning("unknown suffix %s; passing file down to linker", suffix); - } - else - { - do_warning("unknown suffix %s; skipped", suffix); - } - } - /* clean up the file name */ - lw_free(src); - - return retval; -} - -/* -This actually runs the linker. Along the way, all the files the linker -is supposed to handle will have been added to linker_args. -*/ -static int handle_linking(void) -{ - lw_stringlist_t linker_flags; - char *s; - int retval; - - linker_flags = lw_stringlist_create(); - - /* find the linker binary and make that argv[0] */ - s = find_file(linker_program_name, program_dirs, X_OK); - lw_stringlist_addstring(linker_flags, s); - lw_free(s); - - /* tell the linker about the output file name, if specified */ - if (output_file) - { - lw_stringlist_addstring(linker_flags, "-o"); - lw_stringlist_addstring(linker_flags, (char *)output_file); - } - - /* add the standard library options if not -nostdlib */ - if (!nostdlib) - { - } - - /* add the standard startup files if not -nostartfiles */ - if (!nostartfiles) - { - } - - /* pass along the various input files, etc., to the linker */ - lw_stringlist_reset(linker_args); - for (s = lw_stringlist_current(linker_args); s; s = lw_stringlist_next(linker_args)) - { - lw_stringlist_addstring(linker_flags, s); - } - - /* actually run the linker */ - retval = execute_program(linker_flags); - - lw_stringlist_destroy(linker_flags); - return retval; -} - -/* -Do various setup tasks, process the command line, handle the input files, -and clean up. -*/ -int main(int argc, char **argv) -{ - char *ap; - int retval; - - input_files = lw_stringlist_create(); - runtime_dirs = lw_stringlist_create(); - lib_dirs = lw_stringlist_create(); - program_dirs = lw_stringlist_create(); - preproc_args = lw_stringlist_create(); - include_dirs = lw_stringlist_create(); - user_sysincdirs = lw_stringlist_create(); - asm_args = lw_stringlist_create(); - linker_args = lw_stringlist_create(); - sysincdirs = lw_stringlist_create(); - includes = lw_stringlist_create(); - tempfiles = lw_stringlist_create(); - compiler_args = lw_stringlist_create(); - priv_sysincdirs = lw_stringlist_create(); - - parse_command_line(argc, argv); - if (stop_after == PHASE_DEFAULT) - stop_after = PHASE_LINK; - - if (verbose_mode) - printf("%s\n", VERSTRING); - - if (isysroot == NULL) - isysroot = sysroot; - expand_sysroot(); - - if (stop_after != PHASE_LINK && output_file && lw_stringlist_nstrings(input_files) > 1) - { - do_error("-o cannot be specified with multiple inputs unless linking"); - } - - // default to stdout for preprocessing - if (stop_after == PHASE_PREPROCESS && output_file == NULL) - output_file = "-"; - - if (lw_stringlist_nstrings(input_files) == 0) - do_error("No input files specified"); - - /* handle -B here */ - ap = lw_alloc(strlen(basedir) + 10); - strcpy(ap, basedir); - strcat(ap, "/bin"); - lw_stringlist_addstring(program_dirs, ap); - strcpy(ap, basedir); - strcat(ap, "/lib"); - lw_stringlist_addstring(runtime_dirs, ap); - strcpy(ap, basedir); - strcat(ap, "/include"); - lw_stringlist_addstring(priv_sysincdirs, ap); - lw_free(ap); - - retval = 0; - /* make sure we exit if interrupted */ - signal(SIGTERM, exit_on_signal); - - /* handle input files */ - lw_stringlist_reset(input_files); - for (ap = lw_stringlist_current(input_files); ap; ap = lw_stringlist_next(input_files)) - { - if (handle_input_file(ap)) - retval = 1; - } - - if (!retval && stop_after >= PHASE_LINK) - { - retval = handle_linking(); - } - - /* if a signal nixed us, mention the fact */ - if (sigterm_received) - do_warning("Terminating on signal"); - - /* clean up temporary files */ - if (!save_temps) - { - lw_stringlist_reset(tempfiles); - for (ap = lw_stringlist_current(tempfiles); ap; ap = lw_stringlist_next(tempfiles)) - { - if (unlink(ap) == -1) - { - do_warning("Removal of %s failed: %s", ap, strerror(errno)); - } - } - if (temp_directory) - { - if (rmdir(temp_directory) == -1) - { - do_warning("Removal of temporary directory %s failed: %s", temp_directory, strerror(errno)); - } - } - } - - /* be polite and clean up all the string lists */ - lw_stringlist_destroy(input_files); - lw_stringlist_destroy(runtime_dirs); - lw_stringlist_destroy(lib_dirs); - lw_stringlist_destroy(program_dirs); - lw_stringlist_destroy(preproc_args); - lw_stringlist_destroy(include_dirs); - lw_stringlist_destroy(user_sysincdirs); - lw_stringlist_destroy(asm_args); - lw_stringlist_destroy(linker_args); - lw_stringlist_destroy(sysincdirs); - lw_stringlist_destroy(includes); - lw_stringlist_destroy(tempfiles); - lw_stringlist_destroy(compiler_args); - lw_stringlist_destroy(priv_sysincdirs); - return retval; -} - -struct option_e -{ - char *optbase; // base name of option, with - - int needarg; // nonzero if option needs argument - int noextra; // nonzero if there must not be anything after optbase - int optcode; // option code (passed to fn) - void *optptr; // pointer for opt (passed to fn) - int (*fn)(char *, char *, int, void *); // function to handle argument, NULL to ignore it -}; - -enum CMD_MISC { - CMD_MISC_VERSION, - CMD_MISC_OPTIMIZE, -}; - -enum OPT_ARG { - OPT_ARG_OPT = 0, // argument is optional - OPT_ARG_SEP = 1, // argument may be separate - OPT_ARG_INC = 2, // argument must not be separate -}; - -/* set an integer at *optptr to optcode */ -static int cmdline_set_int(char *opt, char *optarg, int optcode, void *optptr) -{ - *((int *)optptr) = optcode; - return 0; -} - -/* set a string at *optptr to optarg */ -static int cmdline_set_string(char *opt, char *optarg, int optcode, void *optptr) -{ - char **s = (char **)optptr; - *s = optarg; - - return 0; -} - -/* set a string at *optptr to optarg */ -static int cmdline_set_stringifnull(char *opt, char *optarg, int optcode, void *optptr) -{ - char **s = (char **)optptr; - - if (*s) - do_error("Multiple %.*s options specified", optcode ? optcode : strlen(opt), opt); - *s = optarg; - - return 0; -} - -/* split arg on commas and add the results to string list *optptr */ -static int cmdline_argsplit(char *opt, char *arg, int optcode, void *optptr) -{ - lw_stringlist_t l = *(lw_stringlist_t *)optptr; - char *next; - - for (; arg != NULL; arg = next) - { - next = strchr(arg, ','); - if (next != NULL) - *next++ = '\0'; - lw_stringlist_addstring(l, arg); - } - return 0; -} - -/* add opt to string list *optptr */ -static int cmdline_arglist(char *opt, char *arg, int optcode, void *optptr) -{ - lw_stringlist_t l = *(lw_stringlist_t *)optptr; - - lw_stringlist_addstring(l, opt); - return 0; -} - -/* add optarg to string list *optptr */ -static int cmdline_optarglist(char *opt, char *optarg, int optcode, void *optptr) -{ - lw_stringlist_t l = *(lw_stringlist_t *)optptr; - - lw_stringlist_addstring(l, optarg); - return 0; -} - -static int cmdline_misc(char *opt, char *optarg, int optcode, void *optptr) -{ - switch (optcode) - { - case CMD_MISC_VERSION: - printf("%s\n", VERSTRING); - exit(0); - - case CMD_MISC_OPTIMIZE: - if (!optarg) - return 0; - switch (*optarg) - { - case '0': - case '1': - case '2': - case '3': - case 's': - return 0; - } - return -1; - - default: - return -1; - } - return 0; -} - -static int cmdline_set_intifzero(char *opt, char *optarg, int optcode, void *optptr) -{ - int *iv = (int *)optptr; - - if (*iv && *iv != optcode) - { - do_error("conflicting compiler option specified: %s", opt); - } - *iv = optcode; - return 0; -} - -struct option_e optionlist[] = -{ - { "--version", OPT_ARG_OPT, 1, CMD_MISC_VERSION, NULL, cmdline_misc }, - { "--sysroot=", OPT_ARG_INC, 0, 0, &sysroot, cmdline_set_string }, - { "-B", OPT_ARG_INC, 0, 0, &basedir, cmdline_set_string }, - { "-C", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, - { "-c", OPT_ARG_OPT, 1, PHASE_COMPILE, &stop_after, cmdline_set_intifzero }, - { "-D", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, - { "-E", OPT_ARG_OPT, 1, PHASE_PREPROCESS, &stop_after, cmdline_set_intifzero }, - { "-fPIC", OPT_ARG_OPT, 1, 2, &pic_mode, cmdline_set_int }, - { "-fpic", OPT_ARG_OPT, 1, 1, &pic_mode, cmdline_set_int }, - { "-g", OPT_ARG_OPT, 1, 1, &debug_mode, cmdline_set_int }, - { "-I", OPT_ARG_SEP, 0, 0, &include_dirs, cmdline_optarglist }, - { "-include", OPT_ARG_SEP, 1, 0, &includes, cmdline_optarglist }, - { "-isysroot", OPT_ARG_SEP, 1, 0, &isysroot, cmdline_set_string }, - { "-isystem", OPT_ARG_SEP, 1, 0, &user_sysincdirs, cmdline_optarglist }, - { "-M", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, - { "-nostartfiles", OPT_ARG_OPT, 1, 1, &nostartfiles, cmdline_set_int }, - { "-nostdinc", OPT_ARG_OPT, 1, 1, &nostdinc, cmdline_set_int }, - { "-nostdlib", OPT_ARG_OPT, 1, 1, &nostdlib, cmdline_set_int }, - { "-O", OPT_ARG_OPT, 0, CMD_MISC_OPTIMIZE, NULL, cmdline_misc }, - { "-o", OPT_ARG_SEP, 0, 2, &output_file, cmdline_set_stringifnull }, - { "-S", OPT_ARG_OPT, 1, PHASE_ASSEMBLE, &stop_after, cmdline_set_intifzero }, - { "-save-temps", OPT_ARG_OPT, 1, 1, &save_temps, cmdline_set_int }, - { "-trigraphs", OPT_ARG_OPT, 1, 0, &preproc_args, cmdline_arglist }, - { "-U", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_arglist }, - { "-v", OPT_ARG_OPT, 1, 1, &verbose_mode, cmdline_set_int }, - { "-Wp,", OPT_ARG_INC, 0, 0, &preproc_args, cmdline_argsplit }, - { "-Wa,", OPT_ARG_INC, 0, 0, &asm_args, cmdline_argsplit }, - { "-Wl,", OPT_ARG_INC, 0, 0, &linker_args, cmdline_argsplit }, - { "-W", OPT_ARG_INC, 0, 0, NULL, NULL }, /* warning options */ - { "-x", OPT_ARG_SEP, 1, 0, NULL, NULL }, /* language options */ - { NULL, 0, 0 } -}; - -static void parse_command_line(int argc, char **argv) -{ - int i, j, olen, ilen; - char *optarg; - - for (i = 1; i < argc; i++) - { - if (argv[i][0] != '-' || argv[i][1] == '\0') - { - /* we have a non-option argument */ - lw_stringlist_addstring(input_files, argv[i]); - continue; - } - olen = strlen(argv[i]); - for (j = 0; optionlist[j].optbase; j++) - { - ilen = strlen(optionlist[j].optbase); - /* if length of optbase is longer than argv[i], it can't match */ - if (ilen > olen) - continue; - /* does the base match? */ - if (strncmp(optionlist[j].optbase, argv[i], ilen) == 0) - break; - } - if (optionlist[j].optbase == NULL) - { - do_error("Unsupported option %s", argv[i]); - } - /* is the option supposed to be exact? */ - if (optionlist[j].noextra && argv[i][ilen] != '\0') - { - do_error("Unsupported option %s", argv[i]); - } - /* is there an argument? */ - optarg = NULL; - if (argv[i][ilen]) - optarg = argv[i] + ilen; - if (!optarg && optionlist[j].needarg == 1) - { - if (i == argc) - { - do_error("Option %s requires an argument", argv[i]); - } - optarg = argv[++i]; - } - if (!optarg && optionlist[j].needarg == 2) - { - do_error("Option %s requires an argument", argv[i]); - } - /* handle the option */ - if (optionlist[j].fn) - { - if ((*(optionlist[j].fn))(argv[i], optarg, optionlist[j].optcode, optionlist[j].optptr) != 0) - do_error("Unsupported option %s %s", argv[i], optarg ? optarg : ""); - } - } -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/lex.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,737 @@ +/* +lwcc/lex.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <ctype.h> +#include <stdio.h> + +#include <lw_alloc.h> + +#include "cpp.h" +#include "strbuf.h" +#include "token.h" + +/* fetch a raw input byte from the current file. Will return CPP_EOF if + EOF is encountered and CPP_EOL if an end of line sequence is encountered. + End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is + returned on the first CR or LF encountered. The complementary CR or LF + is munched, if present, when the *next* character is read. This always + operates on file_stack. + + This function also accounts for line numbers in input files and also + character columns. +*/ +static int fetch_byte_ll(struct preproc_info *pp) +{ + int c; + + if (pp -> eolstate != 0) + { + pp -> lineno++; + pp -> column = 0; + } + c = getc(pp -> fp); + pp -> column++; + if (pp -> eolstate == 1) + { + // just saw CR, munch LF + if (c == 10) + c = getc(pp -> fp); + pp -> eolstate = 0; + } + else if (pp -> eolstate == 2) + { + // just saw LF, much CR + if (c == 13) + c = getc(pp -> fp); + pp -> eolstate = 0; + } + + if (c == 10) + { + // we have LF - end of line, flag to munch CR + pp -> eolstate = 2; + c = CPP_EOL; + } + else if (c == 13) + { + // we have CR - end of line, flag to munch LF + pp -> eolstate = 1; + c = CPP_EOL; + } + else if (c == EOF) + { + c = CPP_EOF; + } + return c; +} + +/* This function takes a sequence of bytes from the _ll function above + and does trigraph interpretation on it, but only if the global + trigraphs is nonzero. */ +static int fetch_byte_tg(struct preproc_info *pp) +{ + int c; + + if (!pp -> trigraphs) + { + c = fetch_byte_ll(pp); + } + else + { + /* we have to do the trigraph shit here */ + if (pp -> ra != CPP_NOUNG) + { + if (pp -> qseen > 0) + { + c = '?'; + pp -> qseen -= 1; + return c; + } + else + { + c = pp -> ra; + pp -> ra = CPP_NOUNG; + return c; + } + } + + c = fetch_byte_ll(pp); + while (c == '?') + { + pp -> qseen++; + c = fetch_byte_ll(pp); + } + + if (pp -> qseen >= 2) + { + // we have a trigraph + switch (c) + { + case '=': + c = '#'; + pp -> qseen -= 2; + break; + + case '/': + c = '\\'; + pp -> qseen -= 2; + break; + + case '\'': + c = '^'; + pp -> qseen -= 2; + break; + + case '(': + c = '['; + pp -> qseen -= 2; + break; + + case ')': + c = ']'; + pp -> qseen -= 2; + break; + + case '!': + c = '|'; + pp -> qseen -= 2; + break; + + case '<': + c = '{'; + pp -> qseen -= 2; + break; + + case '>': + c = '}'; + pp -> qseen -= 2; + break; + + case '-': + c = '~'; + pp -> qseen -= 2; + break; + } + if (pp -> qseen > 0) + { + pp -> ra = c; + c = '?'; + pp -> qseen--; + } + } + else if (pp -> qseen > 0) + { + pp -> ra = c; + c = '?'; + pp -> qseen--; + } + } + return c; +} + +/* This function puts a byte back onto the front of the input stream used + by fetch_byte(). Theoretically, an unlimited number of characters can + be unfetched. Line and column counting may be incorrect if unfetched + characters cross a token boundary. */ +static void preproc_lex_unfetch_byte(struct preproc_info *pp, int c) +{ + if (pp -> ungetbufl >= pp -> ungetbufs) + { + pp -> ungetbufs += 100; + pp -> ungetbuf = lw_realloc(pp -> ungetbuf, pp -> ungetbufs); + } + pp -> ungetbuf[pp -> ungetbufl++] = c; +} + +/* This function retrieves a byte from the input stream. It performs + backslash-newline splicing on the returned bytes. Any character + retrieved from the unfetch buffer is presumed to have already passed + the backslash-newline filter. */ +static int fetch_byte(struct preproc_info *pp) +{ + int c; + + if (pp -> ungetbufl > 0) + { + pp -> ungetbufl--; + c = pp -> ungetbuf[pp -> ungetbufl]; + if (pp -> ungetbufl == 0) + { + lw_free(pp -> ungetbuf); + pp -> ungetbuf = NULL; + pp -> ungetbufs = 0; + } + return c; + } + +again: + if (pp -> unget != CPP_NOUNG) + { + c = pp -> unget; + pp -> unget = CPP_NOUNG; + } + else + { + c = fetch_byte_tg(pp); + } + if (c == '\\') + { + int c2; + c2 = fetch_byte_tg(pp); + if (c2 == CPP_EOL) + goto again; + else + pp -> unget = c2; + } + return c; +} + + + +/* +Lex a token off the current input file. + +Returned tokens are as follows: + +* all words starting with [a-zA-Z_] are returned as TOK_IDENT +* numbers are returned as their appropriate type +* all whitespace in a sequence, including comments, is returned as + a single instance of TOK_WSPACE +* TOK_EOL is returned in the case of the end of a line +* TOK_EOF is returned when the end of the file is reached +* If no TOK_EOL appears before TOK_EOF, a TOK_EOL will be synthesised +* Any symbolic operator, etc., recognized by C will be returned as such + a token +* TOK_HASH will be returned for a # +* trigraphs will be interpreted +* backslash-newline will be interpreted +* any instance of CR, LF, CRLF, or LFCR will be interpreted as TOK_EOL +*/ + + +static int preproc_lex_fetch_byte(struct preproc_info *pp) +{ + int c; + c = fetch_byte(pp); + if (c == CPP_EOF && pp -> eolseen == 0) + { + preproc_throw_warning(pp, "No newline at end of file"); + pp -> eolseen = 1; + return CPP_EOL; + } + + if (c == CPP_EOL) + { + pp -> eolseen = 1; + return c; + } + + pp -> eolseen = 0; + + /* convert comments to a single space here */ + if (c == '/') + { + int c2; + c2 = fetch_byte(pp); + if (c2 == '/') + { + /* single line comment */ + c = ' '; + for (;;) + { + c2 = fetch_byte(pp); + if (c2 == CPP_EOF || c2 == CPP_EOL) + break; + } + preproc_lex_unfetch_byte(pp, c2); + } + else if (c2 == '*') + { + /* block comment */ + c = ' '; + for (;;) + { + c2 = fetch_byte(pp); + if (c2 == CPP_EOL || c2 == CPP_EOF) + { + preproc_lex_unfetch_byte(pp, c); + break; + } + if (c2 == '*') + { + /* maybe end of comment */ + c2 = preproc_lex_fetch_byte(pp); + if (c2 == '/') + break; + } + } + } + else + { + /* not a comment - restore lookahead character */ + preproc_lex_unfetch_byte(pp, c2); + } + } + return c; +} + +struct token *preproc_lex_next_token(struct preproc_info *pp) +{ + int sline = pp -> lineno; + int scol = pp -> column; + char *strval = NULL; + int ttype = TOK_NONE; + int c, c2; + int cl; + struct strbuf *strbuf; + struct token *t; + + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF) + { + if (pp -> nlseen == 0) + { + c = CPP_EOL; + } + } + + if (c == CPP_EOF) + { + ttype = TOK_EOF; + goto out; + } + if (c == CPP_EOL) + { + pp -> nlseen = 1; + ttype = TOK_EOL; + goto out; + } + + pp -> nlseen = 0; + if (isspace(c)) + { + while (isspace(c)) + c = preproc_lex_fetch_byte(pp); + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_WSPACE; + goto out; + } + + switch (c) + { + case '?': + ttype = TOK_QMARK; + goto out; + + case ':': + ttype = TOK_COLON; + goto out; + + case ',': + ttype = TOK_COMMA; + goto out; + + case '(': + ttype = TOK_OPAREN; + goto out; + + case ')': + ttype = TOK_CPAREN; + goto out; + + case '{': + ttype = TOK_OBRACE; + goto out; + + case '}': + ttype = TOK_CBRACE; + goto out; + + case '[': + ttype = TOK_OSQUARE; + goto out; + + case ']': + ttype = TOK_CSQUARE; + goto out; + + case '~': + ttype = TOK_COM; + goto out; + + case ';': + ttype = TOK_EOS; + goto out; + + /* and now for the possible multi character tokens */ + case '#': + ttype = TOK_HASH; + c = preproc_lex_fetch_byte(pp); + if (c == '#') + ttype = TOK_DBLHASH; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '^': + ttype = TOK_XOR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_XORASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '!': + ttype = TOK_BNOT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_NE; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '*': + ttype = TOK_STAR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_MULASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '/': + ttype = TOK_DIV; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_DIVASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '=': + ttype = TOK_ASS; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_EQ; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '%': + ttype = TOK_MOD; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_MODASS; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '-': + ttype = TOK_SUB; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_SUBASS; + else if (c == '-') + ttype = TOK_DBLSUB; + else if (c == '>') + ttype = TOK_ARROW; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '+': + ttype = TOK_ADD; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_ADDASS; + else if (c == '+') + ttype = TOK_DBLADD; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + + case '&': + ttype = TOK_BWAND; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_BWANDASS; + else if (c == '&') + ttype = TOK_BAND; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '|': + ttype = TOK_BWOR; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_BWORASS; + else if (c == '|') + ttype = TOK_BOR; + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '<': + ttype = TOK_LT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_LE; + else if (c == '<') + { + ttype = TOK_LSH; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_LSHASS; + else + preproc_lex_unfetch_byte(pp, c); + } + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + + case '>': + ttype = TOK_GT; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_GE; + else if (c == '>') + { + ttype = TOK_RSH; + c = preproc_lex_fetch_byte(pp); + if (c == '=') + ttype = TOK_RSHASS; + else + preproc_lex_unfetch_byte(pp, c); + } + else + preproc_lex_unfetch_byte(pp, c); + goto out; + + case '\'': + /* character constant - turns into a uint */ +chrlit: + cl = 0; + strbuf = strbuf_new(); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL || c == '\'') + break; + cl++; + if (c == '\\') + { + strbuf_add(strbuf, '\\'); + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL) + { + preproc_throw_error(pp, "Invalid character constant"); + break; + } + cl++; + strbuf_add(strbuf, c); + continue; + } + strbuf_add(strbuf, c); + } + if (cl == 0) + preproc_throw_error(pp, "Invalid character constant"); + strval = strbuf_end(strbuf); + ttype = TOK_CHR_LIT; + goto out; + + case '"': +strlit: + /* string literal */ + strbuf = strbuf_new(); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL || c == '"') + break; + if (c == '\\') + { + strbuf_add(strbuf, '\\'); + c = preproc_lex_fetch_byte(pp); + if (c == CPP_EOF || c == CPP_EOL) + { + preproc_throw_error(pp, "Invalid string constant"); + break; + } + cl++; + strbuf_add(strbuf, c); + continue; + } + strbuf_add(strbuf, c); + } + strval = strbuf_end(strbuf); + ttype = TOK_STR_LIT; + goto out; + + case 'L': + /* check for wide string or wide char const */ + c2 = preproc_lex_fetch_byte(pp); + if (c2 == '\'') + { + goto chrlit; + } + else if (c2 == '"') + { + goto strlit; + } + preproc_lex_unfetch_byte(pp, c2); + /* fall through for identifier */ + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + /* we have an identifier here */ + strbuf = strbuf_new(); + strbuf_add(strbuf, c); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if ((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) + { + strbuf_add(strbuf, c); + continue; + } + else + { + strbuf_add(strbuf, 0); + strval = strbuf_end(strbuf); + break; + } + } + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_IDENT; + goto out; + + case '.': + c = preproc_lex_fetch_byte(pp); + if (c >= '0' && c <= '9') + { + strbuf = strbuf_new(); + strbuf_add(strbuf, '.'); + goto numlit; + } + else if (c == '.') + { + c = preproc_lex_fetch_byte(pp); + if (c == '.') + { + ttype = TOK_ELLIPSIS; + goto out; + } + preproc_lex_unfetch_byte(pp, c); + } + preproc_lex_unfetch_byte(pp, c); + ttype = TOK_DOT; + goto out; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + strbuf = strbuf_new(); +numlit: + strbuf_add(strbuf, c); + for (;;) + { + c = preproc_lex_fetch_byte(pp); + if (!((c == '_') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) + break; + strbuf_add(strbuf, c); + if (c == 'e' || c == 'E' || c == 'p' || c == 'P') + { + c = preproc_lex_fetch_byte(pp); + if (c == '+' || c == '-') + { + strbuf_add(strbuf, c); + continue; + } + preproc_lex_unfetch_byte(pp, c); + } + } + strval = strbuf_end(strbuf); + preproc_lex_unfetch_byte(pp, c); + goto out; + + default: + ttype = TOK_CHAR; + strval = lw_alloc(2); + strval[0] = c; + strval[1] = 0; + break; + } +out: + t = token_create(ttype, strval, sline, scol, pp -> fn); + lw_free(strval); + return t; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/strbuf.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,57 @@ +/* +lwcc/strbuf.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdlib.h> + +#include <lw_alloc.h> + +#include "strbuf.h" + +struct strbuf *strbuf_new(void) +{ + struct strbuf *strbuf; + + strbuf = lw_alloc(sizeof(struct strbuf)); + strbuf -> str = NULL; + strbuf -> bo = 0; + strbuf -> bl = 0; + return strbuf; +} + +void strbuf_add(struct strbuf *strbuf, int c) +{ + if (strbuf -> bo >= strbuf -> bl) + { + strbuf -> bl += 100; + strbuf -> str = lw_realloc(strbuf -> str, strbuf -> bl); + } + strbuf -> str[strbuf -> bo++] = c; +} + +char *strbuf_end(struct strbuf *strbuf) +{ + char *rv; + + strbuf_add(strbuf, 0); + rv = strbuf -> str; + lw_free(strbuf); + return rv; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/strbuf.h Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,36 @@ +/* +lwcc/strbuf.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef strbuf_h_seen___ +#define strbuf_h_seen___ + +struct strbuf +{ + char *str; + int bl; + int bo; +}; + +extern struct strbuf *strbuf_new(void); +extern void strbuf_add(struct strbuf *, int); +extern char *strbuf_end(struct strbuf *); + +#endif // strbufh_seen___
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/token.c Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,134 @@ +/* +lwcc/token.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <stdlib.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#include "token.h" + +struct token *token_create(int ttype, char *strval, int row, int col, const char *fn) +{ + struct token *t; + + t = lw_alloc(sizeof(struct token)); + t -> ttype = ttype; + if (strval) + t -> strval = lw_strdup(strval); + else + strval = NULL; + t -> lineno = row; + t -> column = col; + t -> fn = fn; + t -> next = NULL; + t -> prev = NULL; + return t; +} + +void token_free(struct token *t) +{ + lw_free(t -> strval); + lw_free(t); +} + +struct token *token_dup(struct token *t) +{ + struct token *t2; + + t2 = lw_alloc(sizeof(struct token)); + (*t2) = (*t); + t2 -> next = NULL; + t2 -> prev = NULL; + if (t -> strval) + t2 -> strval = lw_strdup(t -> strval); + return t2; +} + +static struct { int ttype; char *tstr; } tok_strs[] = +{ + { TOK_WSPACE, " " }, + { TOK_EOL, "\n" }, + { TOK_DIV, "/" }, + { TOK_ADD, "+" }, + { TOK_SUB, "-" }, + { TOK_OPAREN, "(" }, + { TOK_CPAREN, ")" }, + { TOK_NE, "!=" }, + { TOK_EQ, "==" }, + { TOK_LE, "<=" }, + { TOK_LT, "<" }, + { TOK_GE, ">=" }, + { TOK_GT, ">" }, + { TOK_BAND, "&&" }, + { TOK_BOR, "||" }, + { TOK_BNOT, "!" }, + { TOK_MOD, "%"}, + { TOK_COMMA, "," }, + { TOK_ELLIPSIS, "..." }, + { TOK_QMARK, "?" }, + { TOK_COLON, ":" }, + { TOK_OBRACE, "{" }, + { TOK_CBRACE, "}" }, + { TOK_OSQUARE, "[" }, + { TOK_CSQUARE, "]" }, + { TOK_COM, "~" }, + { TOK_EOS, ";" }, + { TOK_HASH, "#" }, + { TOK_DBLHASH, "##" }, + { TOK_XOR, "^" }, + { TOK_XORASS, "^=" }, + { TOK_STAR, "*" }, + { TOK_MULASS, "*=" }, + { TOK_DIVASS, "/=" }, + { TOK_ASS, "=" }, + { TOK_MODASS, "%=" }, + { TOK_SUBASS, "-=" }, + { TOK_DBLSUB, "--" }, + { TOK_ADDASS, "+=" }, + { TOK_DBLADD, "++" }, + { TOK_BWAND, "&" }, + { TOK_BWANDASS, "&=" }, + { TOK_BWOR, "|" }, + { TOK_BWORASS, "|=" }, + { TOK_LSH, "<<" }, + { TOK_LSHASS, "<<=" }, + { TOK_RSH, ">>" }, + { TOK_RSHASS, ">>=" }, + { TOK_DOT, "." }, + { TOK_ARROW, "->" }, + { TOK_NONE, "" } +}; + +void token_print(struct token *t, FILE *f) +{ + int i; + for (i = 0; tok_strs[i].ttype != TOK_NONE; i++) + { + if (tok_strs[i].ttype == t -> ttype) + { + fprintf(f, "%s", tok_strs[i].tstr); + break; + } + } + if (t -> strval) + fprintf(f, "%s", t -> strval); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/token.h Thu Sep 12 22:06:26 2013 -0600 @@ -0,0 +1,122 @@ +/* +lwcc/token.h + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef token_h_seen___ +#define token_h_seen___ + +#include <stdio.h> + +enum +{ + CPP_NOUNG = -3, + CPP_EOL = -2, + CPP_EOF = -1, +}; + +enum +{ + TOK_NONE = 0, + TOK_EOF, + TOK_EOL, + TOK_WSPACE, + TOK_IDENT, + TOK_NUMBER, + TOK_STRING, + TOK_CHAR, + TOK_DIV, + TOK_ADD, + TOK_SUB, + TOK_OPAREN, + TOK_CPAREN, + TOK_NE, + TOK_EQ, + TOK_LE, + TOK_LT, + TOK_GE, + TOK_GT, + TOK_BAND, + TOK_BOR, + TOK_BNOT, + TOK_MOD, + TOK_COMMA, + TOK_ELLIPSIS, + TOK_QMARK, + TOK_COLON, + TOK_OBRACE, + TOK_CBRACE, + TOK_OSQUARE, + TOK_CSQUARE, + TOK_COM, + TOK_EOS, + TOK_HASH, + TOK_DBLHASH, + TOK_XOR, + TOK_XORASS, + TOK_STAR, + TOK_MULASS, + TOK_DIVASS, + TOK_ASS, + TOK_MODASS, + TOK_SUBASS, + TOK_DBLSUB, + TOK_ADDASS, + TOK_DBLADD, + TOK_BWAND, + TOK_BWANDASS, + TOK_BWOR, + TOK_BWORASS, + TOK_LSH, + TOK_LSHASS, + TOK_RSH, + TOK_RSHASS, + TOK_DOT, + TOK_CHR_LIT, + TOK_STR_LIT, + TOK_ARROW, + TOK_MAX +}; + +struct token +{ + int ttype; // token type + char *strval; // the token value if relevant + struct token *prev; // previous token in a list + struct token *next; // next token in a list + int lineno; // line number token came from + int column; // character column token came from + const char *fn; // file name token came from +}; + +extern void token_free(struct token *); +extern struct token *token_create(int, char *strval, int, int, const char *); +extern struct token *token_dup(struct token *); +/* add a token to the end of a list */ +extern void token_append(struct token *, struct token *); +/* add a token to the start of a list */ +extern void token_prepend(struct token *, struct token *); +/* remove individual token from whatever list it is on */ +extern void token_remove(struct token *); +/* replace token with list of tokens specified */ +extern void token_replace(struct token *, struct token *); +/* print a token out */ +extern void token_print(struct token *, FILE *); + +#endif // token_h_seen___