Mercurial > hg > index.cgi
changeset 185:cca933d32298
Clean up some mess in lwbasic directory
author | lost@l-w.ca |
---|---|
date | Thu, 22 Dec 2011 18:03:38 -0700 |
parents | 6433cb024174 |
children | 1824cabf25ce |
files | lwbasic/attic/emit.c lwbasic/attic/input.c lwbasic/attic/lexer.c lwbasic/attic/lwbasic.h lwbasic/attic/main.c lwbasic/attic/parser.c lwbasic/attic/rules.make lwbasic/attic/symtab.c lwbasic/emit.c lwbasic/input.c lwbasic/lexer.c lwbasic/lwbasic.h lwbasic/main.c lwbasic/parser.c lwbasic/rules.make lwbasic/symtab.c |
diffstat | 16 files changed, 1420 insertions(+), 1420 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/emit.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,51 @@ +/* +emit.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +This is the actual compiler bit; it drives the parser and code generation +*/ + +#include <stdio.h> + +#define __emit_c_seen__ +#include "lwbasic.h" + +void emit_prolog(cstate *state, int vis) +{ + if (vis) + { + printf("\texport _%s\n", state -> currentsub); + } + printf("_%s\n", state -> currentsub); + if (state -> framesize > 0) + { + printf("\tleas %d,s\n", -(state -> framesize)); + } +} + +void emit_epilog(cstate *state) +{ + if (state -> framesize > 0) + { + printf("\tleas %d,s\n", state -> framesize); + } + printf("\trts\n"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/input.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,85 @@ +/* +input.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +handle reading input for the rest of the system +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <lw_alloc.h> +#include <lw_error.h> + +#define __input_c_seen__ +#include "lwbasic.h" + +struct input_state +{ + FILE *fp; + int error; +}; + +static void input_init(cstate *state) +{ + struct input_state *sp; + + sp = lw_alloc(sizeof(struct input_state)); + sp -> error = 0; + + if (!(state -> input_file) || strcmp(state -> input_file, "-")) + { + sp -> fp = stdin; + } + else + { + sp -> fp = fopen(state -> input_file, "rb"); + if (!(sp -> fp)) + { + lwb_error("Cannot open input file\n"); + } + } + + state -> input_state = sp; +} + +int input_getchar(cstate *state) +{ + int r; + struct input_state *sp; + + if (!(state -> input_state)) + input_init(state); + sp = state -> input_state; + + + if (sp -> error) + return -2; + + if (feof(sp -> fp)) + return -1; + + r = fgetc(sp -> fp); + if (r == EOF) + return -1; + return r; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/lexer.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,440 @@ +/* +lexer.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +This handles the gritty details of parsing tokens +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#define __lexer_c_seen__ +#include "lwbasic.h" + +/* +A token idenfier is returned by lexer(). The actual string value +is found in state->lexer_lexer_token_string; if the token as an integer value, +it will be found in state->lexer_token_number in the appropriate "value" +slot. +*/ + +struct token_list +{ + char *string; + int token; +}; + +/* keywords that appear as part of normal expressions */ +static struct token_list lexer_global_tokens[] = +{ + { "function", token_kw_function }, + { "sub", token_kw_sub }, + { "public", token_kw_public }, + { "private", token_kw_private }, + { "as", token_kw_as }, + { "params", token_kw_params }, + { "returns", token_kw_returns }, + { "integer", token_kw_integer }, + { "endsub", token_kw_endsub }, + { "endfunction", token_kw_endfunction }, + { "dim", token_kw_dim }, + { NULL } +}; + +/* contains "built in" function names */ +static struct token_list lexer_expr_tokens[] = +{ + { "and", token_op_and }, + { "or", token_op_or }, + { "band", token_op_band }, + { "bor", token_op_bor }, + { "bxor", token_op_bxor }, + { "xor", token_op_xor }, + { "not", token_op_not }, + { "bnot", token_op_bnot }, + { NULL } +}; + +static char *lexer_token_names[] = +{ + "SUB", + "FUNCTION", + "AS", + "PUBLIC", + "PRIVATE", + "PARAMS", + "RETURNS", + "INTEGER", + "ENDSUB", + "ENDFUNCTION", + "DIM", + "<assignment>", + "<equality>", + "<greater>", + "<less>", + "<greaterequal>", + "<lessequal>", + "<notequal>", + "<and>", + "<or>", + "<xor>", + "<bitwiseand>", + "<bitwiseor>", + "<bitwisexor>", + "<plus>", + "<minus>", + "<times>", + "<divide>", + "<modulus>", + "<openparen>", + "<closeparen>", + "<not>", + "<bitwisenot>", + "<identifier>", + "<char>", + "<uint>", + "<int>", + "<eol>", + "<eof>" +}; + +char *lexer_token_name(int token) +{ + if (token > token_eol) + return "???"; + return lexer_token_names[token]; +} + +static int lexer_getchar(cstate *state) +{ + int c; + c = input_getchar(state); + if (c == -2) + { + lwb_error("Error reading input stream."); + } + return c; +} + +static void lexer_nextchar(cstate *state) +{ + state -> lexer_curchar = lexer_getchar(state); + if (state -> lexer_curchar == state -> lexer_ignorechar) + state -> lexer_curchar = lexer_getchar(state); + state -> lexer_ignorechar = 0; +} + +static int lexer_curchar(cstate *state) +{ + if (state -> lexer_curchar == -1) + { + lexer_nextchar(state); + } + + return state -> lexer_curchar; +} + +static void lexer_skip_white(cstate *state) +{ + int c; + + for (;;) + { + c = lexer_curchar(state); + if (!(c == 0 || c == ' ' || c == '\t')) + return; + lexer_nextchar(state); + } +} + +/* must not be called unless the word will be non-zero length */ +static void lexer_word(cstate *state) +{ + int wordlen = 0; + int wordpos = 0; + char *word = NULL; + int c; + struct token_list *tok = NULL; + + for (;;) { + c = lexer_curchar(state); + if (c == '_' || (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) + { + /* character is part of word */ + if (wordpos >= wordlen) + { + word = lw_realloc(word, wordlen + 32); + wordlen += 32; + } + word[wordpos++] = c; + } + else + break; + + lexer_nextchar(state); + } + + word[wordpos] = 0; + lw_free(state -> lexer_token_string); + state -> lexer_token_string = lw_strdup(word); + + switch (state -> parser_state) + { + default: + tok = lexer_global_tokens; + } + + if (state -> expression) + { + tok = lexer_expr_tokens; + } + + /* check for tokens if appropriate */ + /* force uppercase */ + if (tok) + { + for (c = 0; word[c]; c++) + if (word[c] >= 'A' && word[c] <= 'Z') + word[c] = word[c] + 0x20; + + while (tok -> string) + { + if (strcmp(tok -> string, word) == 0) + break; + tok++; + } + } + + lw_free(word); + if (tok && tok -> string) + state -> lexer_token = tok -> token; + else + state -> lexer_token = token_identifier; +} + +static void lexer_parse_number(cstate *state, int neg) +{ + unsigned long tint = 0; + int c; + + for (;;) + { + c = lexer_curchar(state); + if (c >= '0' && c <= '9') + { + tint *= 10 + (c - '0'); + } + else + { + /* end of the number here */ + if (neg) + { + if (tint > 0x80000000) + lwb_error("Integer overflow\n"); + state -> lexer_token_number.integer = -tint; + state -> lexer_token = token_int; + } + else + { + state -> lexer_token = token_uint; + state -> lexer_token_number.uinteger = tint; + } + return; + } + lexer_nextchar(state); + } +} + +static void lexer_empty_token(cstate *state) +{ + lw_free(state -> lexer_token_string); + state -> lexer_token_string = NULL; +} + +void lexer(cstate *state) +{ + int c; + + lexer_skip_white(state); + + lexer_empty_token(state); + + c = lexer_curchar(state); + if (c == -1) + { + state -> lexer_token = token_eof; + return; + } + + if (c == '\n') + { + /* LF */ + lexer_nextchar(state); + state -> lexer_ignorechar = '\r'; + state -> lexer_token = token_eol; + return; + } + + if (c == '\r') + { + /* CR */ + lexer_nextchar(state); + state -> lexer_ignorechar = '\n'; + state -> lexer_token = token_eol; + return; + } + + if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) + { + /* we have a word here; identifier, keyword, etc. */ + lexer_word(state); + return; + } + + if (state -> expression && c >= '0' && c <= '9') + { + /* we have a number */ + lexer_parse_number(state, 0); + return; + } + + lexer_nextchar(state); + if (state -> expression) + { + if (c == '-' && lexer_curchar(state) >= '0' && lexer_curchar(state) <= '9') + { + /* we have a negative number here */ + lexer_parse_number(state, 1); + return; + } + if (c == '=') + { + state -> lexer_token = token_op_equality; + return; + } + if (c == '<') + { + if (lexer_curchar(state) == '=') + { + lexer_nextchar(state); + state -> lexer_token = token_op_lessequal; + return; + } + if (lexer_curchar(state) == '>') + { + lexer_nextchar(state); + state -> lexer_token = token_op_notequal; + return; + } + state -> lexer_token = token_op_less; + return; + } + if (c == '>') + { + if (lexer_curchar(state) == '>') + { + lexer_nextchar(state); + state -> lexer_token = token_op_greaterequal; + return; + } + if (lexer_curchar(state) == '<') + { + state -> lexer_token = token_op_notequal; + lexer_nextchar(state); + return; + } + state -> lexer_token = token_op_greater; + return; + } + switch(c) + { + case '+': + state -> lexer_token = token_op_plus; + return; + + case '-': + state -> lexer_token = token_op_minus; + return; + + case '/': + state -> lexer_token = token_op_divide; + return; + + case '*': + state -> lexer_token = token_op_times; + return; + + case '%': + state -> lexer_token = token_op_modulus; + return; + + case '(': + state -> lexer_token = token_op_oparen; + return; + + case ')': + state -> lexer_token = token_op_cparen; + return; + + } + } + else + { + if (c == '=') + { + state -> lexer_token = token_op_assignment; + return; + } + } + + /* return the character if all else fails */ + state -> lexer_token = token_char; + state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); + state -> lexer_token_string[0] = c; + state -> lexer_token_string[1] = 0; + return; +} + +char *lexer_return_token(cstate *state) +{ + static char *buffer = NULL; + static int buflen = 0; + int l; + + if (buflen == 0) + { + buffer = lw_alloc(128); + buflen = 128; + } + + l = snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); + if (l >= buflen) + { + buffer = lw_realloc(buffer, l + 1); + buflen = l + 1; + snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); + } + return buffer; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/lwbasic.h Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,148 @@ +/* +lwbasic.h + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +definitions used throughout lwbasic +*/ + +#ifndef __lwbasic_h_seen__ +#define __lwbasic_h_seen__ + +#include <stdint.h> + +#include "symtab.h" + +/* note: integer and uinteger will be the same for positive values from 0 +through 0x7FFFFFFF; the unsigned type should be used for doing ascii +conversions and then if a negative value was discovered, it should be +negated IFF it is in range. */ + +union lexer_numbers +{ + uint32_t uinteger; + int32_t integer; +}; + +typedef struct +{ + char *output_file; + char *input_file; + + int debug_level; + + char *lexer_token_string; + union lexer_numbers lexer_token_number; + int lexer_token; + int lexer_curchar; + int lexer_ignorechar; + int expression; + int parser_state; + + void *input_state; + + char *currentsub; + symtab_t *global_syms; + symtab_t *local_syms; + int returntype; + int framesize; +} cstate; + +/* parser states */ +enum +{ + parser_state_global = 0, /* only global decls allowed */ + parser_state_error +}; + +/* token types */ +enum +{ + token_kw_sub, /* SUB keyword */ + token_kw_function, /* FUNCTION keyword */ + token_kw_as, /* AS keyword */ + token_kw_public, /* PUBLIC keyword */ + token_kw_private, /* PRIVATE keyword */ + token_kw_params, /* PARAMS keyword */ + token_kw_returns, /* RETURNS keyword */ + token_kw_integer, /* INTEGER keyword */ + token_kw_endsub, /* ENDSUB keyword */ + token_kw_endfunction, /* ENDFUNCTION keyword */ + token_kw_dim, /* DIM keyword */ + token_op_assignment, /* assignment operator */ + token_op_equality, /* equality test */ + token_op_greater, /* greater than */ + token_op_less, /* less than */ + token_op_greaterequal, /* greater or equal */ + token_op_lessequal, /* less or equal */ + token_op_notequal, /* not equal */ + token_op_and, /* boolean and */ + token_op_or, /* boolean or */ + token_op_xor, /* boolean exlusive or */ + token_op_band, /* bitwise and */ + token_op_bor, /* bitwise or */ + token_op_bxor, /* bitwise xor */ + token_op_plus, /* plus */ + token_op_minus, /* minus */ + token_op_times, /* times */ + token_op_divide, /* divide */ + token_op_modulus, /* modulus */ + token_op_oparen, /* open paren */ + token_op_cparen, /* close paren */ + token_op_not, /* boolean not */ + token_op_bnot, /* bitwise not */ + token_identifier, /* an identifier (variable, function, etc. */ + token_char, /* single character; fallback */ + token_uint, /* unsigned integer up to 32 bits */ + token_int, /* signed integer up to 32 bits */ + token_eol, /* end of line */ + token_eof /* end of file */ +}; + +/* symbol types */ +enum +{ + symtype_sub, /* "sub" (void function) */ + symtype_func, /* function (nonvoid) */ + symtype_param, /* function parameter */ + symtype_var /* variable */ +}; + +#ifndef __input_c_seen__ +extern int input_getchar(cstate *state); +#endif + +#ifndef __main_c_seen__ +extern void lwb_error(const char *fmt, ...); +#endif + +#ifndef __lexer_c_seen__ +extern void lexer(cstate *state); +extern char *lexer_return_token(cstate *state); +extern char *lexer_token_name(int token); +#endif + +#ifndef __emit_c_seen__ +extern void emit_prolog(cstate *state, int vis); +extern void emit_epilog(cstate *state); +#endif + + +#endif /* __lwbasic_h_seen__ */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/main.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,117 @@ +/* +main.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +main program startup handling for lwbasic +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> + +#include <lw_cmdline.h> +#include <lw_string.h> +#include <lw_alloc.h> + +#define __main_c_seen__ +#include "lwbasic.h" + +#define PROGVER "lwbasic from " PACKAGE_STRING + +static struct lw_cmdline_options options[] = +{ + { "output", 'o', "FILE", 0, "Output to FILE"}, + { "debug", 'd', "LEVEL", lw_cmdline_opt_optional, "Set debug mode"}, + { 0 } +}; + +static int parse_opts(int key, char *arg, void *data) +{ + cstate *state = data; + + switch (key) + { + case 'o': + if (state -> output_file) + lw_free(state -> output_file); + state -> output_file = lw_strdup(arg); + break; + + case 'd': + if (!arg) + state -> debug_level = 50; + else + state -> debug_level = atoi(arg); + break; + + case lw_cmdline_key_end: + return 0; + + case lw_cmdline_key_arg: + if (state -> input_file) + { + fprintf(stderr, "Already have an input file; ignoring %s\n", arg); + } + else + { + state -> input_file = lw_strdup(arg); + } + break; + + default: + return lw_cmdline_err_unknown; + } + + return 0; +} + +static struct lw_cmdline_parser cmdline_parser = +{ + options, + parse_opts, + "INPUTFILE", + "lwbasic, a compiler for a dialect of Basic\vPlease report bugs to lost@l-w.ca.", + PROGVER +}; + +extern void parser(cstate *state); + +int main(int argc, char **argv) +{ + cstate state = { 0 }; + + lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, &state); + + parser(&state); + + exit(0); +} + +void lwb_error(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + exit(1); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/parser.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,469 @@ +/* +compiler.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +This is the actual compiler bit; it drives the parser and code generation +*/ + +#include <stdio.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#include "lwbasic.h" +#include "symtab.h" + +static void expect(cstate *state, int tt) +{ + if (state -> lexer_token != tt) + lwb_error("Expecting %s, got %s\n", lexer_token_name(tt), lexer_return_token(state)); + lexer(state); +} + + +/* size of a type */ +static int sizeof_type(int type) +{ + /* everything is an "int" right now; 2 bytes */ + return 2; +} + +/* parse a type; the next token will be acquired as a result */ +/* the token advancement is to provide consistency */ +static int parse_type(cstate *state) +{ + int pt = -1; + + switch (state -> lexer_token) + { + case token_kw_integer: + pt = 1; + break; + + default: + lwb_error("Invalid type specification"); + } + lexer(state); + /* look for "unsigned" modifier for integer types */ + return pt; +} + +static void parse_expr(cstate *state, int prec); +static void parse_term(cstate *state); +static int parse_expression(cstate *state) +{ + state -> expression = 1; + + parse_expr(state, 0); + + state -> expression = 0; + return 1; +} + +static void parse_decls(cstate *state) +{ + /* declarations */ + /* the first thing that doesn't look like a declaration is assumed */ + /* to be a statement and will trigger a bailout */ + int vt; + char *vn; + symtab_entry_t *se; + + for (;;) + { + switch (state -> lexer_token) + { + /* DIM keyword */ + case token_kw_dim: + lexer(state); + if (state -> lexer_token != token_identifier) + { + lwb_error("Expecting identifier, got %s\n", lexer_return_token(state)); + } + vn = lw_strdup(state -> lexer_token_string); + lexer(state); + if (state -> lexer_token != token_kw_as) + { + lwb_error("Expecting AS, got %s\n", lexer_return_token(state)); + } + lexer(state); + vt = parse_type(state); + + se = symtab_find(state -> local_syms, vn); + if (se) + { + lwb_error("Multiply defined local variable %s", vn); + } + state -> framesize += sizeof_type(vt); + symtab_register(state -> local_syms, vn, -(state -> framesize), symtype_var, NULL); + + lw_free(vn); + break; + + /* blank lines allowed */ + case token_eol: + break; + + default: + return; + } + if (state -> lexer_token != token_eol) + lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); + lexer(state); + } +} + +static void parse_statements(cstate *state) +{ + symtab_entry_t *se; + int et; + + for (;;) + { + switch (state -> lexer_token) + { + /* blank lines allowed */ + case token_eol: + break; + + /* variable assignment */ + case token_identifier: + se = symtab_find(state -> local_syms, state -> lexer_token_string); + if (!se) + { + se = symtab_find(state -> global_syms, state -> lexer_token_string); + } + if (!se) + lwb_error("Unknown variable %s\n", state -> lexer_token_string); + lexer(state); + /* ensure the first token of the expression will be parsed correctly */ + state -> expression = 1; + expect(state, token_op_assignment); + + /* parse the expression */ + et = parse_expression(state); + + /* check type compatibility */ + + /* actually do the assignment */ + + break; + + /* anything we don't recognize as a statement token breaks out */ + default: + return; + } + if (state -> lexer_token != token_eol) + lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); + lexer(state); + } +} + + +/* issub means RETURNS is not allowed; !issub means RETURNS is required */ + +static void parse_subfunc(cstate *state, int issub) +{ + int pt, rt; + char *subname, *pn; + int vis = 0; + symtab_entry_t *se; + int paramsize = 0; + + state -> local_syms = symtab_init(); + state -> framesize = 0; + + lexer(state); + if (state -> lexer_token != token_identifier) + { + lwb_error("Invalid sub name '%s'", state -> lexer_token_string); + } + + subname = lw_strdup(state -> lexer_token_string); + + lexer(state); + if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private) + { + if (state -> lexer_token == token_kw_public) + vis = 1; + lexer(state); + } + + /* ignore the "PARAMS" keyword if present */ + if (state -> lexer_token == token_kw_params) + lexer(state); + + if (state -> lexer_token == token_eol || state -> lexer_token == token_kw_returns) + goto noparms; + +paramagain: + if (state -> lexer_token != token_identifier) + { + lwb_error("Parameter name expected, got %s\n", lexer_return_token(state)); + } + pn = lw_strdup(state -> lexer_token_string); + lexer(state); + + if (state -> lexer_token != token_kw_as) + lwb_error("Expecting AS\n"); + lexer(state); + + pt = parse_type(state); + + se = symtab_find(state -> local_syms, pn); + if (se) + { + lwb_error("Duplicate parameter name %s\n", pn); + } + symtab_register(state -> local_syms, pn, paramsize, symtype_param, NULL); + paramsize += sizeof_type(pt); + lw_free(pn); + + if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',') + { + lexer(state); + goto paramagain; + } + +noparms: + rt = -1; + if (!issub) + { + if (state -> lexer_token != token_kw_returns) + { + lwb_error("FUNCTION must have RETURNS\n"); + } + lexer(state); +/* if (state -> lexer_token == token_identifier) + { + printf("Return value named: %s\n", state -> lexer_token_string); + + lexer(state); + if (state -> lexer_token != token_kw_as) + lwb_error("Execting AS after RETURNS"); + lexer(state); + } +*/ + rt = parse_type(state); + } + else + { + if (state -> lexer_token == token_kw_returns) + { + lwb_error("SUB cannot specify RETURNS\n"); + } + } + + + if (state -> lexer_token != token_eol) + { + lwb_error("EOL expected; found %s\n", lexer_return_token(state)); + } + + + se = symtab_find(state -> global_syms, subname); + if (se) + { + lwb_error("Multiply defined symbol %s\n", subname); + } + + symtab_register(state -> global_syms, subname, -1, issub ? symtype_sub : symtype_func, NULL); + + state -> currentsub = subname; + state -> returntype = rt; + /* consume EOL */ + lexer(state); + + /* variable declarations */ + parse_decls(state); + + /* output function/sub prolog */ + emit_prolog(state, vis); + + /* parse statement block */ + parse_statements(state); + + if (issub) + { + if (state -> lexer_token != token_kw_endsub) + { + lwb_error("Expecting ENDSUB, got %s\n", lexer_return_token(state)); + } + } + else + { + if (state -> lexer_token != token_kw_endfunction) + { + lwb_error("Expecting ENDFUNCTION, got %s\n", lexer_return_token(state)); + } + } + /* output function/sub epilog */ + emit_epilog(state); + + lw_free(state -> currentsub); + state -> currentsub = NULL; + symtab_destroy(state -> local_syms); + state -> local_syms = NULL; +} + +void parser(cstate *state) +{ + state -> lexer_curchar = -1; + state -> global_syms = symtab_init(); + + /* now look for a global declaration */ + for (;;) + { + state -> parser_state = parser_state_global; + lexer(state); + switch (state -> lexer_token) + { + case token_kw_function: + printf("Function\n"); + parse_subfunc(state, 0); + break; + + case token_kw_sub: + printf("Sub\n"); + parse_subfunc(state, 1); + break; + + /* blank lines are allowed */ + case token_eol: + continue; + + /* EOF is allowed - end of parsing */ + case token_eof: + return; + + default: + lwb_error("Invalid token '%s' in global state\n", lexer_return_token(state)); + } + } +} + +static void parse_expr(cstate *state, int prec) +{ + static const struct operinfo { + int opernum; + int operprec; + } operators[] = + { + { token_op_plus, 100 }, + { token_op_minus, 100 }, + { token_op_times, 150 }, + { token_op_divide, 150 }, + { token_op_modulus, 150 }, + { token_op_and, 25 }, + { token_op_or, 20 }, + { token_op_xor, 20 }, + { token_op_band, 50 }, + { token_op_bor, 45 }, + { token_op_bxor, 45 }, + { -1, -1 } + }; + int opern; + + parse_term(state); + +eval_next: + for (opern = 0; operators[opern].opernum != -1; opern++) + { + if (operators[opern].opernum == state -> lexer_token) + break; + } + if (operators[opern].opernum == -1) + return; + + if (operators[opern].operprec <= prec) + return; + + lexer(state); + + parse_expr(state, operators[opern].operprec); + + /* push operator */ + + goto eval_next; +} + +static void parse_term(cstate *state) +{ +eval_next: + /* parens */ + if (state -> lexer_token == token_op_oparen) + { + lexer(state); + parse_expr(state, 0); + expect(state, token_op_cparen); + return; + } + + /* unary plus; ignore it */ + if (state -> lexer_token == token_op_plus) + { + lexer(state); + goto eval_next; + } + + /* unary minus, precision 200 */ + if (state -> lexer_token == token_op_minus) + { + lexer(state); + parse_expr(state, 200); + + /* push unary negation */ + } + + /* BNOT, NOT */ + if (state -> lexer_token == token_op_not || state -> lexer_token == token_op_bnot) + { + lexer(state); + parse_expr(state, 200); + + /* push unary operator */ + } + + /* integer */ + if (state -> lexer_token == token_int) + { + } + + /* unsigned integer */ + if (state -> lexer_token == token_uint) + { + } + + /* variable or function call */ + if (state -> lexer_token == token_identifier) + { + lexer(state); + if (state -> lexer_token == token_op_oparen) + { + /* function call */ + return; + } + /* variable */ + return; + } + + lwb_error("Invalid input in expression; got %s\n", lexer_return_token(state)); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/rules.make Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,28 @@ +dirname := $(dir $(lastword $(MAKEFILE_LIST))) +lwbasic_dir := $(dirname) + +lwbasic_lsrcs := main.c input.c parser.c lexer.c emit.c symtab.c + +lwbasic_srcs := $(addprefix $(dirname),$(lwbasic_lsrcs)) +lwbasic_objs := $(lwbasic_srcs:.c=.o) +lwbasic_deps := $(lwbasic_srcs:.c=.d) + + + +$(lwbasic_dir)lwbasic$(PROGSUFFIX): $(lwbasic_objs) lwlib $(lwbasic_dir)rules.make + @echo "Linking $@" + @$(CC) -o $@ $(lwbasic_objs) $(LDFLAGS) + +cleantargs := $(cleantargs) lwbasicclean +realcleantargs := $(realcleantargs) lwbasicrealclean + +.PHONY: lwbasicclean lwbasicrealclean +lwbasicrealclean: + @echo "Really cleaning up lwbasic" + @cd $(lwbasic_dir) && rm -f *.d + +lwbasicclean: + @echo "Cleaning up lwbasic" + @cd $(lwbasic_dir) && rm -f *.o *.exe lwbasic + +-include $(lwbasic_deps)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/attic/symtab.c Thu Dec 22 18:03:38 2011 -0700 @@ -0,0 +1,82 @@ +/* +symtab.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +Symbol table handling +*/ + +#include <stdlib.h> +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#define __symtab_c_seen__ +#include "symtab.h" + +symtab_t *symtab_init(void) +{ + symtab_t *st; + + st = lw_alloc(sizeof(symtab_t)); + st -> head = NULL; + return st; +} + +void symtab_destroy(symtab_t *st) +{ + symtab_entry_t *se; + + while (st -> head) + { + se = st -> head; + st -> head = se -> next; + lw_free(se -> name); + lw_free(se -> privdata); + lw_free(se); + } + lw_free(st); +} + +symtab_entry_t *symtab_find(symtab_t *st, char *name) +{ + symtab_entry_t *se; + + for (se = st -> head; se; se = se -> next) + { + if (strcmp(se -> name, name) == 0) + return se; + } + return NULL; +} + +void symtab_register(symtab_t *st, char *name, int addr, int symtype, void *privdata) +{ + symtab_entry_t *se; + + se = lw_alloc(sizeof(symtab_entry_t)); + se -> name = lw_strdup(name); + se -> addr = addr; + se -> symtype = symtype; + se -> privdata = privdata; + se -> next = st -> head; + st -> head = se; +}
--- a/lwbasic/emit.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -/* -emit.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -This is the actual compiler bit; it drives the parser and code generation -*/ - -#include <stdio.h> - -#define __emit_c_seen__ -#include "lwbasic.h" - -void emit_prolog(cstate *state, int vis) -{ - if (vis) - { - printf("\texport _%s\n", state -> currentsub); - } - printf("_%s\n", state -> currentsub); - if (state -> framesize > 0) - { - printf("\tleas %d,s\n", -(state -> framesize)); - } -} - -void emit_epilog(cstate *state) -{ - if (state -> framesize > 0) - { - printf("\tleas %d,s\n", state -> framesize); - } - printf("\trts\n"); -}
--- a/lwbasic/input.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -/* -input.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -handle reading input for the rest of the system -*/ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include <lw_alloc.h> -#include <lw_error.h> - -#define __input_c_seen__ -#include "lwbasic.h" - -struct input_state -{ - FILE *fp; - int error; -}; - -static void input_init(cstate *state) -{ - struct input_state *sp; - - sp = lw_alloc(sizeof(struct input_state)); - sp -> error = 0; - - if (!(state -> input_file) || strcmp(state -> input_file, "-")) - { - sp -> fp = stdin; - } - else - { - sp -> fp = fopen(state -> input_file, "rb"); - if (!(sp -> fp)) - { - lwb_error("Cannot open input file\n"); - } - } - - state -> input_state = sp; -} - -int input_getchar(cstate *state) -{ - int r; - struct input_state *sp; - - if (!(state -> input_state)) - input_init(state); - sp = state -> input_state; - - - if (sp -> error) - return -2; - - if (feof(sp -> fp)) - return -1; - - r = fgetc(sp -> fp); - if (r == EOF) - return -1; - return r; -}
--- a/lwbasic/lexer.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,440 +0,0 @@ -/* -lexer.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -This handles the gritty details of parsing tokens -*/ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include <lw_alloc.h> -#include <lw_string.h> - -#define __lexer_c_seen__ -#include "lwbasic.h" - -/* -A token idenfier is returned by lexer(). The actual string value -is found in state->lexer_lexer_token_string; if the token as an integer value, -it will be found in state->lexer_token_number in the appropriate "value" -slot. -*/ - -struct token_list -{ - char *string; - int token; -}; - -/* keywords that appear as part of normal expressions */ -static struct token_list lexer_global_tokens[] = -{ - { "function", token_kw_function }, - { "sub", token_kw_sub }, - { "public", token_kw_public }, - { "private", token_kw_private }, - { "as", token_kw_as }, - { "params", token_kw_params }, - { "returns", token_kw_returns }, - { "integer", token_kw_integer }, - { "endsub", token_kw_endsub }, - { "endfunction", token_kw_endfunction }, - { "dim", token_kw_dim }, - { NULL } -}; - -/* contains "built in" function names */ -static struct token_list lexer_expr_tokens[] = -{ - { "and", token_op_and }, - { "or", token_op_or }, - { "band", token_op_band }, - { "bor", token_op_bor }, - { "bxor", token_op_bxor }, - { "xor", token_op_xor }, - { "not", token_op_not }, - { "bnot", token_op_bnot }, - { NULL } -}; - -static char *lexer_token_names[] = -{ - "SUB", - "FUNCTION", - "AS", - "PUBLIC", - "PRIVATE", - "PARAMS", - "RETURNS", - "INTEGER", - "ENDSUB", - "ENDFUNCTION", - "DIM", - "<assignment>", - "<equality>", - "<greater>", - "<less>", - "<greaterequal>", - "<lessequal>", - "<notequal>", - "<and>", - "<or>", - "<xor>", - "<bitwiseand>", - "<bitwiseor>", - "<bitwisexor>", - "<plus>", - "<minus>", - "<times>", - "<divide>", - "<modulus>", - "<openparen>", - "<closeparen>", - "<not>", - "<bitwisenot>", - "<identifier>", - "<char>", - "<uint>", - "<int>", - "<eol>", - "<eof>" -}; - -char *lexer_token_name(int token) -{ - if (token > token_eol) - return "???"; - return lexer_token_names[token]; -} - -static int lexer_getchar(cstate *state) -{ - int c; - c = input_getchar(state); - if (c == -2) - { - lwb_error("Error reading input stream."); - } - return c; -} - -static void lexer_nextchar(cstate *state) -{ - state -> lexer_curchar = lexer_getchar(state); - if (state -> lexer_curchar == state -> lexer_ignorechar) - state -> lexer_curchar = lexer_getchar(state); - state -> lexer_ignorechar = 0; -} - -static int lexer_curchar(cstate *state) -{ - if (state -> lexer_curchar == -1) - { - lexer_nextchar(state); - } - - return state -> lexer_curchar; -} - -static void lexer_skip_white(cstate *state) -{ - int c; - - for (;;) - { - c = lexer_curchar(state); - if (!(c == 0 || c == ' ' || c == '\t')) - return; - lexer_nextchar(state); - } -} - -/* must not be called unless the word will be non-zero length */ -static void lexer_word(cstate *state) -{ - int wordlen = 0; - int wordpos = 0; - char *word = NULL; - int c; - struct token_list *tok = NULL; - - for (;;) { - c = lexer_curchar(state); - if (c == '_' || (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) - { - /* character is part of word */ - if (wordpos >= wordlen) - { - word = lw_realloc(word, wordlen + 32); - wordlen += 32; - } - word[wordpos++] = c; - } - else - break; - - lexer_nextchar(state); - } - - word[wordpos] = 0; - lw_free(state -> lexer_token_string); - state -> lexer_token_string = lw_strdup(word); - - switch (state -> parser_state) - { - default: - tok = lexer_global_tokens; - } - - if (state -> expression) - { - tok = lexer_expr_tokens; - } - - /* check for tokens if appropriate */ - /* force uppercase */ - if (tok) - { - for (c = 0; word[c]; c++) - if (word[c] >= 'A' && word[c] <= 'Z') - word[c] = word[c] + 0x20; - - while (tok -> string) - { - if (strcmp(tok -> string, word) == 0) - break; - tok++; - } - } - - lw_free(word); - if (tok && tok -> string) - state -> lexer_token = tok -> token; - else - state -> lexer_token = token_identifier; -} - -static void lexer_parse_number(cstate *state, int neg) -{ - unsigned long tint = 0; - int c; - - for (;;) - { - c = lexer_curchar(state); - if (c >= '0' && c <= '9') - { - tint *= 10 + (c - '0'); - } - else - { - /* end of the number here */ - if (neg) - { - if (tint > 0x80000000) - lwb_error("Integer overflow\n"); - state -> lexer_token_number.integer = -tint; - state -> lexer_token = token_int; - } - else - { - state -> lexer_token = token_uint; - state -> lexer_token_number.uinteger = tint; - } - return; - } - lexer_nextchar(state); - } -} - -static void lexer_empty_token(cstate *state) -{ - lw_free(state -> lexer_token_string); - state -> lexer_token_string = NULL; -} - -void lexer(cstate *state) -{ - int c; - - lexer_skip_white(state); - - lexer_empty_token(state); - - c = lexer_curchar(state); - if (c == -1) - { - state -> lexer_token = token_eof; - return; - } - - if (c == '\n') - { - /* LF */ - lexer_nextchar(state); - state -> lexer_ignorechar = '\r'; - state -> lexer_token = token_eol; - return; - } - - if (c == '\r') - { - /* CR */ - lexer_nextchar(state); - state -> lexer_ignorechar = '\n'; - state -> lexer_token = token_eol; - return; - } - - if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) - { - /* we have a word here; identifier, keyword, etc. */ - lexer_word(state); - return; - } - - if (state -> expression && c >= '0' && c <= '9') - { - /* we have a number */ - lexer_parse_number(state, 0); - return; - } - - lexer_nextchar(state); - if (state -> expression) - { - if (c == '-' && lexer_curchar(state) >= '0' && lexer_curchar(state) <= '9') - { - /* we have a negative number here */ - lexer_parse_number(state, 1); - return; - } - if (c == '=') - { - state -> lexer_token = token_op_equality; - return; - } - if (c == '<') - { - if (lexer_curchar(state) == '=') - { - lexer_nextchar(state); - state -> lexer_token = token_op_lessequal; - return; - } - if (lexer_curchar(state) == '>') - { - lexer_nextchar(state); - state -> lexer_token = token_op_notequal; - return; - } - state -> lexer_token = token_op_less; - return; - } - if (c == '>') - { - if (lexer_curchar(state) == '>') - { - lexer_nextchar(state); - state -> lexer_token = token_op_greaterequal; - return; - } - if (lexer_curchar(state) == '<') - { - state -> lexer_token = token_op_notequal; - lexer_nextchar(state); - return; - } - state -> lexer_token = token_op_greater; - return; - } - switch(c) - { - case '+': - state -> lexer_token = token_op_plus; - return; - - case '-': - state -> lexer_token = token_op_minus; - return; - - case '/': - state -> lexer_token = token_op_divide; - return; - - case '*': - state -> lexer_token = token_op_times; - return; - - case '%': - state -> lexer_token = token_op_modulus; - return; - - case '(': - state -> lexer_token = token_op_oparen; - return; - - case ')': - state -> lexer_token = token_op_cparen; - return; - - } - } - else - { - if (c == '=') - { - state -> lexer_token = token_op_assignment; - return; - } - } - - /* return the character if all else fails */ - state -> lexer_token = token_char; - state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); - state -> lexer_token_string[0] = c; - state -> lexer_token_string[1] = 0; - return; -} - -char *lexer_return_token(cstate *state) -{ - static char *buffer = NULL; - static int buflen = 0; - int l; - - if (buflen == 0) - { - buffer = lw_alloc(128); - buflen = 128; - } - - l = snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); - if (l >= buflen) - { - buffer = lw_realloc(buffer, l + 1); - buflen = l + 1; - snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); - } - return buffer; -}
--- a/lwbasic/lwbasic.h Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ -/* -lwbasic.h - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -definitions used throughout lwbasic -*/ - -#ifndef __lwbasic_h_seen__ -#define __lwbasic_h_seen__ - -#include <stdint.h> - -#include "symtab.h" - -/* note: integer and uinteger will be the same for positive values from 0 -through 0x7FFFFFFF; the unsigned type should be used for doing ascii -conversions and then if a negative value was discovered, it should be -negated IFF it is in range. */ - -union lexer_numbers -{ - uint32_t uinteger; - int32_t integer; -}; - -typedef struct -{ - char *output_file; - char *input_file; - - int debug_level; - - char *lexer_token_string; - union lexer_numbers lexer_token_number; - int lexer_token; - int lexer_curchar; - int lexer_ignorechar; - int expression; - int parser_state; - - void *input_state; - - char *currentsub; - symtab_t *global_syms; - symtab_t *local_syms; - int returntype; - int framesize; -} cstate; - -/* parser states */ -enum -{ - parser_state_global = 0, /* only global decls allowed */ - parser_state_error -}; - -/* token types */ -enum -{ - token_kw_sub, /* SUB keyword */ - token_kw_function, /* FUNCTION keyword */ - token_kw_as, /* AS keyword */ - token_kw_public, /* PUBLIC keyword */ - token_kw_private, /* PRIVATE keyword */ - token_kw_params, /* PARAMS keyword */ - token_kw_returns, /* RETURNS keyword */ - token_kw_integer, /* INTEGER keyword */ - token_kw_endsub, /* ENDSUB keyword */ - token_kw_endfunction, /* ENDFUNCTION keyword */ - token_kw_dim, /* DIM keyword */ - token_op_assignment, /* assignment operator */ - token_op_equality, /* equality test */ - token_op_greater, /* greater than */ - token_op_less, /* less than */ - token_op_greaterequal, /* greater or equal */ - token_op_lessequal, /* less or equal */ - token_op_notequal, /* not equal */ - token_op_and, /* boolean and */ - token_op_or, /* boolean or */ - token_op_xor, /* boolean exlusive or */ - token_op_band, /* bitwise and */ - token_op_bor, /* bitwise or */ - token_op_bxor, /* bitwise xor */ - token_op_plus, /* plus */ - token_op_minus, /* minus */ - token_op_times, /* times */ - token_op_divide, /* divide */ - token_op_modulus, /* modulus */ - token_op_oparen, /* open paren */ - token_op_cparen, /* close paren */ - token_op_not, /* boolean not */ - token_op_bnot, /* bitwise not */ - token_identifier, /* an identifier (variable, function, etc. */ - token_char, /* single character; fallback */ - token_uint, /* unsigned integer up to 32 bits */ - token_int, /* signed integer up to 32 bits */ - token_eol, /* end of line */ - token_eof /* end of file */ -}; - -/* symbol types */ -enum -{ - symtype_sub, /* "sub" (void function) */ - symtype_func, /* function (nonvoid) */ - symtype_param, /* function parameter */ - symtype_var /* variable */ -}; - -#ifndef __input_c_seen__ -extern int input_getchar(cstate *state); -#endif - -#ifndef __main_c_seen__ -extern void lwb_error(const char *fmt, ...); -#endif - -#ifndef __lexer_c_seen__ -extern void lexer(cstate *state); -extern char *lexer_return_token(cstate *state); -extern char *lexer_token_name(int token); -#endif - -#ifndef __emit_c_seen__ -extern void emit_prolog(cstate *state, int vis); -extern void emit_epilog(cstate *state); -#endif - - -#endif /* __lwbasic_h_seen__ */
--- a/lwbasic/main.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -/* -main.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -main program startup handling for lwbasic -*/ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> - -#include <lw_cmdline.h> -#include <lw_string.h> -#include <lw_alloc.h> - -#define __main_c_seen__ -#include "lwbasic.h" - -#define PROGVER "lwbasic from " PACKAGE_STRING - -static struct lw_cmdline_options options[] = -{ - { "output", 'o', "FILE", 0, "Output to FILE"}, - { "debug", 'd', "LEVEL", lw_cmdline_opt_optional, "Set debug mode"}, - { 0 } -}; - -static int parse_opts(int key, char *arg, void *data) -{ - cstate *state = data; - - switch (key) - { - case 'o': - if (state -> output_file) - lw_free(state -> output_file); - state -> output_file = lw_strdup(arg); - break; - - case 'd': - if (!arg) - state -> debug_level = 50; - else - state -> debug_level = atoi(arg); - break; - - case lw_cmdline_key_end: - return 0; - - case lw_cmdline_key_arg: - if (state -> input_file) - { - fprintf(stderr, "Already have an input file; ignoring %s\n", arg); - } - else - { - state -> input_file = lw_strdup(arg); - } - break; - - default: - return lw_cmdline_err_unknown; - } - - return 0; -} - -static struct lw_cmdline_parser cmdline_parser = -{ - options, - parse_opts, - "INPUTFILE", - "lwbasic, a compiler for a dialect of Basic\vPlease report bugs to lost@l-w.ca.", - PROGVER -}; - -extern void parser(cstate *state); - -int main(int argc, char **argv) -{ - cstate state = { 0 }; - - lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, &state); - - parser(&state); - - exit(0); -} - -void lwb_error(const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - - exit(1); -}
--- a/lwbasic/parser.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,469 +0,0 @@ -/* -compiler.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -This is the actual compiler bit; it drives the parser and code generation -*/ - -#include <stdio.h> - -#include <lw_alloc.h> -#include <lw_string.h> - -#include "lwbasic.h" -#include "symtab.h" - -static void expect(cstate *state, int tt) -{ - if (state -> lexer_token != tt) - lwb_error("Expecting %s, got %s\n", lexer_token_name(tt), lexer_return_token(state)); - lexer(state); -} - - -/* size of a type */ -static int sizeof_type(int type) -{ - /* everything is an "int" right now; 2 bytes */ - return 2; -} - -/* parse a type; the next token will be acquired as a result */ -/* the token advancement is to provide consistency */ -static int parse_type(cstate *state) -{ - int pt = -1; - - switch (state -> lexer_token) - { - case token_kw_integer: - pt = 1; - break; - - default: - lwb_error("Invalid type specification"); - } - lexer(state); - /* look for "unsigned" modifier for integer types */ - return pt; -} - -static void parse_expr(cstate *state, int prec); -static void parse_term(cstate *state); -static int parse_expression(cstate *state) -{ - state -> expression = 1; - - parse_expr(state, 0); - - state -> expression = 0; - return 1; -} - -static void parse_decls(cstate *state) -{ - /* declarations */ - /* the first thing that doesn't look like a declaration is assumed */ - /* to be a statement and will trigger a bailout */ - int vt; - char *vn; - symtab_entry_t *se; - - for (;;) - { - switch (state -> lexer_token) - { - /* DIM keyword */ - case token_kw_dim: - lexer(state); - if (state -> lexer_token != token_identifier) - { - lwb_error("Expecting identifier, got %s\n", lexer_return_token(state)); - } - vn = lw_strdup(state -> lexer_token_string); - lexer(state); - if (state -> lexer_token != token_kw_as) - { - lwb_error("Expecting AS, got %s\n", lexer_return_token(state)); - } - lexer(state); - vt = parse_type(state); - - se = symtab_find(state -> local_syms, vn); - if (se) - { - lwb_error("Multiply defined local variable %s", vn); - } - state -> framesize += sizeof_type(vt); - symtab_register(state -> local_syms, vn, -(state -> framesize), symtype_var, NULL); - - lw_free(vn); - break; - - /* blank lines allowed */ - case token_eol: - break; - - default: - return; - } - if (state -> lexer_token != token_eol) - lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); - lexer(state); - } -} - -static void parse_statements(cstate *state) -{ - symtab_entry_t *se; - int et; - - for (;;) - { - switch (state -> lexer_token) - { - /* blank lines allowed */ - case token_eol: - break; - - /* variable assignment */ - case token_identifier: - se = symtab_find(state -> local_syms, state -> lexer_token_string); - if (!se) - { - se = symtab_find(state -> global_syms, state -> lexer_token_string); - } - if (!se) - lwb_error("Unknown variable %s\n", state -> lexer_token_string); - lexer(state); - /* ensure the first token of the expression will be parsed correctly */ - state -> expression = 1; - expect(state, token_op_assignment); - - /* parse the expression */ - et = parse_expression(state); - - /* check type compatibility */ - - /* actually do the assignment */ - - break; - - /* anything we don't recognize as a statement token breaks out */ - default: - return; - } - if (state -> lexer_token != token_eol) - lwb_error("Expecting end of line; got %s\n", lexer_return_token(state)); - lexer(state); - } -} - - -/* issub means RETURNS is not allowed; !issub means RETURNS is required */ - -static void parse_subfunc(cstate *state, int issub) -{ - int pt, rt; - char *subname, *pn; - int vis = 0; - symtab_entry_t *se; - int paramsize = 0; - - state -> local_syms = symtab_init(); - state -> framesize = 0; - - lexer(state); - if (state -> lexer_token != token_identifier) - { - lwb_error("Invalid sub name '%s'", state -> lexer_token_string); - } - - subname = lw_strdup(state -> lexer_token_string); - - lexer(state); - if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private) - { - if (state -> lexer_token == token_kw_public) - vis = 1; - lexer(state); - } - - /* ignore the "PARAMS" keyword if present */ - if (state -> lexer_token == token_kw_params) - lexer(state); - - if (state -> lexer_token == token_eol || state -> lexer_token == token_kw_returns) - goto noparms; - -paramagain: - if (state -> lexer_token != token_identifier) - { - lwb_error("Parameter name expected, got %s\n", lexer_return_token(state)); - } - pn = lw_strdup(state -> lexer_token_string); - lexer(state); - - if (state -> lexer_token != token_kw_as) - lwb_error("Expecting AS\n"); - lexer(state); - - pt = parse_type(state); - - se = symtab_find(state -> local_syms, pn); - if (se) - { - lwb_error("Duplicate parameter name %s\n", pn); - } - symtab_register(state -> local_syms, pn, paramsize, symtype_param, NULL); - paramsize += sizeof_type(pt); - lw_free(pn); - - if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',') - { - lexer(state); - goto paramagain; - } - -noparms: - rt = -1; - if (!issub) - { - if (state -> lexer_token != token_kw_returns) - { - lwb_error("FUNCTION must have RETURNS\n"); - } - lexer(state); -/* if (state -> lexer_token == token_identifier) - { - printf("Return value named: %s\n", state -> lexer_token_string); - - lexer(state); - if (state -> lexer_token != token_kw_as) - lwb_error("Execting AS after RETURNS"); - lexer(state); - } -*/ - rt = parse_type(state); - } - else - { - if (state -> lexer_token == token_kw_returns) - { - lwb_error("SUB cannot specify RETURNS\n"); - } - } - - - if (state -> lexer_token != token_eol) - { - lwb_error("EOL expected; found %s\n", lexer_return_token(state)); - } - - - se = symtab_find(state -> global_syms, subname); - if (se) - { - lwb_error("Multiply defined symbol %s\n", subname); - } - - symtab_register(state -> global_syms, subname, -1, issub ? symtype_sub : symtype_func, NULL); - - state -> currentsub = subname; - state -> returntype = rt; - /* consume EOL */ - lexer(state); - - /* variable declarations */ - parse_decls(state); - - /* output function/sub prolog */ - emit_prolog(state, vis); - - /* parse statement block */ - parse_statements(state); - - if (issub) - { - if (state -> lexer_token != token_kw_endsub) - { - lwb_error("Expecting ENDSUB, got %s\n", lexer_return_token(state)); - } - } - else - { - if (state -> lexer_token != token_kw_endfunction) - { - lwb_error("Expecting ENDFUNCTION, got %s\n", lexer_return_token(state)); - } - } - /* output function/sub epilog */ - emit_epilog(state); - - lw_free(state -> currentsub); - state -> currentsub = NULL; - symtab_destroy(state -> local_syms); - state -> local_syms = NULL; -} - -void parser(cstate *state) -{ - state -> lexer_curchar = -1; - state -> global_syms = symtab_init(); - - /* now look for a global declaration */ - for (;;) - { - state -> parser_state = parser_state_global; - lexer(state); - switch (state -> lexer_token) - { - case token_kw_function: - printf("Function\n"); - parse_subfunc(state, 0); - break; - - case token_kw_sub: - printf("Sub\n"); - parse_subfunc(state, 1); - break; - - /* blank lines are allowed */ - case token_eol: - continue; - - /* EOF is allowed - end of parsing */ - case token_eof: - return; - - default: - lwb_error("Invalid token '%s' in global state\n", lexer_return_token(state)); - } - } -} - -static void parse_expr(cstate *state, int prec) -{ - static const struct operinfo { - int opernum; - int operprec; - } operators[] = - { - { token_op_plus, 100 }, - { token_op_minus, 100 }, - { token_op_times, 150 }, - { token_op_divide, 150 }, - { token_op_modulus, 150 }, - { token_op_and, 25 }, - { token_op_or, 20 }, - { token_op_xor, 20 }, - { token_op_band, 50 }, - { token_op_bor, 45 }, - { token_op_bxor, 45 }, - { -1, -1 } - }; - int opern; - - parse_term(state); - -eval_next: - for (opern = 0; operators[opern].opernum != -1; opern++) - { - if (operators[opern].opernum == state -> lexer_token) - break; - } - if (operators[opern].opernum == -1) - return; - - if (operators[opern].operprec <= prec) - return; - - lexer(state); - - parse_expr(state, operators[opern].operprec); - - /* push operator */ - - goto eval_next; -} - -static void parse_term(cstate *state) -{ -eval_next: - /* parens */ - if (state -> lexer_token == token_op_oparen) - { - lexer(state); - parse_expr(state, 0); - expect(state, token_op_cparen); - return; - } - - /* unary plus; ignore it */ - if (state -> lexer_token == token_op_plus) - { - lexer(state); - goto eval_next; - } - - /* unary minus, precision 200 */ - if (state -> lexer_token == token_op_minus) - { - lexer(state); - parse_expr(state, 200); - - /* push unary negation */ - } - - /* BNOT, NOT */ - if (state -> lexer_token == token_op_not || state -> lexer_token == token_op_bnot) - { - lexer(state); - parse_expr(state, 200); - - /* push unary operator */ - } - - /* integer */ - if (state -> lexer_token == token_int) - { - } - - /* unsigned integer */ - if (state -> lexer_token == token_uint) - { - } - - /* variable or function call */ - if (state -> lexer_token == token_identifier) - { - lexer(state); - if (state -> lexer_token == token_op_oparen) - { - /* function call */ - return; - } - /* variable */ - return; - } - - lwb_error("Invalid input in expression; got %s\n", lexer_return_token(state)); -}
--- a/lwbasic/rules.make Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -dirname := $(dir $(lastword $(MAKEFILE_LIST))) -lwbasic_dir := $(dirname) - -lwbasic_lsrcs := main.c input.c parser.c lexer.c emit.c symtab.c - -lwbasic_srcs := $(addprefix $(dirname),$(lwbasic_lsrcs)) -lwbasic_objs := $(lwbasic_srcs:.c=.o) -lwbasic_deps := $(lwbasic_srcs:.c=.d) - - - -$(lwbasic_dir)lwbasic$(PROGSUFFIX): $(lwbasic_objs) lwlib $(lwbasic_dir)rules.make - @echo "Linking $@" - @$(CC) -o $@ $(lwbasic_objs) $(LDFLAGS) - -cleantargs := $(cleantargs) lwbasicclean -realcleantargs := $(realcleantargs) lwbasicrealclean - -.PHONY: lwbasicclean lwbasicrealclean -lwbasicrealclean: - @echo "Really cleaning up lwbasic" - @cd $(lwbasic_dir) && rm -f *.d - -lwbasicclean: - @echo "Cleaning up lwbasic" - @cd $(lwbasic_dir) && rm -f *.o *.exe lwbasic - --include $(lwbasic_deps)
--- a/lwbasic/symtab.c Thu Dec 22 18:03:04 2011 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -/* -symtab.c - -Copyright © 2011 William Astle - -This file is part of LWTOOLS. - -LWTOOLS is free software: you can redistribute it and/or modify it under the -terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -/* -Symbol table handling -*/ - -#include <stdlib.h> -#include <string.h> - -#include <lw_alloc.h> -#include <lw_string.h> - -#define __symtab_c_seen__ -#include "symtab.h" - -symtab_t *symtab_init(void) -{ - symtab_t *st; - - st = lw_alloc(sizeof(symtab_t)); - st -> head = NULL; - return st; -} - -void symtab_destroy(symtab_t *st) -{ - symtab_entry_t *se; - - while (st -> head) - { - se = st -> head; - st -> head = se -> next; - lw_free(se -> name); - lw_free(se -> privdata); - lw_free(se); - } - lw_free(st); -} - -symtab_entry_t *symtab_find(symtab_t *st, char *name) -{ - symtab_entry_t *se; - - for (se = st -> head; se; se = se -> next) - { - if (strcmp(se -> name, name) == 0) - return se; - } - return NULL; -} - -void symtab_register(symtab_t *st, char *name, int addr, int symtype, void *privdata) -{ - symtab_entry_t *se; - - se = lw_alloc(sizeof(symtab_entry_t)); - se -> name = lw_strdup(name); - se -> addr = addr; - se -> symtype = symtype; - se -> privdata = privdata; - se -> next = st -> head; - st -> head = se; -}