Mercurial > hg > index.cgi
changeset 25:87590f43e76d
Started lwbasic parser; checkpoint
author | lost@l-w.ca |
---|---|
date | Mon, 24 Jan 2011 20:08:09 -0700 |
parents | 421d7ceb4d86 |
children | 26aa76da75ad |
files | Makefile lwbasic/compiler.c lwbasic/input.c lwbasic/lexer.c lwbasic/lwbasic.h lwbasic/main.c lwbasic/rules.make |
diffstat | 7 files changed, 460 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Mon Jan 24 18:31:07 2011 -0700 +++ b/Makefile Mon Jan 24 20:08:09 2011 -0700 @@ -19,6 +19,7 @@ CPPFLAGS += -I lwlib -DPACKAGE_STRING='"lwtools 4.0-pre"' LDFLAGS += -L$(PWD)/lwlib -llw +CFLAGS ?= -g -Wall MAIN_TARGETS := lwasm/lwasm$(PROGSUFFIX) \ lwlink/lwlink$(PROGSUFFIX) \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/compiler.c Mon Jan 24 20:08:09 2011 -0700 @@ -0,0 +1,168 @@ +/* +compiler.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +This is the actual compiler bit; it drives the parser and code generation +*/ + +#include <stdio.h> + +#include "lwbasic.h" + +/* parse a type; the next token will be acquired as a result */ +/* the token advancement is to provide consistency */ +static int parse_type(cstate *state) +{ + int pt = -1; + + switch (state -> lexer_token) + { + case token_kw_integer: + pt = 1; + break; + + default: + lwb_error("Invalid type specification"); + } + lexer(state); + /* look for "unsigned" modifier for integer types */ + return pt; +} + + +/* issub means RETURNS is not allowed; !issub means RETURNS is required */ +static void parse_subfunc(cstate *state, int issub) +{ + int pt; + + lexer(state); + if (state -> lexer_token != token_identifier) + { + lwb_error("Invalid sub name '%s'", state -> lexer_token_string); + } + + printf("<name> = %s\n", state -> lexer_token_string); + + lexer(state); + if (state -> lexer_token == token_kw_public || state -> lexer_token == token_kw_private) + { + printf("<type> = %s\n", state -> lexer_token_string); + lexer(state); + } + + /* ignore the "PARAMS" keyword if present */ + if (state -> lexer_token == token_kw_params) + lexer(state); + + if (state -> lexer_token == token_eol) + goto noparms; + +paramagain: + if (state -> lexer_token != token_identifier) + { + lwb_error("Parameter name expected, get %d, %s\n", state -> lexer_token, state -> lexer_token_string); + } + printf("Got <param> = %s\n", state -> lexer_token_string); + lexer(state); + + if (state -> lexer_token != token_kw_as) + lwb_error("Expecting AS\n"); + lexer(state); + + pt = parse_type(state); + printf("Got <type> = %d\n", pt); + + if (state -> lexer_token == token_char && state -> lexer_token_string[0] == ',') + { + lexer(state); + goto paramagain; + } + +noparms: + if (!issub) + { + int rt; + + if (state -> lexer_token != token_kw_returns) + { + lwb_error("FUNCTION must have RETURNS\n"); + } + lexer(state); + if (state -> lexer_token == token_identifier) + { + printf("Return value named: %s\n", state -> lexer_token_string); + lexer(state); + if (state -> lexer_token != token_kw_as) + lwb_error("Execting AS after RETURNS"); + lexer(state); + } + rt = parse_type(state); + printf("Return type: %d\n", rt); + } + else + { + if (state -> lexer_token == token_kw_returns) + { + lwb_error("SUB cannot specify RETURNS\n"); + } + } + + + if (state -> lexer_token != token_eol) + { + lwb_error("EOL expected; found %d, %s\n", state -> lexer_token, state -> lexer_token_string); + } +} + +void compiler(cstate *state) +{ + state -> lexer_curchar = -1; + + /* now look for a global declaration */ + for (;;) + { + state -> parser_state = parser_state_global; + lexer(state); + switch (state -> lexer_token) + { + case token_kw_function: + printf("Function\n"); + parse_subfunc(state, 0); + break; + + case token_kw_sub: + printf("Sub\n"); + parse_subfunc(state, 1); + break; + + /* blank lines are allowed */ + case token_eol: + continue; + + /* EOF is allowed - end of parsing */ + case token_eof: + return; + + default: + lwb_error("Invalid token %d, %s in global state\n", state -> lexer_token, state -> lexer_token_string); + } + } +}
--- a/lwbasic/input.c Mon Jan 24 18:31:07 2011 -0700 +++ b/lwbasic/input.c Mon Jan 24 20:08:09 2011 -0700 @@ -28,6 +28,7 @@ #include <string.h> #include <lw_alloc.h> +#include <lw_error.h> #define __input_c_seen__ #include "lwbasic.h" @@ -54,8 +55,7 @@ sp -> fp = fopen(state -> input_file, "rb"); if (!(sp -> fp)) { - fprintf(stderr, "Cannot open input file\n"); - exit(1); + lwb_error("Cannot open input file\n"); } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwbasic/lexer.c Mon Jan 24 20:08:09 2011 -0700 @@ -0,0 +1,216 @@ +/* +lexer.c + +Copyright © 2011 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +This handles the gritty details of parsing tokens +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <lw_alloc.h> +#include <lw_string.h> + +#define __lexer_c_seen__ +#include "lwbasic.h" + +/* +A token idenfier is returned by lexer(). The actual string value +is found in state->lexer_lexer_token_string; if the token as an integer value, +it will be found in state->lexer_token_number in the appropriate "value" +slot. +*/ + +struct token_list +{ + char *string; + int token; +}; + +static struct token_list lexer_global_tokens[] = +{ + { "function", token_kw_function }, + { "sub", token_kw_sub }, + { "public", token_kw_public }, + { "private", token_kw_private }, + { "as", token_kw_as }, + { "params", token_kw_params }, + { "returns", token_kw_returns }, + { NULL } +}; + +static int lexer_getchar(cstate *state) +{ + int c; + c = input_getchar(state); + if (c == -2) + { + lwb_error("Error reading input stream."); + } + return c; +} + +static void lexer_nextchar(cstate *state) +{ + state -> lexer_curchar = lexer_getchar(state); + if (state -> lexer_curchar == state -> lexer_ignorechar) + state -> lexer_curchar = lexer_getchar(state); + state -> lexer_ignorechar = 0; +} + +static int lexer_curchar(cstate *state) +{ + if (state -> lexer_curchar == -1) + { + lexer_nextchar(state); + } + + return state -> lexer_curchar; +} + +static void lexer_skip_white(cstate *state) +{ + int c; + + for (;;) + { + c = lexer_curchar(state); + if (!(c == 0 || c == ' ' || c == '\t')) + return; + lexer_nextchar(state); + } +} + +/* must not be called unless the word will be non-zero length */ +static void lexer_word(cstate *state) +{ + int wordlen = 0; + int wordpos = 0; + char *word = NULL; + int c; + struct token_list *tok = NULL; + + for (;;) { + c = lexer_curchar(state); + if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) + { + /* character is part of word */ + if (wordpos >= wordlen) + { + word = lw_realloc(word, wordlen + 32); + wordlen += 32; + } + word[wordpos++] = c; + } + else + break; + + lexer_nextchar(state); + } + + word[wordpos] = 0; + lw_free(state -> lexer_token_string); + state -> lexer_token_string = lw_strdup(word); + + switch (state -> parser_state) + { + default: + tok = lexer_global_tokens; + } + + /* check for tokens if appropriate */ + /* force uppercase */ + if (tok) + { + for (c = 0; word[c]; c++) + if (word[c] >= 'A' && word[c] <= 'Z') + word[c] = word[c] + 0x20; + + while (tok -> string) + { + if (strcmp(tok -> string, word) == 0) + break; + tok++; + } + } + + lw_free(word); + if (tok && tok -> string) + state -> lexer_token = tok -> token; + else + state -> lexer_token = token_identifier; +} + +static void lexer_empty_token(cstate *state) +{ + lw_free(state -> lexer_token_string); + state -> lexer_token_string = NULL; +} + +void lexer(cstate *state) +{ + int c; + + lexer_skip_white(state); + + lexer_empty_token(state); + + c = lexer_curchar(state); + if (c == -1) + { + state -> lexer_token = token_eof; + return; + } + + if (c == '\n') + { + /* LF */ + lexer_nextchar(state); + state -> lexer_ignorechar = '\r'; + state -> lexer_token = token_eol; + return; + } + + if (c == '\r') + { + /* CR */ + lexer_nextchar(state); + state -> lexer_ignorechar = '\n'; + state -> lexer_token = token_eol; + return; + } + + if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) + { + /* we have a word here; identifier, keyword, etc. */ + lexer_word(state); + return; + } + + /* return the character if all else fails */ + state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); + state -> lexer_token_string[0] = c; + state -> lexer_token_string[1] = 0; + lexer_nextchar(state); + state -> lexer_token = token_char; + return; +}
--- a/lwbasic/lwbasic.h Mon Jan 24 18:31:07 2011 -0700 +++ b/lwbasic/lwbasic.h Mon Jan 24 20:08:09 2011 -0700 @@ -26,18 +26,73 @@ #ifndef __lwbasic_h_seen__ #define __lwbasic_h_seen__ +#include <stdint.h> + +/* note: integer and uinteger will be the same for positive values from 0 +through 0x7FFFFFFF; the unsigned type should be used for doing ascii +conversions and then if a negative value was discovered, it should be +negated IFF it is in range. */ + +union lexer_numbers +{ + uint32_t uinteger; + int32_t integer; +}; + typedef struct { char *output_file; char *input_file; int debug_level; + + char *lexer_token_string; + union lexer_numbers lexer_token_number; + int lexer_token; + int lexer_curchar; + int lexer_ignorechar; + + int parser_state; void *input_state; } cstate; +/* parser states */ +enum +{ + parser_state_global = 0, /* only global decls allowed */ + parser_state_error +}; + +/* token types */ +enum +{ + token_kw_sub, /* SUB keyword */ + token_kw_function, /* FUNCTION keyword */ + token_kw_as, /* AS keyword */ + token_kw_public, /* PUBLIC keyword */ + token_kw_private, /* PRIVATE keyword */ + token_kw_params, /* PARAMS keyword */ + token_kw_returns, /* RETURNS keyword */ + token_kw_integer, /* INTEGER keyword */ + token_identifier, /* an identifier (variable, function, etc. */ + token_char, /* single character; fallback */ + token_uint, /* unsigned integer up to 32 bits */ + token_int, /* signed integer up to 32 bits */ + token_eol, /* end of line */ + token_eof /* end of file */ +}; + #ifndef __input_c_seen__ extern int input_getchar(cstate *state); #endif +#ifndef __main_c_seen__ +extern void lwb_error(const char *fmt, ...); +#endif + +#ifndef __lexer_c_seen__ +extern void lexer(cstate *state); +#endif + #endif /* __lwbasic_h_seen__ */
--- a/lwbasic/main.c Mon Jan 24 18:31:07 2011 -0700 +++ b/lwbasic/main.c Mon Jan 24 20:08:09 2011 -0700 @@ -25,11 +25,13 @@ #include <stdlib.h> #include <stdio.h> +#include <stdarg.h> #include <lw_cmdline.h> #include <lw_string.h> #include <lw_alloc.h> +#define __main_c_seen__ #include "lwbasic.h" #define PROGVER "lwbasic from " PACKAGE_STRING @@ -90,11 +92,26 @@ PROGVER }; +extern void compiler(cstate *state); + int main(int argc, char **argv) { cstate state = { 0 }; lw_cmdline_parse(&cmdline_parser, argc, argv, 0, 0, &state); + compiler(&state); + exit(0); } + +void lwb_error(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + exit(1); +}
--- a/lwbasic/rules.make Mon Jan 24 18:31:07 2011 -0700 +++ b/lwbasic/rules.make Mon Jan 24 20:08:09 2011 -0700 @@ -1,7 +1,7 @@ dirname := $(dir $(lastword $(MAKEFILE_LIST))) lwbasic_dir := $(dirname) -lwbasic_lsrcs := main.c input.c +lwbasic_lsrcs := main.c input.c compiler.c lexer.c lwbasic_srcs := $(addprefix $(dirname),$(lwbasic_lsrcs)) lwbasic_objs := $(lwbasic_srcs:.c=.o)