annotate lwbasic/lexer.c @ 34:bfea77812e64

Start of assignment code
author Lost Wizard (lost@starbug3)
date Fri, 04 Feb 2011 21:27:03 -0700
parents 890a8f688889
children cdb0175e1063
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
1 /*
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
2 lexer.c
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
3
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
4 Copyright © 2011 William Astle
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
5
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
6 This file is part of LWTOOLS.
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
7
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
8 LWTOOLS is free software: you can redistribute it and/or modify it under the
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
9 terms of the GNU General Public License as published by the Free Software
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
10 Foundation, either version 3 of the License, or (at your option) any later
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
11 version.
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
12
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
13 This program is distributed in the hope that it will be useful, but WITHOUT
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
16 more details.
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
17
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
18 You should have received a copy of the GNU General Public License along with
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
19 this program. If not, see <http://www.gnu.org/licenses/>.
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
20 */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
21
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
22 /*
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
23 This handles the gritty details of parsing tokens
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
24 */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
25
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
26 #include <stdlib.h>
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
27 #include <stdio.h>
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
28 #include <string.h>
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
29
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
30 #include <lw_alloc.h>
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
31 #include <lw_string.h>
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
32
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
33 #define __lexer_c_seen__
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
34 #include "lwbasic.h"
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
35
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
36 /*
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
37 A token idenfier is returned by lexer(). The actual string value
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
38 is found in state->lexer_lexer_token_string; if the token as an integer value,
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
39 it will be found in state->lexer_token_number in the appropriate "value"
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
40 slot.
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
41 */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
42
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
43 struct token_list
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
44 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
45 char *string;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
46 int token;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
47 };
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
48
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
49 static struct token_list lexer_global_tokens[] =
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
50 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
51 { "function", token_kw_function },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
52 { "sub", token_kw_sub },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
53 { "public", token_kw_public },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
54 { "private", token_kw_private },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
55 { "as", token_kw_as },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
56 { "params", token_kw_params },
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
57 { "returns", token_kw_returns },
26
26aa76da75ad Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents: 25
diff changeset
58 { "integer", token_kw_integer },
26aa76da75ad Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents: 25
diff changeset
59 { "endsub", token_kw_endsub },
26aa76da75ad Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents: 25
diff changeset
60 { "endfunction", token_kw_endfunction },
33
890a8f688889 Basic parsing of local variable decls
lost@l-w.ca
parents: 31
diff changeset
61 { "dim", token_kw_dim },
34
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
62 { "=", token_op_assignment },
25
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
63 { NULL }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
64 };
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
65
34
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
66 static char *lexer_token_names[] =
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
67 {
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
68 "SUB",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
69 "FUNCTION",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
70 "AS",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
71 "PUBLIC",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
72 "PRIVATE",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
73 "PARAMS",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
74 "RETURNS",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
75 "INTEGER",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
76 "ENDSUB",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
77 "ENDFUNCTION",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
78 "DIM",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
79 "<assignment>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
80 "<identifier>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
81 "<char>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
82 "<uint>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
83 "<int>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
84 "<eol>",
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
85 "<eof>"
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
86 };
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
87
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
88 char *lexer_token_name(int token)
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
89 {
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
90 if (token > token_eol)
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
91 return "???";
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
92 return lexer_token_names[token];
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
93 }
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
94
25
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
95 static int lexer_getchar(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
96 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
97 int c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
98 c = input_getchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
99 if (c == -2)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
100 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
101 lwb_error("Error reading input stream.");
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
102 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
103 return c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
104 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
105
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
106 static void lexer_nextchar(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
107 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
108 state -> lexer_curchar = lexer_getchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
109 if (state -> lexer_curchar == state -> lexer_ignorechar)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
110 state -> lexer_curchar = lexer_getchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
111 state -> lexer_ignorechar = 0;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
112 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
113
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
114 static int lexer_curchar(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
115 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
116 if (state -> lexer_curchar == -1)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
117 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
118 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
119 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
120
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
121 return state -> lexer_curchar;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
122 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
123
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
124 static void lexer_skip_white(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
125 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
126 int c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
127
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
128 for (;;)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
129 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
130 c = lexer_curchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
131 if (!(c == 0 || c == ' ' || c == '\t'))
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
132 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
133 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
134 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
135 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
136
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
137 /* must not be called unless the word will be non-zero length */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
138 static void lexer_word(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
139 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
140 int wordlen = 0;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
141 int wordpos = 0;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
142 char *word = NULL;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
143 int c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
144 struct token_list *tok = NULL;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
145
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
146 for (;;) {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
147 c = lexer_curchar(state);
26
26aa76da75ad Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents: 25
diff changeset
148 if (c == '_' || (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80)
25
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
149 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
150 /* character is part of word */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
151 if (wordpos >= wordlen)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
152 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
153 word = lw_realloc(word, wordlen + 32);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
154 wordlen += 32;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
155 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
156 word[wordpos++] = c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
157 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
158 else
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
159 break;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
160
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
161 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
162 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
163
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
164 word[wordpos] = 0;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
165 lw_free(state -> lexer_token_string);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
166 state -> lexer_token_string = lw_strdup(word);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
167
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
168 switch (state -> parser_state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
169 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
170 default:
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
171 tok = lexer_global_tokens;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
172 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
173
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
174 /* check for tokens if appropriate */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
175 /* force uppercase */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
176 if (tok)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
177 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
178 for (c = 0; word[c]; c++)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
179 if (word[c] >= 'A' && word[c] <= 'Z')
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
180 word[c] = word[c] + 0x20;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
181
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
182 while (tok -> string)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
183 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
184 if (strcmp(tok -> string, word) == 0)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
185 break;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
186 tok++;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
187 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
188 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
189
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
190 lw_free(word);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
191 if (tok && tok -> string)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
192 state -> lexer_token = tok -> token;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
193 else
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
194 state -> lexer_token = token_identifier;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
195 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
196
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
197 static void lexer_empty_token(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
198 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
199 lw_free(state -> lexer_token_string);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
200 state -> lexer_token_string = NULL;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
201 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
202
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
203 void lexer(cstate *state)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
204 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
205 int c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
206
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
207 lexer_skip_white(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
208
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
209 lexer_empty_token(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
210
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
211 c = lexer_curchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
212 if (c == -1)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
213 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
214 state -> lexer_token = token_eof;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
215 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
216 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
217
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
218 if (c == '\n')
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
219 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
220 /* LF */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
221 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
222 state -> lexer_ignorechar = '\r';
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
223 state -> lexer_token = token_eol;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
224 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
225 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
226
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
227 if (c == '\r')
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
228 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
229 /* CR */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
230 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
231 state -> lexer_ignorechar = '\n';
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
232 state -> lexer_token = token_eol;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
233 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
234 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
235
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
236 if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80)
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
237 {
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
238 /* we have a word here; identifier, keyword, etc. */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
239 lexer_word(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
240 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
241 }
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
242
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
243 /* return the character if all else fails */
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
244 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
245 state -> lexer_token_string[0] = c;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
246 state -> lexer_token_string[1] = 0;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
247 lexer_nextchar(state);
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
248 state -> lexer_token = token_char;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
249 return;
87590f43e76d Started lwbasic parser; checkpoint
lost@l-w.ca
parents:
diff changeset
250 }
31
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
251
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
252 char *lexer_return_token(cstate *state)
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
253 {
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
254 static char *buffer = NULL;
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
255 static int buflen = 0;
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
256 int l;
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
257
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
258 if (buflen == 0)
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
259 {
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
260 buffer = lw_alloc(128);
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
261 buflen = 128;
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
262 }
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
263
34
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
264 l = snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token));
31
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
265 if (l >= buflen)
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
266 {
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
267 buffer = lw_realloc(buffer, l + 1);
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
268 buflen = l + 1;
34
bfea77812e64 Start of assignment code
Lost Wizard (lost@starbug3)
parents: 33
diff changeset
269 snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token));
31
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
270 }
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
271 return buffer;
574931d87abd Created a function to prettyprint the current lexer token
lost@l-w.ca
parents: 26
diff changeset
272 }