Mercurial > hg > index.cgi
annotate lwbasic/lexer.c @ 34:bfea77812e64
Start of assignment code
author | Lost Wizard (lost@starbug3) |
---|---|
date | Fri, 04 Feb 2011 21:27:03 -0700 |
parents | 890a8f688889 |
children | cdb0175e1063 |
rev | line source |
---|---|
25 | 1 /* |
2 lexer.c | |
3 | |
4 Copyright © 2011 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 /* | |
23 This handles the gritty details of parsing tokens | |
24 */ | |
25 | |
26 #include <stdlib.h> | |
27 #include <stdio.h> | |
28 #include <string.h> | |
29 | |
30 #include <lw_alloc.h> | |
31 #include <lw_string.h> | |
32 | |
33 #define __lexer_c_seen__ | |
34 #include "lwbasic.h" | |
35 | |
36 /* | |
37 A token idenfier is returned by lexer(). The actual string value | |
38 is found in state->lexer_lexer_token_string; if the token as an integer value, | |
39 it will be found in state->lexer_token_number in the appropriate "value" | |
40 slot. | |
41 */ | |
42 | |
43 struct token_list | |
44 { | |
45 char *string; | |
46 int token; | |
47 }; | |
48 | |
49 static struct token_list lexer_global_tokens[] = | |
50 { | |
51 { "function", token_kw_function }, | |
52 { "sub", token_kw_sub }, | |
53 { "public", token_kw_public }, | |
54 { "private", token_kw_private }, | |
55 { "as", token_kw_as }, | |
56 { "params", token_kw_params }, | |
57 { "returns", token_kw_returns }, | |
26
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
58 { "integer", token_kw_integer }, |
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
59 { "endsub", token_kw_endsub }, |
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
60 { "endfunction", token_kw_endfunction }, |
33 | 61 { "dim", token_kw_dim }, |
34 | 62 { "=", token_op_assignment }, |
25 | 63 { NULL } |
64 }; | |
65 | |
34 | 66 static char *lexer_token_names[] = |
67 { | |
68 "SUB", | |
69 "FUNCTION", | |
70 "AS", | |
71 "PUBLIC", | |
72 "PRIVATE", | |
73 "PARAMS", | |
74 "RETURNS", | |
75 "INTEGER", | |
76 "ENDSUB", | |
77 "ENDFUNCTION", | |
78 "DIM", | |
79 "<assignment>", | |
80 "<identifier>", | |
81 "<char>", | |
82 "<uint>", | |
83 "<int>", | |
84 "<eol>", | |
85 "<eof>" | |
86 }; | |
87 | |
88 char *lexer_token_name(int token) | |
89 { | |
90 if (token > token_eol) | |
91 return "???"; | |
92 return lexer_token_names[token]; | |
93 } | |
94 | |
25 | 95 static int lexer_getchar(cstate *state) |
96 { | |
97 int c; | |
98 c = input_getchar(state); | |
99 if (c == -2) | |
100 { | |
101 lwb_error("Error reading input stream."); | |
102 } | |
103 return c; | |
104 } | |
105 | |
106 static void lexer_nextchar(cstate *state) | |
107 { | |
108 state -> lexer_curchar = lexer_getchar(state); | |
109 if (state -> lexer_curchar == state -> lexer_ignorechar) | |
110 state -> lexer_curchar = lexer_getchar(state); | |
111 state -> lexer_ignorechar = 0; | |
112 } | |
113 | |
114 static int lexer_curchar(cstate *state) | |
115 { | |
116 if (state -> lexer_curchar == -1) | |
117 { | |
118 lexer_nextchar(state); | |
119 } | |
120 | |
121 return state -> lexer_curchar; | |
122 } | |
123 | |
124 static void lexer_skip_white(cstate *state) | |
125 { | |
126 int c; | |
127 | |
128 for (;;) | |
129 { | |
130 c = lexer_curchar(state); | |
131 if (!(c == 0 || c == ' ' || c == '\t')) | |
132 return; | |
133 lexer_nextchar(state); | |
134 } | |
135 } | |
136 | |
137 /* must not be called unless the word will be non-zero length */ | |
138 static void lexer_word(cstate *state) | |
139 { | |
140 int wordlen = 0; | |
141 int wordpos = 0; | |
142 char *word = NULL; | |
143 int c; | |
144 struct token_list *tok = NULL; | |
145 | |
146 for (;;) { | |
147 c = lexer_curchar(state); | |
26
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
148 if (c == '_' || (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) |
25 | 149 { |
150 /* character is part of word */ | |
151 if (wordpos >= wordlen) | |
152 { | |
153 word = lw_realloc(word, wordlen + 32); | |
154 wordlen += 32; | |
155 } | |
156 word[wordpos++] = c; | |
157 } | |
158 else | |
159 break; | |
160 | |
161 lexer_nextchar(state); | |
162 } | |
163 | |
164 word[wordpos] = 0; | |
165 lw_free(state -> lexer_token_string); | |
166 state -> lexer_token_string = lw_strdup(word); | |
167 | |
168 switch (state -> parser_state) | |
169 { | |
170 default: | |
171 tok = lexer_global_tokens; | |
172 } | |
173 | |
174 /* check for tokens if appropriate */ | |
175 /* force uppercase */ | |
176 if (tok) | |
177 { | |
178 for (c = 0; word[c]; c++) | |
179 if (word[c] >= 'A' && word[c] <= 'Z') | |
180 word[c] = word[c] + 0x20; | |
181 | |
182 while (tok -> string) | |
183 { | |
184 if (strcmp(tok -> string, word) == 0) | |
185 break; | |
186 tok++; | |
187 } | |
188 } | |
189 | |
190 lw_free(word); | |
191 if (tok && tok -> string) | |
192 state -> lexer_token = tok -> token; | |
193 else | |
194 state -> lexer_token = token_identifier; | |
195 } | |
196 | |
197 static void lexer_empty_token(cstate *state) | |
198 { | |
199 lw_free(state -> lexer_token_string); | |
200 state -> lexer_token_string = NULL; | |
201 } | |
202 | |
203 void lexer(cstate *state) | |
204 { | |
205 int c; | |
206 | |
207 lexer_skip_white(state); | |
208 | |
209 lexer_empty_token(state); | |
210 | |
211 c = lexer_curchar(state); | |
212 if (c == -1) | |
213 { | |
214 state -> lexer_token = token_eof; | |
215 return; | |
216 } | |
217 | |
218 if (c == '\n') | |
219 { | |
220 /* LF */ | |
221 lexer_nextchar(state); | |
222 state -> lexer_ignorechar = '\r'; | |
223 state -> lexer_token = token_eol; | |
224 return; | |
225 } | |
226 | |
227 if (c == '\r') | |
228 { | |
229 /* CR */ | |
230 lexer_nextchar(state); | |
231 state -> lexer_ignorechar = '\n'; | |
232 state -> lexer_token = token_eol; | |
233 return; | |
234 } | |
235 | |
236 if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) | |
237 { | |
238 /* we have a word here; identifier, keyword, etc. */ | |
239 lexer_word(state); | |
240 return; | |
241 } | |
242 | |
243 /* return the character if all else fails */ | |
244 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); | |
245 state -> lexer_token_string[0] = c; | |
246 state -> lexer_token_string[1] = 0; | |
247 lexer_nextchar(state); | |
248 state -> lexer_token = token_char; | |
249 return; | |
250 } | |
31
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
251 |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
252 char *lexer_return_token(cstate *state) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
253 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
254 static char *buffer = NULL; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
255 static int buflen = 0; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
256 int l; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
257 |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
258 if (buflen == 0) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
259 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
260 buffer = lw_alloc(128); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
261 buflen = 128; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
262 } |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
263 |
34 | 264 l = snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); |
31
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
265 if (l >= buflen) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
266 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
267 buffer = lw_realloc(buffer, l + 1); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
268 buflen = l + 1; |
34 | 269 snprintf(buffer, buflen, "%s (%s)", state -> lexer_token_string, lexer_token_name(state -> lexer_token)); |
31
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
270 } |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
271 return buffer; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
272 } |