Mercurial > hg > index.cgi
annotate lwbasic/lexer.c @ 33:890a8f688889
Basic parsing of local variable decls
author | lost@l-w.ca |
---|---|
date | Thu, 03 Feb 2011 22:15:57 -0700 |
parents | 574931d87abd |
children | bfea77812e64 |
rev | line source |
---|---|
25 | 1 /* |
2 lexer.c | |
3 | |
4 Copyright © 2011 William Astle | |
5 | |
6 This file is part of LWTOOLS. | |
7 | |
8 LWTOOLS is free software: you can redistribute it and/or modify it under the | |
9 terms of the GNU General Public License as published by the Free Software | |
10 Foundation, either version 3 of the License, or (at your option) any later | |
11 version. | |
12 | |
13 This program is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
16 more details. | |
17 | |
18 You should have received a copy of the GNU General Public License along with | |
19 this program. If not, see <http://www.gnu.org/licenses/>. | |
20 */ | |
21 | |
22 /* | |
23 This handles the gritty details of parsing tokens | |
24 */ | |
25 | |
26 #include <stdlib.h> | |
27 #include <stdio.h> | |
28 #include <string.h> | |
29 | |
30 #include <lw_alloc.h> | |
31 #include <lw_string.h> | |
32 | |
33 #define __lexer_c_seen__ | |
34 #include "lwbasic.h" | |
35 | |
36 /* | |
37 A token idenfier is returned by lexer(). The actual string value | |
38 is found in state->lexer_lexer_token_string; if the token as an integer value, | |
39 it will be found in state->lexer_token_number in the appropriate "value" | |
40 slot. | |
41 */ | |
42 | |
43 struct token_list | |
44 { | |
45 char *string; | |
46 int token; | |
47 }; | |
48 | |
49 static struct token_list lexer_global_tokens[] = | |
50 { | |
51 { "function", token_kw_function }, | |
52 { "sub", token_kw_sub }, | |
53 { "public", token_kw_public }, | |
54 { "private", token_kw_private }, | |
55 { "as", token_kw_as }, | |
56 { "params", token_kw_params }, | |
57 { "returns", token_kw_returns }, | |
26
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
58 { "integer", token_kw_integer }, |
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
59 { "endsub", token_kw_endsub }, |
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
60 { "endfunction", token_kw_endfunction }, |
33 | 61 { "dim", token_kw_dim }, |
25 | 62 { NULL } |
63 }; | |
64 | |
65 static int lexer_getchar(cstate *state) | |
66 { | |
67 int c; | |
68 c = input_getchar(state); | |
69 if (c == -2) | |
70 { | |
71 lwb_error("Error reading input stream."); | |
72 } | |
73 return c; | |
74 } | |
75 | |
76 static void lexer_nextchar(cstate *state) | |
77 { | |
78 state -> lexer_curchar = lexer_getchar(state); | |
79 if (state -> lexer_curchar == state -> lexer_ignorechar) | |
80 state -> lexer_curchar = lexer_getchar(state); | |
81 state -> lexer_ignorechar = 0; | |
82 } | |
83 | |
84 static int lexer_curchar(cstate *state) | |
85 { | |
86 if (state -> lexer_curchar == -1) | |
87 { | |
88 lexer_nextchar(state); | |
89 } | |
90 | |
91 return state -> lexer_curchar; | |
92 } | |
93 | |
94 static void lexer_skip_white(cstate *state) | |
95 { | |
96 int c; | |
97 | |
98 for (;;) | |
99 { | |
100 c = lexer_curchar(state); | |
101 if (!(c == 0 || c == ' ' || c == '\t')) | |
102 return; | |
103 lexer_nextchar(state); | |
104 } | |
105 } | |
106 | |
107 /* must not be called unless the word will be non-zero length */ | |
108 static void lexer_word(cstate *state) | |
109 { | |
110 int wordlen = 0; | |
111 int wordpos = 0; | |
112 char *word = NULL; | |
113 int c; | |
114 struct token_list *tok = NULL; | |
115 | |
116 for (;;) { | |
117 c = lexer_curchar(state); | |
26
26aa76da75ad
Additional parsing in function/sub; emission of prolog/epilog code
lost@l-w.ca
parents:
25
diff
changeset
|
118 if (c == '_' || (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) |
25 | 119 { |
120 /* character is part of word */ | |
121 if (wordpos >= wordlen) | |
122 { | |
123 word = lw_realloc(word, wordlen + 32); | |
124 wordlen += 32; | |
125 } | |
126 word[wordpos++] = c; | |
127 } | |
128 else | |
129 break; | |
130 | |
131 lexer_nextchar(state); | |
132 } | |
133 | |
134 word[wordpos] = 0; | |
135 lw_free(state -> lexer_token_string); | |
136 state -> lexer_token_string = lw_strdup(word); | |
137 | |
138 switch (state -> parser_state) | |
139 { | |
140 default: | |
141 tok = lexer_global_tokens; | |
142 } | |
143 | |
144 /* check for tokens if appropriate */ | |
145 /* force uppercase */ | |
146 if (tok) | |
147 { | |
148 for (c = 0; word[c]; c++) | |
149 if (word[c] >= 'A' && word[c] <= 'Z') | |
150 word[c] = word[c] + 0x20; | |
151 | |
152 while (tok -> string) | |
153 { | |
154 if (strcmp(tok -> string, word) == 0) | |
155 break; | |
156 tok++; | |
157 } | |
158 } | |
159 | |
160 lw_free(word); | |
161 if (tok && tok -> string) | |
162 state -> lexer_token = tok -> token; | |
163 else | |
164 state -> lexer_token = token_identifier; | |
165 } | |
166 | |
167 static void lexer_empty_token(cstate *state) | |
168 { | |
169 lw_free(state -> lexer_token_string); | |
170 state -> lexer_token_string = NULL; | |
171 } | |
172 | |
173 void lexer(cstate *state) | |
174 { | |
175 int c; | |
176 | |
177 lexer_skip_white(state); | |
178 | |
179 lexer_empty_token(state); | |
180 | |
181 c = lexer_curchar(state); | |
182 if (c == -1) | |
183 { | |
184 state -> lexer_token = token_eof; | |
185 return; | |
186 } | |
187 | |
188 if (c == '\n') | |
189 { | |
190 /* LF */ | |
191 lexer_nextchar(state); | |
192 state -> lexer_ignorechar = '\r'; | |
193 state -> lexer_token = token_eol; | |
194 return; | |
195 } | |
196 | |
197 if (c == '\r') | |
198 { | |
199 /* CR */ | |
200 lexer_nextchar(state); | |
201 state -> lexer_ignorechar = '\n'; | |
202 state -> lexer_token = token_eol; | |
203 return; | |
204 } | |
205 | |
206 if (c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c >= 0x80) | |
207 { | |
208 /* we have a word here; identifier, keyword, etc. */ | |
209 lexer_word(state); | |
210 return; | |
211 } | |
212 | |
213 /* return the character if all else fails */ | |
214 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); | |
215 state -> lexer_token_string[0] = c; | |
216 state -> lexer_token_string[1] = 0; | |
217 lexer_nextchar(state); | |
218 state -> lexer_token = token_char; | |
219 return; | |
220 } | |
31
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
221 |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
222 char *lexer_return_token(cstate *state) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
223 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
224 static char *buffer = NULL; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
225 static int buflen = 0; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
226 int l; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
227 |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
228 if (buflen == 0) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
229 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
230 buffer = lw_alloc(128); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
231 buflen = 128; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
232 } |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
233 |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
234 l = snprintf(buffer, buflen, "%s (%d)", state -> lexer_token_string, state -> lexer_token); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
235 if (l >= buflen) |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
236 { |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
237 buffer = lw_realloc(buffer, l + 1); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
238 buflen = l + 1; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
239 snprintf(buffer, buflen, "%s (%d)", state -> lexer_token_string, state -> lexer_token); |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
240 } |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
241 return buffer; |
574931d87abd
Created a function to prettyprint the current lexer token
lost@l-w.ca
parents:
26
diff
changeset
|
242 } |