Mercurial > hg > index.cgi
comparison lwbasic/lexer.c @ 35:cdb0175e1063
More work on expressions
author | Lost Wizard (lost@starbug3) |
---|---|
date | Sat, 05 Feb 2011 14:22:54 -0700 |
parents | bfea77812e64 |
children | 5325b640424d |
comparison
equal
deleted
inserted
replaced
34:bfea77812e64 | 35:cdb0175e1063 |
---|---|
44 { | 44 { |
45 char *string; | 45 char *string; |
46 int token; | 46 int token; |
47 }; | 47 }; |
48 | 48 |
49 /* keywords that appear as part of normal expressions */ | |
49 static struct token_list lexer_global_tokens[] = | 50 static struct token_list lexer_global_tokens[] = |
50 { | 51 { |
51 { "function", token_kw_function }, | 52 { "function", token_kw_function }, |
52 { "sub", token_kw_sub }, | 53 { "sub", token_kw_sub }, |
53 { "public", token_kw_public }, | 54 { "public", token_kw_public }, |
57 { "returns", token_kw_returns }, | 58 { "returns", token_kw_returns }, |
58 { "integer", token_kw_integer }, | 59 { "integer", token_kw_integer }, |
59 { "endsub", token_kw_endsub }, | 60 { "endsub", token_kw_endsub }, |
60 { "endfunction", token_kw_endfunction }, | 61 { "endfunction", token_kw_endfunction }, |
61 { "dim", token_kw_dim }, | 62 { "dim", token_kw_dim }, |
62 { "=", token_op_assignment }, | 63 { NULL } |
64 }; | |
65 | |
66 /* contains "built in" function names */ | |
67 static struct token_list lexer_expr_tokens[] = | |
68 { | |
69 { "and", token_op_and }, | |
70 { "or", token_op_or }, | |
71 { "band", token_op_band }, | |
72 { "bor", token_op_bor }, | |
73 { "bxor", token_op_bxor }, | |
74 { "xor", token_op_xor }, | |
63 { NULL } | 75 { NULL } |
64 }; | 76 }; |
65 | 77 |
66 static char *lexer_token_names[] = | 78 static char *lexer_token_names[] = |
67 { | 79 { |
75 "INTEGER", | 87 "INTEGER", |
76 "ENDSUB", | 88 "ENDSUB", |
77 "ENDFUNCTION", | 89 "ENDFUNCTION", |
78 "DIM", | 90 "DIM", |
79 "<assignment>", | 91 "<assignment>", |
92 "<equality>", | |
93 "<greater>", | |
94 "<less>", | |
95 "<greaterequal>", | |
96 "<lessequal>", | |
97 "<notequal>", | |
98 "<and>", | |
99 "<or>", | |
100 "<xor>", | |
101 "<bitwiseand>", | |
102 "<bitwiseor>", | |
103 "<bitwisexor>", | |
104 "<plus>", | |
105 "<minus>", | |
106 "<times>", | |
107 "<divide>", | |
108 "<modulus>", | |
80 "<identifier>", | 109 "<identifier>", |
81 "<char>", | 110 "<char>", |
82 "<uint>", | 111 "<uint>", |
83 "<int>", | 112 "<int>", |
84 "<eol>", | 113 "<eol>", |
169 { | 198 { |
170 default: | 199 default: |
171 tok = lexer_global_tokens; | 200 tok = lexer_global_tokens; |
172 } | 201 } |
173 | 202 |
203 if (state -> expression) | |
204 { | |
205 tok = lexer_expr_tokens; | |
206 } | |
207 | |
174 /* check for tokens if appropriate */ | 208 /* check for tokens if appropriate */ |
175 /* force uppercase */ | 209 /* force uppercase */ |
176 if (tok) | 210 if (tok) |
177 { | 211 { |
178 for (c = 0; word[c]; c++) | 212 for (c = 0; word[c]; c++) |
192 state -> lexer_token = tok -> token; | 226 state -> lexer_token = tok -> token; |
193 else | 227 else |
194 state -> lexer_token = token_identifier; | 228 state -> lexer_token = token_identifier; |
195 } | 229 } |
196 | 230 |
231 static void lexer_parse_number(cstate *state, int neg) | |
232 { | |
233 unsigned long tint = 0; | |
234 int c; | |
235 | |
236 for (;;) | |
237 { | |
238 c = lexer_curchar(state); | |
239 if (c >= '0' && c <= '9') | |
240 { | |
241 tint *= 10 + (c - '0'); | |
242 } | |
243 else | |
244 { | |
245 /* end of the number here */ | |
246 if (neg) | |
247 { | |
248 if (tint > 0x80000000) | |
249 lwb_error("Integer overflow\n"); | |
250 state -> lexer_token_number.integer = -tint; | |
251 state -> lexer_token = token_int; | |
252 } | |
253 else | |
254 { | |
255 state -> lexer_token = token_uint; | |
256 state -> lexer_token_number.uinteger = tint; | |
257 } | |
258 return; | |
259 } | |
260 lexer_nextchar(state); | |
261 } | |
262 } | |
263 | |
197 static void lexer_empty_token(cstate *state) | 264 static void lexer_empty_token(cstate *state) |
198 { | 265 { |
199 lw_free(state -> lexer_token_string); | 266 lw_free(state -> lexer_token_string); |
200 state -> lexer_token_string = NULL; | 267 state -> lexer_token_string = NULL; |
201 } | 268 } |
237 { | 304 { |
238 /* we have a word here; identifier, keyword, etc. */ | 305 /* we have a word here; identifier, keyword, etc. */ |
239 lexer_word(state); | 306 lexer_word(state); |
240 return; | 307 return; |
241 } | 308 } |
309 | |
310 if (state -> expression && c >= '0' && c <= '9') | |
311 { | |
312 /* we have a number */ | |
313 lexer_parse_number(state, 0); | |
314 return; | |
315 } | |
316 | |
317 lexer_nextchar(state); | |
318 if (state -> expression) | |
319 { | |
320 if (c == '-' && lexer_curchar(state) >= '0' && lexer_curchar(state) <= '9') | |
321 { | |
322 /* we have a negative number here */ | |
323 lexer_parse_number(state, 1); | |
324 return; | |
325 } | |
326 if (c == '=') | |
327 { | |
328 state -> lexer_token = token_op_equality; | |
329 return; | |
330 } | |
331 if (c == '<') | |
332 { | |
333 if (lexer_curchar(state) == '=') | |
334 { | |
335 lexer_nextchar(state); | |
336 state -> lexer_token = token_op_lessequal; | |
337 return; | |
338 } | |
339 if (lexer_curchar(state) == '>') | |
340 { | |
341 lexer_nextchar(state); | |
342 state -> lexer_token = token_op_notequal; | |
343 return; | |
344 } | |
345 state -> lexer_token = token_op_less; | |
346 return; | |
347 } | |
348 if (c == '>') | |
349 { | |
350 if (lexer_curchar(state) == '>') | |
351 { | |
352 lexer_nextchar(state); | |
353 state -> lexer_token = token_op_greaterequal; | |
354 return; | |
355 } | |
356 if (lexer_curchar(state) == '<') | |
357 { | |
358 state -> lexer_token = token_op_notequal; | |
359 lexer_nextchar(state); | |
360 return; | |
361 } | |
362 state -> lexer_token = token_op_greater; | |
363 return; | |
364 } | |
365 switch(c) | |
366 { | |
367 case '+': | |
368 state -> lexer_token = token_op_plus; | |
369 return; | |
370 | |
371 case '-': | |
372 state -> lexer_token = token_op_minus; | |
373 return; | |
374 | |
375 case '/': | |
376 state -> lexer_token = token_op_divide; | |
377 return; | |
378 | |
379 case '*': | |
380 state -> lexer_token = token_op_times; | |
381 return; | |
382 | |
383 case '%': | |
384 state -> lexer_token = token_op_modulus; | |
385 return; | |
386 | |
387 | |
388 } | |
389 } | |
390 else | |
391 { | |
392 if (c == '=') | |
393 { | |
394 state -> lexer_token = token_op_assignment; | |
395 return; | |
396 } | |
397 } | |
242 | 398 |
243 /* return the character if all else fails */ | 399 /* return the character if all else fails */ |
400 state -> lexer_token = token_char; | |
244 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); | 401 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); |
245 state -> lexer_token_string[0] = c; | 402 state -> lexer_token_string[0] = c; |
246 state -> lexer_token_string[1] = 0; | 403 state -> lexer_token_string[1] = 0; |
247 lexer_nextchar(state); | |
248 state -> lexer_token = token_char; | |
249 return; | 404 return; |
250 } | 405 } |
251 | 406 |
252 char *lexer_return_token(cstate *state) | 407 char *lexer_return_token(cstate *state) |
253 { | 408 { |