comparison lwbasic/lexer.c @ 35:cdb0175e1063

More work on expressions
author Lost Wizard (lost@starbug3)
date Sat, 05 Feb 2011 14:22:54 -0700
parents bfea77812e64
children 5325b640424d
comparison
equal deleted inserted replaced
34:bfea77812e64 35:cdb0175e1063
44 { 44 {
45 char *string; 45 char *string;
46 int token; 46 int token;
47 }; 47 };
48 48
49 /* keywords that appear as part of normal expressions */
49 static struct token_list lexer_global_tokens[] = 50 static struct token_list lexer_global_tokens[] =
50 { 51 {
51 { "function", token_kw_function }, 52 { "function", token_kw_function },
52 { "sub", token_kw_sub }, 53 { "sub", token_kw_sub },
53 { "public", token_kw_public }, 54 { "public", token_kw_public },
57 { "returns", token_kw_returns }, 58 { "returns", token_kw_returns },
58 { "integer", token_kw_integer }, 59 { "integer", token_kw_integer },
59 { "endsub", token_kw_endsub }, 60 { "endsub", token_kw_endsub },
60 { "endfunction", token_kw_endfunction }, 61 { "endfunction", token_kw_endfunction },
61 { "dim", token_kw_dim }, 62 { "dim", token_kw_dim },
62 { "=", token_op_assignment }, 63 { NULL }
64 };
65
66 /* contains "built in" function names */
67 static struct token_list lexer_expr_tokens[] =
68 {
69 { "and", token_op_and },
70 { "or", token_op_or },
71 { "band", token_op_band },
72 { "bor", token_op_bor },
73 { "bxor", token_op_bxor },
74 { "xor", token_op_xor },
63 { NULL } 75 { NULL }
64 }; 76 };
65 77
66 static char *lexer_token_names[] = 78 static char *lexer_token_names[] =
67 { 79 {
75 "INTEGER", 87 "INTEGER",
76 "ENDSUB", 88 "ENDSUB",
77 "ENDFUNCTION", 89 "ENDFUNCTION",
78 "DIM", 90 "DIM",
79 "<assignment>", 91 "<assignment>",
92 "<equality>",
93 "<greater>",
94 "<less>",
95 "<greaterequal>",
96 "<lessequal>",
97 "<notequal>",
98 "<and>",
99 "<or>",
100 "<xor>",
101 "<bitwiseand>",
102 "<bitwiseor>",
103 "<bitwisexor>",
104 "<plus>",
105 "<minus>",
106 "<times>",
107 "<divide>",
108 "<modulus>",
80 "<identifier>", 109 "<identifier>",
81 "<char>", 110 "<char>",
82 "<uint>", 111 "<uint>",
83 "<int>", 112 "<int>",
84 "<eol>", 113 "<eol>",
169 { 198 {
170 default: 199 default:
171 tok = lexer_global_tokens; 200 tok = lexer_global_tokens;
172 } 201 }
173 202
203 if (state -> expression)
204 {
205 tok = lexer_expr_tokens;
206 }
207
174 /* check for tokens if appropriate */ 208 /* check for tokens if appropriate */
175 /* force uppercase */ 209 /* force uppercase */
176 if (tok) 210 if (tok)
177 { 211 {
178 for (c = 0; word[c]; c++) 212 for (c = 0; word[c]; c++)
192 state -> lexer_token = tok -> token; 226 state -> lexer_token = tok -> token;
193 else 227 else
194 state -> lexer_token = token_identifier; 228 state -> lexer_token = token_identifier;
195 } 229 }
196 230
231 static void lexer_parse_number(cstate *state, int neg)
232 {
233 unsigned long tint = 0;
234 int c;
235
236 for (;;)
237 {
238 c = lexer_curchar(state);
239 if (c >= '0' && c <= '9')
240 {
241 tint *= 10 + (c - '0');
242 }
243 else
244 {
245 /* end of the number here */
246 if (neg)
247 {
248 if (tint > 0x80000000)
249 lwb_error("Integer overflow\n");
250 state -> lexer_token_number.integer = -tint;
251 state -> lexer_token = token_int;
252 }
253 else
254 {
255 state -> lexer_token = token_uint;
256 state -> lexer_token_number.uinteger = tint;
257 }
258 return;
259 }
260 lexer_nextchar(state);
261 }
262 }
263
197 static void lexer_empty_token(cstate *state) 264 static void lexer_empty_token(cstate *state)
198 { 265 {
199 lw_free(state -> lexer_token_string); 266 lw_free(state -> lexer_token_string);
200 state -> lexer_token_string = NULL; 267 state -> lexer_token_string = NULL;
201 } 268 }
237 { 304 {
238 /* we have a word here; identifier, keyword, etc. */ 305 /* we have a word here; identifier, keyword, etc. */
239 lexer_word(state); 306 lexer_word(state);
240 return; 307 return;
241 } 308 }
309
310 if (state -> expression && c >= '0' && c <= '9')
311 {
312 /* we have a number */
313 lexer_parse_number(state, 0);
314 return;
315 }
316
317 lexer_nextchar(state);
318 if (state -> expression)
319 {
320 if (c == '-' && lexer_curchar(state) >= '0' && lexer_curchar(state) <= '9')
321 {
322 /* we have a negative number here */
323 lexer_parse_number(state, 1);
324 return;
325 }
326 if (c == '=')
327 {
328 state -> lexer_token = token_op_equality;
329 return;
330 }
331 if (c == '<')
332 {
333 if (lexer_curchar(state) == '=')
334 {
335 lexer_nextchar(state);
336 state -> lexer_token = token_op_lessequal;
337 return;
338 }
339 if (lexer_curchar(state) == '>')
340 {
341 lexer_nextchar(state);
342 state -> lexer_token = token_op_notequal;
343 return;
344 }
345 state -> lexer_token = token_op_less;
346 return;
347 }
348 if (c == '>')
349 {
350 if (lexer_curchar(state) == '>')
351 {
352 lexer_nextchar(state);
353 state -> lexer_token = token_op_greaterequal;
354 return;
355 }
356 if (lexer_curchar(state) == '<')
357 {
358 state -> lexer_token = token_op_notequal;
359 lexer_nextchar(state);
360 return;
361 }
362 state -> lexer_token = token_op_greater;
363 return;
364 }
365 switch(c)
366 {
367 case '+':
368 state -> lexer_token = token_op_plus;
369 return;
370
371 case '-':
372 state -> lexer_token = token_op_minus;
373 return;
374
375 case '/':
376 state -> lexer_token = token_op_divide;
377 return;
378
379 case '*':
380 state -> lexer_token = token_op_times;
381 return;
382
383 case '%':
384 state -> lexer_token = token_op_modulus;
385 return;
386
387
388 }
389 }
390 else
391 {
392 if (c == '=')
393 {
394 state -> lexer_token = token_op_assignment;
395 return;
396 }
397 }
242 398
243 /* return the character if all else fails */ 399 /* return the character if all else fails */
400 state -> lexer_token = token_char;
244 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2); 401 state -> lexer_token_string = lw_realloc(state -> lexer_token_string, 2);
245 state -> lexer_token_string[0] = c; 402 state -> lexer_token_string[0] = c;
246 state -> lexer_token_string[1] = 0; 403 state -> lexer_token_string[1] = 0;
247 lexer_nextchar(state);
248 state -> lexer_token = token_char;
249 return; 404 return;
250 } 405 }
251 406
252 char *lexer_return_token(cstate *state) 407 char *lexer_return_token(cstate *state)
253 { 408 {