Mercurial > hg > index.cgi
comparison lwcc/preproc.c @ 298:6112c67728ba ccdev
Add stringification and token concatenation
Add support for # and ## in macro expansion by the preprocessor
(stringification and token concatenation). Totally untested.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sat, 14 Sep 2013 22:42:53 -0600 |
parents | 310df72c641d |
children | 856caf91ffaa |
comparison
equal
deleted
inserted
replaced
297:310df72c641d | 298:6112c67728ba |
---|---|
30 #include "token.h" | 30 #include "token.h" |
31 | 31 |
32 static int expand_macro(struct preproc_info *, char *); | 32 static int expand_macro(struct preproc_info *, char *); |
33 static void process_directive(struct preproc_info *); | 33 static void process_directive(struct preproc_info *); |
34 static long eval_expr(struct preproc_info *); | 34 static long eval_expr(struct preproc_info *); |
35 extern struct token *preproc_lex_next_token(struct preproc_info *); | |
36 | |
35 | 37 |
36 struct token *preproc_next_processed_token(struct preproc_info *pp) | 38 struct token *preproc_next_processed_token(struct preproc_info *pp) |
37 { | 39 { |
38 struct token *ct; | 40 struct token *ct; |
39 | 41 |
763 } | 765 } |
764 | 766 |
765 /* | 767 /* |
766 Below here is the logic for expanding a macro | 768 Below here is the logic for expanding a macro |
767 */ | 769 */ |
770 static char *stringify(struct token *tl) | |
771 { | |
772 struct strbuf *s; | |
773 int ws = 0; | |
774 | |
775 s = strbuf_new(); | |
776 strbuf_add(s, '"'); | |
777 | |
778 while (tl && tl -> ttype == TOK_WSPACE) | |
779 tl = tl -> next; | |
780 | |
781 for (; tl; tl = tl -> next) | |
782 { | |
783 if (tl -> ttype == TOK_WSPACE) | |
784 { | |
785 ws = 1; | |
786 continue; | |
787 } | |
788 if (ws) | |
789 { | |
790 strbuf_add(s, ' '); | |
791 } | |
792 for (ws = 0; tl -> strval[ws]; ws++) | |
793 { | |
794 if (tl -> ttype == TOK_STRING || tl -> ttype == TOK_CHR_LIT) | |
795 { | |
796 if (tl -> strval[ws] == '"' || tl -> strval[ws] == '\\') | |
797 strbuf_add(s, '\\'); | |
798 } | |
799 } | |
800 ws = 0; | |
801 } | |
802 | |
803 strbuf_add(s, '"'); | |
804 return strbuf_end(s); | |
805 } | |
806 | |
807 /* return list to tokens as a result of ## expansion */ | |
808 static struct token *paste_tokens(struct preproc_info *pp, struct symtab_e *s, struct token **arglist, struct token *t1, struct token *t2) | |
809 { | |
810 struct token *rl = NULL, *rlt; | |
811 struct token *s1, *s2; | |
812 struct token *ws; | |
813 int i; | |
814 char *tstr; | |
815 | |
816 if (t1 -> ttype == TOK_IDENT) | |
817 { | |
818 if (strcmp(t1 -> strval, "__VA_ARGS__") == 0) | |
819 { | |
820 i = s -> nargs; | |
821 } | |
822 else | |
823 { | |
824 for (i = 0; i < s -> nargs; i++) | |
825 { | |
826 if (strcmp(s -> params[i], t1 -> strval) == 0) | |
827 break; | |
828 } | |
829 } | |
830 if ((i == s -> nargs) && !(s -> vargs)) | |
831 { | |
832 s1 = token_dup(t1); | |
833 } | |
834 else | |
835 { | |
836 /* find last non-whitespace token */ | |
837 ws = NULL; | |
838 for (t1 = s -> tl; t1; t1 = t1 -> next) | |
839 { | |
840 if (t1 -> ttype != TOK_WSPACE) | |
841 ws = t1; | |
842 } | |
843 if (!ws) | |
844 { | |
845 s1 = NULL; | |
846 } | |
847 else | |
848 { | |
849 if (ws != s -> tl) | |
850 { | |
851 /* output extra tokens */ | |
852 for (t1 = s -> tl; t1 -> next != ws; t1 = t1 -> next) | |
853 { | |
854 if (!rl) | |
855 { | |
856 rl = token_dup(t1); | |
857 rlt = rl; | |
858 } | |
859 else | |
860 { | |
861 rlt -> next = token_dup(t1); | |
862 rlt = rlt -> next; | |
863 } | |
864 } | |
865 } | |
866 s1 = token_dup(ws); | |
867 } | |
868 } | |
869 } | |
870 else | |
871 { | |
872 s1 = token_dup(t1); | |
873 } | |
874 if (t2 -> ttype == TOK_IDENT) | |
875 { | |
876 if (strcmp(t1 -> strval, "__VA_ARGS__") == 0) | |
877 { | |
878 i = s -> nargs; | |
879 } | |
880 else | |
881 { | |
882 for (i = 0; i < s -> nargs; i++) | |
883 { | |
884 if (strcmp(s -> params[i], t1 -> strval) == 0) | |
885 break; | |
886 } | |
887 } | |
888 if ((i == s -> nargs) && !(s -> vargs)) | |
889 { | |
890 s2 = token_dup(t2); | |
891 t2 = NULL; | |
892 } | |
893 else | |
894 { | |
895 /* find last non-whitespace token */ | |
896 ws = NULL; | |
897 for (t2 = s -> tl; t2; t2 = t2 -> next) | |
898 { | |
899 if (t2 -> ttype != TOK_WSPACE) | |
900 { | |
901 ws = t2; | |
902 t2 = t2 -> next; | |
903 break; | |
904 } | |
905 } | |
906 if (!ws) | |
907 { | |
908 s2 = NULL; | |
909 } | |
910 else | |
911 { | |
912 s2 = token_dup(ws); | |
913 } | |
914 } | |
915 } | |
916 else | |
917 { | |
918 s2 = token_dup(t2); | |
919 } | |
920 | |
921 /* here, s1 is NULL if no left operand or a duplicated token for the actual left operand */ | |
922 /* here, s2 is NULL if no right operand or a duplicated token for the actual right operand */ | |
923 /* here, t2 points to a possibly empty list of extra tokens to output after the concatenated tokens */ | |
924 /* here, rl,rlt is a possibly non-empty list of tokens preceding the concatenation */ | |
925 | |
926 /* tokens combine if the combination exactly matches "combinelist", in which case the string values are | |
927 concatenated and the new token type is used to create a new token. If the tokens do not combine, | |
928 s1 and s2 are returned in sequence. */ | |
929 | |
930 if (!s1 && s2) | |
931 { | |
932 if (!rl) | |
933 rl = s2; | |
934 else | |
935 rlt -> next = s2; | |
936 rlt = s2; | |
937 } | |
938 else if (s1 && !s2) | |
939 { | |
940 if (!rl) | |
941 rl = s1; | |
942 else | |
943 rlt -> next = s1; | |
944 rlt = s1; | |
945 } | |
946 else if (s1 && s2) | |
947 { | |
948 tstr = lw_alloc(strlen(s1 -> strval) + strlen(s2 -> strval) + 1); | |
949 strcpy(tstr, s1 -> strval); | |
950 strcat(tstr, s2 -> strval); | |
951 /* now try to lex the string */ | |
952 pp -> lexstr = tstr; | |
953 pp -> lexstrloc = 0; | |
954 t1 = preproc_lex_next_token(pp); | |
955 if (pp -> lexstr[pp -> lexstrloc]) | |
956 { | |
957 // doesn't make a new token - pass through the original two | |
958 if (!rl) | |
959 rl = s1; | |
960 else | |
961 rlt -> next = s1; | |
962 s1 -> next = s2; | |
963 rlt = s2; | |
964 } | |
965 else | |
966 { | |
967 // does make a new token | |
968 t1 -> fn = s1 -> fn; | |
969 t1 -> column = s1 -> column; | |
970 t1 -> lineno = s1 -> lineno; | |
971 if (!rl) | |
972 rl = t1; | |
973 else | |
974 rlt -> next = t1; | |
975 rlt = t1; | |
976 } | |
977 lw_free(tstr); | |
978 pp -> lexstr = NULL; | |
979 } | |
980 | |
981 /* add in any extra tokens */ | |
982 while (t2) | |
983 { | |
984 if (!rl) | |
985 { | |
986 rl = token_dup(t2); | |
987 rlt = rl; | |
988 } | |
989 else | |
990 { | |
991 rlt -> next = token_dup(t2); | |
992 rlt = rlt -> next; | |
993 } | |
994 t2 = t2 -> next; | |
995 } | |
996 | |
997 return rl; | |
998 } | |
999 | |
1000 | |
768 static int expand_macro(struct preproc_info *pp, char *mname) | 1001 static int expand_macro(struct preproc_info *pp, char *mname) |
769 { | 1002 { |
770 struct symtab_e *s; | 1003 struct symtab_e *s; |
771 struct token *t, *t2, *t3; | 1004 struct token *t, *t2, *t3; |
772 struct token **arglist = NULL; | 1005 struct token **arglist = NULL; |
773 int nargs = 0; | 1006 int nargs = 0; |
774 struct expand_e *e; | 1007 struct expand_e *e; |
775 struct token **exparglist = NULL; | 1008 struct token **exparglist = NULL; |
776 int i; | 1009 int i; |
777 int pcount; | 1010 int pcount; |
778 | 1011 char *tstr; |
1012 | |
779 s = symtab_find(pp, mname); | 1013 s = symtab_find(pp, mname); |
780 if (!s) | 1014 if (!s) |
781 return 0; | 1015 return 0; |
782 | 1016 |
783 for (e = pp -> expand_list; e; e = e -> next) | 1017 for (e = pp -> expand_list; e; e = e -> next) |
910 t2 = NULL; | 1144 t2 = NULL; |
911 t3 = NULL; | 1145 t3 = NULL; |
912 | 1146 |
913 for (t = s -> tl; t; t = t -> next) | 1147 for (t = s -> tl; t; t = t -> next) |
914 { | 1148 { |
1149 again: | |
1150 if (t -> ttype != TOK_WSPACE && t -> next) | |
1151 { | |
1152 struct token *ct1, *ct2; | |
1153 | |
1154 for (ct1 = t -> next; ct1 && ct1 -> ttype == TOK_WSPACE; ct1 = ct1 -> next) | |
1155 { | |
1156 if (ct1 -> ttype == TOK_DBLHASH) | |
1157 { | |
1158 // possible concatenation here | |
1159 for (ct2 = ct1 -> next; ct2 && ct2 -> ttype == TOK_WSPACE; ct2 = ct2 -> next) | |
1160 /* do nothing */ ; | |
1161 if (ct2) | |
1162 { | |
1163 // we have concatenation here so we paste str1 and str2 together and see what we get | |
1164 // if we get NULL, the past didn't make a valid token | |
1165 ct1 = paste_tokens(pp, s, arglist, t, ct2); | |
1166 if (ct1) | |
1167 { | |
1168 if (t2) | |
1169 { | |
1170 t2 -> next = ct1; | |
1171 } | |
1172 else | |
1173 { | |
1174 t3 = ct1; | |
1175 } | |
1176 for (t2 = ct1; t2 -> next; t2 = t2 -> next) | |
1177 /* do nothing */ ; | |
1178 | |
1179 /* because of the level of control structures, move to next token and restart loop */ | |
1180 t = ct2 -> next; | |
1181 goto again; | |
1182 } | |
1183 goto nopaste; | |
1184 } | |
1185 } | |
1186 } | |
1187 } | |
1188 | |
1189 nopaste: | |
1190 if (t -> ttype == TOK_HASH) | |
1191 { | |
1192 if (t -> next && t -> next -> ttype == TOK_IDENT) | |
1193 { | |
1194 if (strcmp(t -> next -> strval, "__VA_ARGS__") == 0) | |
1195 { | |
1196 i = nargs; | |
1197 } | |
1198 else | |
1199 { | |
1200 for (i = 0; i < nargs; i++) | |
1201 { | |
1202 if (strcmp(t -> next -> strval, s -> params[i]) == 0) | |
1203 break; | |
1204 } | |
1205 } | |
1206 if (!((i == s -> nargs) && !(s -> vargs))) | |
1207 { | |
1208 // we have a stringification here | |
1209 t = t -> next; | |
1210 tstr = stringify(arglist[i]); | |
1211 if (t2) | |
1212 { | |
1213 t2 = token_create(TOK_STRING, tstr, t -> lineno, t -> column, t -> fn); | |
1214 t2 = t2 -> next; | |
1215 } | |
1216 else | |
1217 { | |
1218 t3 = token_create(TOK_STRING, tstr, t -> lineno, t -> column, t -> fn); | |
1219 t2 = t3; | |
1220 } | |
1221 lw_free(tstr); | |
1222 continue; | |
1223 } | |
1224 } | |
1225 } | |
915 if (t -> ttype == TOK_IDENT) | 1226 if (t -> ttype == TOK_IDENT) |
916 { | 1227 { |
917 /* identifiers might need expansion to arguments */ | 1228 /* identifiers might need expansion to arguments */ |
918 if (strcmp(t -> strval, "__VA_ARGS__") == 0) | 1229 if (strcmp(t -> strval, "__VA_ARGS__") == 0) |
919 { | 1230 { |
929 } | 1240 } |
930 if ((i == s -> nargs) && !(s -> vargs)) | 1241 if ((i == s -> nargs) && !(s -> vargs)) |
931 { | 1242 { |
932 struct token *te; | 1243 struct token *te; |
933 // expand argument | 1244 // expand argument |
934 // FIXME: handle # and ## | |
935 for (te = exparglist[i]; te; te = te -> next) | 1245 for (te = exparglist[i]; te; te = te -> next) |
936 { | 1246 { |
937 if (t2) | 1247 if (t2) |
938 { | 1248 { |
939 t2 -> next = token_dup(te); | 1249 t2 -> next = token_dup(te); |