#include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #define ishex(ch) (int)strchr("0123456789ABCDEFabcdef", ch) int hextobin(char x) { // Too crude, just a quick hack. Seldom called. if (('0' <= x) && (x <= '9')) { return x-'0'; } else if (('A' <= x) && (x <= 'F')) { return x-'A'+10; } else if (('a' <= x) && (x <= 'f')) { return x-'a'+10; } else { return 0; // ASSERT: INTERNAL ERROR! } } void indent(int depth, FILE *f) { int i; for (i = 0; i < depth; i++) fprintf(f, " "); } #define DEBUG_PARSER #include "teeny.h" // GENERATED GRAMMAR FROM takeon.c and teeny.g #ifndef FALSE #define FALSE (0!=0) #define TRUE (0==0) #endif char *progname; int debug_parser = FALSE; // Built-in phrase codes. Must match with grammar file. #define TYPE_EOF 0 #define TYPE_TAG 1 #define TYPE_STRING 2 #define TYPE_CHARCONST 3 #define TYPE_CHAR 4 #define TYPE_INT 5 #define TYPE_HEXINT 6 #define TYPE_KEYWORD 7 typedef struct sourceinfo { // ATOMS for processed input stream char *s; // string contents int l; // lineno int col; // column int t; // type - tag, "string", 'charconst', or char, so far char *f; // source or includefile name } sourceinfo; int bestparse = -1; // for error reporting. char *looking_for = "<UNKNOWN>"; // 'while looking for <PHRASENAME>' (or literal) ... // C is the source character token stream static sourceinfo *c = NULL; int nextfree = 0, arraysize = 0; char *onecharstr; // A is the Analysis record. Contents point to a C[] structure when the item is a BIP, // otherwise the format is [phraseno] [altno] [no-of-subphrases] [subphrases and/or BIPs...] // eg A[25] = 10 means that A[25] is C[10]. static int *A = NULL; int next_free_a = 0, a_size = 0; static void *makespace_(void *c, int nextfree, int *arraysize, int objsize) { if (nextfree >= *arraysize) { *arraysize = (*arraysize * 2) + 1; c = (void *)realloc(c, (*arraysize+1) * objsize); // 0:arraysize, inclusive. eg 0:15, 0:127 etc if (c == NULL) {fprintf(stderr, "tacctwo: %s\n", strerror(errno)); exit(errno);} } return c; } #define makespace(c, nextfree, arraysize) c = (typeof(c))makespace_(c, nextfree, &arraysize, sizeof(c[0])) void stores(char *s, int lineno, int col, int type, char *fname) { makespace(c, nextfree, arraysize); c[nextfree].s = s; c[nextfree].l = lineno; c[nextfree].col = col; c[nextfree].f = fname; c[nextfree].t = type; nextfree++; } void storec(int ch, int lineno, int col, int type, char *fname) { onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0'; // convert char to 1-char string before saving. stores(&onecharstr[ch*2], lineno, col, type, fname); } int iskeyword(char *s) { int i; for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE; return FALSE; } //-------------------------------------------------------------------------- FILE *sourcefile; char *curfile; int startline = TRUE, whitespace = TRUE, lineno = 1, col = 0, ch, peek; void line_reconstruction(void) // PRE-PROCESS INPUT READY FOR PARSING { for (;;) { ch = fgetc(sourcefile); if (ch == EOF) break; ch &= 255; // int, positive. peek = fgetc(sourcefile); ungetc(peek, sourcefile); if (isalpha(ch)) { int nextfree = 0, strsize = 0, startcol = col; char *token = NULL; whitespace = FALSE; for (;;) { makespace(token, nextfree, strsize); if (isalpha(ch) || isdigit(ch) || (ch == '_')) { // digits and '_' allowed after 1st char. col++; token[nextfree++] = ch; } else { token[nextfree] = '\0'; ungetc(ch, sourcefile); break; } ch = fgetc(sourcefile); } stores(strdup(token), lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : TYPE_TAG, curfile); free(token); } else if (isdigit(ch)) { int nextfree = 0, numsize = 0; char *number = NULL; // Store as a string... whitespace = FALSE; for (;;) { makespace(number, nextfree, numsize); if (isdigit(ch)) { col++; number[nextfree++] = ch; } else { number[nextfree] = '\0'; ungetc(ch, sourcefile); break; } ch = fgetc(sourcefile); } stores(strdup(number), lineno, col, TYPE_INT, curfile); free(number); } else switch (ch) { case '$': // Hex constant \$[0-9a-fA-F]+ // Q: store the '$' in the string or not? whitespace = FALSE; col++; if (ishex(peek)) { int nextfree = 0, numsize = 0; char *number = NULL; for (;;) { makespace(number, nextfree, numsize); ch = fgetc(sourcefile); if (ishex(ch)) { col++; number[nextfree++] = ch; } else { number[nextfree] = '\0'; ungetc(ch, sourcefile); break; } } stores(strdup(number), lineno, col, TYPE_HEXINT, curfile); free(number); } else { // Warn: probably an error... should not be any naked '$' symbols. // If the error to be prined would have been a generic syntax // error at the same location, then maybe give a more informative // error message such as "Unexpected character '$' near: ..." // On the other hand the generic mechanism probably reports this // almsost as accurately. storec(ch, lineno, col++, TYPE_CHAR, curfile); } break; case '\'': // Handle 'c' character constants case '"': // Handle "string" { int nextfree = 0, strsize = 0, quotech = ch; char *string = NULL; whitespace = FALSE; col++; for (;;) { ch = fgetc(sourcefile); // Newlines are allowed col++; makespace(string, nextfree, strsize); if (ch == '\\') { ch = fgetc(sourcefile); col++; if (ch == '\\') { string[nextfree++] = ch; } else if (ch == '\'') { string[nextfree++] = '\''; } else if (ch == '"') { string[nextfree++] = '"'; } else if (ch == 'n') { string[nextfree++] = '\n'; } else if (ch == 'r') { string[nextfree++] = '\r'; } else if (ch == 't') { string[nextfree++] = '\t'; } else if (ch == 'x') { int x, x1, x2; x1 = fgetc(sourcefile); col++; if (!ishex(x1)) { // WARN: Bad format continue; } x2 = fgetc(sourcefile); col++; if (!ishex(x2)) { // WARN: Bad format continue; } x = (hextobin(x1)<<4) | hextobin(x2); if (x == 0) { // WARN: embedded NUL in a string is asking for trouble... } string[nextfree++] = x; } else { // Warn of unknown (to me) \x escape. Probably an error. string[nextfree++] = '\\'; string[nextfree++] = ch; } } else if (ch != quotech) { string[nextfree++] = ch; } else { string[nextfree] = '\0'; break; } } if (quotech == '\'') { if (strlen(string) == 1) { } else if (strlen(string) <= 4) { // Warn that 'xx' as a 32-bit int is a non-standard extension } else { // Warn that this is probably a string with the wrong type of quote. } } stores(strdup(string), lineno, col, (quotech == '\'' ? TYPE_CHARCONST : TYPE_STRING), curfile); free(string); } break; case '/': // COMMENTS (or just a divide symbol) col++; whitespace = FALSE; if (peek == '/') { // Handle line comment do {ch = fgetc(sourcefile);} while (ch != '\n'); lineno++; col = 0; whitespace = TRUE; } else if (peek == '*') { /* Handle potential multi-line comment */ ch = fgetc(sourcefile); // Now we have read '/*' for (;;) { col++; ch = fgetc(sourcefile); peek = fgetc(sourcefile); if ((ch == '*') && (peek == '/')) break; ungetc(peek, sourcefile); } col += 2; (void)fgetc(sourcefile); // Remove '/' // QUESTION: How does this affect # directives? } else { storec(ch, lineno, col, TYPE_CHAR, curfile); } break; // WHITESPACE case '\n': lineno++; case '\r': startline = TRUE; col = 0; whitespace = TRUE; break; case '\t': case ' ': col++; // Does not affect whitespace break; // DIRECTIVES case '#': // If we interpret any #-directives while lexing, we don't want to // do an expensive test on every token, so what we can do is set // a countdown timer on the introductory token (either this '#' // or the actual keyword such as 'ifdef') and then test that the // *previous* tokens match when the timer hits 0, eg // C[cp-3] == '#' && C[cp-2] == 'include' ... etc if (!whitespace) { // WARN: probably an error... should not be any '#' symbols in the // middle of a line. (This language uses "!=" or "<>" for not-equal) } // Drop through default: whitespace = FALSE; storec(ch, lineno, col++, TYPE_CHAR, curfile); } } // set up a dummy at the end because we sometimes look ahead by 1 // in the parsing code and don't want to hit uninitialised data. c[nextfree].t = TYPE_EOF; c[nextfree].s = "<EOF>"; c[nextfree].l = lineno; c[nextfree].col = col; } //-------------------------------------------------------------------------- int cp = 0; // code pointer. Has to be global state. int ap = 0; // Analysis record pointer. int parse(int pp, int depth) // depth is only for indentation in diags { int saved_cp, saved_ap, i, gp, alts, match; gp = phrase_start[pp-512-MAX_BIP]; alts = gram[gp]; if (debug_parser) { fprintf(stdout, "\n"); indent(depth, stdout); fprintf(stdout, "Phrase %s/%d (%d alternatives) = ", phrasename[pp-512], pp, alts); fflush(stdout); } gp++; // gp now points to first element (length) of first alt saved_cp = cp; saved_ap = ap; for (i = 0; i < alts; i++) { int each, phrases = gram[gp++], phrase_count, gap = 0; cp = saved_cp; ap = saved_ap; if (ap+3 > next_free_a) next_free_a = ap+3; makespace(A, next_free_a, a_size); A[ap++] = pp; // record which phrase (could be done outside loop) A[ap++] = i; // and which alt. // Count slots needed. *Could* be precalculated and stored // in the grammar, either embedded (after the ALT) or as a // separate table for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++; A[ap++] = gap; // Count of alts (gap) // ap+gap now points to the slot after the space required, which // is where the first subphrase will be stored. ap = ap+gap; // recursive subphrases are stored after this phrase. // ap is left updated if successful. // successfully parsed phrases are stored in A[saved_ap+3+n] if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap; makespace(A, next_free_a, a_size); // this loop is only for diagnostics if (debug_parser) { fprintf(stdout, "\n"); indent(depth, stdout); fprintf(stdout, "Alternative %d: (%d phrases) ", i+1, phrases); for (each = 0; each < phrases; each++) { int phrase = gram[gp+each]; if (phrase < 256) { fprintf(stdout, " '%c'", phrase); } else if (phrase < 512) { fprintf(stdout, " \"%s\"/%d", keyword[phrase-256], phrase-256); } else if (phrase < 512+MAX_BIP) { fprintf(stdout, " {%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]); } else { fprintf(stdout, " <%s/%d>", phrasename[phrase-512], phrase); } } fprintf(stdout, "\n"); fflush(stdout); } match = TRUE; // stays true if all subphrases match phrase_count = 0; // only phrases which make it into the A record, // i.e. excluding literals and keywords for (each = 0; each < phrases; each++) { int phrase = gram[gp+each]; if (debug_parser) { indent(depth, stdout); fprintf(stdout, "Input token stream = '%s' '%s' '%s' ...\n", (cp < nextfree ? c[cp].s : "EOF"), (cp+1 < nextfree ? c[cp+1].s : "EOF"), (cp+2 < nextfree ? c[cp+2].s : "EOF")); } if (cp > bestparse) { static char s[128]; if (phrase < 256) { sprintf(s, "'%c'", phrase); } else if (phrase < 512) { sprintf(s, "\"%s\"", keyword[phrase-256]); } else if (phrase < 512+MAX_BIP) { sprintf(s, "{%s}", phrasename[phrase-512]); } else { sprintf(s, "<%s>", phrasename[phrase-512]); } looking_for = s; bestparse = cp; } if (debug_parser) indent(depth, stdout); if (phrase < 256) { if (debug_parser) fprintf(stdout, "'%c'", phrase); if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++; // Don't record literals } else if (phrase < 512) { if (debug_parser) fprintf(stdout, "\"%s\"/%d", keyword[phrase-256], phrase-256); if (strcmp(keyword[phrase-256], c[cp].s) != 0) match = FALSE; else cp++; // Don't record keywords } else if (phrase < 512+MAX_BIP) { int where = ap; // next phrase to be parsed will be stored at current 'ap'. if (debug_parser) fprintf(stdout, "{%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]); if (c[cp].t != BIP[phrase-512]) match = FALSE; else { A[ap++] = phrase; A[ap++] = 1; A[ap++] = 1; A[ap++] = cp++; A[saved_ap+3+phrase_count++] = where; // Record BIP } } else { int where = ap; // next phrase to be parsed will be stored at current 'ap'. if (debug_parser) fprintf(stdout, "<%s/%d>", phrasename[phrase-512], phrase); if (!parse(phrase, depth+1)) match = FALSE; else { A[saved_ap+3+phrase_count++] = where; } } if (debug_parser) { fprintf(stdout, "\n"); indent(depth, stdout); fprintf(stdout, "Tried alternative %d: result was %s\n", each+1, (match ? "TRUE" : "FALSE")); fflush(stdout); } if (!match) break; } gp += phrases; // move over all phrases, to next alt if (match) break; else if (debug_parser) { indent(depth, stdout); fprintf(stdout, "** Alternative %d FAILED.\n", i+1); } // gp now points to first element (length) of next alt, or start of next phrase } return(match); } //-------------------------------------------------------------------------- typedef int TRIP; //------------------------------------------------------------------------------------ /* In previous compilers, I had to write custom code for every tree-based optimisation, in order to walk down the tree to the right place to find the leaves to be optimised. In this one, I have a generic tree-walking procedure which can walk the entire program, but it can be customised so that it takes action only on specific phrases. This is possible in this design only because each set of subphrases stores the count of subphrases befoe it - thus allowing a generic tree-walking procedure that doesn't have to know what each node consists of until it happens across a node of the type it is looking for. */ void walk(int ap, int depth, int wanted(int phraseno), void perform(int ap, int before, int depth)) { int i; if (wanted(A[ap])) perform(ap, TRUE, depth); for (i = 3; i < A[ap+2]+3; i++) { if (A[A[ap+i]] >= 512+MAX_BIP) walk(A[ap+i], depth+1, wanted, perform); } if (wanted(A[ap])) perform(ap, FALSE, depth); } int want_all(int phraseno) { return TRUE; } void print_all(int ap, int before, int depth) { int saved_ap = ap; int phrase = A[ap++]; int alt = A[ap++]; int phrases = A[ap++]; // defined subphrases int i; indent(depth, stderr); fprintf(stderr, "<%s%s/%d> ", (before ? "" : "/"), phrasename[phrase-512], alt); for (i = 0; i < (3+phrases); i++) { fprintf(stderr, "A[%d] = %d, ", saved_ap+i, A[saved_ap+i]); } fprintf(stderr, "\n"); } TRIP compile(int ap, int depth) { int saved_ap = ap; int phrase = A[ap++]; // A[ap] is the phrase number. A[ap+1] is the alt. int alt = A[ap++]; // For consistency, in BIPs, the Alt should always be 1 // although that may not be the case at the moment :-( int phrases = A[ap++]; // defined subphrases int i; // The following ecce command executed on this file will generate teeny.g: // ecce -c "(v.//\\.s..(v/ /e)?m,k)0;%c" teeny.c teeny.g // May later tweak takeon.c to read from teeny.c rather than teeny.g // thus simplifying build process and becoming more like yacc. switch (phrase) { //\\ # BIPS (Built-in Phrases)are linked to the type-code returned //\\ # by the line-reconstruction code (aka lexer) //\\ //\\ # These *must* come first. // See TYPE_* in first page for the values to use. //\\ //\\ B<IDENT>=1; case P_IDENT: fprintf(stdout, "%s", c[A[ap]].s); break; //\\ B<NUM>=5; case P_NUM: fprintf(stdout, "%s", c[A[ap]].s); break; //\\ B<HEXNUM>=6; case P_HEXNUM: fprintf(stdout, "0x%s", c[A[ap]].s); break; //\\ B<CHARLIT>=3; case P_CHARLIT: fprintf(stdout, "'%s'", c[A[ap]].s); // ADD ESCAPES break; //\\ B<STRING>=2; case P_STRING: fprintf(stdout, "\"%s\"", c[A[ap]].s); // ADD ESCAPES break; //\\ //\\ # Phrase definitions. PROGRAM is the main entry point. //\\ //\\ # This is the first draft of a grammar, based on the one //\\ # collectively determined at the wiki site. At the moment //\\ # this has not been debugged extensively. case P_PROGRAM: //\\ P<PROGRAM> = <OPTGLOBAL> <OPTPROC> "program" <MAINBLOCK>; // A[ap] A[ap+1] A[ap+2] Note that literals are discarded. fprintf(stdout, "#include <stdio.h>\n"); fprintf(stdout, "void put(int c) {\n"); fprintf(stdout, " fputc(c, stdout);\n"); fprintf(stdout, "}\n"); compile(A[ap], depth+1); compile(A[ap+1], depth+1); fprintf(stdout, "int main(int argc, char **argv)\n"); compile(A[ap+2], depth+1); // Would be nice if we could force an 'exit(0)' at the end of // the mail program block. break; case P_OPTGLOBAL: //\\ //\\ P<OPTGLOBAL> = <GLOBAL> <OPTGLOBAL>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_OPTPROC: //\\ //\\ P<OPTPROC> = <PROC> <OPTPROC>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_MAINBLOCK: case P_BLOCK: //\\ //\\ P<MAINBLOCK> = "begin" <OPTLOCAL> <SSLIST> "end"; //\\ P<BLOCK> = "begin" <OPTLOCAL> <SSLIST> "end"; fprintf(stdout, "{\n"); compile(A[ap], depth+1); compile(A[ap+1], depth+1); if (phrase == P_MAINBLOCK) fprintf(stdout, " exit(0); return(0);\n"); fprintf(stdout, "}\n"); break; case P_SSLIST: //\\ //\\ P<SSLIST> = <STATEMENT> <SSLIST>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_OPTLOCAL: //\\ //\\ P<OPTLOCAL> = <LOCAL>, //\\ ; if (alt == 0) compile(A[ap], depth+1); break; case P_RESTOFDECL: //\\ //\\ P<RESTOFDECL> = ',' <DECL> <RESTOFDECL>, //\\ ; if (alt == 1) break; fprintf(stdout, ", "); compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_DIRECTIVE: //\\ //\\ P<DIRECTIVE> = '#' <COMMAND>; compile(A[ap], depth+1); break; case P_COMMAND: //\\ //\\ P<COMMAND> = "define" <IDENT> <DEFN>, //\\ "ifdef" <IDENT>, //\\ "endif"; // See comment next to '#' in the line reconstruction for one way // to handle these directives. fprintf(stdout, "\n#%s", (alt == 0 ? "define" : (alt == 1 ? "ifdef" : "endif"))); if (alt < 2) { fprintf(stdout, " "); compile(A[ap], depth+1); } if (alt == 0) { fprintf(stdout, " "); compile(A[ap+1], depth+1); } fprintf(stdout, "\n"); break; case P_DEFN: //\\ //\\ P<DEFN> = <IDENT>, <NUMBER>, <HEXNUM>, <CHARLIT>, <STRING>; compile(A[ap], depth+1); break; case P_GLOBAL: //\\ //\\ P<GLOBAL> = '#' "include" <STRING>, //\\ <LOCAL>; if (alt == 0) { fprintf(stdout, "\n#%s"); compile(A[ap], depth+1); fprintf(stdout, "\n"); } else { compile(A[ap], depth+1); } break; case P_LOCAL: //\\ //\\ P<LOCAL> = <DIRECTIVE>, //\\ <TYPE> <DECL> <RESTOFDECL> <OPTSEMI>; if (alt == 0) { compile(A[ap], depth+1); } else { fprintf(stdout, " "); compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); fprintf(stdout, ";\n"); } break; case P_TYPE: //\\ //\\ P<TYPE> = <OPTUNSIGNED> <SIZE>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_OPTUNSIGNED: //\\ //\\ P<OPTUNSIGNED> = "unsigned", //\\ ; if (alt == 0) fprintf(stdout, "unsigned "); break; case P_SIZE: //\\ //\\ P<SIZE> = "char", //\\ "int"; fprintf(stdout, (alt == 0 ? "char " : "int ")); break; case P_DECL: //\\ //\\ P<DECL> = '*' <IDENT>, //\\ <IDENT> <OPTINDEX>, //\\ <IDENT>; if (alt==0) fprintf(stdout, "*"); compile(A[ap], depth+1); if (alt==1) compile(A[ap+1], depth+1); break; case P_OPTINDEX: //\\ //\\ P<OPTINDEX> = '[' <NUMBER> ']', //\\ ; if (alt == 1) break; fprintf(stdout, "["); compile(A[ap], depth+1); fprintf(stdout, "]"); break; case P_PROC: //\\ //\\ P<PROC> = "procedure" <IDENT> '(' ')' <OPTBLOCK>, //\\ "procedure" <IDENT> '(' <FORMALLIST> ')' <OPTBLOCK>; if (alt == 0) { fprintf(stdout, "void ");compile(A[ap],depth+1);fprintf(stdout, "(void)\n"); compile(A[ap+1],depth+1); } else { fprintf(stdout, "void ");compile(A[ap],depth+1);fprintf(stdout, "(");compile(A[ap+1],depth+1);fprintf(stdout, ")\n"); compile(A[ap+2],depth+1); } break; case P_OPTBLOCK: //\\ //\\ P<OPTBLOCK> = <BLOCK>, //\\ ; if (alt == 0) { compile(A[ap], depth+1); } else fprintf(stdout, ";"); break; case P_FORMALLIST: //\\ //\\ P<FORMALLIST> = <OPTFORMAL> <RESTOFFORMAL>, //\\ ; if (alt == 1) { fprintf(stdout, "void"); break; } compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFFORMAL: //\\ //\\ P<RESTOFFORMAL> = ',' <FORMAL> <RESTOFFORMAL>, //\\ ; if (alt == 1) break; fprintf(stdout, ", "); compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_FORMAL: // \\ // \\ P<FORMAL> = <TYPE> '*' <IDENT>, // \\ <TYPE> '*', // \\ <TYPE> <IDENT>, // \\ <IDENT>; // <IDENT> on its own is prsumably for declarations like: // unsigned int M, N - in which case the type is that of // the preceding item. But what if the IDENT comes first, // i.e. procedure fred(jim); Would that default to an int // or is it simply an error? // compile(A[ap], depth+1); // if (alt < 2) fprintf(stdout, " *"); // if ((alt&1) == 0) compile(A[ap+1], depth+1); // break; // Trying a different grammar for FORMAL: //\\ //\\ P<FORMAL> = <TYPE> '*' <IDENTLIST>, //\\ <TYPE> '*', //\\ <TYPE> <IDENTLIST>, //\\ <TYPE>; // *** I think we forgot naked type in the otiginal grammar if ((alt&1) == 1) { compile(A[ap], depth+1); if (alt == 1) fprintf(stdout, " *"); } else { int namelist = A[ap+1]; // Need to look down into structure for namelist in order to // explicitly type each variable, i.e. "int i, int j" rather // than "int i, j" // namelist points to a IDENTLIST for (;;) { int name = A[namelist+3]; compile(A[ap], depth+1); if (alt == 0) fprintf(stdout, " *"); compile(name, depth+1); namelist = A[namelist+4]; // namelist now points to a RESTOFIDENTLIST if (A[namelist+1] == 0) { fprintf(stdout, ", "); } else { break; } } // // Can we do this elegantly with the 'walk' procedure? } break; //\\ P<IDENTLIST> = <IDENT> <RESTOFIDENTLIST>; //\\ P<RESTOFIDENTLIST> = ',' <IDENT> <RESTOFIDENTLIST>, //\\ ; case P_OPTFORMAL: //\\ //\\ P<OPTFORMAL> = <FORMAL>, //\\ ; if (alt == 1) return; compile(A[ap], depth+1); break; case P_STATEMENT: //\\ //\\ P<STATEMENT> = <SS> <OPTSEMI>; compile(A[ap], depth+1); break; case P_SS: switch (alt) { //\\ case 0: //\\ P<SS> = <IF>, compile(A[ap], depth+1); break; case 1: //\\ <WHILE>, compile(A[ap], depth+1); break; case 2: //\\ <FOR>, compile(A[ap], depth+1); break; case 3: //\\ '#' "inline" <STRING>, break; case 4: //\\ <DIRECTIVE>, break; case 5: //\\ <SIMPLE>; compile(A[ap], depth+1); break; } break; case P_IF: //\\ //\\ P<IF> = "if" '(' <BOOLEXPR> ')' <SSLIST> <OPTELSE> "endif"; fprintf(stdout, " if ("); compile(A[ap], depth+1); fprintf(stdout, ") {\n"); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); fprintf(stdout, " }\n"); break; case P_OPTELSE: //\\ //\\ P<OPTELSE> = "else" <SSLIST>, //\\ ; if (alt == 1) break; fprintf(stdout, " } else {\n"); compile(A[ap], depth+1); break; case P_WHILE: //\\ //\\ P<WHILE> = "while" '(' <BOOLEXPR> ')' <SSLIST> "endwhile"; fprintf(stdout, " while ("); compile(A[ap], depth+1); fprintf(stdout, ") {\n"); compile(A[ap+1], depth+1); fprintf(stdout, " }\n"); break; case P_FOR: //\\ //\\ P<FOR> = "for" '(' <SIMPLE> <OPTSEMI> <BOOLEXPR> <OPTSEMI> <SIMPLE> <OPTSEMI> ')' <SSLIST> "endfor"; fprintf(stdout, " for ("); compile(A[ap], depth+1); fprintf(stdout, "; "); compile(A[ap+2], depth+1); fprintf(stdout, "; "); compile(A[ap+4], depth+1); fprintf(stdout, ") {\n"); compile(A[ap+6], depth+1); fprintf(stdout, " }\n"); break; case P_SIMPLE: //\\ //\\ P<SIMPLE> = <IDENT> '(' <PARAMLIST> ')', //\\ <IDENT> '(' ')', //\\ <VARIABLE> '=' <BOOLEXPR>; fprintf(stdout, " "); compile(A[ap], depth+1); if (alt < 2) { fprintf(stdout, "("); if (alt == 0) compile(A[ap+1], depth+1); fprintf(stdout, ")"); } else { fprintf(stdout, " = "); compile(A[ap+1], depth+1); } fprintf(stdout, ";\n"); break; case P_PARAMLIST: //\\ //\\ P<PARAMLIST> = <PARAM> <RESTOFPARAMLIST> //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFPARAMLIST: //\\ P<RESTOFPARAMLIST> = <OPTCOMMA> <PARAMLIST>, //\\ ; if (alt == 1) break; fprintf(stdout, ", "); compile(A[ap+1], depth+1); break; case P_OPTCOMMA: //\\ //\\ P<OPTCOMMA> = ',', //\\ ; break; case P_VARIABLE: //\\ //\\ P<VARIABLE> = '*' <ADDR>, //\\ <IDENT> '[' <EXPR> ']', //\\ <IDENT>; if (alt == 0) fprintf(stdout, "*"); compile(A[ap], depth+1); if (alt == 1) { fprintf(stdout, "["); compile(A[ap+1], depth+1); fprintf(stdout, "]"); } break; case P_ADDR: //\\ //\\ P<ADDR> = <HEXNUM>, //\\ <IDENT>; compile(A[ap], depth+1); break; case P_PARAM: //\\ //\\ P<PARAM> = <STRING>, //\\ <EXPR>; compile(A[ap], depth+1); break; case P_BOOLEXPR: //\\ //\\ P<BOOLEXPR> = <BOOLTERM> <RESTOFBOOLTERM>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFBOOLTERM: //\\ //\\ P<RESTOFBOOLTERM> = <OROP> <BOOLTERM> <RESTOFBOOLTERM>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); break; case P_BOOLTERM: //\\ //\\ P<BOOLTERM> = <BOOLFACTOR> <RESTOFBOOLFACTOR>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFBOOLFACTOR: //\\ //\\ P<RESTOFBOOLFACTOR> = '&' <BOOLFACTOR> <RESTOFBOOLFACTOR>, //\\ ; if (alt == 1) break; fprintf(stdout, " && "); compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_BOOLFACTOR: //\\ //\\ P<BOOLFACTOR> = <OPTNOT> <RELATION>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_OPTNOT: //\\ //\\ P<OPTNOT> = '!', //\\ ; if (alt == 0) fprintf(stdout, "!"); break; case P_OPTSEMI: //\\ //\\ P<OPTSEMI> = ';', //\\ ; break; case P_RELATION: //\\ //\\ P<RELATION> = <EXPR> <RESTOFRELATION>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFRELATION: //\\ //\\ P<RESTOFRELATION> = <RELOP> <EXPR>, //\\ ; // implicitly "<expr> != 0", if no relop given. if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_EXPR: //\\ //\\ P<EXPR> = <SUM> <RESTOFEXPR>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFEXPR: //\\ //\\ P<RESTOFEXPR> = <SHIFTOP> <SUM> <RESTOFEXPR>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); break; case P_SUM: //\\ //\\ P<SUM> = <OPTADDOP> <TERM> <RESTOFSUM>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); break; case P_OPTADDOP: //\\ //\\ P<OPTADDOP> = <ADDOP>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); break; case P_RESTOFSUM: //\\ //\\ P<RESTOFSUM> = <ADDOP> <TERM> <RESTOFSUM>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); break; case P_TERM: //\\ //\\ P<TERM> = <FACTOR> <RESTOFTERM>; compile(A[ap], depth+1); compile(A[ap+1], depth+1); break; case P_RESTOFTERM: //\\ //\\ P<RESTOFTERM> = <MULOP> <FACTOR> <RESTOFTERM>, //\\ ; if (alt == 1) break; compile(A[ap], depth+1); compile(A[ap+1], depth+1); compile(A[ap+2], depth+1); break; case P_OROP: //\\ //\\ P<OROP> = '|', '~'; fprintf(stdout, (alt == 0 ? " | " : " ^ ")); break; case P_RELOP: //\\ //\\ P<RELOP> = '<>', '<=', '<', '>=', '>', '!=', '='; switch (alt) { case 0: case 5: fprintf(stdout, " != "); break; case 1: fprintf(stdout, " <= "); break; case 2: fprintf(stdout, " < "); break; case 3: fprintf(stdout, " >= "); break; case 4: fprintf(stdout, " > "); break; case 6: fprintf(stdout, " == "); break; } break; case P_SHIFTOP: //\\ //\\ P<SHIFTOP> = '<<', '>>'; fprintf(stdout, (alt == 0 ? " << " : " >> ")); break; case P_ADDOP: //\\ //\\ P<ADDOP> = '+', '-'; fprintf(stdout, (alt == 0 ? " + " : " - ")); break; case P_MULOP: //\\ //\\ P<MULOP> = '*', '/'; fprintf(stdout, (alt == 0 ? " * " : " / ")); break; case P_FACTOR: //\\ //\\ P<FACTOR> = '(' <BOOLEXPR> ')', if (alt == 0) { // when this is in the switch, it is not executed. Compiler bug? fprintf(stdout, " ("); compile(A[ap], depth+1); fprintf(stdout, ") "); break; } else switch (alt) { case 1: //\\ '&' <IDENT> '[' <EXPR> ']', case 2: //\\ '&' <IDENT>, fprintf(stdout, " &"); compile(A[ap], depth+1); if (alt == 1) { fprintf(stdout, "["); compile(A[ap+1], depth+1); fprintf(stdout, "]"); } break; case 3: //\\ <VARIABLE>, case 4: //\\ <NUMBER>, case 5: //\\ <HEXNUM>; compile(A[ap], depth+1); } break; //\\ //\\ # Shouldn't HEXNUM be here with NUM and CHARLIT? //\\ # The apparent restriction is that a HEXNUM cannot //\\ # be used as the size of an array declaration - but //\\ # a character constant can??? case P_NUMBER: //\\ //\\ P<NUMBER> = <NUM>, //\\ <CHARLIT>; compile(A[ap], depth+1); break; //\\ //\\ E //\\ # 'E' is end of grammar. Everything after this is ignored. default: fprintf(stderr, "*** Internal error at line %d. ap=%d phrase=%d\n", __LINE__, ap, phrase); exit(2); } return(-1); // DUMMY TRIP, NOTHING TO RETURN } //-------------------------------------------------------------------------- int main(int argc, char **argv) { char *s; // Get clean version of executable name. Should work on most existing systems (2006) progname = argv[0]; if ((s = strrchr(progname, '/')) != NULL) progname = s+1; // Unix if ((s = strrchr(progname, '\\')) != NULL) progname = s+1; // M$ if ((s = strrchr(progname, ']')) != NULL) progname = s+1; // Dec if ((s = strrchr(progname, ';')) != NULL) *s = '\0'; // Version no's if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".exe") == 0)) *s = '\0'; if (((s = strrchr(progname, '.')) != NULL) && (strcasecmp(s, ".com") == 0)) *s = '\0'; if ((argc == 3) && strcmp(argv[1], "-d") == 0) { argv++; argc--; debug_parser = TRUE; } if (argc != 2) { fprintf(stderr, "syntax: %s [-d] filename\n", progname); exit(1); } sourcefile = fopen(argv[1], "r"); if (sourcefile == NULL) { fprintf(stderr, "%s: %s - %s\n", progname, strerror(errno), argv[1]); exit(errno); } curfile = argv[1]; startline = TRUE; whitespace = TRUE; onecharstr = (char *)malloc(512); line_reconstruction(); if (debug_parser) { int i; // DEBUG ONLY for (i = 0; i < nextfree; i++) { fprintf(stdout, "%s, line %d, col %d: [%0d] %s\n", c[i].f, c[i].l, c[i].col, c[i].t, c[i].s); } } if (!parse(PHRASE_BASE, 0)) { if (bestparse == nextfree) { fprintf(stderr, "\"%s\", Line %d, Col %d: Premature end of file while looking for %s\n", argv[1], c[bestparse].l, c[bestparse].col+1, looking_for); } else { int i; fprintf(stderr, "\"%s\", Line %d, Col %d: Syntax error while looking for %s near ", argv[1], c[bestparse].l, c[bestparse].col+1, looking_for); for (i = bestparse; i < bestparse+3; i++) { if (i == nextfree) { fprintf(stderr, "<End of file>"); break; } switch (c[i].t) { case TYPE_HEXINT: fprintf(stderr, "$"); // *OR* ... We could put the '$' back in front of the string /* drop through */ // and probably save much code whenever printing. Use str+1 case TYPE_TAG: case TYPE_CHAR: case TYPE_INT: case TYPE_KEYWORD: fprintf(stderr, "%s", c[i].s); break; case TYPE_STRING: fprintf(stderr, "\"%s\"", c[i].s); break; case TYPE_CHARCONST: fprintf(stderr, "'%s'", c[i].s); break; } fprintf(stderr, (i == (bestparse+2) ? " ..." : " ")); } fprintf(stderr, "\n"); } exit(1); } fprintf(stderr, "Parsed OK, now compiling.\n"); if (debug_parser) walk(0, 0, want_all, print_all); // Diags: print final parse tree // The structure at A[] is a concrete syntax tree. We could // generate code directly from this tree, however for the // sake of cleaner design, we will first convert the concrete // syntax tree into an Abstract Syntax Tree (AST) {int program = compile(0, 0); // Now generate the output code from the AST. // codegen(program, -1, -1, -1) } exit(0); return(1); }