#include <stdio.h> #include <string.h> #include <errno.h> #include <stdlib.h> #ifndef FALSE #define FALSE (0!=0) #define TRUE (0==0) #endif /* Takeon generates a grammar in gram[]. Phrases are numbered sequentially from 512 upwards, with a lookup table from this sequential numbering into gram indexes stored in phrase[]. We *could* make the phrase numbers sparse, and have only the gram table without the phrase table - the only reason to do it this way is so that the phrasename[] table is compact. If we didn't do the indirection (thus saving the phrase[] table) then the phrasename table would be as big as the gram table, which is much worse. Of course, the downside is that we only really need the phrasename table for diagnostics, so in a production compiler it's true that we do have a slight unnecessary overhead. */ static FILE *grammar; #define MAX_GRAMMAR (1024*16) #define MAX_PHRASES 1024 static int gram[MAX_GRAMMAR]; static int phrase[MAX_PHRASES]; static int lineno; void fatal_(int line) { fprintf(stderr, "* Syntax error at line %d (detected in %s, line %d)\n", lineno, __FILE__, line); exit(1); } #define fatal() fatal_(__LINE__) char *upto(int ends) { char temp[128], *s = temp; int count = 0; for (;;) { int c = fgetc(grammar); if ((c == EOF) || ferror(grammar) || (count == 127)) fatal(); if (c == ends) return(strdup(temp)); *s++ = c; *s = '\0'; count++; } } int nonspace(void) { for (;;) { int c = fgetc(grammar); if ((c == EOF) || ferror(grammar)) fatal(); if (c == '\n') lineno++; if (!isspace(c)) return(c); } } #define MAX_BIPS 16 /* Surely more than enough? [undoubtedly I'll curse that later ;-)] */ static int nextbip = 0; static int BIP[MAX_BIPS]; static char *phrasename[MAX_PHRASES+512]; static char *key[128]; static int nextfreekey = 0; static int next_gram = 0; static int pnp = 512; void dump_tables(void) { int i; int nums_per_line = 16; int keys_per_line = 4; fprintf(stdout, "#define MAX_GRAMMAR %d\n", next_gram); fprintf(stdout, "#define PHRASE_BASE %d\n", nextbip+512); fprintf(stdout, "int gram[MAX_GRAMMAR] = {\n"); for (i = 0; i < next_gram; i++) { fprintf(stdout, "%5d, ", gram[i]); if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n"); } if ((next_gram % nums_per_line) != 0) fprintf(stdout, "\n"); fprintf(stdout, "};\n\n"); fprintf(stdout, "#define MAX_KEYWORD %d\n", nextfreekey); fprintf(stdout, "char *keyword[MAX_KEYWORD] = { // Keywords are based at 256\n "); for (i = 0; i < nextfreekey; i++) { fprintf(stdout, "\"%s\", ", key[i]); if ((i+1) % keys_per_line == 0) fprintf(stdout, "\n "); } if ((nextfreekey % keys_per_line) != 0) fprintf(stdout, "\n"); fprintf(stdout, "};\n\n"); fprintf(stdout, "#define MAX_BIP %d\n", nextbip); fprintf(stdout, "int BIP[MAX_BIP] = { // BIPs precede PHRASEs at 512 upwards\n"); for (i = 0; i < nextbip; i++) { fprintf(stdout, "%2d, ", BIP[i]); if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n"); } if ((nextbip % nums_per_line) != 0) fprintf(stdout, "\n"); fprintf(stdout, "};\n"); fprintf(stdout, "\n#define MAX_PHRASE %d\n", pnp-512); fprintf(stdout, "#ifdef DEBUG_PARSER\n// FOR DEBUGGING ONLY\n"); fprintf(stdout, "char *phrasename[MAX_PHRASE] = { // Based at 512 upwards\n "); for (i = 512; i < pnp; i++) { fprintf(stdout, "\"%s\", ", phrasename[i]); if ((i+1) % keys_per_line == 0) fprintf(stdout, "\n "); } if ((pnp % keys_per_line) != 0) fprintf(stdout, "\n"); fprintf(stdout, "};\n#endif /* DEBUG_PARSER */\n"); fprintf(stdout, "\nint phrase_start[MAX_PHRASE-MAX_BIP] = {\n"); for (i = 512+nextbip; i < pnp; i++) { fprintf(stdout, "%5d, ", phrase[i-512]); if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n"); } if ((pnp % nums_per_line) != 0) fprintf(stdout, "\n"); fprintf(stdout, "};\n\n"); for (i = 512; i < pnp; i++) { fprintf(stdout, "#define P_%s %d\n", phrasename[i], i); } } int keyword_code(char *keyword) { int i; key[nextfreekey] = keyword; // POOR IMPLEMENTATION for now. Quick & Dirty to get something working. for (i = 0; i <= nextfreekey; i++) { if (strcmp(keyword, key[i]) == 0) break; } if (i == nextfreekey) { key[i] = strdup(key[i]); nextfreekey++; } return i+256; // 256..511 are for keywords } void takeon(int finalpass) { int gp = 0; // grammar pointer int sym, lastsym = ';'; char *def_name = NULL, *name = NULL, *string = NULL, *keyword = NULL; int def_bip = FALSE, def_phrase = FALSE; int alt_count = 1, alt_count_index = 0; int phrase_count = 0, phrase_count_index = 0; int this_phrase_start = 0; int indent_len = 0; static int max_phrase = 0; lineno = 1; // (re)init globals too. next_gram = 0; pnp = 512; // phrase name pointer nextbip = pnp; // First BIPS, then phrases. for (;;) { switch (sym = nonspace()) { case 'B': // BIP DEFINITION def_bip = TRUE; break; case 'P': // PHRASE DEFINITION def_phrase = TRUE; break; case 'E': // END OF GRAMMAR if (finalpass) dump_tables(); max_phrase = pnp; return; case '=': // start of a phrase alternative or a BIP defn // name should be valid at this point if (def_bip) { // expect a number and a ';'. int digit = nonspace(); if (!isdigit(digit)) fatal(); // For now, single digit... // DO SOMETHING HERE WITH "def_name" and "digit" TO DEFINE BIP BIP[pnp-512] = digit-'0'; nextbip = pnp-512+1; phrase[pnp-512] = 0; phrasename[pnp++] = strdup(def_name); if (nonspace() != ';') fatal(); def_bip = FALSE; } else if (def_phrase) { // expect a phrase definition. phrase[pnp-512] = next_gram; phrasename[pnp++] = strdup(def_name); def_phrase = FALSE; alt_count_index = this_phrase_start = next_gram++; // Hole for number of alternatives phrase_count_index = next_gram++; // hole for number of items in first alt. alt_count = 1; phrase_count = 0; } else { fatal(); // missing definition } break; case '<': // phrase def *or* instance within an alt. name = upto('>'); if (def_bip || def_phrase) { def_name = name; } else { int i; for (i = 512; i < max_phrase; i++) {if (strcmp(phrasename[i], name)==0) break;} if (finalpass && (i >= max_phrase)) {fflush(stderr); fprintf(stderr, "\n* UNDEFINED P<%s>\n", name); fatal();} phrase_count++; gram[next_gram++] = i; } break; case '\'': // string literal string = upto('\''); phrase_count += strlen(string); // Each char counts as a phrase, albeit a short one... { char *s = string; while (*s != '\0') gram[next_gram++] = *s++; } break; case '"': // keyword literal keyword = upto('"'); phrase_count++; gram[next_gram++] = keyword_code(keyword); break; case ',': // next alternative gram[phrase_count_index] = phrase_count; phrase_count = 0; phrase_count_index = next_gram++; // hole for number of items in first alt. alt_count++; break; case ';': // end of alternatives gram[phrase_count_index] = phrase_count; // Can tell is last alt was null by checking lastsym == ',', should we ever need to know gram[this_phrase_start] = alt_count; gram[phrase_count_index] = phrase_count; phrase_count = 0; def_phrase = FALSE; free(def_name); def_name = NULL; break; case '#': // comment to end of line {int c; for (;;) {c = fgetc(grammar); if ((c == EOF) || ferror(grammar)) fatal(); if (c == '\n') break;}} lineno++; break; default: fatal(); } lastsym = sym; } } int main(int argc, char **argv) { int pass; if (argc != 2) { fprintf(stderr, "syntax: takeon teeny.g\n"); exit(1); } for (pass = 0; pass <= 1; pass++) { grammar = fopen(argv[1], "r"); if (grammar == NULL) { fprintf(stderr, "takeon: %s - %s\n", strerror(errno), argv[1]); exit(errno); } takeon(pass); // build tables fclose(grammar); } exit(0); return(1); }