#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>

#ifndef FALSE
#define FALSE (0!=0)
#define TRUE (0==0)
#endif

/*
     Takeon generates a grammar in gram[].  Phrases are numbered
     sequentially from 512 upwards, with a lookup table from this
     sequential numbering into gram indexes stored in phrase[].

     We *could* make the phrase numbers sparse, and have only the
     gram table without the phrase table - the only reason to do it
     this way is so that the phrasename[] table is compact.  If
     we didn't do the indirection (thus saving the phrase[] table)
     then the phrasename table would be as big as the gram table,
     which is much worse.  Of course, the downside is that we only
     really need the phrasename table for diagnostics, so in a
     production compiler it's true that we do have a slight
     unnecessary overhead.
 */

static FILE *grammar;

#define MAX_GRAMMAR (1024*16)
#define MAX_PHRASES 1024
static int gram[MAX_GRAMMAR];
static int phrase[MAX_PHRASES];

static int lineno;
void fatal_(int line) {
  fprintf(stderr, "* Syntax error at line %d (detected in %s, line %d)\n", lineno, __FILE__, line);
  exit(1);
}
#define fatal() fatal_(__LINE__)

char *upto(int ends) {
  char temp[128], *s = temp;
  int count = 0;
  for (;;) {
    int c = fgetc(grammar);
    if ((c == EOF) || ferror(grammar) || (count == 127)) fatal();
    if (c == ends) return(strdup(temp));
    *s++ = c; *s = '\0'; count++;
  }
}

int nonspace(void) {
  for (;;) {
    int c = fgetc(grammar);
    if ((c == EOF) || ferror(grammar)) fatal();
    if (c == '\n') lineno++;
    if (!isspace(c)) return(c);
  }
}

#define MAX_BIPS 16  /* Surely more than enough? [undoubtedly I'll curse that later ;-)] */
static int nextbip = 0;
static int BIP[MAX_BIPS];
static char *phrasename[MAX_PHRASES+512];
static char *key[128];
static int nextfreekey = 0;
static int next_gram = 0;
static int pnp = 512;

void dump_tables(void) {
  int i;
  int nums_per_line = 16;
  int keys_per_line = 4;
  fprintf(stdout, "#define MAX_GRAMMAR %d\n", next_gram);
  fprintf(stdout, "#define PHRASE_BASE %d\n", nextbip+512);
  fprintf(stdout, "int gram[MAX_GRAMMAR] = {\n");
  for (i = 0; i < next_gram; i++) {
    fprintf(stdout, "%5d, ", gram[i]);
    if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n");
  }
  if ((next_gram % nums_per_line) != 0) fprintf(stdout, "\n");
  fprintf(stdout, "};\n\n");

  fprintf(stdout, "#define MAX_KEYWORD %d\n", nextfreekey);
  fprintf(stdout, "char *keyword[MAX_KEYWORD] = { // Keywords are based at 256\n  ");
  for (i = 0; i < nextfreekey; i++) {
    fprintf(stdout, "\"%s\", ", key[i]);
    if ((i+1) % keys_per_line == 0) fprintf(stdout, "\n  ");
  }
  if ((nextfreekey % keys_per_line) != 0) fprintf(stdout, "\n");
  fprintf(stdout, "};\n\n");

  fprintf(stdout, "#define MAX_BIP %d\n", nextbip);
  fprintf(stdout, "int BIP[MAX_BIP] = { // BIPs precede PHRASEs at 512 upwards\n");
  for (i = 0; i < nextbip; i++) {
    fprintf(stdout, "%2d, ", BIP[i]);
    if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n");
  }
  if ((nextbip % nums_per_line) != 0) fprintf(stdout, "\n");
  fprintf(stdout, "};\n");

  fprintf(stdout, "\n#define MAX_PHRASE %d\n", pnp-512);
  fprintf(stdout, "#ifdef DEBUG_PARSER\n// FOR DEBUGGING ONLY\n");
  fprintf(stdout, "char *phrasename[MAX_PHRASE] = { // Based at 512 upwards\n  ");
  for (i = 512; i < pnp; i++) {
    fprintf(stdout, "\"%s\", ", phrasename[i]);
    if ((i+1) % keys_per_line == 0) fprintf(stdout, "\n  ");
  }
  if ((pnp % keys_per_line) != 0) fprintf(stdout, "\n");
  fprintf(stdout, "};\n#endif /* DEBUG_PARSER */\n");

  fprintf(stdout, "\nint phrase_start[MAX_PHRASE-MAX_BIP] = {\n");
  for (i = 512+nextbip; i < pnp; i++) {
    fprintf(stdout, "%5d, ", phrase[i-512]);
    if ((i+1) % nums_per_line == 0) fprintf(stdout, "\n");
  }
  if ((pnp % nums_per_line) != 0) fprintf(stdout, "\n");
  fprintf(stdout, "};\n\n");

  for (i = 512; i < pnp; i++) {
    fprintf(stdout, "#define P_%s %d\n", phrasename[i], i);
  }

}

int keyword_code(char *keyword)
{
  int i;
  key[nextfreekey] = keyword;
  // POOR IMPLEMENTATION for now.  Quick & Dirty to get something working.
  for (i = 0; i <= nextfreekey; i++) {
    if (strcmp(keyword, key[i]) == 0) break;
  }
  if (i == nextfreekey) {
    key[i] = strdup(key[i]); nextfreekey++;
  }
  return i+256; // 256..511 are for keywords
}

void takeon(int finalpass) {
  int gp = 0; // grammar pointer
  int sym, lastsym = ';';
  char *def_name = NULL, *name = NULL, *string = NULL, *keyword = NULL;
  int def_bip = FALSE, def_phrase = FALSE;
  int alt_count = 1, alt_count_index = 0;
  int phrase_count = 0, phrase_count_index = 0;
  int this_phrase_start = 0;
  int indent_len = 0;
  static int max_phrase = 0;

  lineno = 1; // (re)init globals too.
  next_gram = 0;
  pnp = 512; // phrase name pointer
  nextbip = pnp; // First BIPS, then phrases.

  for (;;) {
    switch (sym = nonspace()) {
    case 'B': // BIP DEFINITION
      def_bip = TRUE; break;

    case 'P': // PHRASE DEFINITION
      def_phrase = TRUE; break;

    case 'E': // END OF GRAMMAR
      if (finalpass) dump_tables();
      max_phrase = pnp;
      return;

    case '=': // start of a phrase alternative or a BIP defn
      // name should be valid at this point
      if (def_bip) {
        // expect a number and a ';'.
        int digit = nonspace();
        if (!isdigit(digit)) fatal();
        // For now, single digit...
        // DO SOMETHING HERE WITH "def_name" and "digit" TO DEFINE BIP
        BIP[pnp-512] = digit-'0';
        nextbip = pnp-512+1;
        phrase[pnp-512] = 0;
        phrasename[pnp++] = strdup(def_name);
        if (nonspace() != ';') fatal();
        def_bip = FALSE;
      } else if (def_phrase) {
        // expect a phrase definition.
        phrase[pnp-512] = next_gram;
        phrasename[pnp++] = strdup(def_name);
        def_phrase = FALSE;
        alt_count_index = this_phrase_start = next_gram++; // Hole for number of alternatives
        phrase_count_index = next_gram++; // hole for number of items in first alt.
        alt_count = 1; phrase_count = 0;
      } else {
        fatal(); // missing definition
      }
      break;

    case '<': // phrase def *or* instance within an alt.
      name = upto('>');
      if (def_bip || def_phrase) {
        def_name = name;
      } else {
        int i; for (i = 512; i < max_phrase; i++) {if (strcmp(phrasename[i], name)==0) break;}
        if (finalpass && (i >= max_phrase)) {fflush(stderr); fprintf(stderr, "\n* UNDEFINED P<%s>\n", name); fatal();}
        phrase_count++; gram[next_gram++] = i;
      }
      break;

    case '\'': // string literal
      string = upto('\'');
      phrase_count += strlen(string); // Each char counts as a phrase, albeit a short one...
      { char *s = string; while (*s != '\0') gram[next_gram++] = *s++; }
      break;

    case '"': // keyword literal
      keyword = upto('"');
      phrase_count++;
      gram[next_gram++] = keyword_code(keyword);
      break;

    case ',': // next alternative
      gram[phrase_count_index] = phrase_count; phrase_count = 0;
      phrase_count_index = next_gram++; // hole for number of items in first alt.
      alt_count++;
      break;

    case ';': // end of alternatives
      gram[phrase_count_index] = phrase_count;
      // Can tell is last alt was null by checking lastsym == ',', should we ever need to know
      gram[this_phrase_start] = alt_count;
      gram[phrase_count_index] = phrase_count; phrase_count = 0;
      def_phrase = FALSE;
      free(def_name); def_name = NULL;
      break;

    case '#': // comment to end of line
      {int c; for (;;) {c = fgetc(grammar); if ((c == EOF) || ferror(grammar)) fatal(); if (c == '\n') break;}}
      lineno++;
      break;

    default:
      fatal();
    }
    lastsym = sym;
  }
}

int main(int argc, char **argv) {
  int pass;
  if (argc != 2) {
    fprintf(stderr, "syntax: takeon teeny.g\n");
    exit(1);
  }
  for (pass = 0; pass <= 1; pass++) {
    grammar = fopen(argv[1], "r");
    if (grammar == NULL) {
      fprintf(stderr, "takeon: %s - %s\n", strerror(errno), argv[1]);
      exit(errno);
    }
    takeon(pass); // build tables
    fclose(grammar);
  }
  exit(0); return(1);
}