// Based on earlier imp77 to html code, this replaces comments in Imp source files
// with space characters.  The output file should be the same number of characters
// as the input file and have the same line breaks.

// This is a pre-processor so that imp sources can be filtered by 'gtcpp' to do
// c-style macro replacement.  Since it was a utility intended for use with C
// programs it was failing when it hit unbalanced quotes in an Imp comment.

// Note that gtcpp does *not* handle imp indentifiers with spaces in the name, so
// this code would have to be modified to remove spaces from indentifiers, which
// is quite a complex modification.  (not unlike the original keyword handling
// code which was removed)

// "%comment" comments are not handled.  Fortunately I doubt they exist much
// even in old Imp77 code.

// HOWEVER... the uparse/imp77 parser is having trouble with unmatched quote
// symbols in comments, and I need to strip those comments out cleanly.  This
// is unfortunately quite problematic as we need to recognise "!" comments
// everywhere, not just at the start of statements ... in particular, we need
// to detect them after labels - both simple labels and switch labels, which
// can be quite complex since any const expression can occur in a switch label.

// Some nasty cases will be added to the regression tests for this utility.
// Eventually the code will have to be migrated to the line reconstruction
// code of the imp77 parser.  We're lucky that we have original era Imp77
// compilers to compare against.

// Note that "!" comments in Imp77 are *not* parsed comments, but are removed
// on the fly by the parser - they are accepted after any statement terminator
// and the ';' statement terminator is not a real statement terminator, it is
// a synonym for 'end of line' and therefore accepted in places where a newline
// character is not a statement terminator, such as this abortion:

// ***NOTE*** although this code now does a fairly respectable job of handling
// '!' and '{' comments, it does not handle '%comment' ones, and those can still
// contain unbalanced quotes, which wil be problematic for the uparse imp77 parser.


/*
        %constintegerarray fred(0:2) = ;! A comment here???!
          1,2,3
 */

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
  int c;

  for (;;) {
    /* assume we're at the start of a new statement here. */
    c = fgetc(stdin);
    if (c == EOF) break;
    if (c == '\r') continue;
    if (c == ' ' || c == '\t') {
      fputc(c, stdout); /* Don't touch leading indentation */
    } else if (c == '!') {
      /* Comment holds until end of line *OR SEMICOLON!* (bugfix) */
      //fprintf(stdout, "<font color=%s>!", COMMENT_COLOR);
      //fprintf(stdout, "!");
      fprintf(stdout, " ");
      for (;;) {
        c = fgetc(stdin);
        if ((c == '\n') /* || (c == ';') */ ) break;  // Oops!  Imp77 does not terminate '!' comments on a ';'
        if (c == '\r') continue;
        if (c == EOF) break;
        fputc(' ', stdout);
        //fputc(c, stdout);
      }
      //if (c != EOF) fprintf(stdout, "</font>%c", c);
      if (c != EOF) fprintf(stdout, "%c", c);
    } else if (c == '\n') {
      fprintf(stdout, "\n");
    } else {
      /* rest of statement */
      for (;;) {
        if (c == '{') {
          //fprintf(stdout, "<font color=%s>{", COMMENT_COLOR);
          //fprintf(stdout, "{");
          fprintf(stdout, " ");
          for (;;) {
	    c = fgetc(stdin);
            if (c == EOF) break;
	    if (c == '}') break;
	    if (c == '\n') {
               break;
	    }
	    if (c == '\r') continue;
            fputc(' ', stdout);
            //fputc(c, stdout);
	  }
          if (c != EOF) {
            if (c != '}') fprintf(stdout, "%c", c); else fprintf(stdout, " ");
          }
          //if (c != EOF) fprintf(stdout, "%c</font>", c);
          if (c == '\n') {
            break; /* new statement */
	  }
	} else if (c == '"') {
          //fprintf(stdout, "<font color=%s>\"", QUOTE_COLOR);
          fprintf(stdout, "\"");
          for (;;) {
	    c = fgetc(stdin);
            if (c == EOF) break;
	    if (c == '"') break;
	    if (c == '\r') continue;
            fputc(c, stdout);
	  }
          fprintf(stdout, "\"");
          //fprintf(stdout, "\"</font>");
	} else if (c == '\'') {
          //fprintf(stdout, "<font color=%s>'", QUOTE_COLOR);
          fprintf(stdout, "'");
          for (;;) {
	    c = fgetc(stdin);
            if (c == EOF) break;
	    if (c == '\r') continue;
	    if (c == '\'') break;
            fputc(c, stdout);
	  }
          fprintf(stdout, "'");
          //fprintf(stdout, "'</font>");
	} else if (c == ':') {
          fputc(c, stdout); /* Probably a label.  Should fix this broken case:
                               lab(n): ! comment with odd ' in it */
          break;
	} else {
          fputc(c, stdout);
	}
        do {c = fgetc(stdin);} while (c == '\r');
        if (c == EOF) break;
        if (c == '\n') {
          fprintf(stdout, "\n");
          break;
	}
        if (c == ';') {
          fprintf(stdout, ";");
          break;
	}
      }
    }
  }
  exit(0);
}
