# for now, invoke with:
#   grep -v "^#include"  c-testsuite/tests/single-exec/00164.c |cc -E -| grep -v "^#" | tee zzz.c | ./c /dev/stdin
#
# NOTE: Since writing this I have written an 'exclude' utility at
#
#              /home/gtoal/src/mkptypes/exclude.c
#
# which expands include files but only local ones using "filename.h" rather
# than system <file.h>...

# to avoid problems with system headers try using the header files from dietlibc with -I ./include and the --nostdinc option to cpp
# I may or may not have a problem with types "int16_t" etc.  Need to look into that.

# Also, if it's needed, there's a version of the C preprocessor (with binaries
# in ~gtoal/bin/gtcpp and ~gtoal/bin/gtcpp_FILE) with the sources being in
# /home/gtoal/src/unicode/mcpp/* - this is the hacked pre-processor that
# uses '$define' instead of '#define' and supports redefinition of C array
# syntax using [] rather than ().  E.g. $define x[a][b] instead of #define x(a,b)

# (the choice of plain or _FILE variant is to allow the choice of expanding __FILE__
#  either in gtcpp or leaving it for cpp to handle in the processed output, which is
#  usually preferred).

# This is a somewhat crude attempt to parse C (somewhere around the C99 level)
# that does successfully parse just about all of my own code.  I wrote it for
# two reasons: 1) to create a context-sensitive filter for C code (to be used
# in my static binary translators and also in my Imp to C language translator)
# and 2) just to see if C could be parsed with my 'uparse' (Edinburgh-style)
# parser without using a lexer.

# There are a couple of areas that it still has some minor problems with -
#   a) some of the more complex typedefs
# and
#   b) struct initialisation
# also
#   c) some later extensions that are now in common use such as "for (int i=0; i<nsteps; i++) ..."

# I may try to fix those but it may not be a priority as my code doesn't use
# the particular obscure cases it has problems with, and it does already parse
# all the C that I need it for.

# There are a couple of places where C is ambiguous (mostly examples using typedefs),
# with the ambiguity only being resolvable by mixing the boundary between syntax and
# semantics - so because this is for a source-to-source filter, we don't resolve those
# here - we just accept one of the two parsing options arbitrarily.  This may be
# improved apon later, if one of the ambiguous phrases needs to be manipulated,
# but as long as it only occurs in some code that the filter is not modifying,
# the text can be passed through with the ambiguity intact to be sorted out later
# by the real C compiler.

{
  static char *TypeName[32] = {
    // CST
    "*ERROR(0)*", "CST:BIP_TYPE", "CST:PHRASE_TYPE", "CST:SEMANTIC_TYPE", "CST:KEYWORD_TYPE",
    "CST:CHAR_TYPE", "CST:UTF32CHAR_TYPE", "CST:STRING_TYPE", "CST:UTF32STRING_TYPE",
    "CST:REGEXP_TYPE", "CST:OPTION_TYPE", "CST:COUNT_OF_ALTS", "CST:COUNT_OF_PHRASES",
    "CST:ALT_NUMBER", "*ERROR(14)*", "*ERROR(15)*",
    // AST
    "AST:BIP", "AST:PHRASE", "AST:ATOMLIT", "AST:POOLLIT",
    "*ERROR(20)*", "*ERROR(21)*", "*ERROR(22)*", "*ERROR(23)*",
    "*ERROR(24)*", "*ERROR(25)*", "*ERROR(26)*", "*ERROR(27)*",
    "*ERROR(28)*", "*ERROR(29)*", "*ERROR(30)*", "*ERROR(31)*",
  };

  wchar_t *Decode(int Ph) { // Until Diagnose is padded out a bit...
    static wchar_t tmp[512];
    swprintf(tmp, 511, L"%s", TypeName[(Ph>>AST_type_shift)&AST_type_mask]);
    return tmp;
  }

  wchar_t *pooltowstr_inner(StrpoolIDX p, const char *file, const int line) {
    if (p == -1) {
      fprintf(stderr, "* Error: pooltowstr passed -1 (uninitialised string) from %s, line %d\n", file, line);
    } else if (P_AST_type(p) == 0) {
      fprintf(stderr, "* Error: pooltowstr passed an untagged index from %s, line %d\n", file, line);
    } else if (P_AST_type(p) == STRING_TYPE) {
      fprintf(stderr, "* Error: pooltowstr passed a STRING_TYPE rather than an AST_POOL_LIT from %s, line %d\n", file, line);
    } else if (P_AST_type(p) == AST_ATOM_LIT) {
      fprintf(stderr, "* Error: pooltowstr passed a AST_ATOM_LIT rather than an AST_POOL_LIT from %s, line %d\n", file, line);
    } else if (P_AST_type(p) != AST_POOL_LIT) {
      fprintf(stderr, "* Error: pooltowstr passed an unexpected tag type %d:", P_AST_type(p) >> AST_type_shift);
      fprintf(stderr, " %ls from %s, line %d\n", Decode(p), file, line);
    }
    p = p & AST_idx_mask;
    return &Stringpool(p);
  }
}

# The project that this came from used built-in routines fairly heavily.
# This version based on the new Unicode parser should be able to replace
# these with C<> code or regular expressions in many cases.

# It's not any particular standard C version, but it's close enough to
# the C that I write that it parses all my code which was all I wanted
# it for (actually it was originally for my static binary translator
# project where the output of the translator had to be massaged a little,
# but now it's for my Imp to C project for similar reasons!)

# Note that all I will be using this for will be relatively simple
# almost macro-like transformations to make the generated C more readable,
# for instance replacing A[(5) - (0) + (1)] with A[6] ...

B<EOF> = 0;
B<NL> = 2;

C<whitespace> = {
#ifdef IN_PARSER
  while (source(TP).ch==' ' || source(TP).ch=='\n' || source(TP).ch=='\t' || source(TP).ch=='\f') {
    TP += 1;
  }
#endif
  return TRUE;
};

C<C_line_reconstruction> = {
#ifdef IN_PARSER
  int debug_stropping = 0;

  // The 'C' line reconstruction was thrown together in a couple of minutes and without
  // any consideration to the subtleties of C's lexing.  So expect major problems in this
  // area until it is looked at properly and actually designed.  Current testing
  // environment strips out all lines starting with '#', and all comments too. So
  // the comment stripping here hasn't even been tested yet.

  // Note that line reconstruction in C is unlike in Imp and even unlike lexing in
  // a yacc/lex C compiler.  It is *only* here to remove multiple leading spaces and
  // comments before non-space characters (not *tokens*), however care must be taken
  // in parsing to skip spaces where spaces must be skipped, while checking that there
  // are no spaces in multi-character tokens whose characters must not have spaces
  // between them.  This will require some assistance from the grammar, it cannot
  // all be done in the line reconstruction phase.

  // So far the only concession to C's lexing conventions are that we check that keywords
  // are not followed by an extension of the keyword, so "intx" is not confused with "int x".
  // Otherwise, spaces are pretty much skipped.  I suspect that something like "constint"
  // would currently be accepted for "const int".  I either have to modify uparse.c to
  // handle whitespace differently, or add explicit whitespace to the grammar.


  // The source file has already been read trivially into source().
  
  // We will copy from source() into temp(), then perform line reconstruction
  // on temp(), writing back to source().  The parser will then parse source()
  // into atoms according to the grammar.  Initially it will only store the
  // reconstructed characters into the atoms, but once it is working, I will
  // modify it to also store the unreconstructed source for use in source-to-source
  // translations, where whitespace, embedded comments, and indentation is
  // desired in the translation, in order to mirror the original file.

  // All arrays are flex and the upper bound is a limit, not a minimum.

  // TODO: free SYM at the end of this procedure using FREE_FLEX(SYM).
#define MAX_SYM 128000000
  DECLARE(SYM, reconstructed, MAX_SYM);  // was 600000
#define _SYM(x) WRITE(x,SYM,reconstructed)
#define  SYM(x)  READ(x,SYM,reconstructed)

  int LASTP, P = 0;
  _SYM(0).start = 0;
  while (source(P).ch != 0 /* WEOF */) {
    _SYM(P).ch = source(P).ch;
    if (P > 0) _SYM(P).start = SYM(P-1).end;
    _SYM(P).end = P+1;
    P += 1;
  }
  _SYM(P).ch = 0 /* WEOF */;
  if (P > 0) _SYM(P).start = SYM(P-1).end; _SYM(P).end = P; // no chars for EOF
  LASTP = P;
  
  if (debug_stropping) {
    int I;
    fprintf(stderr, "source() moved to SYM(0:%d) = \"", LASTP);
    for (I = 0; I < LASTP; I++) {
      fprintf(stderr, "%lc", SYM(I).ch);
    }
    if (SYM(LASTP).ch != 0) fprintf(stderr, "[%d]", SYM(LASTP).ch);
    fprintf(stderr, "\";\n");
  };

  int FP = 0, PP = 0; // Fetch Pointer, Put Pointer.

#define DONE() \
        do {                                                                        \
            FP -= 1; /* the terminating 0*/                                         \
            _source(PP).ch = 0;                                                     \
            if (PP > 0) _source(PP).start = SYM(PP-1).end;                          \
            _source(PP).end = SYM(FP).end;                                          \
            if (debug_stropping) {                                                  \
              int I;                                                                \
              fprintf(stderr, "SYM(0:%d) moved back to source(0:%d) = \"", FP, PP); \
              for (I = 0; I < PP; I++) {                                            \
                fprintf(stderr, "%lc", source(I).ch);                               \
              }                                                                     \
              if (source(PP).ch != 0) fprintf(stderr, "[%d]", source(PP).ch);       \
              fprintf(stderr, "\";\n");                                             \
            }                                                                       \
            TP = 0; FREE_FLEX(SYM); return TRUE;                                    \
        } while (0)

  wint_t WC, Peek;

  // uparse.c had been modified so that its implicit whitespace skipping no longer skipped '\n'.
  // (The algol60 parser in contrast treats all \n's the same as spaces, as we are now doing here)
  
#define CHECK_EOF(x) do if ((x) == 0) DONE(); else { if (PP > 0) _source(PP).start = source(PP-1).end; if (FP > 0) _source(PP).end = SYM(FP-1).end; } while (0)

  // PP is the 'current' slot we are writing into.
  _source(PP).start = SYM(FP).start;

  for (;;) {

    if (PP > 0) _source(PP).start = source(PP-1).end; // Keep updated.
    _source(PP).end = SYM(FP).end; // Keep updated.
    WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);

    if ((WC == '/') && (Peek == '*')) {

      // TODO: fold multiple spaces and comments into one.
      //       Instead of using the saved WC to provide spacing and newlines in the output, we
      //       should use the saved comment text, while saving only a ' ' rather then \n \t \f as ch.

      // TODO: remember that fred/*...*/jim generates two tokens, so a comment
      //       that is not surrounded by spaces still must be treated as a space.
      //       Matters for C such as (when b = 2, c = 3):
      
      //          b- --c    = 0
      //          b-- -c    = -1
      //          b---c     = -1
      //          b-/**/--c = 0
      //          b--/**/-c = -1
      
      // (and this is before we get into token pasting in the preprocessor!)
      
      for (;;) {
        WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);
        if ((WC == '*') && (Peek == '/')) {
          WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);
          break; // but still looking.
        }
      }
      continue;
    }

    else if ((WC == '/') && (Peek == '/')) {

      // Note: C's '//' comments are not quite the same as Imp's '!' comments,
      // as the Imp ones are a statement that must come at the start of a boundary
      // whereas the C ones can be appended to any line.  They are more like
      // the Imp '{' but without a matching '}' before the end of the line.
      // Whereas C's /* ... */ comments are *not* like Imp { ... } comments
      // because those are single-line only.  Translating comments from Imp to C
      // is going to be ugly.  Fortunately this program doesn't care :-)
      
      for (;;) {
        WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);
        if (WC == '\n') {
          break; // but still looking.
        }
      }
      continue;
    }

    else if (WC == '\'') {
      _source(PP++).ch = WC;
      for (;;) {
        WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);
        if (WC == '\'') {
          _source(PP).ch = WC;
          if (PP > 0) _source(PP).start = source(PP-1).end; // Leave Peek for later.
          if (FP > 0) _source(PP).end = SYM(FP-1).end; // Leave Peek for later.
          PP++;
          break;
        } else if (WC == '\\') {
          _source(PP++).ch = WC;
          _source(PP++).ch = Peek;
          FP++;
        } else {
          _source(PP++).ch = WC;
        }
      }
      continue;
    }

    else if (WC == '"') {
      _source(PP++).ch = WC;
      for (;;) {
        WC = SYM(FP++).ch; CHECK_EOF(WC); Peek = SYM(FP).ch; //CHECK_EOF(Peek);
        if (WC == '"') {
          _source(PP).ch = WC;
          if (PP > 0) _source(PP).start = source(PP-1).end; // Leave Peek for later.
          if (FP > 0) _source(PP).end = SYM(FP-1).end; // Leave Peek for later.
          PP++;
          break;
        } else if (WC == '\\') {
          _source(PP++).ch = WC;
          _source(PP++).ch = Peek;
          FP++;
        } else {
          _source(PP++).ch = WC;
        }
      }
      continue;
    }

    else if (WC == ' ' || WC == '\n' || WC == '\t' || WC == '\f') {  // use iswblank(WC) instead?
      // TODO: fold multiple spaces and comments into one.
      //       Instead of using the saved WC to provide spacing and newlines in the output, we
      //       should use the saved comment text, while saving only a ' ' rather then \n \t \f as ch.
      _source(PP++).ch = WC;
      continue;
    }

    else {
      // everything else just returns one significant non-space character.

      _source(PP++).ch = WC;
      continue;
    }


    // Still skipping whitespace ...

  }

  DONE();
  P = 0;
  while (source(P).ch != 0) {
    if (debug_stropping) fprintf(stderr, "%d: ch='%lc'  start=%d:end=%d\n", P, source(P).ch, source(P).start, source(P).end);
    P++;
  }

  TP = 0;
  //for (;;) {
  //  fprintf(stderr, "TP = %d  start = %d  end = %d  ch = '%c'\n", TP, source(TP).start, source(TP).end, source(TP).ch);
  //                                                  // should show comment text etc too.
  //  TP += 1;
  //  if (source(TP).ch == 0) break;
  //}
  //exit(0);
  //TP = 0;

  FREE_FLEX(SYM);
  
#undef DONE
#endif

  return TRUE;
};

# for the initial implementation, preprocessor directives (and anything
# starting with a '#' at the start of a line such as pragmas) will not
# be handled.  So they had been have been stripped from the inputs first!

# I'm consider a mechanism to allow spaces before rules by default but
# not allowed if the rule is explicitly prefixed by a <nosp> test which
# checks there are no spaces present.  This ought to e possible solely
# as a C<...> rule, with no code changes needed to the parser.


# I believe these are the regular expression types that this regex package supports:

# ^string    (start of text)

# string$    (end of text or end of line???)

# [ABC]
# [ABC]?
# [ABC]+

# [^ABC]
# [^ABC]?
# [^ABC]+

# [A-Za-z0-9]

# .      (any)
# abc*   (i.e ab abc abcc abccd etc)
# abc+   (i.e    abc abcc abccd etc)

# (abc|def)
# (abc|def)?
# (abc|def)+

# ... and combinations (sequences) of the above

# \< and \> are for unimplemented word boundary functionality.
#           where \< must match [A-Za-z0-9_] and \> must match [^A-Za-z0-9_]

# Probably <string-constant> should be in here too:
P<constant> = <cast_opt><floating-constant>,  # 1.1 must be parsed before 1
              <cast_opt><integer-constant>,
              <cast_opt><character-constant>,
              <cast_opt><enumeration-constant>;

# Not sure if allowing spaces anywhere in any constant is valid, but I'll accept it for now.
P<U_opt> = 'U', 'u', ;
P<L_opt> = 'L', 'l', ;
P<UL_opt> = 'U' <L_opt> <L_opt>, 'L' <L_opt> <U_opt>, ;
P<integer-constant> = <hexadecimal-constant><UL_opt>,
                      <binary-constant><UL_opt>,
                      <octal-constant><UL_opt>,
                      <decimal-constant><UL_opt>;

P<decimal-constant> = «[1-9][0-9]*»;

P<octal-constant> = «0[0-7]*»;

P<hexadecimal-constant> = «0[xX][0-9a-fA-F]+»;
P<binary-constant> = «0[bB][0-1]+»;

P<floating-constant> = <decimal-floating-constant>,
                       <hexadecimal-floating-constant>,
                       <binary-floating-constant>;   # octal too?

##P<fractional-constant> = «([0-9]*\\\\.)?[0-9]+»;
P<fractional-constant> = <digit-sequence_opt> '.' <digit-sequence>,
                         <digit-sequence> '.';

P<exponent-part> = «[eE][+-]?[0-9]*»;
P<exponent-part_opt> = <exponent-part>, ;


##P<decimal-floating-constant> = «([0-9]*\\\\.)?[0-9]+([eE][+-]?[0-9]*)?[flFL]?»,
##                               «[0-9]+[eE][+-]?[0-9]*[flFL]?»;

P<floating-suffix_opt> = «[flFL]?»;

P<digit-sequence> = «[0-9]+»;
P<digit-sequence_opt> = <digit-sequence>, ;
P<decimal-floating-constant> = <fractional-constant> <exponent-part_opt> <floating-suffix_opt>,
                               <digit-sequence> <exponent-part> <floating-suffix_opt>;

P<hexadecimal-floating-constant> = «0[xX]» <hexadecimal-fractional-constant> <binary-exponent-part> «[flFL]?»,
                                   «0[xX]» <hexadecimal-digit-sequence> <binary-exponent-part> «[flFL]?»;


P<binary-floating-constant> = «0[bB]» <binary-fractional-constant> <binary-exponent-part> «[flFL]?»,
                              «0[bB]» <binary-digit-sequence> <binary-exponent-part> «[flFL]?»;


P<hexadecimal-digit-sequence> = «[A-Fa-f0-9]+»;

P<hexadecimal-fractional-constant> = «[A-Fa-f0-9]*» '.' «[A-Fa-f0-9]+»,
                                     «[A-Fa-f0-9]+» '.';

P<binary-digit-sequence> = «[0-1]+»;

P<binary-fractional-constant> = «[0-1]*» '.' «[0-1]+»,
                                «[0-1]+» '.';

P<binary-exponent-part> = «[pP][+-]?[0-9]+»;

P<enumeration-constant> = <identifier>;

# WARNING: Looks like \ handling is not yet done properly in regexps?
#          - what is entered in the .g file should look just like what
#            would be entered in a .c file ...

##P<character-constant> = «[L]?'(\\\\[\\\\'\\"?abfnrtv]|[~'])*'»;
P<character-constant> = <longchartype_opt> «'» <sqchars> «'»;
P<sqchars> = <sqchar> <sqchars>, ;
P<sqchar> = «[^'\\\\]», <escaped-char>;
P<escaped-char> = '\\' «[\\\\\\"'?abfnrtvx0-9]»;

P<longchartype_opt> = 'L', ;

##This will replace <dqstring>
##P<string-constant> = «[L]?\\"(\\\\[\\"'?abfnrtv]|[~\\"])*\\"»;

P<unchecked-identifier> = «[A-Za-z_][A-Za-z0-9_]*» <!sq>;   # «[ ]*[A-Za-z][A-Za-z ]*»

P<sq>                = «'»;    # temporarily needed for disambiguating L'ch' in <unchecked-identifier>.
##These have been replaced by <character-constant>:
##P<sqchar>            = '\\' <sq>, <!sq> «.»;
##P<sqstringchars>     = <sqchar> <sqstringchars>, ;
##P<_sqstring>         = <sq> <sqstringchars> <sq>;
##P<sqstring>          = <w> <_sqstring>;

##Replaced by regexps based on https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf
##though I still wonder if Ll or lL are allowed as well as ll and LL.
##(personally I think lower-case L should *never* be allowed in integer constants!)
##P<const-type_opt> = <len_opt> <unsign_opt>;
##P<len_opt> = 'LL', 'L', ;
##P<unsign_opt> = 'U', ;


P<dq>                = '"';
P<dqchar>            = '\\' <dq>, <!dq> «.»;
P<dqstringchars>     = <dqchar> <dqstringchars>, ;
P<dqstring>          = <longchartype_opt><dq> <dqstringchars> <dq> <dqstring_opt>;
P<dqstring_opt>      = <dqstring>, ;  # implicit concatenation

# The uparse parser now supports creating an automatic list of keywords from
# any strings enclosed in ""s.  Not so for ''s.  We still have to work out how
# to access that list from here as a predicate though!
C<preceded-by-alpha> = {
#ifdef IN_PARSER
  int SAVE = TP;
  parse_whitespace();
  if (TP != SAVE) return 0;
  if (TP == 0) return 0;
  if ((source(TP-1).ch >= 'a' && source(TP-1).ch <= 'z')
          || (source(TP-1).ch >= 'A' && source(TP-1).ch <= 'Z')
          || (source(TP-1).ch >= '0' && source(TP-1).ch <= '9')
          ||  source(TP-1).ch == '_') {
    TP = SAVE;
    return 1;
  }
  TP = SAVE;
#endif
  return 0;
};

P<not-preceded-by-alpha_and_is-not-keyword> = <!preceded-by-alpha> <!keyword>;  # indirection caused by bug with skipped spaces not being backtracked on error... (TODO)
# BUG: <!keyword works but has skipped spaces, <not-a-keyword> causes a runtime error. WHY????
###P<identifier> =
###   <!preceded-by-alpha> <!keyword> <unchecked-identifier>;
##P<identifier> =
##   <!preceded-by-alpha> <unchecked-identifier>;

P<tag> = <identifier>;
P<new-identifier> = <identifier>;
P<identifier> =
   <?not-preceded-by-alpha_and_is-not-keyword> <unchecked-identifier>;

#P<identifier> =
#   <?not-preceded-by-alpha_and_is-not-keyword> <unchecked-identifier> {
#  fprintf(stderr, "Calling C code to compile identifier.\n");
#  t[1] = -1;
#  t[2] = compile(P(2), depth+1 /* P_unchecked_identifier */);
#  return t[0] = P_mktuple(P_identifier, alt, 2/*phrases*/, t); /* (note t[], not T[]) */
#};

C<followed-by-alpha> = {
#ifdef IN_PARSER
  // Is a boolean return what is needed here?
  return     ((source(TP).ch >= 'a') && (source(TP).ch <= 'z'))
          || ((source(TP).ch >= 'A') && (source(TP).ch <= 'Z'))
          || ((source(TP).ch >= '0') && (source(TP).ch <= '9'))
          ||  (source(TP).ch == '_');
#else
  return 0;
#endif
};

## C<keyword-not-allowed-in-context-of-an-identifier> = {
##    return is_in_array(last_internal_identifier_seen, keyword, NUM_KEYWORDS);
## };

# I've added a mechanism that allows uparse to build a list of keywords from strings in "quotes"
# (which is triggered by C<keyword> being defined) but for now I'll just use a hard-coded list...

P<keyword> = <actual-keyword> <!followed-by-alpha>;
P<actual-keyword> =
   "if",
   "const",
   "struct",
   "union",
   "sizeof",
   "typeof",
   "double",
   "long",
   "char",
   "float",
   "void",
   "enum",
   "short",
   "int",
   "signed",
   "unsigned",
   "volatile",
   "auto",
   "register",
   "static",
   "extern",
   "goto",
   "continue",
   "break",
   "return",
   "while",
   "do",
   "for",
   "switch",
   "else",
   "case",
   "default",
   "typedef";

P<SS> =
   <C_line_reconstruction> <external-declaration-list_opt> <EOF>;

P<external-declaration-list> = <external-declaration> <external-declaration-list_opt>;
P<external-declaration-list_opt> =
   <external-declaration-list>,
  ;

C<pp_directive> = {
#ifdef IN_PARSER
  int WP = TP;  // Pick up start of white space
  parse_whitespace();
  if (source(TP).ch == '#') {
  
    // Although we currently read from a '#' at the start of a *statement*,
    // C requires us to accept preprocessor directives only from the start
    // of a *line* (modulo leading spaces) so (TODO) we need to keep a
    // 'start of line' flag as well, and test that when checking for a
    // preprocessor directive.

    // Actually even this scheme isn't proper C because a preprocessor
    // directive can be at the start of a line yet in the middle of
    // a statement.  I think this means it *must* be handled during
    // line reconstruction, which means (if we are going to handle
    // #define as well as #pragma not to mention #include), that all
    // of cpp needs to be built in to the line reconstruction and
    // a special mechanism (hack) has to be added to pass #pragma
    // instructions through to the compiler part.

    _source(TP).start = WP;
    TP += 1;
    while (source(TP).ch != '\n') TP += 1 /* skip */;
    // maybe should include the \n for a # directive?
    if (TP > 0) _source(TP).start = source(TP-1).end;
    _source(TP).end = TP;
    return 1;
  } else
#endif
  return 0;
};

P<external-declaration> =
   <pp_directive>,
   <typedef-declaration> ';',
   <proc-fn-decl>,
   <possibly-initialised-scalar-or-array-decl> ';',
   <enum-specifier>,                               # do these need a ';' ?
   <struct-or-union-specifier>,                    # "    "     "   "    ?
   ';'; # null statement

# Oops - these can be initialised.  Not yet done so.
#
P<struct-or-union-specifier> =
   <struct-or-union> <identifier> '{' <struct-field-declarations> '}',
   <struct-or-union> '{' <struct-field-declarations> '}',
   <struct-or-union> <identifier>;   # semicolon needed???

P<struct-or-union> =
   "struct",
   "union";

P<struct-field-declarations> =
   <struct-field-declaration> <rest-of-struct-field-declarations>;

P<rest-of-struct-field-declarations> =
   <struct-field-declaration> <rest-of-struct-field-declarations>,
  ;

P<struct-field-declaration> =
   <possibly-initialised-scalar-or-array-decl> ';',
   <struct-or-union-specifier> ';';  # added to support "union { int a; int b; };" without a name (i.e. "union { int a; int b; } fred;")

# I originally thought you could only initialise an array after a declaration
# that had '[]' in it.  But not the case - a typedef'd declaration may have
# the array part in the typedef rather than the instance.  Ugly language.
#
# Also we can have initialised structs :-/
#

# These can be the fields of a struct??? that doesn't make sense.
# - it's the top-level struct declaration that should be initialisable, not the fields.
# should allow:
#   struct { int a; int b; int c; } s = {1, 2, 3};
# or
#   struct S {int a; int b;};
#   struct S s = { .b = 2, .a = 1};

P<possibly-initialised-scalar-or-array-decl> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <type> <rest-of-scalar-or-array-decl>;


# The <scalar-init_opt> part of function pointers is usually going to be "= &procname"... any other type of scalar is dubious...

P<rest-of-scalar-or-array-decl> =
   <indirection-decl_opt> <new-identifier> <array-bounds> <array-init_opt> <rest-of-scalar-or-array-decl_opt>,
   <indirection-decl_opt> '(' <indirection-decl_opt> <new-identifier> ')' '(' <param-list_opt> ')' <scalar-init_opt> <rest-of-scalar-or-array-decl_opt>,
   <indirection-decl_opt> <new-identifier> <scalar-init_opt> <rest-of-scalar-or-array-decl_opt>;

P<rest-of-scalar-or-array-decl_opt> =
   ',' <rest-of-scalar-or-array-decl>,
  ;

##P<scalar-init_opt> =
##   '=' <assignment-expression>,
##  ;

# The original code failed to handle "int b = a = 123;" or "int b = a[123] = 123;" or "int b = fred + (a = 123);"
# and my quick hack fix failed as well - I think it's
# another example of the 'maximal munch' problem same as
# with a series of casts before a bracketed variable.
# No - wait - it's more likely that <unary-rvalue-expression> does not allow assignment statements?

##P<more-assignments> = <new-identifier> '=' <more-assignments>, ;
P<scalar-init_opt> =
   '=' <assignment-expression>,
   ##'=' <constant-initializer>,
   ##'=' <unary-rvalue-expression>,
  ;

P<assignment-expression> =
   <lvalue-assign_opt> <conditional-expression> <rest-of-assignment-expression_opt>;

P<rest-of-assignment-expression_opt> =
   <assignment-operator> <conditional-expression> <rest-of-assignment-expression_opt>,
  ;

P<conditional-expression> =
   <logical-or-expression> <rest-of-conditional-expression>;

P<rest-of-conditional-expression> =
   '?' <expression> ':' <conditional-expression>,
  ;

P<expression> =
   <comma-statement>;

P<comma-statement> =
   <assignment-expression> <rest-of-comma-statement>;

P<rest-of-comma-statement> =
   ',' <assignment-expression> <rest-of-comma-statement>,
  ;

P<logical-or-expression> =
   <logical-and-expression> <rest-of-logical-or-expression>;

P<rest-of-logical-or-expression> =
   "||" <logical-and-expression> <rest-of-logical-or-expression>,
  ;

P<logical-and-expression> =
   <inclusive-or-expression> <rest-of-logical-and-expression>;

P<rest-of-logical-and-expression> =
   "&&" <inclusive-or-expression> <rest-of-logical-and-expression>,
  ;

P<inclusive-or-expression> =
   <exclusive-or-expression> <rest-of-inclusive-or-expression>;

P<rest-of-inclusive-or-expression> =
   '|' <exclusive-or-expression> <rest-of-inclusive-or-expression>,
  ;

P<exclusive-or-expression> =
   <bitwise-and-expression> <rest-of-exclusive-or-expression>;

P<rest-of-exclusive-or-expression> =
   '^' <bitwise-and-expression> <rest-of-exclusive-or-expression>,
  ;

P<bitwise-and-expression> =
   <equality-expression> <rest-of-bitwise-and-expression>;

P<rest-of-bitwise-and-expression> =
   '&' <equality-expression> <rest-of-bitwise-and-expression>,
  ;

P<equality-expression> =
   <relational-expression> <rest-of-equality-expression>;

P<rest-of-equality-expression> =
   <eqop> <relational-expression> <rest-of-equality-expression>,
  ;

P<relational-expression> =
   <shift-expression> <rest-of-relational-expression>;

P<rest-of-relational-expression> =
   <relop> <shift-expression> <rest-of-relational-expression>,
  ;

P<shift-expression> =
   <additive-expression> <rest-of-shift-expression>;

P<rest-of-shift-expression> =
   <shiftop> <additive-expression> <rest-of-shift-expression>,
  ;

P<additive-expression> =
   <multiplicative-expression> <rest-of-additive-expression>;

P<rest-of-additive-expression> =
   <plusminus> <multiplicative-expression> <rest-of-additive-expression>,
  ;

P<multiplicative-expression> =
   <unary-rvalue-expression> <rest-of-multiplicative-expression>;

P<rest-of-multiplicative-expression> =
   <mulop> <unary-rvalue-expression> <rest-of-multiplicative-expression>,
  ;

# my reading of the spec is that "sizeof" <type-specifier> is valid but "sizeof" '(' <type-specifier> ')' is not,
# however the latter appears to be accepted by gcc.
# Should we merge <type-specifier> with <indirection-unary-ops_opt>? Is <type-specifier> ever used without an optional '*' following?

P<unary-rvalue-expression> =
   <cast_opt><arithmetic-unary-op> <unary-rvalue-expression>,
   <cast_opt><boolean-unary-ops> <unary-rvalue-expression>,
   <cast_opt><bitwise-unary-ops> <unary-rvalue-expression>,
             <indirection-unary-ops> <unary-address-expression>,
   <cast_opt>"sizeof" <unary-rvalue-expression>,
   <cast_opt>"sizeof" <unary-lvalue-expression>,
   <cast_opt>"sizeof" <type-specifier> <indirection-unary-ops_opt>,
   <cast_opt>"sizeof" '(' <type-specifier> <indirection-unary-ops_opt> ')',
   <postfix-rvalue-expression>;

P<postfix-rvalue-expression> =
   <primary-expression> <rest-of-postfix-rvalue-expression>,
   <unary-lvalue-expression> <rest-of-postfix-rvalue-expression>;

P<rest-of-postfix-rvalue-expression> =
   '[' <HACK1> <expression> <HACK2> ']' <rest-of-postfix-rvalue-expression>,
   '(' <actual-param-list> ')' <rest-of-postfix-rvalue-expression>,
   '.' <tag> <rest-of-postfix-rvalue-expression>,
   "->" <tag> <rest-of-postfix-rvalue-expression>,
   <post-increment-op> <rest-of-postfix-rvalue-expression>,
  ;

# Unfortunately with this parser, '...' strings are really '.' '.' '.' sequences, and
# each unit is allowed to have whitespace separating it.  Using "..." will force no
# spaces between the characters, but will have the side-effect of adding "..." into
# an internal table that was intended for use in checking keywords.  But we can live
# with that.
P<post-increment-op> =
   "++",
   "--";

P<actual-param-list> =
   <assignment-expression> <rest-of-param-list>,
  ;

P<rest-of-param-list> =
   ',' <assignment-expression> <rest-of-param-list>,
  ;

P<unary-lvalue-expression> =
   <cast_opt><pre-increment-op> <unary-lvalue-expression>,
             <indirection-unary-ops> <unary-lvalue-expression>,
   <unary-address-expression>,
   <postfix-lvalue-expression>;

P<postfix-lvalue-expression> =
   <primary-expression> <rest-of-postfix-expression>;

P<rest-of-postfix-expression> =
   '[' <HACK1> <expression> <HACK2> ']' <rest-of-postfix-expression>,
   '(' <actual-param-list> ')' <non-empty-rest-of-postfix-expression>,
   '.' <tag> <rest-of-postfix-expression>,
   "->" <tag> <rest-of-postfix-expression>,
   <post-increment-op> <rest-of-postfix-expression>,
  ;

P<non-empty-rest-of-postfix-expression> =
   '[' <HACK1> <expression> <HACK2> ']' <rest-of-postfix-expression>,
   '(' <actual-param-list> ')' <non-empty-rest-of-postfix-expression>,
   '.' <tag> <rest-of-postfix-expression>,
   "->" <tag> <rest-of-postfix-expression>,
   <post-increment-op> <rest-of-postfix-expression>;

# this does not recognise '(unsigned) *p' in "int jim = 123; int *p = &jim; unsigned int fred = (unsigned) *p;"

P<unary-address-expression> =
   <cast_opt><address-operator> <unary-lvalue-expression>;

#
# There is a problem parsing (cast)(expr), because multiple casts are allowed,
# as in (unsigned long)(long)(variable)
# so the <cast> rule gobbles up the (variable) and what follows then causes a
# syntax error.  Or worse, doesn't ... as in (long) (short) (fred)*(jim)
# which parses as a cast of "*(jim)" rather than a cast of "(fred)*(jim)".
#
# There doesn't seem to be a way to fix this in this sort of parser without
# mixing the syntactic and semantic levels which unfortunately rules out
# program-at-a-time parsing.  Meanwhile working around it with this hack
# which limits the number of cascading casts!
#

P<primary-expression> =
   <cast_opt><identifier>,
   <cast_opt><constant>,
   <cast_opt><dqstring>,
   '(' <expression> ')',
   # before a bracketed expression is the only place where the maximal-munch of <cast_opt> causes problems:
   <cast>'(' <expression> ')',
   <cast><cast>'(' <expression> ')',
   <cast><cast><cast>'(' <expression> ')',
   <cast><cast><cast><cast>'(' <expression> ')';

P<cast_opt> = <cast> <cast_opt>, ;
P<cast> =
   '(' <type-specifier> <indirection-unary-ops_opt> ')',
   '(' "typeof" '(' <unary-lvalue-expression> ')' ')',
   '(' "typeof" '(' <type-specifier> <indirection-unary-ops_opt> ')' ')';

# <indirection-decl_opt> looks just like <indirection-unary-ops_opt> except that it only
# happens in a declaration, not a data access.  The significant difference is that the '*'
# can be preceded by a "const" (and maybe "volatile") keyword, eg "int const *fred" which
# is a constant pointer to an int.
#

P<indirection-decl_opt> =
   <const-or-volatile-type-qualifier_opt> '*' <indirection-decl_opt>,
  ;

P<indirection-unary-ops> =
   <cast_opt>'*' <indirection-unary-ops_opt>;

P<indirection-unary-ops_opt> =
   '*' <indirection-unary-ops_opt>,
  ;

P<type-specifier> =
   <basic-type-specifier>,
   <typedef-name>;

# TO DO: This has to interact with the back-end in order to distinguish typedef'd names from anything else!
# unfortunately this completely breaks the separation of syntax from semantics.  C is badly designed as it
# assumes (and indeed almost forces) a particular organisation of compiler structure.
P<typedef-name> =
   <identifier>;

# I think struct or union or enum specifier below can be an actual declaration using '{}'s
# rather than just a struct or enum name.
#

P<basic-type-specifier> =
   "void",
   "double",
   "long" "double",
   "float",
   <signed_opt> "char",
   <signed-inttype>,
   <struct-or-union-specifier>,
   <enum-specifier>;

P<enum-specifier> =
   "enum" '{' <enumerator-list> '}',
   "enum" <identifier> '{' <enumerator-list> '}',
   "enum" <identifier>;                               # do these need a ';' ?

P<enumerator-list> =
   <enumerator> <rest-of-enumerator-list>;

P<rest-of-enumerator-list> =
   ',' <enumerator> <rest-of-enumerator-list>,
   ',',
  ;

P<enumerator> =
   <identifier> '=' <constant-expression>,
   <identifier>;

P<constant-expression> =
   <conditional-expression>;

# does this omit "signed int"? - check, and add <signed_opt> "int" if needed...

P<signed-inttype> =
   <signed_opt> "short" <int_opt>,
   <signed_opt> "long" "long" <int_opt>,
   <signed_opt> "long" <int_opt>,
   <signed_opt> "int",
   "signed",
   "unsigned";

P<int_opt> =
   "int",
  ;

P<signed_opt> =
   "signed",
   "unsigned",
  ;

P<address-operator> =
   "&&",
   '&';

# && is a gcc extension for arrays of labels.

P<pre-increment-op> =
   "++",
   "--";

# precedence is built in to this grammar.  It's easier than handling it
# with operators of equal precedence and a reverse-polish stack shunting
# yard algorithm.

P<bitwise-unary-ops> =
   '~' <rest-of-bitwise-unary-ops>;

P<rest-of-bitwise-unary-ops> =
   '~' <rest-of-bitwise-unary-ops>,
  ;

P<boolean-unary-ops> =
   '!' <rest-of-boolean-unary-ops>;

P<rest-of-boolean-unary-ops> =
   '!' <rest-of-boolean-unary-ops>,
  ;

P<arithmetic-unary-op> =
   '+',
   '-';

P<mulop> =
   '*',
   '/',
   '%';

P<plusminus> =
   '+',
   '-';

P<shiftop> =
   "<<",
   ">>";

P<relop> =
   "<=",
   '<',
   ">=",
   '>';

P<eqop> =
   "==",
   "!=";

P<eq> = '=';
P<assignment-operator> =
   '=' <!eq>,  # a guard, just in case there's a comparison also valid with the same input up to this point...
   "*=",
   "/=",
   "%=",
   "+=",
   "-=",
   "<<=",
   ">>=",
   "&=",
   "^=",
   "|=";

P<lvalue-assign_opt> =
   <unary-lvalue-expression> <assignment-operator>,
  ;

P<array-init_opt> =
   '=' '{' <constant-initializer-list> '}',
   '=' <dqstring>,
  ;

P<constant-initializer-list> =
   <constant-initializer> <rest-of-constant-initializer-list>;

P<rest-of-constant-initializer-list> =
   ',' <constant-initializer> <rest-of-constant-initializer-list>,
   ',',
  ;

P<constant-initializer> =
   <constant-expression>,
   '{' <constant-initializer-list> '}';

P<HACK1> = '' {
   //fprintf(stdout, "/*hack1*/");
};

P<HACK2> = '' {
   //fprintf(stdout, "/*hack2*/");
};

P<array-bounds> =
   '[' <HACK1> <constant-expression_opt> <HACK2> ']' <array-bounds_opt>;

P<array-bounds_opt> =
   '[' <HACK1> <constant-expression_opt> <HACK2> ']' <array-bounds_opt>,
  ;

P<constant-expression_opt> =
   <constant-expression>,
  ;

P<type> =
   <type-specifier>,  # (hmmm.... includes 'void')
   <enum-specifier>,
   <struct-or-union-specifier>,
   "typeof" '(' <unary-lvalue-expression> ')',
   "typeof" '(' <type-specifier> <indirection-unary-ops_opt> ')',
   "void";

P<const-or-volatile-type-qualifier_opt> =
   <const-or-volatile-type-qualifier>,
  ;

P<const-or-volatile-type-qualifier> =
   "const",
   "volatile";

# The C standard suggests we could put "typedef" in here and use
# variable declaration syntax in place of typedef declarations...

# note that "register fred" should be equivalent to "register int fred"
# but does not parse because <type> does not have a null option.

# rules that parse a non-null <auto-reg-static-ext_opt> in phrases
# that include '<auto-reg-static-ext_opt> <type>' should allow a null
# <type> and default the declaration to "int".  This code does not
# implement that option and rejects those declarations.  It can be
# done by using a <C> rule to detect the presence of one of these
# keywords, and a <?...> guard in P<type> before the null phrase.

P<auto-reg-static-ext_opt> =
   "auto",
   "register",
   "static",
   "extern",
  ;

P<identifier-list> =
   <new-identifier> <rest-of-identifier-list>;

P<rest-of-identifier-list> =
   ',' <identifier-list>,
  ;

P<more-forward-decls-or-actual-body> =
   <extern-proc-name-and-params-list_opt> ';',
   <compound-statement>;

P<extern-proc-spec> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <basic-type-specifier-or-typedef-name_opt>;

P<basic-type-specifier-or-typedef-name_opt> =
   <typedef-name>,
   <basic-type-specifier_opt>;

P<identifier-or-function-pointer> =
   '(' '*' <new-identifier> ')',
   <new-identifier>;

P<extern-proc-name-and-params> =
   <indirection-decl_opt> <identifier-or-function-pointer> '(' <param-list_opt> ')';

P<extern-proc-name-and-params-list_opt> =
   ',' <extern-proc-name-and-params> <extern-proc-name-and-params-list_opt>,
  ;

P<proc-fn-decl> =
   <extern-proc-spec> <extern-proc-name-and-params> <more-forward-decls-or-actual-body>,
   <extern-proc-spec> <indirection-decl_opt> <new-identifier> '(' <identifier-list> ')' <oldstyle-param-list_opt> ';' <compound-statement>;

P<compound-statement> =
   '{' '}',
   '{' <statement-list> '}'
{
   //fprintf(stdout, "\n");
};

# Actually I don't think declarations inside a block are allowed
# to be preceded by labels.
# So the three lines below (which are in P<statement>) probably should
# be separated out and put in <statement-list> below separately from the
# options which can have <labels_opt> in front of them.
#
#   <extern-proc-spec> <extern-proc-name-and-params> <more-forward-decls-or-actual-body>,
#   <in-proc-data-declaration>,
#   <external-declaration-list_opt>;

# Also still to add are label declarations (inside blocks):
# P<label-declaration> =
#    "__label__" <identifier-list> ';';

# (I'm not sure if "label" as a keyword is valid too, or does it have to be #define'd as __label__ ?


P<statement-list> =
   <labels_opt> <statement> <rest-of-statement-list>;

P<rest-of-statement-list> =
   <labels_opt> <statement> <rest-of-statement-list>,
  ;

# added as a test: <extern-proc-spec> <extern-proc-name-and-params> <more-forward-decls-or-actual-body>,
P<statement> =
   ';',
   <compound-statement>,
   <selection-statement>,
   <iteration-statement>,
   <jump-statement>,
   <expression> ';',
   <extern-proc-spec> <extern-proc-name-and-params> <more-forward-decls-or-actual-body>,
   <in-proc-data-declaration>,
   <external-declaration-list>;
   # WAS: <external-declaration-list_opt>;

P<in-proc-data-declaration> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <type> <const-or-volatile-type-qualifier_opt> <decl-list>,
   <struct-decl>;

P<struct-decl> =
   "struct" <new-structname> '{' <struct-member-list> '}';

P<struct-member-list> =    # what about the ';'s between members?
   <struct-member-declaration> <struct-member-list>,
  ;

P<struct-member-declaration> =
   <struct-decl>,
   <possibly-initialised-scalar-decl>,
   <possibly-initialised-array-decl>;

P<possibly-initialised-array-decl> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <type> <const-or-volatile-type-qualifier_opt> <indirection-decl_opt> <new-identifier> <array-bounds> <array-init_opt> ';';

P<possibly-initialised-scalar-decl> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <type> <rest-of-scalar-decl>;

P<rest-of-scalar-decl> =
   <indirection-decl_opt> <new-identifier> <scalar-init_opt> <rest-of-scalar-decl_opt> ';';

P<rest-of-scalar-decl_opt> =
   ',' <indirection-decl_opt> <new-identifier> <scalar-init_opt> <rest-of-scalar-decl_opt>,
  ;

P<new-structname> =
   <new-identifier>;

P<decl-list> =
   <decl> <rest-of-decl-list>;

P<rest-of-decl-list> =
   ',' <decl-list>,
  ;

P<decl> =
   <indirection-decl_opt> <new-identifier> <possibly-empty-array-bounds-list> <array-init_opt>,
   <indirection-decl_opt> <new-identifier> <scalar-init_opt>;

# 'HACK' is a test of marking contents of [] expressions so they can be re-written to
# use a different base given in a macro definition, to support Imp's multi-dimensional arrays.
P<possibly-empty-array-bounds-list> =
   '[' <HACK1> <constant-expression_opt> <HACK2> ']' <optional-possibly-empty-array-bounds-list>;

P<optional-possibly-empty-array-bounds-list> =
   '[' <HACK1> <constant-expression_opt> <HACK2> ']' <optional-possibly-empty-array-bounds-list>,
  ;

P<jump-statement> =
   "goto" <label> ';',
   "continue" ';',
   "break" ';',
   "return" <!followed-by-alpha> <expression_opt> ';';   # need a C<> tweak here to inhibit "return 0" being seen as "return0"...

P<expression_opt> =
   <expression>,
  ;

P<label> =
   <identifier>;

P<iteration-statement> =
   "while" '(' <expression> ')' <statement>,
   "do" <statement> "while" '(' <expression> ')' ';',
   "for" '(' <expression_opt> ';' <expression_opt> ';' <expression_opt> ')' <statement>;

P<selection-statement> =
   "if" '(' <expression> ')' <statement> <else_opt>,
   "switch" '(' <expression> ')' <statement>;

P<else_opt> =
   "else" <statement>,
  ;

# guard to give better error on "label: }" which should be "label: ; }"
P<missing-semicolon-after-label-at-end-of-block> =
   '}';

P<labels_opt> =
   <label> ':' <labels_opt> <!missing-semicolon-after-label-at-end-of-block>,
   "case" <constant-expression> ':' <labels_opt> <!missing-semicolon-after-label-at-end-of-block>,
   "default" ':' <labels_opt> <!missing-semicolon-after-label-at-end-of-block>,
  ;

# formal parameter list can also be just a list of types (but can still end in "...")
# normal current style however is to have named parameters.
#
P<param-list_opt> =
   <formal-param> <rest-of-param-list_opt>,
   "void",
  ;

P<oldstyle-param-list_opt> =
   <oldstyle-param-list>,
  ;

P<oldstyle-param-list> =
   <oldstyle-formal-param> <rest-of-oldstyle-param-list_opt>,
   "void";

P<rest-of-oldstyle-param-list_opt> =
   ';' <oldstyle-param-list>,
  ;

P<oldstyle-formal-param> =
   <const-or-volatile-type-qualifier_opt> <type> <oldstyle-parameter-list>;

P<oldstyle-parameter-list> =
   <whatever> <rest-of-oldstyle-parameter-list>;

P<rest-of-oldstyle-parameter-list> =
   ',' <oldstyle-parameter-list>,
  ;

P<whatever> =
   <indirection-decl_opt> <identifier_opt> <optional-possibly-empty-array-bounds-list>;

P<rest-of-param-list_opt> =
   ',' "...",
   ',' <formal-param> <rest-of-param-list_opt>,
  ;

P<formal-param> =
   <procedure-as-parameter>,
   <const-or-volatile-type-qualifier_opt> <type> <indirection-decl_opt> <identifier_opt> <optional-possibly-empty-array-bounds-list>;

P<identifier_opt> =
   <identifier>,
  ;

P<procedure-as-parameter> =
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <basic-type-specifier_opt> '(' <indirection-decl_opt> <new-identifier> ')' '(' <param-list_opt> ')',
   <auto-reg-static-ext_opt> <const-or-volatile-type-qualifier_opt> <typedef-name> '(' <indirection-decl_opt> <new-identifier> ')' '(' <param-list_opt> ')';

P<basic-type-specifier_opt> =
   <basic-type-specifier>,
  ;

# Wonder if this would do it? <indirection-decl_opt> '(' <indirection-decl_opt> <identifier> ')' '(' <param-list_opt> ')' <scalar-init_opt> <rest-of-scalar-or-array-decl_opt>,
P<typedef-declaration> =
   "typedef" <type-specifier> <indirection-decl_opt> '(' <indirection-decl_opt> <new-identifier> ')' '(' <param-list_opt> ')',
   "typedef" <const-or-volatile-type-qualifier_opt> <type-specifier> <const-or-volatile-type-qualifier_opt> <decl-list>,
   "typedef" <struct-decl> <maybe-indirect-typedef-name-list>,
   "typedef" <type-specifier> <maybe-indirect-typedef-name-list>;

P<maybe-indirect-typedef-name-list> =
   <indirection-decl_opt> <typedef-name> <rest-of-maybe-indirect-typedef-name-list>;

P<rest-of-maybe-indirect-typedef-name-list> =
   ',' <maybe-indirect-typedef-name-list>,
  ;

E