# This is an extended grammar which now accepts constructs from multiple versions of Imp
# - it was primarily based on a correct Imp80 grammar, then modified to add Imp77 constructs
# followed by features from Pdp15 Imp, old Emas "Imp9", and Hamish's 68000 extensions, etc
# The original base grammar was:
#   https://history.dcs.ed.ac.uk/archive/os/emas/emas2/compilers/imp80/imp80ps04.txt
#
# Since this accepts multiple dialects, I'm tempted to call it "PImp", short for "Polyglot Imp"...
#
# There is good info re the phrase structure grammar to be found at
# http://www.ancientgeek.org.uk/EMAS/EMAS_Manuals/IMP/A_Syntactic_and_Semantic_Definition_of_the_IMP_Language.pdf
# To add:
#tests/imp2021/stdperm.imp %const    %record(*)          %name   nil == 0
#                                                                       ^
#  "/tmp/13563.imp", Line 336, Col 44: Syntax error while looking for <ASSOP-EXPR> <LVALUE>
#
#  tests/imp68k/rectest.imp %OWNRECORD(F1)%NAME rnn==0
#                                                    ^
#  "/tmp/16989.imp", Line 226, Col 25: Syntax error while looking for <ASSOP-EXPR> <LVALUE>
#
# Built-in Phrases used by the line reconstruction phase:
#
# stropped letter (%a %b etc)
B<stropped> = 0;
#
# single-quoted string
B<sqstring> = 1;
#
# double-quoted string
B<dqstring> = 2;
#
B<NL> = 3;
#
B<char> = 4;
#
# All parsers need an <EOF> phrase.
#
B<EOF> = 5;
C<immediate> = {
#define INCLUDE_AST_DEBUG "show_ast.c"
int kw(int AST_op, int idx, int ap)
{
  int data[4];

  int i;

  char *k = keyword(idx);

  data[1] =  idx;

  data[2] = A[ap+3]; // index of c[] array element
  data[3] = strlen(k);

  return mktuple(AST_op, 1, 3, data);

}
// variables used in line_reconstruction() moved here so they are available to semantic code in grammar.
#define STROPPED_KEYWORDS 1  /* alters behaviour of parse() */
int at_startofstatement = TRUE;

int saved = TRUE;

int at_startofline = TRUE;

int in_const_initialiser_list = FALSE;

int in_keyword = FALSE;

int in_alphanumeric = FALSE;

};
# There are currently two places in the code where a parse-time decision has to be made.  This is one of them:
#
# OK, this is slightly trickier than you might expect... you can't place the <COLON> immediate code after the ':'
# because the parser pre-fetches the next token in order to compare against <COLON> even though <COLON> doesn't
# actually examine any text.  So... we put the test *before* the ':', and *UNDO* its actions in the following
# alternative if the ':' was not matched.
#
# Anyway, the outcome is that if a <COLON> is followed by a '!' comment, that comment is handled by the
# line reconstruction code and is *not* passed back to the parser a character at a time as it was previously doing!
#
# This in turn solves the problem of line reconstruction finding a ' or a " in the comment and gobbling up characters
# - possibly over multiple line - until it hits a matching quote.  PHEW.  It took a long time to get here!
#
# (The other place that requires parse-time immediate code is in handling const initialisers. Look for "immediate:" far below)
P<LABEL-COLON> = 
   <immediate:saved = at_startofstatement; at_startofstatement = TRUE; return TRUE;> ':',
   <immediate:at_startofstatement = saved; return FALSE;>;
##P<LABEL-COLON> =
##   ':';
# SS is the primary entry point.
#
P<SS> = 
   <EXTERNALS> <LAST-ITEMS-IN-FILE>;
#
# I'm taking the long way to handle alphanumerics and variable names, for now
# At some point (soon) I hope to add regular expression matching, to shorten
# these rules and speed up parsing.  But this works, quite reliably.
#
## P<letter> = "[A-Z]";
#
P<letter> = 
   'A',
   'B',
   'C',
   'D',
   'E',
   'F',
   'G',
   'H',
   'I',
   'J',
   'K',
   'L',
   'M',
   'N',
   'O',
   'P',
   'Q',
   'R',
   'S',
   'T',
   'U',
   'V',
   'W',
   'X',
   'Y',
   'Z';
## P<digit> = "[0-9]";
#
P<digit> = 
   '0',
   '1',
   '2',
   '3',
   '4',
   '5',
   '6',
   '7',
   '8',
   '9';
P<alphanumeric> = 
   <letter>,
   <digit>;
P<opt-letter-or-digit-seq> = 
   <letter> <opt-letter-or-digit-seq>,
   <digit> <opt-letter-or-digit-seq>,
  ;
#More than one newline (or semicolon)
P<BLANKS> = 
   <S> <BLANKS>,
  ;
# The last items in a file.
# the final EOF below is only for begin/endofprogram which *can* be followed by code but shouldn't
P<LAST-ITEMS-IN-FILE> = 
   "end" "of" "file" <BLANKS> <EOF>,
   <BLANKS> <EOF>;
P<program> = 
   "programme",
   "program";
# I haven't specified any phrases to be external-only.  There might be some,
# but I'll leave it to compile() to reject them.
P<EXTERNALS> = 
   "end" "of" "perm" <S> <EXTERNALS>,
   "end" "of" "prim" <S> <EXTERNALS>,
   <EXTERNAL-OR-INTERNAL> <EXTERNALS>,
  ;
# I cleaned up label handling, we can afford the overhead of backtracking over one <NAME>...
# - this isn't the 70's any more.  Labels are not valid outside blocks.
P<INTERNALS> = 
   <OPT-LABEL-DEFINITION> <INTERNAL> <INTERNALS>,
   <OPT-LABEL-DEFINITION>;
# top-level begin/end block can either be %begin/%endofprogram,
# or %begin/%end/%endoffile.  However in some cases things can
# be placed between the %end and the %endoffile.
#
# I've also seen %routine xxx/%endofprogram in FMACS/BOOT.imp
#
# There's a %begin/%end without an %endoffile in FMACS/FIRST4.imp
P<outer-level-end> = 
   "end" "of" <program>,
   "end";
# these are allowed at the external level and also inside procedures/main program.
# The phrases that are internal-only include these as the final alternative,
# hence why the empty statement can only occur as the last item in this list.
#
# If using this grammar as a 'SOAP' replacement, you'll want to allow syntactically
# incorrect statements also, and if so, those should be handled here after the <S> rule.
#
P<EXTERNAL-OR-INTERNAL> = 
   "begin" <S> <INTERNALS> <outer-level-end> <S>,
   <Percent-SEX> <RT> <spec> <NAME> <Opt-ALIAS> <FPP> <S>,
   <Percent-SEX> <RT> <NAME> <Opt-ALIAS> <FPP> <S> <INTERNALS> <outer-level-end> <S>,
   <OPT-68K> <Percent-SEX> <RT> <NAME> <Opt-ALIAS> <FPP> <S>,
   <DECLARATION-S>,
   "record" "format" <RECFMT-spec-OR-BODY> <S>,
   "switch" <ONE-SWITCH-DECL> <REST-OF-SWLIST> <S>,
   "include" <STR-CONST>,
   "option" <STR-CONST>,
   "from" <NAME> "include" <NAME> <Opt-NAME-LIST>,
   "end" "of" "list" <S>,
   "list" <S>,
   "end" "of" "mcode" <S>,
   "mcode" <S>,
   "trusted" "program" <S>,
   "main" "ep" <NAME> <S>,
   "control" <INT-CONST> <S>,
   "diagnose" <INT-CONST> <S>,
   "reals" "long" <S>,
   "reals" "normal" <S>,
   <COMMENT>,
   '?' <EXPR> <S>,
   '?' '?' <CEXPR> <S>,
   '?' <LVALUE> '=' <EXPR> <S>,
   '?' <LVALUE> '=' '=' <EXPR> <S>,
   <IF-S>,
   <S>;
# These are all extensions used by 68000 Imp.
#
# %begin
# %recordformat f(%byte ldte,lsap,rdte,rsap)
# @#ldte %record(f)cur
# ( I have no idea what "@#ldte " means. )
#
# @400(a5) %record(*)%namearray in(0:7)
# @16_3F00-192 %routine         closeinput
# @724(a5) %integername heapfront
#
# '@' <68K-code-address> before a routine name is an external spec in 68000 Imp, and
# <Opt-68K-code-address> after an external %spec is a perm routine spec in 68000 Imp.
# as in: %integerfnspec(16_1124) REM(%integer a,b)  { found in APM-gdmr/I/GGPERM.imp }
#
P<OPT-68K> = 
   '@' <68K-stuff>,
  ;
P<68K-stuff> = 
   '-' <INT-CONST> '(' <NAME> ')',
   <INT-CONST> '(' <NAME> ')',
   <INT-CONST> '-' <INT-CONST>,
   <INT-CONST>,
   '#' <NAME>;
# this is for 68000 Imp:
#
# %externalrealfnspecalias READ %alias "readreal"
# %externalintegerfnspecalias READ
# I have no idea what a "specalias" is :-(
#
# %integerarrayspec(16_C00000) frame(0:32767)
# %integerfnspec(16_1124) REM(%integer a,b)
# %bytespec(16_7fffc)status
#
# I think these extensions are also covered with a *different* extension in the same compiler :-(
# i.e.
#   @16_7fffc %byte status  ???
#
P<spec> = 
   "specalias",
   "spec" <Opt-68K-code-address>;
P<Opt-68K-code-address> = 
   '(' <INT-CONST> ')',
  ;
P<OPT-spec> = 
   <spec>,
  ;
# All labels come through here.  Because of an interaction with comments,
# we use some parse-time code in phrase <LABEL-COLON> to modify how line reconstruction
# handles the comments.  btw Numeric labels are a historical form...
# I'm not sure why I'm not using <CEXPR-OR-STAR> here, but even so it's not a significant overhead to reparse the label.
P<OPT-LABEL-DEFINITION> = 
   <NAME> '(' <CEXPR> ')' <LABEL-COLON> <OPT-LABEL-DEFINITION>,
   <NAME> '(' '*' ')' <LABEL-COLON> <OPT-LABEL-DEFINITION>,
   <NAME> <LABEL-COLON> <OPT-LABEL-DEFINITION>,
   <DIGIT-SEQ> <LABEL-COLON> <OPT-LABEL-DEFINITION>,
  ;
# fault statements can only appear in the outer block of a program;
# they cannot appear in external routines.  This should be checked
# by the compile() procedure.
P<FAULT-S> = 
   "fault" <FAULT-ACTION> <MORE-FAULTS> <S>;
P<FAULT-LIST> = 
   <DIGIT-SEQ> <MORE-NUMBERS>;
P<MORE-NUMBERS> = 
   ',' <DIGIT-SEQ> <MORE-NUMBERS>,
  ;
P<FAULT-ACTION> = 
   <FAULT-LIST> '-' '>' <LABEL>;
P<MORE-FAULTS> = 
   ',' <FAULT-ACTION> <MORE-FAULTS>,
  ;
# The "spec" form below is for old EMAS Imp procedure parameters:
#
# %routine integrate(%realname y, %real a,b, %integer n, %c
#                    %realfn f)
#   %spec f(%real x)
#
# the fully-qualified form (%realfnspec f(%real x)) is also valid but comes in via the <EXTERNAL> route.
#
# There are other types of %spec related to parameters to procedures as well -
# %recordspec for sure.  Not so sure if there is an arrayspec.
# They are only allowed in the body of a procedure.
#
# (Btw in the older compilers, all declarations in a block had to precede any code.)
#
# strictly speaking the <UI> <REST-OF-SS1> form, <UI> "while" <SC>, should only
# allow a single <UI>, not the <UI> <AUI> form. (EMAS Imp)
# (page 4.4 of https://history.dcs.ed.ac.uk/archive/docs/Edinburgh_IMP_Language_Manual.pdf )
P<INTERNAL> = 
   <NESTED-BLOCK>,
   <UI> <REST-OF-SS1>,
   '*' <UCI>,
   <CYCLE-S>,
   <FAULT-S>,
   "on" <OPT-event> <EVENT-LIST> <REST-OF-EVENT-LIST> "start" <S> <INTERNALS> "finish" <S>,
   "spec" <NAME> <FPP> <S>,
   "short" "routine",
   <EXTERNAL-OR-INTERNAL>;
# unlike with old-style parsers, the whole of the block content ends up in an AST item.
#
# Unfortunately there is a problem caused by 68000 Imp: some files contain only
# begin/end with no endoffile.  Some are begin/end;endoffile instead of begin/endofprogram
# A few are routine blah/endofprogram !
# One had begin/endofprogram followed by more routines.
# (and see elsewhere for 68000's external routine specs which are of the form
#   @address %routine blah
#  - with no %spec keyword, but no routine body either..)
#
P<NESTED-BLOCK> = 
   "begin" <S> <INTERNALS> "end" <S>;
# compound statements - blah %and blah %and blah - a short form of startfinish block
P<AUI> = 
   "and" <UI>,
  ;
# unconditonal string resolution should *not* be under <OPT-ASSIGN>
# because the LHS is the value and the RHS is the assignee.  ***FIX***
# unconditional string resolution should be moved up to <UI> or thereabouts
P<OPT-ASSIGN> = 
   '-' '>' <STRING-RESOLUTION-EXPR>,
   <ASSOP-EXPR>,
  ;
P<BASIC-UI> = 
   <LVALUE> <OPT-ASSIGN>;
# the compound statement form ends in UIs which change control flow, ie
# you cannot say: %if whatever %then blah %and ->lab %and this will never happen
# tempted to add %caption here ;-)  But that may be a step too far.
# "%monitor 9" would be trapped by "%fault 9 -> lab"
P<UI> = 
   <BASIC-UI> <AUI>,
   "print" "text" <sqstring> <AUI>,
   '-' '>' <LABEL>,
   "return",
   "true",
   "false",
   "result" <ASSOP-EXPR>,
   "monitorstop",
   "monitor" <DIGIT-SEQ> <AUI>,
   "monitor" <AUI>,
   "stop",
   "signal" <OPT-event> <CEXPR> <OPEXPR2>,
   "exit",
   "continue";
P<LABEL> = 
   <NAME> '(' <EXPR> ')',
   <NAME>,
   <DIGIT-SEQ>;
# I added an old record declaration syntax for examples out of ai2logo.i
#
# %OWNRECORDARRAY INDEX42( 0:1022)           (PIC DIR)
#
# %SYSTEMROUTINESPEC FINFO(%STRING(15) S,%INTEGER LEV,%C
#    %RECORDNAME R, %INTEGERNAME FLAG)
#
# %RECORD R(F)
#
# I'm not sure where/if this is handled:
#
# %record(f)%array portinfo(1:31)
#
# I need to get the old Imp manuals out and check a couple of things, in particular
#   %recordformatspec and %arrayformatspec ...
#
# The grammar below allows some initialisations that the compilers don't support
# but it's cleaner to reject them in compile()
#
# I had to add initialisations with pointers for these examples from
# 68000 Imp: %bytename b == length(s)
# %ownrecord(i tc f)%namearray tc refs(1:maxtc) == nil(*)
#
# These are 68000 syntax which is rejected by Imp77:
# %conststring(1) %array(1:max params) parameters="B","W","L"
# %string(255) %array(0:maxfiles) list
#
# The equivalent Imp77 syntax would be
#
# %conststring(1) %array parameters(1:max params)="B","W","L"
# %string(255) %array list(0:maxfiles)
#
# Finally here's a one-off hack for a construct I've only seen *once*
# in APM-gdmr/I/PRIM22.imp ...
# %constchar                    snl=char<10>
#
# The "register" below is for ICL/4-75 Imp9: %REGISTER SC(6)
#
P<DECLARATION-S> = 
   <OPT-68K> <XOWN> <XTYPE> <OWNDEC> <S>,
   <OPT-68K> <XTYPE> <DECLN> <S>,
   <OPT-68K> <OPT-XOWN> "record" "array" <RADEC> <REST-OF-RADEC> <S>,
   <OPT-68K> <OPT-XOWN> "record" <RDEC> <REST-OF-RDEC> <S>,
   "const" "char" <NAME> '=' 'C' 'H' 'A' 'R' '<' <INT-CONST> '>' <S>,
   "register" <NAME> '(' <INT-CONST> ')' <S>;
P<OPT-XOWN> = 
   <XOWN>,
  ;
# old-style record array declaration
P<RADEC> = 
   <NAME> <Opt-NAME-LIST> <BPAIR> '(' <RECFMT-REF> ')';
P<REST-OF-RADEC> = 
   ',' <RADEC>,
  ;
# old-style record declaration
P<RDEC> = 
   <NAME> <Opt-NAME-LIST> '(' <RECFMT-REF> ')';
P<REST-OF-RDEC> = 
   ',' <RDEC> <REST-OF-RDEC>,
  ;
# %on %event * %start or %on %event 1,2,3,etc %start
P<EVENT-LIST> = 
   '*',
   <CEXPR>;
# Added old-style while/until for: %UNTIL NEXTINSYM=NL %THEN %CYCLE
# and %WHILE NEXTINSYM#NL %THEN SKIPINSYM
P<CYCLE-S> = 
   "cycle" <Opt-CYCPARM> <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
   <Percent-WUF> "cycle" <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
   <Percent-WU> "then" "cycle" <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
   <Percent-WU> "then" <UI>;
P<IF-S> = 
   <Percent-IU> <TOP-LEVEL-CONDITION> <THEN-S>;
# /home/gtoal/gtoal.com/athome/edinburgh/APM-gdmr/I/LIB.imp
# contains:
#          %elseif 'A'<=sym<='Z' %start
#
# which is a form of Hamish's that is equivalent to
#
#  %finish %else %if 'A'<=sym<='Z' %start
#
# or the more concise
#
#  %else %if 'A'<=sym<='Z'
#
# An identical issue is: %elsestart
# in APM-gdmr/ETHER/PROFILE.imp
#
P<FINISH-S> = 
   "else" <Percent-IU> <TOP-LEVEL-CONDITION> <Opt-start> <S> <INTERNALS> <FINISH-S>,
   "else" <Opt-start> <S> <INTERNALS> "finish" <S>,
   "finish" <Opt-ELSE-S>;
P<Opt-start> = 
   "start",
  ;
# see the comments on <FINISH-S> above.
P<Opt-ELSE-S> = 
   "else" "start" <S> <INTERNALS> "finish" <S>,
   "else" <IF-S>,
   "else" <UI> <S>,
   <S>;
P<THEN-S> = 
   <OPT-then> "start" <S> <INTERNALS> <FINISH-S>,
   "then" <UI> <Opt-ELSE-S>;
P<OPT-then> = 
   "then",
  ;
P<S> = 
   <NL>,
   ';';
# Most comments are handled in the line reconstruction phase.
# Comments starting with %COMMENT are an exception.
# Some older compilers treated '!' comments like %COMMENT,
# with "%c" at the end of a comment causing continuation!
# This phrase does not allow ';' in a comment to terminate
# the comment, but some versions of Imp (I think PDP15)
# did, so that will have to be handled with a command-line
# option and a parse-time phrase if it ever is needed.
#
P<COMMENT> = 
   "comment" <Parsed-OldStyle-Comment>,
   '!' <Parsed-OldStyle-Comment>;
P<Parsed-OldStyle-Comment> = 
   <char> <Parsed-OldStyle-Comment>,
   <dqstring> <Parsed-OldStyle-Comment>,
   <sqstring> <Parsed-OldStyle-Comment>,
   <letter> <Parsed-OldStyle-Comment>,
   <digit> <Parsed-OldStyle-Comment>,
   <stropped> <Parsed-OldStyle-Comment>,
   <NL>;
P<NL-or-SEMI> = 
   <NL>,
   <semi>;
P<semi> = 
   ';';
# <DIGIT-SEQ> '_' <DIGIT-SEQ>, and  <DIGIT-SEQ> '_' <NAME>, added by gt
# <NAME> <sqstring> is really X'....' but type checking stops me matching 'X'
# VERY hacky stopgap just to get stuff to parse. Needs proper syntax
# which needs the ability to recognise individual digits and letters
# rather than the current typed 'alphanumeric' hack in line_reconstruction ...
# Doesn't handle %real well, and indeed not at all if containing an '@' sign.
# E"ebcdic" etc also not handled.  Hamish's 40_XXX comes in here.
#
## P<basedconst> =  "[0-9][0-9]*_[0-9A-Z][0-9A-Z]*"
#
P<INT-CONST> = 
   <SAVED-INT-CONST>;
# almost forgot 'A' as an integer constant...
P<SAVED-INT-CONST> = 
   <DIGIT-SEQ> '_' <alphanumeric-SEQ>,
   <DIGIT-SEQ>,
   <OLDSTYLE-BASE>,
   <sqstring>;
P<alphanumeric-SEQ> = 
   <alphanumeric> <Opt-alphanumeric-SEQ>;
P<Opt-alphanumeric-SEQ> = 
   <alphanumeric> <Opt-alphanumeric-SEQ>,
  ;
P<STR-CONST> = 
   <SAVED-STR-CONST>;
P<SAVED-STR-CONST> = 
   <OPT-DQ-LETTER> <dqstring>,
   <sqstring>;
# current line reconstruction doesn't support this. Letters come through as part of <alphanumeric>s.
P<OPT-DQ-LETTER> = 
   'E',
  ;
P<NUMERIC-CONST> = 
   <INT-CONST>,
   <REAL-CONST>;
# <CONST> is somewhat generic and should be replaced with the more specific version when possible
P<CONST> = 
   <INT-CONST>,
   <REAL-CONST>,
   <STR-CONST>;
# <sqstring> has to be checked for length and content depending on prefix.
#
## P<lettercharconst> = <squote> <schar> <squote> |
##   "[RHX]" <squote> <hexchars> <squote> |
##   "B" <squote> <binchars> <squote> |
##   "K" <squote> <octchars> <squote> |
##   "[CMD]" <squote> <mchars> <squote>;
#
P<OLDSTYLE-BASE> = 
   <letter> <sqstring>;
# Note: a <DIGIT-SEQ> can be both an integer constant and a real constant.
# Imp77 supports constants like this: %constlongreal PI = 16_3.243F6A89
# which are not handled here yet.
#
## P<realconst> = "[0-9][0-9]*\.[0-9][0-9]*E[0-9][0-9]*" |
##                "[0-9][0-9]*\.[0-9][0-9]*" |
##                "[0-9][0-9]*@[0-9][0-9]*"
#
P<REAL-CONST> = 
   <SAVED-REAL-CONST>;
# removed ability to match integer
P<DOT> = 
   '.';
P<OPT-DOT> = 
   <DOT>,
  ;
P<SAVED-REAL-CONST> = 
   <DIGIT-SEQ> <OPT-DOT> <EXPONENT>,
   <rest-of-number> <FRACPT> <OPT-EXPONENT>,
   <DIGIT-SEQ> '.' <OPT-EXPONENT>;
P<FRACPT> = 
   '.' <DIGIT-SEQ>;
P<OPT-FRACPT> = 
   <FRACPT>,
  ;
P<OPT-Plus-Minus> = 
   '+',
   '-',
  ;
P<EXPONENT> = 
   '@' <OPT-Plus-Minus> <DIGIT-SEQ>;
P<OPT-EXPONENT> = 
   <EXPONENT>,
  ;
P<DIGIT-SEQ> = 
   <digit> <rest-of-number>;
P<rest-of-number> = 
   <DIGIT-SEQ>,
  ;
P<NAME> = 
   <letter> <!SQUO-STRING> <opt-letter-or-digit-seq>;
## P<NAME> =
##    <alphanumeric> <!SQUO-STRING> <rest-of-name>;
## P<rest-of-name> =
##    <NAME>,
##   ;
P<SQUO-STRING> = 
   <sqstring>;
# A UCI is a machine code instruction.
# (Hamish's PDP15 imp machine code syntax (which is not introduced by '*') is definitely not supported.)
#
# Machine-specific machine-code removed. May cause problems with ';' and quoted chars in machine code
#
P<UCI> = 
   <NL-or-SEMI>,
   <char> <UCI>,
   <dqstring> <UCI>,
   <sqstring> <UCI>,
   <stropped> <UCI>;
# Imp does not let you mix and/or without explicit brackets. They do not have a precedence relationship.
# The implied continuation after %and and %or in some versions of Imp are handled hackily
# in line reconstruction, not here (and will eventually be made a command-line option).
P<REST-OF-COND> = 
   "and" <SC> <REST-OF-ANDC>,
   "or" <SC> <REST-OF-ORC>,
  ;
P<REST-OF-ANDC> = 
   "and" <SC> <REST-OF-ANDC>,
  ;
P<REST-OF-ORC> = 
   "or" <SC> <REST-OF-ORC>,
  ;
P<CONDITION> = 
   <SC> <REST-OF-COND>;
# <NAME> <OPT-ACTUAL-PARAMETERS> added by gt for predicates
# RESTOFSC handles double-sided conditions such as '0' <= ch <= '9'
P<SC> = 
   <STRING-RESOLUTION>,
   <EXPR> <COMP> <EXPR> <OPT-DOUBLE-SIDED>,
   <NAME> <OPT-ACTUAL-PARAMETERS>,
   '(' <CONDITION> ')',
   "not" <SC>;
P<OPT-DOUBLE-SIDED> = 
   <COMP> <EXPR>,
  ;
# LHS of -> is *not* a string expression according to Imp77 (but no reason why it should not be)
P<STRING-RESOLUTION> = 
   <LVALUE> '-' '>' <STRING-RESOLUTION-EXPR>;
# Current Imp77 restricts string resolution to -> a.("test").b  with a and b being optional.
# Older Emas implementations allowed multiple parts I'm fairly sure, eg -> a.("match1").b.("match2").c
#
# Also, behaviour changed re initial ("match") with no assignment variable before it -
# used to be that "match" had to start the string, but now there can be text before
# it, and a null match has to be coded as -> a.("match").b %and a = ""
#
# So if we need to distinguish between these variations, do it in compile()
#
P<STRING-RESOLUTION-EXPR> = 
   <STR-MATCH> <OPT-STR-ASSIGN>,
   <STR-ASSIGN> <OPT-STR-MATCH>;
P<STRING-LVALUE> = 
   <LVALUE>;
P<STRING-RVALUE> = 
   <STRING-LVALUE>,
   <STR-CONST>,
   '(' <STRING-EXPR> ')';
# No bracketed expressions, only '.' operator allowed, although once again,
# Hamish has an idiosyncratic extension where strings in a printstring are
# separated by semi-colons - I think it is some approximation to C's printf?
#
P<STRING-EXPR> = 
   <STRING-RVALUE> <REST-OF-STRING-EXPR>;
P<DOTTED-STRING-EXPR> = 
   <STRING-RVALUE> <?DOT> <REST-OF-STRING-EXPR>;
P<REST-OF-STRING-EXPR> = 
   '.' <STRING-RVALUE> <REST-OF-STRING-EXPR>,
  ;
P<STR-MATCH> = 
   '(' <STRING-EXPR> ')';
P<STR-ASSIGN> = 
   <STRING-LVALUE>;
# ping-pong.  Can't allow -> (a).b.c.(d) or -> a.(b).(c).d
P<OPT-STR-ASSIGN> = 
   '.' <STR-ASSIGN> <OPT-STR-MATCH>,
  ;
P<OPT-STR-MATCH> = 
   '.' <STR-MATCH> <OPT-STR-ASSIGN>,
  ;
# Ordering is important, eg '##' must come before '#'
#
# I really pity people who use the sort of grammar that requires
# eliminating shift-reduce conflicts before they can parse anything :-)
#
P<COMP> = 
   '=' '=',
   '=',
   <NOT-EQUALS-ADDRESS>,
   <NOT-EQUALS>,
   '<' '=',
   '<',
   '>' '=',
   '>';
# '<>' is a really obscure form but it definitely used to be used.  I'll
# post a reference as soon as I locate one.
#
P<NOT-EQUALS> = 
   '#',
   '\\' '=',
   '<' '>';
P<NOT-EQUALS-ADDRESS> = 
   '#' '#',
   '\\' '=' '=';
# Procedure application, i.e. actual parameters to routine or function call:
# We *only* want to accept literals and const scalar variables in these lists -
# array elements, record fields, function calls ... all disallowed.
#
# The parse-time code eliminates most of the illegal objects, and a type check
# at the code generation stage will catch the remaining few.  This was necessary
# because something like:  err(4)
# was previously being recognised as possibly an array access or a procedure call
# when it fact the (4) was really a repetition count.
#
# The phrase: <immediate:return !in_const_initialiser_list;>
# causes the parse to fail at that point if we are in a const initialiser list.
# (actually if we are in *any* <CEXPR> to be honest).
P<OPT-ACTUAL-PARAMETERS> = 
   <immediate:return !in_const_initialiser_list;> '(' <EXPR> <REST-OF-ACTUAL-PARAMETERS> ')',
  ;
P<REST-OF-ACTUAL-PARAMETERS> = 
   ',' <EXPR> <REST-OF-ACTUAL-PARAMETERS>,
  ;
# handling precedence with the grammar rather than Edinburgh-style in the AST. Tradeoff of speed for simplicity.
# We don't create rules to parse constant expressions as something different from an expression composed
# of variables - instead it is a wrapper around a regular <EXPR> which tests to see if the EXPR can be
# evaluated at compile time, and if so it is folded and returned as a signed constant decimal integer.
# It should really have been named <CONST-INT-EXPR> ...
P<CEXPR> = 
   <immediate:in_const_initialiser_list = TRUE; return TRUE;> <EXPR> <immediate:in_const_initialiser_list = FALSE; return TRUE;>,
   <immediate:in_const_initialiser_list = FALSE; return FALSE;>;
# <DOTTED-STRING-EXPR> here restricted to expressions containing "." concatenation
# a simple string variable or function etc comes through on the second option
P<EXPR> = 
   <DOTTED-STRING-EXPR>,
   <MEDIUMPREC-EXPR> <REST-OF-EXPR>;
P<REST-OF-EXPR> = 
   <OP-LOW> <MEDIUMPREC-EXPR> <REST-OF-EXPR>,
  ;
P<MEDIUMPREC-EXPR> = 
   <HIGHPREC-EXPR> <REST-OF-MEDIUMPREC-EXPR>;
P<REST-OF-MEDIUMPREC-EXPR> = 
   <OP-MED> <HIGHPREC-EXPR> <REST-OF-MEDIUMPREC-EXPR>,
  ;
P<HIGHPREC-EXPR> = 
   <RIGHT-ASSOC-EXPR> <REST-OF-HIGHPREC-EXPR>;
P<REST-OF-HIGHPREC-EXPR> = 
   <OP-HIGH-LEFTASSOC> <RIGHT-ASSOC-EXPR> <REST-OF-HIGHPREC-EXPR>,
  ;
P<RIGHT-ASSOC-EXPR> = 
   <REST-OF-RIGHTASSOC-EXPR> <UNARY-EXPR>;
P<REST-OF-RIGHTASSOC-EXPR> = 
   <RIGHTASSOC-EXPR-with-OP> <REST-OF-RIGHTASSOC-EXPR>,
  ;
P<RIGHTASSOC-EXPR-with-OP> = 
   <UNARY-EXPR> <OP-HIGH-RIGHTASSOC>;
P<UNARY-EXPR> = 
   <Opt-UNARY-OP> <OPERAND>;
# These pointer arithmetic elements are for Hamish's PDP9/15 Imp (old): INDP == INDEX0++P
# and for Hamish's 68000 Imp:     %if tp[1]_mode # litmode %then lo = minint %else lo = tp[1]_val
# I hope this grammar definition causes the right precedence level...
#
# They can also be used in LVALUEs.
P<Opt-PARAMETERS-OR-POINTERINDEX> = 
   <immediate:return !in_const_initialiser_list;> '[' <EXPR> ']',
   <immediate:return !in_const_initialiser_list;> '+' '+' <EXPR>,
   <immediate:return !in_const_initialiser_list;> '-' '-' <EXPR>,
   <OPT-ACTUAL-PARAMETERS>;
P<Opt-SUBFIELD> = 
   <immediate:return !in_const_initialiser_list;> '_' <SUBFIELD>,
  ;
P<Opt-RECORDFIELD> = 
   <immediate:return !in_const_initialiser_list;> '_' <RECORDFIELD>,
  ;
# The old form of modulus (!X!) is for really ancient compilers such as Hamish's Pdp15 Imp.
# Unfortunately there are obscure cases where an ambiguous parse results:
#
#  putact(condop!!polarity!!1+polarity<<7,item,0)
#
# Does that mean
#   putact(condop !!  polarity   !!  1 + polarity<<7, item, 0)
# or
#   putact(condop !  !polarity!  !   1 + polarity<<7, item, 0)
#
# I need to check but I *hope* that the grammar here causes the first of the above to be selected.
# If not, we'll need a command-line option here to force the version of Imp being used.
#
# I just hope that there's no version of Imp that ever supported factorials with a postfix '!'
#
P<OPERAND> = 
   <REAL-CONST>,
   <INT-CONST>,
   <STR-CONST>,
   <LVALUE>,
   '(' <EXPR> ')',
   '|' <EXPR> '|',
   '!' <EXPR> '!';
# <OP> is only used in <CEXPR>s now and will be removed entirely
# once CEXPRs are recoded to use the same grammar as EXPRs.
#
# *However* I am considering doing away with CEXPRs altogether,
# and just using <EXPR> everywhere, in conjunction with a parse-time
# test to check whether any of the terms were in fact *not* constants.
#
# Also... it's not obvious if we could in fact use EXPRs everywhere *without*
# the overhead of having to organise the code/name tables so that they
# can be checked for being constants at parse time, and merely reject
# them from the compile() procedure instead?  This would be possible
# as long as there are no parse ambiguities caused by checking after
# parsing. I know that's a problem in C but Imp's simpler syntax
# might let us get away with it!
#
# Note that none of these options eliminates the possibility of implementing
# conditional compilation the EMAS Imp80 way using
#
# %if <CEXPR> %start
#   ! conditional code
# %finish
#
# (Though I've always had questions about that on EMAS if there's a
#  label inside the block - the ability to use that construct for
#  conditional compilation depends on whether that label is ever
#  jumped to from code outside the block! - doing it properly requires
#  full flow control analysis and a transitive closure on the flow graph!)
#
# highest precedence, left-associative
P<OP-HIGH-LEFTASSOC> = 
   '<' '<',
   '>' '>';
# highest precedence, right-associative (only exponentiation)
#
# I found this expression the 68000 Imp compiler:
#
#           y = |r|+0.5/10.0^m;  !modulus, rounded
#
# which I parse as:
#
#           y = |r|+( 0.5 / (10.0^m));  !modulus, rounded
#
# I presume this is a clever way to round the last digit of a decimal,
# but damn, how it works is beyond me.
# Typical Hamish code! Brilliant (I presume) but obscure and uncommented.
#
P<OP-HIGH-RIGHTASSOC> = 
   <INTEGER-EXP>,
   <REAL-EXP>;
P<INTEGER-EXP> = 
   '*' '*' '*' '*',
   '\\' '\\',
   '^' '^';
# eq is used in '\\' <!eq> to stop it being accepted when the input should be matching '\\' '=' or '\\' '=' '='
# (matching '\\' '\\' is not an issue as <INTEGER-EXP> is tested before <REAL-EXP>)
P<eq> = 
   '=';
P<REAL-EXP> = 
   '*' '*',
   '\\' <!eq>,
   '^';
# middle-precedence operators.  All left-associative.
P<OP-MED> = 
   '*',
   '/' '/',
   '/',
   '&';
# lowest precedence operators.  All left-associative.
P<OP-LOW> = 
   '+',
   '-',
   '!' '!',
   '!';
# You would think that unary operators always have the highest precedence,
# but not so - here's a tricky one...
#
# I = -1 >> 1
#
# This always has the value 0, regardless of integer size, because a unary minus
# in front of a constant is *NOT* interpreted as a negative constant, but rather
# the positive constant preceded by a unary negation operator.
#
# It gets worse... unary negation is implemented as binary subtraction with
# an inserted '0' left operand, i.e.
#
# 0 - 1 >> 1
#
# Shifts have higher precedence than subtraction, so this becomes:
#
# 0 - (1 >> 1)
#
# which evaluates to 0.
#
# The 68000 Imp compiler includes this statement which I hope is correct! dict(firstpos)_val = -cad>>1
#
# Some versions of Imp (I think Imp80) forbid a unary '+' before constants,
# so A + +3 would be illegal (although A + -3 would not)
#
# and none of this applies to binary-not (\) so \3 really is higher
# priority than '>>' and \3 >> 1 does what you would expect.  I hope.
#
# https://gtoal.com/imp77/reference-manual/A_Syntactic_and_Semantic_Definition_of_the_IMP_Language.pdf
# 9.  An initial minus sign is treated exactly as '0-'
# 10. An initial 'not' has the highest precedence has the effect of 'exclusive
# or' with X'FFFFFFFF'.  Thus \A**B is exactly equivalent to (X'FFFFFFFF'!!A)**B
P<Opt-UNARY-OP> = 
   '+',
   '-',
   <UNARY-NOT>,
  ;
# fortunately unary backslash is unambiguous with ERCC style exponentiation binary operator
P<UNARY-NOT> = 
   '\\',
   '~';
P<Percent-IU> = 
   "if",
   "unless";
P<ONE-SWITCH-DECL> = 
   <NAME> <Opt-NAME-LIST> '(' <CEXPR> ':' <CEXPR> ')';
# rest of %switch list
# This reminds me, isn't there an optional %label declaration in Imp?  Haven't seen an example yet,
# I need to check the manuals to make sure I'm not confusing this with C...
P<REST-OF-SWLIST> = 
   ',' <ONE-SWITCH-DECL> <REST-OF-SWLIST>,
  ;
P<Opt-NAME-LIST> = 
   ',' <NAME> <Opt-NAME-LIST>,
  ;
P<REST-OF-EVENT-LIST> = 
   ',' <CEXPR> <REST-OF-EVENT-LIST>,
  ;
# Still to handle operator precedence etc for <CEXPR>s the same way as for <EXPR>s.
# However it is such a major duplication of effort that it might be much easier to
# just use <EXPR>s and have a semantic test for constantcy.
# The problem is the ambiguity of <NAMES>.  And you don't want any of the decorated
# forms of a name (records, function calls, pointers, maybe even rejecting const
# array elements?) - just simple literals and const integer operands.
## P<REMOVED-CEXPR> =
##    <Opt-UNARY-OP> <COPERAND> <REST-OF-CEXPR>;
## P<REST-OF-CEXPR> =
##    <OP> <COPERAND> <REST-OF-CEXPR>,
##   ;
P<OPT-event> = 
   "event",
  ;
P<FPP> = 
   '(' <FORMAL-PARAMETER-DECLARATION> <REST-OF-FORMAL-PARAMETER-LIST> ')',
  ;
P<REST-OF-FORMAL-PARAMETER-LIST> = 
   <Opt-Comma> <FORMAL-PARAMETER-DECLARATION> <REST-OF-FORMAL-PARAMETER-LIST>,
  ;
# %routine y (%integer %name %array (1) %name b)
# %routine r10(%integer%array%name a(1:10),%integer k)
P<FORMAL-PARAMETER-DECLARATION> = 
   <XTYPE> <OPT-arrayname> <NAME> <OPT-68K-Bounds> <Opt-NAME-LIST>,
   <RT> <OPT-name> <NAME> <Opt-NAME-LIST> <FPP>,
   "name" <Opt-68K-reg> <NAME> <Opt-NAME-LIST>;
# More Hamish idiosyncracy!:
#
# @16_1114%record(*)%map ZNEW(%name(d0) v)
#  - APM-gdmr/I/VPERM19.imp
#      d0 being a register
#
P<Opt-68K-reg> = 
   '(' <NAME> ')',
  ;
P<OPT-name> = 
   "name",
  ;
# added predicate - gt
P<RT> = 
   "routine",
   "predicate",
   <XTYPE> <FN-MAP>;
# note map is equivalent to "name function"
P<FN-MAP> = 
   <FN>,
   "map";
P<FN> = 
   "fn",
   "function";
# Confirmed that label is used.  Found one in 68000 code for labels used in embeddded machine code:
# %label f1,f2,f3,f4,f5,f6,f7,b0,b1,b2,b3,b4,b5,b6,b7,end
#
# "long long" would be added here if we supported 64 bit ints or 128bit reals...
# Plain "long" added here by making <BTYPE> optional.
# (BTYPE being integer/real/longreal)
#
P<longlong> = 
   "long" "long",
   "long";
P<BASE-XTYPE> = 
   "integer",
   "real",
   "label",
   <longlong> <Opt-BTYPE>,
   "byte" <Opt-INTEGER>,
   "mite" <Opt-INTEGER>,
   "short" <Opt-INTEGER>,
   "half" <Opt-INTEGER>,
   "string" <Opt-STRLEN>,
   "record" <Opt-RFREF>;
# does %record(i tc f)%name match <XTYPE>?
#
# Courtesy of Hamish, two more language extensions:
#
# Two <OPT-68K-qualifier>s to allow one or both of %writeonly %volatile %byte %integer device
#
P<XTYPE> = 
   <OPT-68K-qualifier> <BASE-XTYPE> <OPT-name>;
P<68K-memptr> = 
   "read" "only" <68K-memptr>,
   "write" "only" <68K-memptr>,
   "volatile" <68K-memptr>,
  ;
P<OPT-68K-qualifier> = 
   <68K-register>,
   <68K-memptr>,
  ;
# C-like register qualifiers (Hamish, again)
# %register(a2)%integer r1,r2
# %register(a2)%byte%name bn
# %register(a3)%short%name sn
P<68K-register> = 
   "register" '(' <NAME> ')';
# '*' added by gt for recordformat(*)
# () moved from point of call to here to support old style "%recordname fred" without format. (Uses a formatspec instead)
P<Opt-RFREF> = 
   '(' <RECFMT-REF> ')',
   '(' '*' ')',
  ;
# RF in the rules below is short for Record Format
P<RECFMT-REF> = 
   <NAME>,
   <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC>;
# Imp variant record. NOT the same as C's which is per field.  This is per record.
# Still to check: a ( ... %or ... ) subgroup in a record definition, where the ...
# represents several declarations, with the whole ( ... ) group being a part
# of an enclosing record.
#
P<ALT-RECFMT-DEC> = 
   "or" <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC>,
  ;
P<REST-OF-RECFMT-DEC> = 
   ',' <RECFMT-DEC> <REST-OF-RECFMT-DEC>,
  ;
P<RECFMT-DEC> = 
   <XTYPE> <RECFMT-ELMNT>,
   '(' <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC> ')';
# Now "name" has been removed from <OPT-arrayname-or-name>, check at the point
# of invocation, that "name" still works, now it as been added to <XTYPE>...
#
P<RECFMT-ELMNT> = 
   <OPT-arrayname> <NAME> <Opt-NAME-LIST>,
   "array" <ADECLN-in-record>;
# Array declaration
P<ADECLN-in-record> = 
   <NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST-in-record>;
P<REST-OF-ARLIST-in-record> = 
   ',' <ADECLN-in-record>,
  ;
P<LOWERBOUND> = 
   <EXPR>,
   <BASE-XTYPE> <NAME>;
# Another Hamishism... %integer %array %name fred(1:*)
# which in Imp77 would be %integer %array (1) fred
# (reject the '*' in other contexts)
#
#  In the declaration of an array name variable, as in
#
#  %integer %array %name a(lo1:hi1,...,loN:hiN)
#
#  each of the lower and upper bounds may be:
#
#  (a) a literal or literal expression, for example:
#
#  %integer %array %name a(1:1000)
#  %real    %array %name delta(10:20,1:30)
#
#  In this case, the corresponding bound of every actual parameter
#  must have the identical literal value.
#
#  (b) a variable declaration, for example:
#
#  %byte%array%name used(1:%integer xdim,1:%integer ydim)
#  %short%array%name count(%byte lo:%byte hi)
#
#  In this case, the corresponding bound of the actual parameter is
#  assigned to the stated variable at the time of procedure entry.
#  This case, which is similar to the Pascal conformant array concept,
#  is confined to %array %names as procedure parameters.  It cannot
#  be used for %array %names declared as ordinary variables.
#
#  (c) an asterisk, for example:
#
#  %integer%array%name cases(1:*)
#
#  The use of an asterisk should be confined to the UPPER bound of
#  the FIRST (outer) dimension only, and when it appears, all other
#  bounds should be literal.  It implies an unspecified number of
#  elements in the array (and the absence of bound-checking even
#  with checks on).
#
#  For the time being, the Vax/VMS form ... %array(n)%name ... is also
#  accepted, but note that the '(n)' part is obligatory even when n is one.
P<UPPERBOUND> = 
   '*',
   <EXPR>,
   <BASE-XTYPE> <NAME>;
# List of lower/upper bound pairs, for array declarations.
P<BPAIR> = 
   '(' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST> ')';
P<BPAIR68K> = 
   '(' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST68K> ')';
# Multi-dimensional arrays
P<REST-OF-BPLIST> = 
   ',' <EXPR> ':' <EXPR> <REST-OF-BPLIST>,
  ;
P<REST-OF-BPLIST68K> = 
   ',' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST68K>,
  ;
# <Opt-DIMENSIONS> added to handle the Vax Imp extension:
#
# %routine y (%integer %name %array (1) %name b)
#
# and <OPT-68K-Bounds> added to handle:
#
# %routine extract params(%string(255) %name in,out,
#    %integer %array(1:max params) %name params)
#
P<OPT-arrayname> = 
   "array" <Opt-DIMENSIONS> "name",
   "array" <OPT-68K-Bounds> "name",
  ;
# for the example from 68000 Imp by way of Vax Imp,
# %routine y (<whatever type> %array (1) %name b)
# mentioned in https://gtoal.com/imp77/reference-manual/Imp-M68K-V2.lay
P<Opt-DIMENSIONS> = 
   '(' <INT-CONST> ')',
  ;
P<Opt-INTEGER> = 
   "integer",
  ;
# "42(2)" is equivalent to "42, 42".  (*) means 'as many more are needed to fill the array' so must come last.
# However it looks like <REPFACT> is also used as the string length indicator?!
P<Opt-STRLEN> = 
   '(' <Opt-STAROREXPR> ')',
  ;
P<REPEATS> = 
   '(' <Opt-STAROREXPR> ')',
  ;
P<Opt-STAROREXPR> = 
   <STAROREXPR>,
  ;
# Used in switch label.  (*) marks a catch-all. (like default: in C)
# Or at least SHOULD be used in switch label.  Looks like it isn't... just used in <REPFACT>?
P<STAROREXPR> = 
   <CEXPR>,
   '*';
# Opt- added here to allow plain "%long" as a declaration at <XTYPE>
P<Opt-BTYPE> = 
   "real",
   "integer",
  ;
P<Opt-Comma> = 
   ',',
  ;
P<Opt-ALIAS> = 
   "alias" <TEXTTEXT>,
  ;
P<TEXTTEXT> = 
   <dqstring>;
# if we add a "C" type (to reverse the order of parameter evaluation) it would be done here.
# I don't think you would write %permroutine since they are built in to the compiler, but
# depending on how the compiler is implemented, you might, and if so they would be added here.
# I do have a vague recollection of perm routines being matched up with internal code using
# a file or declarations containing magic numbers, eg something like:
# %perm (5) %routine write(%integer n, sp)
# - but that may be a manufactured memory!
#
P<Percent-SEX> = 
   "system",
   "external",
   "dynamic",
   "prim",
   "perm",
  ;
P<RECFMT-spec-OR-BODY> = 
   "spec" <NAME>,
   <NAME> '(' <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC> ')';
# 68000 Imp has a non-standard syntax for some array declarations, eg
#
# %conststring(1) %array(1:max params) parameters="B","W","L"
# %string(255) %array(0:maxfiles) list
#
# The equivalent Imp77 syntax would be
#
# %conststring(1) %array parameters(1:max params)="B","W","L"
# %string(255) %array list(0:maxfiles)
#
# Putting bounds after a %name is also from the 68000:
#
#    %integer%array%name an10(1:10)
#
# which is likely the same as
#
#    %integer%array(1)%name an10
#
#  in Imp77.  We just haven't been told the lower bound...
#
# meanwhile this broke: %string (64) %array %format sform1(0 : 1022)
#
# I'm not sure if there is any significant difference nowadays between own declarations and non-own declarations...
# the used to be distinguished by allowing initialisations... maybe worth merging the definitions now?
P<DECLN> = 
   <OPT-arrayname> <OPT-spec> <NAME-or-STAR-68K> <OPT-68K-Bounds> <Opt-Init-assign> <Opt-Assign-NAME-LIST>,
   "array" <OPT-spec> <OPT-format> <NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST>,
   "array" <BPAIR> <OPT-spec> <OPT-format> <NAME> <Opt-NAME-LIST> <REST-OF-ARLIST-68K>;
P<OWNDEC> = 
   <OPT-arrayname> <OPT-spec> <SINGLE-OWNDEC> <REST-OF-OWNDEC>,
   "array" <OPT-format> <OPT-spec> <BPAIR> <NAME> <Opt-Init-assign-array>,
   "array" <OPT-format> <OPT-spec> <NAME> <BPAIR> <Opt-Init-assign-array>;
P<REST-OF-ARLIST> = 
   ',' <NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST>,
  ;
P<REST-OF-ARLIST-68K> = 
   ',' <NAME> <Opt-NAME-LIST> <REST-OF-ARLIST-68K>,
  ;
P<OPT-68K-Bounds> = 
   <BPAIR68K>,
  ;
P<OPT-format> = 
   "format",
  ;
#
# This is the same ugly hack as <Opt-Assign-NAME-LIST> below
# The AST handling has to check for and reject the '*' in all contexts except for
# 68000-style declarations such as "@16_4000c0 %byte *,acia s,*,acia d"
#
# Below is a note from inside the 68K compiler that may be relevant:
#
#! %record(*) or %string(*) needs %name (or @)
#! %string alone allowed only for %const%string
#
P<NAME-or-STAR-68K> = 
   '*',
   <NAME>;
# <Opt-Init-assign> added by gt
# This hideous hack with '*' for an anonymous entry is yet another of Hamish's idiosyncrasies:
#
# @16_7fffc %byte status,data,*,control
#                            ^
#  "/home/gtoal/gtoal.com/athome/edinburgh/APM-gdmr/ETHER/SPY.imp"
#
# @16_4000c0 %byte *,acia s,*,acia d
#
# and would appear to just be a lazy way to skip over an address when assigning multiple
# individual scalars to addresses...
#
# Obviously it should not be allowed in any other context - some semantic checking needed here...
#
P<Opt-Assign-NAME-LIST> = 
   ',' '*' <Opt-Assign-NAME-LIST>,
   ',' <NAME> <Opt-Init-assign> <Opt-Assign-NAME-LIST>,
  ;
# Need <EXPR> instead of <CEXPR> to catch = tostring(i) because cexpr only got tostring
# Need '== <EXPR>' instead of '== <LVALUE>' to catch '== length(s)', i.e. a %map although it looks like a fn call.
# - the latter being more of a failing of LVALUE that here.
# Removed:
#   '==' <LVALUE>,
#   '=' <CEXPR>,
# These will be improved once we have semantic checks...
#
P<Opt-Init-assign> = 
   <ASSOP-EXPR>,
  ;
# removed string resolution from assop - too much type checking
P<ASSOP> = 
   '=' '=',
   <SCALAR-ASSIGN>;
P<ASSOP-EXPR> = 
   '=' '=' <LVALUE>,
   <SCALAR-ASSIGN> <EXPR>;
P<SUBFIELD> = 
   <NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-SUBFIELD>;
P<RECORDFIELD> = 
   <NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-RECORDFIELD>;
P<LVALUE> = 
   <NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-RECORDFIELD>;
# An initialised array declaration does not need a %C following the '=' before the list of initial values.
# this is a stupid hack to get around comments in the middle of array declarations :-(
# note that it gobbles the newline
# This has to handle: "== nil(*)" from %ownrecord(i tc f)%namearray tc refs(1:maxtc) == nil(*)
# Turns out <SCALAR-ASSIGN> didn't include '==' so switched to <ASSOP> instead
P<Opt-Init-assign-array> = 
   <ASSOP> <stupid> <ARRAY-INITIALISER> <ROCL>,
  ;
# the version without "for" is to support an old-style statement, %WHILE <cond> %THEN <ui>
P<Percent-WU> = 
   "while" <TOP-LEVEL-CONDITION>,
   "until" <TOP-LEVEL-CONDITION>;
P<Percent-WUF> = 
   "while" <TOP-LEVEL-CONDITION>,
   "until" <TOP-LEVEL-CONDITION>,
   "for" <NAME> <SCALAR-ASSIGN> <EXPR> ',' <EXPR> ',' <EXPR>;
P<SCALAR-ASSIGN> = 
   '=',
   '<' '-';
P<TOP-LEVEL-CONDITION> = 
   <CONDITION>;
P<Opt-UNTIL> = 
   "until" <TOP-LEVEL-CONDITION>,
  ;
P<Opt-CYCPARM> = 
   <NAME> <SCALAR-ASSIGN> <EXPR> ',' <EXPR> ',' <EXPR>,
  ;
# gt changed signal to allow 2 exprs.  Mouses Imp77 adds an optional string!
# 68000: %signal 5,,holeend,"HEAPPUT: heap corrupt" %if holeend>lhb_limit
P<OPEXPR2> = 
   ',' <OPT-EXPR> <OPEXPR>,
  ;
P<OPT-EXPR> = 
   <EXPR>,
  ;
# single optional ", expr" for use with %signal event (, subevent)?
P<OPEXPR> = 
   ',' <OPT-EXPR> <OPSTRING>,
  ;
# %signal 3,4,position,"Set input fails" %unless position=0
P<OPSTRING> = 
   ',' <STRING-EXPR>,
  ;
# reversed conditions and 1-line cycles
P<REST-OF-SS1> = 
   <Percent-IU> <TOP-LEVEL-CONDITION> <S>,
   <Percent-WUF> <S>,
   <S>,
  ;
P<REST-OF-OWNDEC> = 
   ',' <SINGLE-OWNDEC> <REST-OF-OWNDEC>,
  ;
P<SINGLE-OWNDEC> = 
   <NAME> <Opt-ALIAS> <Opt-Init-assign>;
P<ROCL> = 
   ',' <stupid> <ARRAY-INITIALISER> <ROCL>,
  ;
P<ARRAY-INITIALISER> = 
   <CEXPR> <REPEATS>;
P<stupid> = 
   <COMMENT>,
  ;
# extrinsic is an old form meaning roughly "external spec"
# These apply to data as opposed to <Percent-SEX> which apply to procedures.
P<XOWN> = 
   "own",
   "external",
   "extrinsic",
   "constant",
   "const";
# Constant conditions are not used to optimise code, but rather as a substitute
# for C's 'ifdef' pre-processor.
P<CONST-IF-S> = 
   <Percent-IU> <CONST-CONDITION> <CONST-THEN-S>;
P<CONST-THEN-S> = 
   <OPT-then> "start" <S> <EXTERNALS-OR-INTERNALS> <CONST-FINISH-S>,
   "then" <UI> <CONST-Opt-ELSE-S>;
P<CONST-FINISH-S> = 
   "else" <Percent-IU> <CONST-CONDITION> <Opt-start> <S> <EXTERNALS-OR-INTERNALS> <CONST-FINISH-S>,
   "else" <Opt-start> <S> <EXTERNALS-OR-INTERNALS> "finish" <S>,
   "finish" <CONST-Opt-ELSE-S>;
P<CONST-Opt-ELSE-S> = 
   "else" "start" <S> <EXTERNALS-OR-INTERNALS> "finish" <S>,
   "else" <CONST-IF-S>,
   "else" <UI> <S>,
   <S>;
P<CONST-CONDITION> = 
   <CONST-SC> <CONST-REST-OF-COND>;
#  <STRING-RESOLUTION>,
#  <NAME> <OPT-ACTUAL-PARAMETERS>,
P<CONST-SC> = 
   <CEXPR> <COMP> <CEXPR> <CONST-OPT-DOUBLE-SIDED>,
   '(' <CONST-CONDITION> ')',
   "not" <CONST-SC>;
P<CONST-OPT-DOUBLE-SIDED> = 
   <COMP> <CEXPR>,
  ;
P<CONST-REST-OF-COND> = 
   "and" <CONST-SC> <CONST-REST-OF-ANDC>,
   "or" <CONST-SC> <CONST-REST-OF-ORC>,
  ;
P<CONST-REST-OF-ANDC> = 
   "and" <CONST-SC> <CONST-REST-OF-ANDC>,
  ;
P<CONST-REST-OF-ORC> = 
   "or" <CONST-SC> <CONST-REST-OF-ORC>,
  ;
P<EXTERNALS-OR-INTERNALS> = 
   <EXTERNAL-OR-INTERNAL> <EXTERNALS-OR-INTERNALS>,
  ;
E