# This is an extended grammar which now accepts constructs from multiple versions of Imp
# - it was primarily based on a correct Imp80 grammar, then modified to add Imp77 constructs
# followed by features from Pdp15 Imp, old Emas "Imp9", and Hamish's 68000 extensions, etc
# The original base grammar was:
# https://history.dcs.ed.ac.uk/archive/os/emas/emas2/compilers/imp80/imp80ps04.txt
#
# Since this accepts multiple dialects, I'm tempted to call it "PImp", short for "Polyglot Imp"...
#
# There is good info re the phrase structure grammar to be found at
# http://www.ancientgeek.org.uk/EMAS/EMAS_Manuals/IMP/A_Syntactic_and_Semantic_Definition_of_the_IMP_Language.pdf
# To add:
#tests/imp2021/stdperm.imp %const %record(*) %name nil == 0
# ^
# "/tmp/13563.imp", Line 336, Col 44: Syntax error while looking for <ASSOP-EXPR> <LVALUE>
#
# tests/imp68k/rectest.imp %OWNRECORD(F1)%NAME rnn==0
# ^
# "/tmp/16989.imp", Line 226, Col 25: Syntax error while looking for <ASSOP-EXPR> <LVALUE>
#
# Built-in Phrases used by the line reconstruction phase:
#
# stropped letter (%a %b etc)
B<stropped> = 0;
#
# single-quoted string
B<sqstring> = 1;
#
# double-quoted string
B<dqstring> = 2;
#
B<NL> = 3;
#
B<char> = 4;
#
# All parsers need an <EOF> phrase.
#
B<EOF> = 5;
C<immediate> = {
#define INCLUDE_AST_DEBUG "show_ast.c"
int kw(int AST_op, int idx, int ap)
{
int data[4];
int i;
char *k = keyword(idx);
data[1] = idx;
data[2] = A[ap+3]; // index of c[] array element
data[3] = strlen(k);
return mktuple(AST_op, 1, 3, data);
}
// variables used in line_reconstruction() moved here so they are available to semantic code in grammar.
#define STROPPED_KEYWORDS 1 /* alters behaviour of parse() */
int at_startofstatement = TRUE;
int saved = TRUE;
int at_startofline = TRUE;
int in_const_initialiser_list = FALSE;
int in_keyword = FALSE;
int in_alphanumeric = FALSE;
};
# There are currently two places in the code where a parse-time decision has to be made. This is one of them:
#
# OK, this is slightly trickier than you might expect... you can't place the <COLON> immediate code after the ':'
# because the parser pre-fetches the next token in order to compare against <COLON> even though <COLON> doesn't
# actually examine any text. So... we put the test *before* the ':', and *UNDO* its actions in the following
# alternative if the ':' was not matched.
#
# Anyway, the outcome is that if a <COLON> is followed by a '!' comment, that comment is handled by the
# line reconstruction code and is *not* passed back to the parser a character at a time as it was previously doing!
#
# This in turn solves the problem of line reconstruction finding a ' or a " in the comment and gobbling up characters
# - possibly over multiple line - until it hits a matching quote. PHEW. It took a long time to get here!
#
# (The other place that requires parse-time immediate code is in handling const initialisers. Look for "immediate:" far below)
P<LABEL-COLON> =
<immediate:saved = at_startofstatement; at_startofstatement = TRUE; return TRUE;> ':',
<immediate:at_startofstatement = saved; return FALSE;>;
##P<LABEL-COLON> =
## ':';
# SS is the primary entry point.
#
P<SS> =
<EXTERNALS> <LAST-ITEMS-IN-FILE>;
#
# I'm taking the long way to handle alphanumerics and variable names, for now
# At some point (soon) I hope to add regular expression matching, to shorten
# these rules and speed up parsing. But this works, quite reliably.
#
## P<letter> = "[A-Z]";
#
P<letter> =
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z';
## P<digit> = "[0-9]";
#
P<digit> =
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9';
P<alphanumeric> =
<letter>,
<digit>;
P<opt-letter-or-digit-seq> =
<letter> <opt-letter-or-digit-seq>,
<digit> <opt-letter-or-digit-seq>,
;
#More than one newline (or semicolon)
P<BLANKS> =
<S> <BLANKS>,
;
# The last items in a file.
# the final EOF below is only for begin/endofprogram which *can* be followed by code but shouldn't
P<LAST-ITEMS-IN-FILE> =
"end" "of" "file" <BLANKS> <EOF>,
<BLANKS> <EOF>;
P<program> =
"programme",
"program";
# I haven't specified any phrases to be external-only. There might be some,
# but I'll leave it to compile() to reject them.
P<EXTERNALS> =
"end" "of" "perm" <S> <EXTERNALS>,
"end" "of" "prim" <S> <EXTERNALS>,
<EXTERNAL-OR-INTERNAL> <EXTERNALS>,
;
# I cleaned up label handling, we can afford the overhead of backtracking over one <NAME>...
# - this isn't the 70's any more. Labels are not valid outside blocks.
P<INTERNALS> =
<OPT-LABEL-DEFINITION> <INTERNAL> <INTERNALS>,
<OPT-LABEL-DEFINITION>;
# top-level begin/end block can either be %begin/%endofprogram,
# or %begin/%end/%endoffile. However in some cases things can
# be placed between the %end and the %endoffile.
#
# I've also seen %routine xxx/%endofprogram in FMACS/BOOT.imp
#
# There's a %begin/%end without an %endoffile in FMACS/FIRST4.imp
P<outer-level-end> =
"end" "of" <program>,
"end";
# these are allowed at the external level and also inside procedures/main program.
# The phrases that are internal-only include these as the final alternative,
# hence why the empty statement can only occur as the last item in this list.
#
# If using this grammar as a 'SOAP' replacement, you'll want to allow syntactically
# incorrect statements also, and if so, those should be handled here after the <S> rule.
#
P<EXTERNAL-OR-INTERNAL> =
"begin" <S> <INTERNALS> <outer-level-end> <S>,
<Percent-SEX> <RT> <spec> <NAME> <Opt-ALIAS> <FPP> <S>,
<Percent-SEX> <RT> <NAME> <Opt-ALIAS> <FPP> <S> <INTERNALS> <outer-level-end> <S>,
<OPT-68K> <Percent-SEX> <RT> <NAME> <Opt-ALIAS> <FPP> <S>,
<DECLARATION-S>,
"record" "format" <RECFMT-spec-OR-BODY> <S>,
"switch" <ONE-SWITCH-DECL> <REST-OF-SWLIST> <S>,
"include" <STR-CONST>,
"option" <STR-CONST>,
"from" <NAME> "include" <NAME> <Opt-NAME-LIST>,
"end" "of" "list" <S>,
"list" <S>,
"end" "of" "mcode" <S>,
"mcode" <S>,
"trusted" "program" <S>,
"main" "ep" <NAME> <S>,
"control" <INT-CONST> <S>,
"diagnose" <INT-CONST> <S>,
"reals" "long" <S>,
"reals" "normal" <S>,
<COMMENT>,
'?' <EXPR> <S>,
'?' '?' <CEXPR> <S>,
'?' <LVALUE> '=' <EXPR> <S>,
'?' <LVALUE> '=' '=' <EXPR> <S>,
<IF-S>,
<S>;
# These are all extensions used by 68000 Imp.
#
# %begin
# %recordformat f(%byte ldte,lsap,rdte,rsap)
# @#ldte %record(f)cur
# ( I have no idea what "@#ldte " means. )
#
# @400(a5) %record(*)%namearray in(0:7)
# @16_3F00-192 %routine closeinput
# @724(a5) %integername heapfront
#
# '@' <68K-code-address> before a routine name is an external spec in 68000 Imp, and
# <Opt-68K-code-address> after an external %spec is a perm routine spec in 68000 Imp.
# as in: %integerfnspec(16_1124) REM(%integer a,b) { found in APM-gdmr/I/GGPERM.imp }
#
P<OPT-68K> =
'@' <68K-stuff>,
;
P<68K-stuff> =
'-' <INT-CONST> '(' <NAME> ')',
<INT-CONST> '(' <NAME> ')',
<INT-CONST> '-' <INT-CONST>,
<INT-CONST>,
'#' <NAME>;
# this is for 68000 Imp:
#
# %externalrealfnspecalias READ %alias "readreal"
# %externalintegerfnspecalias READ
# I have no idea what a "specalias" is :-(
#
# %integerarrayspec(16_C00000) frame(0:32767)
# %integerfnspec(16_1124) REM(%integer a,b)
# %bytespec(16_7fffc)status
#
# I think these extensions are also covered with a *different* extension in the same compiler :-(
# i.e.
# @16_7fffc %byte status ???
#
P<spec> =
"specalias",
"spec" <Opt-68K-code-address>;
P<Opt-68K-code-address> =
'(' <INT-CONST> ')',
;
P<OPT-spec> =
<spec>,
;
# All labels come through here. Because of an interaction with comments,
# we use some parse-time code in phrase <LABEL-COLON> to modify how line reconstruction
# handles the comments. btw Numeric labels are a historical form...
# I'm not sure why I'm not using <CEXPR-OR-STAR> here, but even so it's not a significant overhead to reparse the label.
P<OPT-LABEL-DEFINITION> =
<NAME> '(' <CEXPR> ')' <LABEL-COLON> <OPT-LABEL-DEFINITION>,
<NAME> '(' '*' ')' <LABEL-COLON> <OPT-LABEL-DEFINITION>,
<NAME> <LABEL-COLON> <OPT-LABEL-DEFINITION>,
<DIGIT-SEQ> <LABEL-COLON> <OPT-LABEL-DEFINITION>,
;
# fault statements can only appear in the outer block of a program;
# they cannot appear in external routines. This should be checked
# by the compile() procedure.
P<FAULT-S> =
"fault" <FAULT-ACTION> <MORE-FAULTS> <S>;
P<FAULT-LIST> =
<DIGIT-SEQ> <MORE-NUMBERS>;
P<MORE-NUMBERS> =
',' <DIGIT-SEQ> <MORE-NUMBERS>,
;
P<FAULT-ACTION> =
<FAULT-LIST> '-' '>' <LABEL>;
P<MORE-FAULTS> =
',' <FAULT-ACTION> <MORE-FAULTS>,
;
# The "spec" form below is for old EMAS Imp procedure parameters:
#
# %routine integrate(%realname y, %real a,b, %integer n, %c
# %realfn f)
# %spec f(%real x)
#
# the fully-qualified form (%realfnspec f(%real x)) is also valid but comes in via the <EXTERNAL> route.
#
# There are other types of %spec related to parameters to procedures as well -
# %recordspec for sure. Not so sure if there is an arrayspec.
# They are only allowed in the body of a procedure.
#
# (Btw in the older compilers, all declarations in a block had to precede any code.)
#
# strictly speaking the <UI> <REST-OF-SS1> form, <UI> "while" <SC>, should only
# allow a single <UI>, not the <UI> <AUI> form. (EMAS Imp)
# (page 4.4 of https://history.dcs.ed.ac.uk/archive/docs/Edinburgh_IMP_Language_Manual.pdf )
P<INTERNAL> =
<NESTED-BLOCK>,
<UI> <REST-OF-SS1>,
'*' <UCI>,
<CYCLE-S>,
<FAULT-S>,
"on" <OPT-event> <EVENT-LIST> <REST-OF-EVENT-LIST> "start" <S> <INTERNALS> "finish" <S>,
"spec" <NAME> <FPP> <S>,
"short" "routine",
<EXTERNAL-OR-INTERNAL>;
# unlike with old-style parsers, the whole of the block content ends up in an AST item.
#
# Unfortunately there is a problem caused by 68000 Imp: some files contain only
# begin/end with no endoffile. Some are begin/end;endoffile instead of begin/endofprogram
# A few are routine blah/endofprogram !
# One had begin/endofprogram followed by more routines.
# (and see elsewhere for 68000's external routine specs which are of the form
# @address %routine blah
# - with no %spec keyword, but no routine body either..)
#
P<NESTED-BLOCK> =
"begin" <S> <INTERNALS> "end" <S>;
# compound statements - blah %and blah %and blah - a short form of startfinish block
P<AUI> =
"and" <UI>,
;
# unconditonal string resolution should *not* be under <OPT-ASSIGN>
# because the LHS is the value and the RHS is the assignee. ***FIX***
# unconditional string resolution should be moved up to <UI> or thereabouts
P<OPT-ASSIGN> =
'-' '>' <STRING-RESOLUTION-EXPR>,
<ASSOP-EXPR>,
;
P<BASIC-UI> =
<LVALUE> <OPT-ASSIGN>;
# the compound statement form ends in UIs which change control flow, ie
# you cannot say: %if whatever %then blah %and ->lab %and this will never happen
# tempted to add %caption here ;-) But that may be a step too far.
# "%monitor 9" would be trapped by "%fault 9 -> lab"
P<UI> =
<BASIC-UI> <AUI>,
"print" "text" <sqstring> <AUI>,
'-' '>' <LABEL>,
"return",
"true",
"false",
"result" <ASSOP-EXPR>,
"monitorstop",
"monitor" <DIGIT-SEQ> <AUI>,
"monitor" <AUI>,
"stop",
"signal" <OPT-event> <CEXPR> <OPEXPR2>,
"exit",
"continue";
P<LABEL> =
<NAME> '(' <EXPR> ')',
<NAME>,
<DIGIT-SEQ>;
# I added an old record declaration syntax for examples out of ai2logo.i
#
# %OWNRECORDARRAY INDEX42( 0:1022) (PIC DIR)
#
# %SYSTEMROUTINESPEC FINFO(%STRING(15) S,%INTEGER LEV,%C
# %RECORDNAME R, %INTEGERNAME FLAG)
#
# %RECORD R(F)
#
# I'm not sure where/if this is handled:
#
# %record(f)%array portinfo(1:31)
#
# I need to get the old Imp manuals out and check a couple of things, in particular
# %recordformatspec and %arrayformatspec ...
#
# The grammar below allows some initialisations that the compilers don't support
# but it's cleaner to reject them in compile()
#
# I had to add initialisations with pointers for these examples from
# 68000 Imp: %bytename b == length(s)
# %ownrecord(i tc f)%namearray tc refs(1:maxtc) == nil(*)
#
# These are 68000 syntax which is rejected by Imp77:
# %conststring(1) %array(1:max params) parameters="B","W","L"
# %string(255) %array(0:maxfiles) list
#
# The equivalent Imp77 syntax would be
#
# %conststring(1) %array parameters(1:max params)="B","W","L"
# %string(255) %array list(0:maxfiles)
#
# Finally here's a one-off hack for a construct I've only seen *once*
# in APM-gdmr/I/PRIM22.imp ...
# %constchar snl=char<10>
#
# The "register" below is for ICL/4-75 Imp9: %REGISTER SC(6)
#
P<DECLARATION-S> =
<OPT-68K> <XOWN> <XTYPE> <OWNDEC> <S>,
<OPT-68K> <XTYPE> <DECLN> <S>,
<OPT-68K> <OPT-XOWN> "record" "array" <RADEC> <REST-OF-RADEC> <S>,
<OPT-68K> <OPT-XOWN> "record" <RDEC> <REST-OF-RDEC> <S>,
"const" "char" <NAME> '=' 'C' 'H' 'A' 'R' '<' <INT-CONST> '>' <S>,
"register" <NAME> '(' <INT-CONST> ')' <S>;
P<OPT-XOWN> =
<XOWN>,
;
# old-style record array declaration
P<RADEC> =
<NAME> <Opt-NAME-LIST> <BPAIR> '(' <RECFMT-REF> ')';
P<REST-OF-RADEC> =
',' <RADEC>,
;
# old-style record declaration
P<RDEC> =
<NAME> <Opt-NAME-LIST> '(' <RECFMT-REF> ')';
P<REST-OF-RDEC> =
',' <RDEC> <REST-OF-RDEC>,
;
# %on %event * %start or %on %event 1,2,3,etc %start
P<EVENT-LIST> =
'*',
<CEXPR>;
# Added old-style while/until for: %UNTIL NEXTINSYM=NL %THEN %CYCLE
# and %WHILE NEXTINSYM#NL %THEN SKIPINSYM
P<CYCLE-S> =
"cycle" <Opt-CYCPARM> <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
<Percent-WUF> "cycle" <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
<Percent-WU> "then" "cycle" <S> <INTERNALS> "repeat" <Opt-UNTIL> <S>,
<Percent-WU> "then" <UI>;
P<IF-S> =
<Percent-IU> <TOP-LEVEL-CONDITION> <THEN-S>;
# /home/gtoal/gtoal.com/athome/edinburgh/APM-gdmr/I/LIB.imp
# contains:
# %elseif 'A'<=sym<='Z' %start
#
# which is a form of Hamish's that is equivalent to
#
# %finish %else %if 'A'<=sym<='Z' %start
#
# or the more concise
#
# %else %if 'A'<=sym<='Z'
#
# An identical issue is: %elsestart
# in APM-gdmr/ETHER/PROFILE.imp
#
P<FINISH-S> =
"else" <Percent-IU> <TOP-LEVEL-CONDITION> <Opt-start> <S> <INTERNALS> <FINISH-S>,
"else" <Opt-start> <S> <INTERNALS> "finish" <S>,
"finish" <Opt-ELSE-S>;
P<Opt-start> =
"start",
;
# see the comments on <FINISH-S> above.
P<Opt-ELSE-S> =
"else" "start" <S> <INTERNALS> "finish" <S>,
"else" <IF-S>,
"else" <UI> <S>,
<S>;
P<THEN-S> =
<OPT-then> "start" <S> <INTERNALS> <FINISH-S>,
"then" <UI> <Opt-ELSE-S>;
P<OPT-then> =
"then",
;
P<S> =
<NL>,
';';
# Most comments are handled in the line reconstruction phase.
# Comments starting with %COMMENT are an exception.
# Some older compilers treated '!' comments like %COMMENT,
# with "%c" at the end of a comment causing continuation!
# This phrase does not allow ';' in a comment to terminate
# the comment, but some versions of Imp (I think PDP15)
# did, so that will have to be handled with a command-line
# option and a parse-time phrase if it ever is needed.
#
P<COMMENT> =
"comment" <Parsed-OldStyle-Comment>,
'!' <Parsed-OldStyle-Comment>;
P<Parsed-OldStyle-Comment> =
<char> <Parsed-OldStyle-Comment>,
<dqstring> <Parsed-OldStyle-Comment>,
<sqstring> <Parsed-OldStyle-Comment>,
<letter> <Parsed-OldStyle-Comment>,
<digit> <Parsed-OldStyle-Comment>,
<stropped> <Parsed-OldStyle-Comment>,
<NL>;
P<NL-or-SEMI> =
<NL>,
<semi>;
P<semi> =
';';
# <DIGIT-SEQ> '_' <DIGIT-SEQ>, and <DIGIT-SEQ> '_' <NAME>, added by gt
# <NAME> <sqstring> is really X'....' but type checking stops me matching 'X'
# VERY hacky stopgap just to get stuff to parse. Needs proper syntax
# which needs the ability to recognise individual digits and letters
# rather than the current typed 'alphanumeric' hack in line_reconstruction ...
# Doesn't handle %real well, and indeed not at all if containing an '@' sign.
# E"ebcdic" etc also not handled. Hamish's 40_XXX comes in here.
#
## P<basedconst> = "[0-9][0-9]*_[0-9A-Z][0-9A-Z]*"
#
P<INT-CONST> =
<SAVED-INT-CONST>;
# almost forgot 'A' as an integer constant...
P<SAVED-INT-CONST> =
<DIGIT-SEQ> '_' <alphanumeric-SEQ>,
<DIGIT-SEQ>,
<OLDSTYLE-BASE>,
<sqstring>;
P<alphanumeric-SEQ> =
<alphanumeric> <Opt-alphanumeric-SEQ>;
P<Opt-alphanumeric-SEQ> =
<alphanumeric> <Opt-alphanumeric-SEQ>,
;
P<STR-CONST> =
<SAVED-STR-CONST>;
P<SAVED-STR-CONST> =
<OPT-DQ-LETTER> <dqstring>,
<sqstring>;
# current line reconstruction doesn't support this. Letters come through as part of <alphanumeric>s.
P<OPT-DQ-LETTER> =
'E',
;
P<NUMERIC-CONST> =
<INT-CONST>,
<REAL-CONST>;
# <CONST> is somewhat generic and should be replaced with the more specific version when possible
P<CONST> =
<INT-CONST>,
<REAL-CONST>,
<STR-CONST>;
# <sqstring> has to be checked for length and content depending on prefix.
#
## P<lettercharconst> = <squote> <schar> <squote> |
## "[RHX]" <squote> <hexchars> <squote> |
## "B" <squote> <binchars> <squote> |
## "K" <squote> <octchars> <squote> |
## "[CMD]" <squote> <mchars> <squote>;
#
P<OLDSTYLE-BASE> =
<letter> <sqstring>;
# Note: a <DIGIT-SEQ> can be both an integer constant and a real constant.
# Imp77 supports constants like this: %constlongreal PI = 16_3.243F6A89
# which are not handled here yet.
#
## P<realconst> = "[0-9][0-9]*\.[0-9][0-9]*E[0-9][0-9]*" |
## "[0-9][0-9]*\.[0-9][0-9]*" |
## "[0-9][0-9]*@[0-9][0-9]*"
#
P<REAL-CONST> =
<SAVED-REAL-CONST>;
# removed ability to match integer
P<DOT> =
'.';
P<OPT-DOT> =
<DOT>,
;
P<SAVED-REAL-CONST> =
<DIGIT-SEQ> <OPT-DOT> <EXPONENT>,
<rest-of-number> <FRACPT> <OPT-EXPONENT>,
<DIGIT-SEQ> '.' <OPT-EXPONENT>;
P<FRACPT> =
'.' <DIGIT-SEQ>;
P<OPT-FRACPT> =
<FRACPT>,
;
P<OPT-Plus-Minus> =
'+',
'-',
;
P<EXPONENT> =
'@' <OPT-Plus-Minus> <DIGIT-SEQ>;
P<OPT-EXPONENT> =
<EXPONENT>,
;
P<DIGIT-SEQ> =
<digit> <rest-of-number>;
P<rest-of-number> =
<DIGIT-SEQ>,
;
P<NAME> =
<letter> <!SQUO-STRING> <opt-letter-or-digit-seq>;
## P<NAME> =
## <alphanumeric> <!SQUO-STRING> <rest-of-name>;
## P<rest-of-name> =
## <NAME>,
## ;
P<SQUO-STRING> =
<sqstring>;
# A UCI is a machine code instruction.
# (Hamish's PDP15 imp machine code syntax (which is not introduced by '*') is definitely not supported.)
#
# Machine-specific machine-code removed. May cause problems with ';' and quoted chars in machine code
#
P<UCI> =
<NL-or-SEMI>,
<char> <UCI>,
<dqstring> <UCI>,
<sqstring> <UCI>,
<stropped> <UCI>;
# Imp does not let you mix and/or without explicit brackets. They do not have a precedence relationship.
# The implied continuation after %and and %or in some versions of Imp are handled hackily
# in line reconstruction, not here (and will eventually be made a command-line option).
P<REST-OF-COND> =
"and" <SC> <REST-OF-ANDC>,
"or" <SC> <REST-OF-ORC>,
;
P<REST-OF-ANDC> =
"and" <SC> <REST-OF-ANDC>,
;
P<REST-OF-ORC> =
"or" <SC> <REST-OF-ORC>,
;
P<CONDITION> =
<SC> <REST-OF-COND>;
# <NAME> <OPT-ACTUAL-PARAMETERS> added by gt for predicates
# RESTOFSC handles double-sided conditions such as '0' <= ch <= '9'
P<SC> =
<STRING-RESOLUTION>,
<EXPR> <COMP> <EXPR> <OPT-DOUBLE-SIDED>,
<NAME> <OPT-ACTUAL-PARAMETERS>,
'(' <CONDITION> ')',
"not" <SC>;
P<OPT-DOUBLE-SIDED> =
<COMP> <EXPR>,
;
# LHS of -> is *not* a string expression according to Imp77 (but no reason why it should not be)
P<STRING-RESOLUTION> =
<LVALUE> '-' '>' <STRING-RESOLUTION-EXPR>;
# Current Imp77 restricts string resolution to -> a.("test").b with a and b being optional.
# Older Emas implementations allowed multiple parts I'm fairly sure, eg -> a.("match1").b.("match2").c
#
# Also, behaviour changed re initial ("match") with no assignment variable before it -
# used to be that "match" had to start the string, but now there can be text before
# it, and a null match has to be coded as -> a.("match").b %and a = ""
#
# So if we need to distinguish between these variations, do it in compile()
#
P<STRING-RESOLUTION-EXPR> =
<STR-MATCH> <OPT-STR-ASSIGN>,
<STR-ASSIGN> <OPT-STR-MATCH>;
P<STRING-LVALUE> =
<LVALUE>;
P<STRING-RVALUE> =
<STRING-LVALUE>,
<STR-CONST>,
'(' <STRING-EXPR> ')';
# No bracketed expressions, only '.' operator allowed, although once again,
# Hamish has an idiosyncratic extension where strings in a printstring are
# separated by semi-colons - I think it is some approximation to C's printf?
#
P<STRING-EXPR> =
<STRING-RVALUE> <REST-OF-STRING-EXPR>;
P<DOTTED-STRING-EXPR> =
<STRING-RVALUE> <?DOT> <REST-OF-STRING-EXPR>;
P<REST-OF-STRING-EXPR> =
'.' <STRING-RVALUE> <REST-OF-STRING-EXPR>,
;
P<STR-MATCH> =
'(' <STRING-EXPR> ')';
P<STR-ASSIGN> =
<STRING-LVALUE>;
# ping-pong. Can't allow -> (a).b.c.(d) or -> a.(b).(c).d
P<OPT-STR-ASSIGN> =
'.' <STR-ASSIGN> <OPT-STR-MATCH>,
;
P<OPT-STR-MATCH> =
'.' <STR-MATCH> <OPT-STR-ASSIGN>,
;
# Ordering is important, eg '##' must come before '#'
#
# I really pity people who use the sort of grammar that requires
# eliminating shift-reduce conflicts before they can parse anything :-)
#
P<COMP> =
'=' '=',
'=',
<NOT-EQUALS-ADDRESS>,
<NOT-EQUALS>,
'<' '=',
'<',
'>' '=',
'>';
# '<>' is a really obscure form but it definitely used to be used. I'll
# post a reference as soon as I locate one.
#
P<NOT-EQUALS> =
'#',
'\\' '=',
'<' '>';
P<NOT-EQUALS-ADDRESS> =
'#' '#',
'\\' '=' '=';
# Procedure application, i.e. actual parameters to routine or function call:
# We *only* want to accept literals and const scalar variables in these lists -
# array elements, record fields, function calls ... all disallowed.
#
# The parse-time code eliminates most of the illegal objects, and a type check
# at the code generation stage will catch the remaining few. This was necessary
# because something like: err(4)
# was previously being recognised as possibly an array access or a procedure call
# when it fact the (4) was really a repetition count.
#
# The phrase: <immediate:return !in_const_initialiser_list;>
# causes the parse to fail at that point if we are in a const initialiser list.
# (actually if we are in *any* <CEXPR> to be honest).
P<OPT-ACTUAL-PARAMETERS> =
<immediate:return !in_const_initialiser_list;> '(' <EXPR> <REST-OF-ACTUAL-PARAMETERS> ')',
;
P<REST-OF-ACTUAL-PARAMETERS> =
',' <EXPR> <REST-OF-ACTUAL-PARAMETERS>,
;
# handling precedence with the grammar rather than Edinburgh-style in the AST. Tradeoff of speed for simplicity.
# We don't create rules to parse constant expressions as something different from an expression composed
# of variables - instead it is a wrapper around a regular <EXPR> which tests to see if the EXPR can be
# evaluated at compile time, and if so it is folded and returned as a signed constant decimal integer.
# It should really have been named <CONST-INT-EXPR> ...
P<CEXPR> =
<immediate:in_const_initialiser_list = TRUE; return TRUE;> <EXPR> <immediate:in_const_initialiser_list = FALSE; return TRUE;>,
<immediate:in_const_initialiser_list = FALSE; return FALSE;>;
# <DOTTED-STRING-EXPR> here restricted to expressions containing "." concatenation
# a simple string variable or function etc comes through on the second option
P<EXPR> =
<DOTTED-STRING-EXPR>,
<MEDIUMPREC-EXPR> <REST-OF-EXPR>;
P<REST-OF-EXPR> =
<OP-LOW> <MEDIUMPREC-EXPR> <REST-OF-EXPR>,
;
P<MEDIUMPREC-EXPR> =
<HIGHPREC-EXPR> <REST-OF-MEDIUMPREC-EXPR>;
P<REST-OF-MEDIUMPREC-EXPR> =
<OP-MED> <HIGHPREC-EXPR> <REST-OF-MEDIUMPREC-EXPR>,
;
P<HIGHPREC-EXPR> =
<RIGHT-ASSOC-EXPR> <REST-OF-HIGHPREC-EXPR>;
P<REST-OF-HIGHPREC-EXPR> =
<OP-HIGH-LEFTASSOC> <RIGHT-ASSOC-EXPR> <REST-OF-HIGHPREC-EXPR>,
;
P<RIGHT-ASSOC-EXPR> =
<REST-OF-RIGHTASSOC-EXPR> <UNARY-EXPR>;
P<REST-OF-RIGHTASSOC-EXPR> =
<RIGHTASSOC-EXPR-with-OP> <REST-OF-RIGHTASSOC-EXPR>,
;
P<RIGHTASSOC-EXPR-with-OP> =
<UNARY-EXPR> <OP-HIGH-RIGHTASSOC>;
P<UNARY-EXPR> =
<Opt-UNARY-OP> <OPERAND>;
# These pointer arithmetic elements are for Hamish's PDP9/15 Imp (old): INDP == INDEX0++P
# and for Hamish's 68000 Imp: %if tp[1]_mode # litmode %then lo = minint %else lo = tp[1]_val
# I hope this grammar definition causes the right precedence level...
#
# They can also be used in LVALUEs.
P<Opt-PARAMETERS-OR-POINTERINDEX> =
<immediate:return !in_const_initialiser_list;> '[' <EXPR> ']',
<immediate:return !in_const_initialiser_list;> '+' '+' <EXPR>,
<immediate:return !in_const_initialiser_list;> '-' '-' <EXPR>,
<OPT-ACTUAL-PARAMETERS>;
P<Opt-SUBFIELD> =
<immediate:return !in_const_initialiser_list;> '_' <SUBFIELD>,
;
P<Opt-RECORDFIELD> =
<immediate:return !in_const_initialiser_list;> '_' <RECORDFIELD>,
;
# The old form of modulus (!X!) is for really ancient compilers such as Hamish's Pdp15 Imp.
# Unfortunately there are obscure cases where an ambiguous parse results:
#
# putact(condop!!polarity!!1+polarity<<7,item,0)
#
# Does that mean
# putact(condop !! polarity !! 1 + polarity<<7, item, 0)
# or
# putact(condop ! !polarity! ! 1 + polarity<<7, item, 0)
#
# I need to check but I *hope* that the grammar here causes the first of the above to be selected.
# If not, we'll need a command-line option here to force the version of Imp being used.
#
# I just hope that there's no version of Imp that ever supported factorials with a postfix '!'
#
P<OPERAND> =
<REAL-CONST>,
<INT-CONST>,
<STR-CONST>,
<LVALUE>,
'(' <EXPR> ')',
'|' <EXPR> '|',
'!' <EXPR> '!';
# <OP> is only used in <CEXPR>s now and will be removed entirely
# once CEXPRs are recoded to use the same grammar as EXPRs.
#
# *However* I am considering doing away with CEXPRs altogether,
# and just using <EXPR> everywhere, in conjunction with a parse-time
# test to check whether any of the terms were in fact *not* constants.
#
# Also... it's not obvious if we could in fact use EXPRs everywhere *without*
# the overhead of having to organise the code/name tables so that they
# can be checked for being constants at parse time, and merely reject
# them from the compile() procedure instead? This would be possible
# as long as there are no parse ambiguities caused by checking after
# parsing. I know that's a problem in C but Imp's simpler syntax
# might let us get away with it!
#
# Note that none of these options eliminates the possibility of implementing
# conditional compilation the EMAS Imp80 way using
#
# %if <CEXPR> %start
# ! conditional code
# %finish
#
# (Though I've always had questions about that on EMAS if there's a
# label inside the block - the ability to use that construct for
# conditional compilation depends on whether that label is ever
# jumped to from code outside the block! - doing it properly requires
# full flow control analysis and a transitive closure on the flow graph!)
#
# highest precedence, left-associative
P<OP-HIGH-LEFTASSOC> =
'<' '<',
'>' '>';
# highest precedence, right-associative (only exponentiation)
#
# I found this expression the 68000 Imp compiler:
#
# y = |r|+0.5/10.0^m; !modulus, rounded
#
# which I parse as:
#
# y = |r|+( 0.5 / (10.0^m)); !modulus, rounded
#
# I presume this is a clever way to round the last digit of a decimal,
# but damn, how it works is beyond me.
# Typical Hamish code! Brilliant (I presume) but obscure and uncommented.
#
P<OP-HIGH-RIGHTASSOC> =
<INTEGER-EXP>,
<REAL-EXP>;
P<INTEGER-EXP> =
'*' '*' '*' '*',
'\\' '\\',
'^' '^';
# eq is used in '\\' <!eq> to stop it being accepted when the input should be matching '\\' '=' or '\\' '=' '='
# (matching '\\' '\\' is not an issue as <INTEGER-EXP> is tested before <REAL-EXP>)
P<eq> =
'=';
P<REAL-EXP> =
'*' '*',
'\\' <!eq>,
'^';
# middle-precedence operators. All left-associative.
P<OP-MED> =
'*',
'/' '/',
'/',
'&';
# lowest precedence operators. All left-associative.
P<OP-LOW> =
'+',
'-',
'!' '!',
'!';
# You would think that unary operators always have the highest precedence,
# but not so - here's a tricky one...
#
# I = -1 >> 1
#
# This always has the value 0, regardless of integer size, because a unary minus
# in front of a constant is *NOT* interpreted as a negative constant, but rather
# the positive constant preceded by a unary negation operator.
#
# It gets worse... unary negation is implemented as binary subtraction with
# an inserted '0' left operand, i.e.
#
# 0 - 1 >> 1
#
# Shifts have higher precedence than subtraction, so this becomes:
#
# 0 - (1 >> 1)
#
# which evaluates to 0.
#
# The 68000 Imp compiler includes this statement which I hope is correct! dict(firstpos)_val = -cad>>1
#
# Some versions of Imp (I think Imp80) forbid a unary '+' before constants,
# so A + +3 would be illegal (although A + -3 would not)
#
# and none of this applies to binary-not (\) so \3 really is higher
# priority than '>>' and \3 >> 1 does what you would expect. I hope.
#
# https://gtoal.com/imp77/reference-manual/A_Syntactic_and_Semantic_Definition_of_the_IMP_Language.pdf
# 9. An initial minus sign is treated exactly as '0-'
# 10. An initial 'not' has the highest precedence has the effect of 'exclusive
# or' with X'FFFFFFFF'. Thus \A**B is exactly equivalent to (X'FFFFFFFF'!!A)**B
P<Opt-UNARY-OP> =
'+',
'-',
<UNARY-NOT>,
;
# fortunately unary backslash is unambiguous with ERCC style exponentiation binary operator
P<UNARY-NOT> =
'\\',
'~';
P<Percent-IU> =
"if",
"unless";
P<ONE-SWITCH-DECL> =
<NAME> <Opt-NAME-LIST> '(' <CEXPR> ':' <CEXPR> ')';
# rest of %switch list
# This reminds me, isn't there an optional %label declaration in Imp? Haven't seen an example yet,
# I need to check the manuals to make sure I'm not confusing this with C...
P<REST-OF-SWLIST> =
',' <ONE-SWITCH-DECL> <REST-OF-SWLIST>,
;
P<Opt-NAME-LIST> =
',' <NAME> <Opt-NAME-LIST>,
;
P<REST-OF-EVENT-LIST> =
',' <CEXPR> <REST-OF-EVENT-LIST>,
;
# Still to handle operator precedence etc for <CEXPR>s the same way as for <EXPR>s.
# However it is such a major duplication of effort that it might be much easier to
# just use <EXPR>s and have a semantic test for constantcy.
# The problem is the ambiguity of <NAMES>. And you don't want any of the decorated
# forms of a name (records, function calls, pointers, maybe even rejecting const
# array elements?) - just simple literals and const integer operands.
## P<REMOVED-CEXPR> =
## <Opt-UNARY-OP> <COPERAND> <REST-OF-CEXPR>;
## P<REST-OF-CEXPR> =
## <OP> <COPERAND> <REST-OF-CEXPR>,
## ;
P<OPT-event> =
"event",
;
P<FPP> =
'(' <FORMAL-PARAMETER-DECLARATION> <REST-OF-FORMAL-PARAMETER-LIST> ')',
;
P<REST-OF-FORMAL-PARAMETER-LIST> =
<Opt-Comma> <FORMAL-PARAMETER-DECLARATION> <REST-OF-FORMAL-PARAMETER-LIST>,
;
# %routine y (%integer %name %array (1) %name b)
# %routine r10(%integer%array%name a(1:10),%integer k)
P<FORMAL-PARAMETER-DECLARATION> =
<XTYPE> <OPT-arrayname> <NAME> <OPT-68K-Bounds> <Opt-NAME-LIST>,
<RT> <OPT-name> <NAME> <Opt-NAME-LIST> <FPP>,
"name" <Opt-68K-reg> <NAME> <Opt-NAME-LIST>;
# More Hamish idiosyncracy!:
#
# @16_1114%record(*)%map ZNEW(%name(d0) v)
# - APM-gdmr/I/VPERM19.imp
# d0 being a register
#
P<Opt-68K-reg> =
'(' <NAME> ')',
;
P<OPT-name> =
"name",
;
# added predicate - gt
P<RT> =
"routine",
"predicate",
<XTYPE> <FN-MAP>;
# note map is equivalent to "name function"
P<FN-MAP> =
<FN>,
"map";
P<FN> =
"fn",
"function";
# Confirmed that label is used. Found one in 68000 code for labels used in embeddded machine code:
# %label f1,f2,f3,f4,f5,f6,f7,b0,b1,b2,b3,b4,b5,b6,b7,end
#
# "long long" would be added here if we supported 64 bit ints or 128bit reals...
# Plain "long" added here by making <BTYPE> optional.
# (BTYPE being integer/real/longreal)
#
P<longlong> =
"long" "long",
"long";
P<BASE-XTYPE> =
"integer",
"real",
"label",
<longlong> <Opt-BTYPE>,
"byte" <Opt-INTEGER>,
"mite" <Opt-INTEGER>,
"short" <Opt-INTEGER>,
"half" <Opt-INTEGER>,
"string" <Opt-STRLEN>,
"record" <Opt-RFREF>;
# does %record(i tc f)%name match <XTYPE>?
#
# Courtesy of Hamish, two more language extensions:
#
# Two <OPT-68K-qualifier>s to allow one or both of %writeonly %volatile %byte %integer device
#
P<XTYPE> =
<OPT-68K-qualifier> <BASE-XTYPE> <OPT-name>;
P<68K-memptr> =
"read" "only" <68K-memptr>,
"write" "only" <68K-memptr>,
"volatile" <68K-memptr>,
;
P<OPT-68K-qualifier> =
<68K-register>,
<68K-memptr>,
;
# C-like register qualifiers (Hamish, again)
# %register(a2)%integer r1,r2
# %register(a2)%byte%name bn
# %register(a3)%short%name sn
P<68K-register> =
"register" '(' <NAME> ')';
# '*' added by gt for recordformat(*)
# () moved from point of call to here to support old style "%recordname fred" without format. (Uses a formatspec instead)
P<Opt-RFREF> =
'(' <RECFMT-REF> ')',
'(' '*' ')',
;
# RF in the rules below is short for Record Format
P<RECFMT-REF> =
<NAME>,
<RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC>;
# Imp variant record. NOT the same as C's which is per field. This is per record.
# Still to check: a ( ... %or ... ) subgroup in a record definition, where the ...
# represents several declarations, with the whole ( ... ) group being a part
# of an enclosing record.
#
P<ALT-RECFMT-DEC> =
"or" <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC>,
;
P<REST-OF-RECFMT-DEC> =
',' <RECFMT-DEC> <REST-OF-RECFMT-DEC>,
;
P<RECFMT-DEC> =
<XTYPE> <RECFMT-ELMNT>,
'(' <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC> ')';
# Now "name" has been removed from <OPT-arrayname-or-name>, check at the point
# of invocation, that "name" still works, now it as been added to <XTYPE>...
#
P<RECFMT-ELMNT> =
<OPT-arrayname> <NAME> <Opt-NAME-LIST>,
"array" <ADECLN-in-record>;
# Array declaration
P<ADECLN-in-record> =
<NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST-in-record>;
P<REST-OF-ARLIST-in-record> =
',' <ADECLN-in-record>,
;
P<LOWERBOUND> =
<EXPR>,
<BASE-XTYPE> <NAME>;
# Another Hamishism... %integer %array %name fred(1:*)
# which in Imp77 would be %integer %array (1) fred
# (reject the '*' in other contexts)
#
# In the declaration of an array name variable, as in
#
# %integer %array %name a(lo1:hi1,...,loN:hiN)
#
# each of the lower and upper bounds may be:
#
# (a) a literal or literal expression, for example:
#
# %integer %array %name a(1:1000)
# %real %array %name delta(10:20,1:30)
#
# In this case, the corresponding bound of every actual parameter
# must have the identical literal value.
#
# (b) a variable declaration, for example:
#
# %byte%array%name used(1:%integer xdim,1:%integer ydim)
# %short%array%name count(%byte lo:%byte hi)
#
# In this case, the corresponding bound of the actual parameter is
# assigned to the stated variable at the time of procedure entry.
# This case, which is similar to the Pascal conformant array concept,
# is confined to %array %names as procedure parameters. It cannot
# be used for %array %names declared as ordinary variables.
#
# (c) an asterisk, for example:
#
# %integer%array%name cases(1:*)
#
# The use of an asterisk should be confined to the UPPER bound of
# the FIRST (outer) dimension only, and when it appears, all other
# bounds should be literal. It implies an unspecified number of
# elements in the array (and the absence of bound-checking even
# with checks on).
#
# For the time being, the Vax/VMS form ... %array(n)%name ... is also
# accepted, but note that the '(n)' part is obligatory even when n is one.
P<UPPERBOUND> =
'*',
<EXPR>,
<BASE-XTYPE> <NAME>;
# List of lower/upper bound pairs, for array declarations.
P<BPAIR> =
'(' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST> ')';
P<BPAIR68K> =
'(' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST68K> ')';
# Multi-dimensional arrays
P<REST-OF-BPLIST> =
',' <EXPR> ':' <EXPR> <REST-OF-BPLIST>,
;
P<REST-OF-BPLIST68K> =
',' <LOWERBOUND> ':' <UPPERBOUND> <REST-OF-BPLIST68K>,
;
# <Opt-DIMENSIONS> added to handle the Vax Imp extension:
#
# %routine y (%integer %name %array (1) %name b)
#
# and <OPT-68K-Bounds> added to handle:
#
# %routine extract params(%string(255) %name in,out,
# %integer %array(1:max params) %name params)
#
P<OPT-arrayname> =
"array" <Opt-DIMENSIONS> "name",
"array" <OPT-68K-Bounds> "name",
;
# for the example from 68000 Imp by way of Vax Imp,
# %routine y (<whatever type> %array (1) %name b)
# mentioned in https://gtoal.com/imp77/reference-manual/Imp-M68K-V2.lay
P<Opt-DIMENSIONS> =
'(' <INT-CONST> ')',
;
P<Opt-INTEGER> =
"integer",
;
# "42(2)" is equivalent to "42, 42". (*) means 'as many more are needed to fill the array' so must come last.
# However it looks like <REPFACT> is also used as the string length indicator?!
P<Opt-STRLEN> =
'(' <Opt-STAROREXPR> ')',
;
P<REPEATS> =
'(' <Opt-STAROREXPR> ')',
;
P<Opt-STAROREXPR> =
<STAROREXPR>,
;
# Used in switch label. (*) marks a catch-all. (like default: in C)
# Or at least SHOULD be used in switch label. Looks like it isn't... just used in <REPFACT>?
P<STAROREXPR> =
<CEXPR>,
'*';
# Opt- added here to allow plain "%long" as a declaration at <XTYPE>
P<Opt-BTYPE> =
"real",
"integer",
;
P<Opt-Comma> =
',',
;
P<Opt-ALIAS> =
"alias" <TEXTTEXT>,
;
P<TEXTTEXT> =
<dqstring>;
# if we add a "C" type (to reverse the order of parameter evaluation) it would be done here.
# I don't think you would write %permroutine since they are built in to the compiler, but
# depending on how the compiler is implemented, you might, and if so they would be added here.
# I do have a vague recollection of perm routines being matched up with internal code using
# a file or declarations containing magic numbers, eg something like:
# %perm (5) %routine write(%integer n, sp)
# - but that may be a manufactured memory!
#
P<Percent-SEX> =
"system",
"external",
"dynamic",
"prim",
"perm",
;
P<RECFMT-spec-OR-BODY> =
"spec" <NAME>,
<NAME> '(' <RECFMT-DEC> <REST-OF-RECFMT-DEC> <ALT-RECFMT-DEC> ')';
# 68000 Imp has a non-standard syntax for some array declarations, eg
#
# %conststring(1) %array(1:max params) parameters="B","W","L"
# %string(255) %array(0:maxfiles) list
#
# The equivalent Imp77 syntax would be
#
# %conststring(1) %array parameters(1:max params)="B","W","L"
# %string(255) %array list(0:maxfiles)
#
# Putting bounds after a %name is also from the 68000:
#
# %integer%array%name an10(1:10)
#
# which is likely the same as
#
# %integer%array(1)%name an10
#
# in Imp77. We just haven't been told the lower bound...
#
# meanwhile this broke: %string (64) %array %format sform1(0 : 1022)
#
# I'm not sure if there is any significant difference nowadays between own declarations and non-own declarations...
# the used to be distinguished by allowing initialisations... maybe worth merging the definitions now?
P<DECLN> =
<OPT-arrayname> <OPT-spec> <NAME-or-STAR-68K> <OPT-68K-Bounds> <Opt-Init-assign> <Opt-Assign-NAME-LIST>,
"array" <OPT-spec> <OPT-format> <NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST>,
"array" <BPAIR> <OPT-spec> <OPT-format> <NAME> <Opt-NAME-LIST> <REST-OF-ARLIST-68K>;
P<OWNDEC> =
<OPT-arrayname> <OPT-spec> <SINGLE-OWNDEC> <REST-OF-OWNDEC>,
"array" <OPT-format> <OPT-spec> <BPAIR> <NAME> <Opt-Init-assign-array>,
"array" <OPT-format> <OPT-spec> <NAME> <BPAIR> <Opt-Init-assign-array>;
P<REST-OF-ARLIST> =
',' <NAME> <Opt-NAME-LIST> <BPAIR> <REST-OF-ARLIST>,
;
P<REST-OF-ARLIST-68K> =
',' <NAME> <Opt-NAME-LIST> <REST-OF-ARLIST-68K>,
;
P<OPT-68K-Bounds> =
<BPAIR68K>,
;
P<OPT-format> =
"format",
;
#
# This is the same ugly hack as <Opt-Assign-NAME-LIST> below
# The AST handling has to check for and reject the '*' in all contexts except for
# 68000-style declarations such as "@16_4000c0 %byte *,acia s,*,acia d"
#
# Below is a note from inside the 68K compiler that may be relevant:
#
#! %record(*) or %string(*) needs %name (or @)
#! %string alone allowed only for %const%string
#
P<NAME-or-STAR-68K> =
'*',
<NAME>;
# <Opt-Init-assign> added by gt
# This hideous hack with '*' for an anonymous entry is yet another of Hamish's idiosyncrasies:
#
# @16_7fffc %byte status,data,*,control
# ^
# "/home/gtoal/gtoal.com/athome/edinburgh/APM-gdmr/ETHER/SPY.imp"
#
# @16_4000c0 %byte *,acia s,*,acia d
#
# and would appear to just be a lazy way to skip over an address when assigning multiple
# individual scalars to addresses...
#
# Obviously it should not be allowed in any other context - some semantic checking needed here...
#
P<Opt-Assign-NAME-LIST> =
',' '*' <Opt-Assign-NAME-LIST>,
',' <NAME> <Opt-Init-assign> <Opt-Assign-NAME-LIST>,
;
# Need <EXPR> instead of <CEXPR> to catch = tostring(i) because cexpr only got tostring
# Need '== <EXPR>' instead of '== <LVALUE>' to catch '== length(s)', i.e. a %map although it looks like a fn call.
# - the latter being more of a failing of LVALUE that here.
# Removed:
# '==' <LVALUE>,
# '=' <CEXPR>,
# These will be improved once we have semantic checks...
#
P<Opt-Init-assign> =
<ASSOP-EXPR>,
;
# removed string resolution from assop - too much type checking
P<ASSOP> =
'=' '=',
<SCALAR-ASSIGN>;
P<ASSOP-EXPR> =
'=' '=' <LVALUE>,
<SCALAR-ASSIGN> <EXPR>;
P<SUBFIELD> =
<NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-SUBFIELD>;
P<RECORDFIELD> =
<NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-RECORDFIELD>;
P<LVALUE> =
<NAME> <Opt-PARAMETERS-OR-POINTERINDEX> <Opt-RECORDFIELD>;
# An initialised array declaration does not need a %C following the '=' before the list of initial values.
# this is a stupid hack to get around comments in the middle of array declarations :-(
# note that it gobbles the newline
# This has to handle: "== nil(*)" from %ownrecord(i tc f)%namearray tc refs(1:maxtc) == nil(*)
# Turns out <SCALAR-ASSIGN> didn't include '==' so switched to <ASSOP> instead
P<Opt-Init-assign-array> =
<ASSOP> <stupid> <ARRAY-INITIALISER> <ROCL>,
;
# the version without "for" is to support an old-style statement, %WHILE <cond> %THEN <ui>
P<Percent-WU> =
"while" <TOP-LEVEL-CONDITION>,
"until" <TOP-LEVEL-CONDITION>;
P<Percent-WUF> =
"while" <TOP-LEVEL-CONDITION>,
"until" <TOP-LEVEL-CONDITION>,
"for" <NAME> <SCALAR-ASSIGN> <EXPR> ',' <EXPR> ',' <EXPR>;
P<SCALAR-ASSIGN> =
'=',
'<' '-';
P<TOP-LEVEL-CONDITION> =
<CONDITION>;
P<Opt-UNTIL> =
"until" <TOP-LEVEL-CONDITION>,
;
P<Opt-CYCPARM> =
<NAME> <SCALAR-ASSIGN> <EXPR> ',' <EXPR> ',' <EXPR>,
;
# gt changed signal to allow 2 exprs. Mouses Imp77 adds an optional string!
# 68000: %signal 5,,holeend,"HEAPPUT: heap corrupt" %if holeend>lhb_limit
P<OPEXPR2> =
',' <OPT-EXPR> <OPEXPR>,
;
P<OPT-EXPR> =
<EXPR>,
;
# single optional ", expr" for use with %signal event (, subevent)?
P<OPEXPR> =
',' <OPT-EXPR> <OPSTRING>,
;
# %signal 3,4,position,"Set input fails" %unless position=0
P<OPSTRING> =
',' <STRING-EXPR>,
;
# reversed conditions and 1-line cycles
P<REST-OF-SS1> =
<Percent-IU> <TOP-LEVEL-CONDITION> <S>,
<Percent-WUF> <S>,
<S>,
;
P<REST-OF-OWNDEC> =
',' <SINGLE-OWNDEC> <REST-OF-OWNDEC>,
;
P<SINGLE-OWNDEC> =
<NAME> <Opt-ALIAS> <Opt-Init-assign>;
P<ROCL> =
',' <stupid> <ARRAY-INITIALISER> <ROCL>,
;
P<ARRAY-INITIALISER> =
<CEXPR> <REPEATS>;
P<stupid> =
<COMMENT>,
;
# extrinsic is an old form meaning roughly "external spec"
# These apply to data as opposed to <Percent-SEX> which apply to procedures.
P<XOWN> =
"own",
"external",
"extrinsic",
"constant",
"const";
# Constant conditions are not used to optimise code, but rather as a substitute
# for C's 'ifdef' pre-processor.
P<CONST-IF-S> =
<Percent-IU> <CONST-CONDITION> <CONST-THEN-S>;
P<CONST-THEN-S> =
<OPT-then> "start" <S> <EXTERNALS-OR-INTERNALS> <CONST-FINISH-S>,
"then" <UI> <CONST-Opt-ELSE-S>;
P<CONST-FINISH-S> =
"else" <Percent-IU> <CONST-CONDITION> <Opt-start> <S> <EXTERNALS-OR-INTERNALS> <CONST-FINISH-S>,
"else" <Opt-start> <S> <EXTERNALS-OR-INTERNALS> "finish" <S>,
"finish" <CONST-Opt-ELSE-S>;
P<CONST-Opt-ELSE-S> =
"else" "start" <S> <EXTERNALS-OR-INTERNALS> "finish" <S>,
"else" <CONST-IF-S>,
"else" <UI> <S>,
<S>;
P<CONST-CONDITION> =
<CONST-SC> <CONST-REST-OF-COND>;
# <STRING-RESOLUTION>,
# <NAME> <OPT-ACTUAL-PARAMETERS>,
P<CONST-SC> =
<CEXPR> <COMP> <CEXPR> <CONST-OPT-DOUBLE-SIDED>,
'(' <CONST-CONDITION> ')',
"not" <CONST-SC>;
P<CONST-OPT-DOUBLE-SIDED> =
<COMP> <CEXPR>,
;
P<CONST-REST-OF-COND> =
"and" <CONST-SC> <CONST-REST-OF-ANDC>,
"or" <CONST-SC> <CONST-REST-OF-ORC>,
;
P<CONST-REST-OF-ANDC> =
"and" <CONST-SC> <CONST-REST-OF-ANDC>,
;
P<CONST-REST-OF-ORC> =
"or" <CONST-SC> <CONST-REST-OF-ORC>,
;
P<EXTERNALS-OR-INTERNALS> =
<EXTERNAL-OR-INTERNAL> <EXTERNALS-OR-INTERNALS>,
;
E