//#define ORIG 1
#define STUPID_COMP_SHORTCUT
// macro-process pseudo stack-based assembly language into actual x86 code, and compile with nasm
/*
http://en.wikipedia.org/wiki/X86_calling_conventions
http://unixwiz.net/techtips/win32-callconv-asm.html
http://www.codeproject.com/KB/cpp/calling_conventions_demystified.aspx
http://www.agner.org/optimize/calling_conventions.pdf - see table 4, register usage
eax, ecx and edx may be corrupted inside a function.
edx is used for long long results, which I'm not returning, so don't care.
I push left->right, not right->left, so incompatible with CDECL at the moment, unless you
call in reverse from the program (actually, same as Imp77 on intel does)
another pending optimisation:
call putchar ;; CALL putchar , 1 , 0
add esp, 4
mov esp, ebp ;; RET 0
pop ebp
ret
i think any assignment to esp is redundant before a return.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
static char *Progname;
FILE *infile, *outfile;
#define TRUE (0==0)
#define FALSE (0!=0)
#define DEBUG_PARSER 1
#define DEBUG_LEXER 1
//#define DEBUG 1
int debug_parser = FALSE;
int debug_lexer = FALSE;
int opt_optimiser = FALSE;
#define sourcefile infile
#define curfile "infile"
#define MAXLINE 1024
#include "assemble.h"
/* C[] is the source character token stream */
typedef struct sourceinfo { // ATOMS for processed input stream
char *s; // string contents
int l; // lineno
int col; // column
int t; // type - tag, "string", 'charconst', or char, so far
char *f; // source or includefile name
} sourceinfo;
static sourceinfo c[16*1024];
int nextfree, arraysize;
char onecharstr[1024];
int A[16*1024];
int next_free_a, a_size;
int cp; // code pointer. Has to be global state.
int ap; // Analysis record pointer.
// Built-in phrase codes. Must match with grammar file.
#define TYPE_EOF 0
#define TYPE_TAG 1
#define TYPE_STRING 2
#define TYPE_CHARCONST 3
#define TYPE_CHAR 4
#define TYPE_INT 5
//#define TYPE_HEXINT 6
#define TYPE_KEYWORD 7
#define TYPE_COMMENT 8
#define TYPE_NEWLINE 9
int lineno = 1, col = 0;
int startline = TRUE;
#define MAXPOOL (1024*32)
char stringpool[MAXPOOL];
int nextstring = 0;
static char *itoaf(char *fmt, int i) // unsafe, can't call twice in one statement
{
static char s[128];
sprintf(s, fmt, i);
return s;
}
static char *f(char *fmt, char *p) // unsafe, can't call twice in one statement
{
static char s[512];
sprintf(s, fmt, p);
return s;
}
static int next_free_entry = 0;
#define MAX_ENTRIES 10000
typedef struct line {
char *psect;
char *label;
char *opcode;
char *opd1;
char *opd2;
char *comment;
} line;
line program[MAX_ENTRIES];
static char xline[1024];
static int xi = 0, xcur_line = 1;
static void xungetc(int c, FILE *f)
{
if (xi > 0) {
xi -= 1;
} else if (c == '\n') {
xcur_line -= 1;
xi = strlen(xline);
}
ungetc(c, f);
}
static int xfgetc(FILE *f)
{
int c, ch;
c = fgetc(f);
if (c == EOF) return EOF;
ch = c&255;
if (ch == '\n') {
xline[xi] = '\0'; xi = 0;
// (void)make_binary_tuple(LINE, xcur_line++, (int)strdup(xline));
} else xline[xi++] = ch;
if (xi == 1023) xi = 1022;
xline[xi] = '\0';
return c;
}
int str_to_pool(char *s)
{
int tag;
for (tag = 0; tag <= nextstring; tag++) {
if (strcmp(stringpool+tag, s) == 0) {
return tag; /* found, one way or another */
}
}
}
void stores(char *s, int lineno, int col, int type, char *fname) {
int tag;
if (nextstring + strlen(s) + 1 >= MAXPOOL) exit(1); // TODO: add message
strcpy(stringpool+nextstring, s); /* Create a backstop for when not found */
tag = str_to_pool(s);
if (tag == nextstring) nextstring += strlen(s)+1; /* Not found, add it */
// makespace(c, nextfree, arraysize);
c[nextfree].s = stringpool+tag; c[nextfree].l = lineno; c[nextfree].col = col;
c[nextfree].f = fname; c[nextfree].t = type;
nextfree++;
}
void storec(int ch, int lineno, int col, int type, char *fname) {
onecharstr[ch*2] = ch; onecharstr[ch*2+1] = '\0'; // convert char to 1-char string before saving.
stores(&onecharstr[ch*2], lineno, col, type, fname);
}
/*>*/
int iskeyword(char *s) {
int i;
for (i = 0; i < MAX_KEYWORD; i++) if (strcmp(s, keyword[i]) == 0) return TRUE;
return FALSE;
}
void line_reconstruction(void)
{
static int whitespace;
static int peek;
int ch;
/* Pre-process input ready for parsing. Tokens are stored in array C[] */
whitespace = FALSE;
for (;;) {
ch = xfgetc(sourcefile); if (ch == EOF) break;
ch &= 255; // int, positive.
peek = xfgetc(sourcefile); xungetc(peek, sourcefile);
if (isalpha(ch) || (ch == '_')) {
/*< token or keyword */
int nextfree = 0, strsize = 0, startcol = col;
char token[1024];
whitespace = FALSE;
for (;;) {
// makespace(token, nextfree, strsize);
if (isalpha(ch) || isdigit(ch) || (ch == '_')) {
// digits allowed after 1st char.
col++;
token[nextfree++] = ch;
} else {
token[nextfree] = '\0'; xungetc(ch, sourcefile);
break;
}
ch = xfgetc(sourcefile);
}
stores(token, lineno, startcol, iskeyword(token) ? TYPE_KEYWORD : TYPE_TAG, curfile);
//free(token);
/*>*/
} else if (isdigit(ch)) {
/*< Number */
int nextfree = 0, numsize = 0;
char number[1024];
// Store as a string...
whitespace = FALSE;
for (;;) {
// makespace(number, nextfree, numsize);
if (isdigit(ch)) {
col++;
number[nextfree++] = ch;
} else {
number[nextfree] = '\0'; xungetc(ch, sourcefile);
break;
}
ch = xfgetc(sourcefile);
}
stores(number, lineno, col, TYPE_INT, curfile);
//free(number);
/*>*/
} else switch (ch) {
case '\'': // Handle 'c' character constants
case '"': // Handle "string"
/*< literals */
{
int nextfree = 0, strsize = 0, quotech = ch;
char string[1024];
whitespace = FALSE;
col++;
for (;;) {
ch = xfgetc(sourcefile); // Newlines are allowed
col++;
// makespace(string, nextfree, strsize);
if (ch == '\\') {
ch = xfgetc(sourcefile); col++;
if (ch == '\\') { string[nextfree++] = ch;
} else if (ch == '\'') { string[nextfree++] = '\'';
} else if (ch == '"') { string[nextfree++] = '"';
} else if (ch == 'n') { string[nextfree++] = '\n';
} else if (ch == 'r') { string[nextfree++] = '\r';
} else if (ch == 't') { string[nextfree++] = '\t';
} else {
// Warn of unknown (to me) \x escape. Probably an error.
string[nextfree++] = '\\'; string[nextfree++] = ch;
}
} else if (ch != quotech) { string[nextfree++] = ch;
} else {
string[nextfree] = '\0';
break;
}
}
if (quotech == '\'') {
if (strlen(string) == 1) {
} else if (strlen(string) <= 4) {
// Warn that 'xx' as a 32-bit int is a non-standard extension
} else {
// Warn that this is probably a string with the wrong type of quote.
}
}
stores(string, lineno, col, (quotech == '\'' ? TYPE_CHARCONST : TYPE_STRING), curfile);
//free(string);
}
break;
/*>*/
// this parser is line-at-a-time. If it was whole-file, we wouldn't return here
case '\n': lineno++; startline = TRUE; col = 0; whitespace = TRUE;
storec(ch, lineno, col++, TYPE_NEWLINE, curfile);
return;
case '\t':
case ' ': col++; // Does not affect whitespace
break;
default:
whitespace = FALSE;
storec(ch, lineno, col++, TYPE_CHAR, curfile);
}
}
// set up a dummy at the end because we sometimes look ahead by 1
// in the parsing code and don't want to hit uninitialised data.
c[nextfree].t = TYPE_EOF;
c[nextfree].s = "<EOF>";
c[nextfree].l = lineno;
c[nextfree].col = col;
/*>*/
}
// ============================== code generation section ============================
int pos(int i)
{
if (i < 0) return 0;
return i;
}
static int access = TRUE;
static char *hack = NULL; // source
static char cur_psect[128] = { '\0' };
char *strdup_or_null(const char *s)
{
char *t;
if (s == NULL) return(NULL);
t = strdup(s);
if (t == NULL) {
fprintf(stderr, "Won't\n");
exit(2);
}
return t;
}
int eq(const char *s, const char *t)
{
if (s == t) return(TRUE); // includes if both NULL
if ((s == NULL) || (t == NULL)) return(FALSE);
return (strcmp(s, t) == 0);
}
int isnull(const char *s)
{
if (s == NULL) return(TRUE);
if (*s == '\0') return(TRUE);
return(FALSE);
}
// "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE";
static char *cond[6] = { "ge", "l", "le", "g", "e", "ne" };
static char *jcond[6] = { "jge", "jl", "jle", "jg", "je", "jne" };
char *inverse_jump(char *jump)
{
int i;
for (i = 0; i < 7; i++) {
if (strcmp(jcond[i], jump) == 0) return(jcond[i^1]);
}
fprintf(stderr, "Coding error. Is there a new opcode beginning with 'j' ???\n");
}
int iscondjump(char *jump)
{
int i;
for (i = 0; i < 7; i++) {
if (strcmp(jcond[i], jump) == 0) return TRUE;
}
return FALSE;
}
static char *spacestr = " ";
static char *spaces = NULL;
void plant(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment)
{
int elide = FALSE;
// using strings, and especially multiple heap strings, is scandalously inefficient.
if (next_free_entry == MAX_ENTRIES) {
fprintf(stderr, "Won't\n");
exit(1);
}
if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0
if (hack != NULL) {
static char c2[256];
if (comment == NULL) {
sprintf(c2, "; %s", hack);
} else {
sprintf(c2, "; %s%s ; %s", hack, spaces-pos(11-strlen(hack)), comment);
}
comment = c2;
hack = NULL;
}
if (eq(psect, "text")) {
if (isnull(label)) {
if (!access) elide = TRUE;
} else {
access = TRUE;
}
if (eq(opcode, "jmp") || eq(opcode, "ret")) access = FALSE; // for *following* code, not this line.
} // other psects don't affect access
if (!elide) { // previously we commented these out; now we just drop them silently
program[next_free_entry].psect = strdup_or_null(psect);
program[next_free_entry].label = strdup_or_null(label);
program[next_free_entry].opcode = strdup_or_null(opcode);
program[next_free_entry].opd1 = strdup_or_null(opd1);
program[next_free_entry].opd2 = strdup_or_null(opd2);
program[next_free_entry].comment = strdup_or_null(comment);
next_free_entry += 1;
}
}
void print_comment(char *t)
{
char *s;
for (;;) {
s = strstr(t, ";;");
if (s == NULL) break;
*s = '\0'; s += 2;
fprintf(outfile, "%s\n ;;", t);
t = s;
}
fprintf(outfile, "%s", t);
}
void reap(const char *psect, const char *label, const char *opcode, const char *opd1, const char *opd2, char *comment)
{
char opc[128];
int i;
*opc = '\0';
if (opcode != NULL) sprintf(opc, "%s", opcode);
if (spaces == NULL) spaces = spacestr + strlen(spacestr); // points to \0
if (strcmp(psect, cur_psect) != 0) {
strcpy(cur_psect, psect);
fprintf(outfile, "%sSECTION .%s\n", spaces-12, psect);
}
if ((label != NULL) && (*label != '\0')) {
i = 10-strlen(label); fprintf(outfile, "%s:%s ", label, spaces-pos(i));
} else {
fprintf(outfile, "%s", spaces-12);
}
// if (elide) fprintf(outfile, ";");
if (isalpha(opc[0]) && isupper(opc[0])) opc[0] = tolower(opc[0]);
i = 6-strlen(opc); fprintf(outfile, "%s %s", opc, spaces-pos(i));
fprintf(outfile, "%s", opd1);
i = strlen(opd1);
if (opd2 != NULL) {
i += 2+strlen(opd2);
fprintf(outfile, ", %s", opd2);
}
i = 30-i;
fprintf(outfile, "%s ", spaces-pos(i+1));
if ((comment != NULL) && (*comment != '\0')) {
if (*comment == ';') {
fprintf(outfile, ";");
} else {
fprintf(outfile, " ; ");
}
print_comment(comment);
//fprintf(outfile, "%s", comment);
}
fprintf(outfile, "\n");
}
void backcomment(int source, int target)
{
static char comment[1024];
if (program[target].comment == NULL) {
program[target].comment = program[source].comment;
return;
}
if (program[source].comment == NULL) return;
sprintf(comment, "%s ;%s", program[target].comment, program[source].comment);
program[target].comment = strdup(comment);
program[source].comment = NULL; // for wipeout
}
void wipeout(int line, int count)
{
int i = line + count;
for (i = line; i < line+count; i++) {
if (!isnull(program[i].opcode)) {
fprintf(stderr, "Won't: '%s'\n", program[i-count].opcode);
exit(3); // trying to wipe out a non-empty line.
}
}
while (i < next_free_entry) {
program[i-count] = program[i]; // gcc, copy entire record contents
i += 1;
}
next_free_entry -= count;
}
void optimise(void)
{
int i;
static char tmp[128];
// some of these optimisations are overly precise in which patterns they match; it's safer
// to start narrow and then widen an existing rule by adding more options (eg adding "and"
// to a test for "add") than it is to assume from the start that any arithmetic op is valid
// at that point... for example, you could include "add", and "sub", forgetting that sub
// is not commutative. It doesn't matter if you omit a tiny number of optimisation
// opportunities, but it matters a lot if you get even one wrong.
// this style of optimising make several assumptions about the style of code being
// generated, for instance that registers are loaded once and then used once, but not
// remembered. Once we start doing register remembering, some of these optimisations
// could break, so we need to structure our optimisations into sequential passes, with
// known assumptions for each pass.
i = 0;
for (;;) {
if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)
// LABELS NECESSARY:
// sequences of 2 instructions + label following:
if (eq(program[i].psect, "text") &&
eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
eq(program[i+2].psect, "text") && (!isnull(program[i+2].label))
) {
if (
(program[i].opcode[0] == 'j' ) && (!eq(program[i].opcode, "jmp")) && (strncmp(program[i].opd1, "F_", 2) == 0)
&& eq(program[i+1].opcode, "jmp")
&& eq(program[i].opd1, program[i+2].label)
) { // sequences of 4 instructions:
/*
je F_1118 ;; BF F_1118
jmp nq ;; B nq
F_1118: ;; F_1118 :
=>
jne nq
*/
program[i].opcode = inverse_jump(program[i].opcode);
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+2].label = ""; // safe assumptions as F_ labels only ever used once
backcomment(i+1, i);
wipeout(i+1, 1);
if (isnull(program[i+1].opcode) && isnull(program[i+1].opd1) && isnull(program[i+1].opd2)) {
backcomment(i+1, i); wipeout(i+1, 1);
}
i -= 10; if (i < 0) i = 0; continue;
}
}
// UNBROKEN BY LABELS:
// sequences of 5 instructions:
if (eq(program[i].psect, "text") &&
eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
eq(program[i+2].psect, "text") && isnull(program[i+2].label) &&
eq(program[i+3].psect, "text") && isnull(program[i+3].label) &&
eq(program[i+4].psect, "text") && isnull(program[i+4].label)
) {
if ( eq(program[i].opcode, "push") &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") &&
eq(program[i+2].opcode, "add") && eq(program[i+2].opd1, "eax") &&
eq(program[i+3].opcode, "mov") && eq(program[i+3].opd1, "ecx") &&
eq(program[i+4].opcode, "pop") && eq(program[i+4].opd1, "edx")
) {
/*
push dword [ci] ;; PUSH ci ;; PUSH & c ; note param is rel to ebp in procedures
mov eax, dword [text] ;; PUSH text
add eax, dword 1 ;; PUSH # 1 ;; ADD ;; POPI 4
mov ecx, c
pop edx
=>
mov edx, dword [ci] ;; PUSH ci ;; PUSH & c ; note param is rel to ebp in procedures
mov eax, dword [text] ;; PUSH text
add eax, dword 1 ;; PUSH # 1 ;; ADD ;; POPI 4
mov ecx, c
*/
program[i].opcode = "mov"; // convert pop to a move
program[i].opd2 = program[i].opd1;
program[i].opd1 = "edx";
program[i+4].opcode = ""; // remove the push
program[i+4].opd1 = "";
program[i+4].opd2 = "";
backcomment(i+4, i+3);
wipeout(i+4, 1);
i -= 10; if (i < 0) i = 0; continue;
}
}
// sequences of 4 instructions:
if (eq(program[i].psect, "text") &&
eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
eq(program[i+2].psect, "text") && isnull(program[i+2].label) &&
eq(program[i+3].psect, "text") && isnull(program[i+3].label)
) {
if ((eq(program[i].opcode, "push") ) &&
(eq(program[i+1].opcode, "push") ) &&
(eq(program[i+2].opcode, "pop") ) &&
((eq(program[i+2].opd1, "eax") ) || (eq(program[i+2].opd1, "ecx") )) &&
(eq(program[i+3].opcode, "pop") ) &&
((eq(program[i+3].opd1, "eax") ) || (eq(program[i+3].opd1, "ecx") ))
) {
/*
push A
push B
pop eax
pop ecx
=>
mov eax, A
mov ecx, B
*/
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = program[i+3].opd1;
program[i+1].opcode = "mov";
program[i+1].opd2 = program[i+1].opd1;
program[i+1].opd1 = program[i+2].opd1;
program[i+2].opcode = ""; program[i+2].opd1 = "";
backcomment(i+2, i+1);
program[i+3].opcode = ""; program[i+3].opd1 = "";
backcomment(i+3, i+1);
wipeout(i+2, 2);
i -= 10; if (i < 0) i = 0; continue;
}
if ((eq(program[i].opcode, "push") ) &&
(eq(program[i+1].opcode, "mov") ) && (!eq(program[i+1].opd1, "ecx")) &&
(eq(program[i+2].opcode, "add") || eq(program[i+2].opcode, "sub") || eq(program[i+2].opcode, "and"))
&& (!eq(program[i+1].opd1, "ecx") ) &&
(eq(program[i+3].opcode, "pop") && (eq(program[i+3].opd1, "ecx")))
) {
/*
push c ;; PUSH & c ; note param is rel to ebp in procedures
mov eax, dword [ci] ;; PUSH ci
add eax, dword 2 ;; PUSH # 2 ;; ADD ;; PUSHI 4
pop ecx
=>
;; PUSH & c ; note param is rel to ebp in procedures
mov eax, dword [ci] ;; PUSH ci
add eax, dword 2 ;; PUSH # 2 ;; ADD ;; PUSHI 4
mov ecx, c
*/
program[i+3].opcode = "mov"; // convert pop to a move
program[i+3].opd2 = program[i].opd1;
program[i].opcode = ""; // remove the push
program[i].opd1 = "";
program[i].opd2 = "";
if (i > 0) backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "push") && /*eq(program[i].opd1, "dword [ecx+eax*4]") &&*/
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") &&
eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") &&
eq(program[i+3].opcode, "pop") && eq(program[i+3].opd1, "edx")
) {
/*
push dword [ecx+eax*4]
mov ecx, a ;; PUSH & a ; note param is rel to ebp in procedures
mov eax, dword [pp] ;; PUSH pp ;; POPI 4
pop edx
=>
mov edx, dword [ecx+eax*4]
mov ecx, a ;; PUSH & a ; note param is rel to ebp in procedures
mov eax, dword [pp] ;; PUSH pp ;; POPI 4
similarly for
push dword [v] ;; PUSH v
mov ecx, c ;; PUSH & c ; note param is rel to ebp in procedures
mov eax, dword [ci] ;; PUSH ci ;; POPI 4
pop edx
*/
program[i].opcode = "mov"; // convert pop to a move
program[i].opd2 = program[i].opd1;
program[i].opd1 = "edx";
program[i+3].opcode = ""; // remove the push
program[i+3].opd1 = "";
program[i+3].opd2 = "";
backcomment(i+3, i+2);
wipeout(i+3, 1);
i -= 10; if (i < 0) i = 0; continue;
}
}
// sequences of 3 instructions:
if (eq(program[i].psect, "text") &&
eq(program[i+1].psect, "text") && isnull(program[i+1].label) &&
eq(program[i+2].psect, "text") && isnull(program[i+2].label)
) {
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
(eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") &&
eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i+2].opd2, "ecx")
) {
/*
mov ecx, dword [ci] ;; PUSH ci
sub ecx, dword 3 ;; PUSH # 3 ;; SUB
mov eax, ecx ;; PUSHI 4
->
mov eax, dword [ci] ;; PUSH ci
sub eax, dword 3 ;; PUSH # 3 ;; SUB
;; PUSHI 4
*/
program[i].opd1 = "eax";
program[i+1].opd1 = "eax";
program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = "";
backcomment(i+2, i+1); wipeout(i+2, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
(eq(program[i+1].opcode, "sub") || eq(program[i+1].opcode, "add")) && eq(program[i+1].opd1, "ecx") &&
(strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, program[i].opd2) && eq(program[i+2].opd2, "ecx")
) {
/*
mov ecx, dword [fp] ;; PUSH fp
add ecx, dword 1 ;; PUSH # 1 (or any constant)
;; ADD
mov dword [fp], ecx ;; POP fp
=>
add dword [fp], dword 1
*/
// *POSSIBLY* decide not to do this optimisation if program[i].opd2 is referred to anywhere
// in the opd2 fields of the next 4 or 5 instructions, ie avoid a redundant memory access
// if the value can stay in a register until needed again shortly. [TODO]
// THIS BREAKS if a subsequent instruction has already been optimised to use the cached version of
// ecx which this originally left behind. It's really the other optimisation that's wrong.
int next;
for (next = i+3; next < i+8; next++) {
if (!isnull(program[next].label)) break;
if (eq(program[i].opd2, program[next].opd2) || eq(program[i].opd2, program[next].opd1)) {
goto skip;
}
}
program[i+2].opcode = program[i+1].opcode; program[i+2].opd2 = program[i+1].opd2;
program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL;
program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
backcomment(i, i-1);
backcomment(i+1, i-1);
wipeout(i, 2);
i -= 10; if (i < 0) i = 0; continue;
skip:;
// PROTECT:
program[i].opcode = "Mov";
//program[i+2].opcode = "Mov";
}
if (eq(program[i].opcode, "mov") && (strncmp(program[i].opd1, "dword [", 7) == 0) && eq(program[i].opd2, "ecx") &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") && (!eq(program[i+1].opd2, "eax")) &&
(!eq(program[i+1].opd2, "ecx")) &&
eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1, "eax") && eq(program[i].opd1, program[i+2].opd2)
) {
/*
knock on effect of avoiding store-to-store increment when memory address reused soon after:
mov dword [pp], ecx ;; POP pp
mov ecx, a ;; PUSH & a
mov eax, dword [pp] ;; PUSH pp
=>
mov dword [pp], ecx ;; POP pp
mov eax, ecx ;; PUSH & a
mov ecx, a ;; PUSH pp
*/
char *temp = program[i+1].opd2;
program[i].opcode = "Mov"; // stop this opcode being absorbed by the previous two opcodes
// and becoming part of an: add dword[loc], dword 1
program[i+1].opd2 = program[i+1].opd1;
program[i+1].opd1 = program[i+2].opd1;
program[i+2].opd1 = program[i+1].opd2;
program[i+2].opd2 = temp;
// i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") &&
eq(program[i+2].opcode, "mov") && ((eq(program[i+2].opd1, "[ecx+eax*4]") && eq(program[i+2].opd2, "edx")) ||
(eq(program[i+2].opd1, "edx") && eq(program[i+2].opd2, "dword [ecx+eax*4]")))
) {
/*
Start this optimisation with a simple reordering:
mov ecx, stored ;; PUSH & stored
mov eax, dword [i] ;; PUSH i
;; POPI 4
mov [ecx+eax*4], edx
=>
mov eax, dword [i] ;; PUSH i
;; POPI 4
mov ecx, stored ;; PUSH & stored
mov [ecx+eax*4], edx
Also works for:
mov ecx, a ;; PUSH & a
mov eax, dword [fp] ;; PUSH fp
;; PUSHI 4
mov edx, dword [ecx+eax*4]
*/
char *temp;
// swap opd1;
temp = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
program[i+1].opd1 = temp;
// swap opd2:
temp = program[i].opd2;
program[i].opd2 = program[i+1].opd2;
program[i+1].opd2 = temp;
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "edx") && (eq(program[i].opd2, "ecx") || (strncmp(program[i].opd2, "dword ", 6) == 0)) &&
(!eq(program[i].opd2 + strlen(program[i].opd2) - strlen("+eax*4]"), "+eax*4]")) &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && (!eq(program[i+1].opd1, "edx")) &&
eq(program[i+2].opcode, "mov") &&
eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]") &&
eq(program[i+2].opd2, "edx")
) {
/*
Add a second simple reordering:
mov edx, dword [nl] ;; PUSH nl
mov eax, dword [fp] ;; PUSH & a
;; PUSH fp
;; POPI 4
mov [a+eax*4], edx
=>
mov eax, dword [fp] ;; PUSH & a
;; PUSH fp
;; POPI 4
mov edx, dword [nl] ;; PUSH nl
mov [a+eax*4], edx
*/
char *temp;
// swap opd1;
temp = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
program[i+1].opd1 = temp;
// swap opd2:
temp = program[i].opd2;
program[i].opd2 = program[i+1].opd2;
program[i+1].opd2 = temp;
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") &&
(eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") &&
(strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
eq(program[i+2].opcode, "mov") &&
eq(program[i+2].opd2 + strlen(program[i+2].opd2) - strlen("+eax*4]"), "+eax*4]")
) { // quite a nice one, given this isn't being done at the AST level...
/*
mov eax, dword [text] ;; PUSH text
add/sub eax, dword 1 ;; PUSH # 1
mov eax, dword [c+eax*4]
=>
mov eax, dword [text] ;; PUSH text
mov eax, dword [c+eax*4+1*4]
*/
char *temp;
sprintf(tmp, "%s", program[i+2].opd2);
temp = strrchr(tmp, ']');
sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING.
program[i+2].opd2 = strdup(tmp);
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+1].opd2 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") &&
(eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub")) && eq(program[i+1].opd1, "eax") &&
(strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && // isconst()?
eq(program[i+2].opcode, "mov") && eq(program[i+2].opd1 + strlen(program[i+2].opd1) - strlen("+eax*4]"), "+eax*4]")
) { // same as above, but 3rd instr operands swapped
/*
mov eax, dword [text] ;; PUSH text
add eax, dword 1 ;; PUSH # 1
;; ADD
;; POPI 4
mov [c+eax*4], edx
*/
char *temp;
sprintf(tmp, "%s", program[i+2].opd1);
temp = strrchr(tmp, ']');
sprintf(temp, "%c%s*4]", (eq(program[i+1].opcode, "add") ? '+' : '-'), program[i+1].opd2+6); // NO - REMOVE "dword " FIRST, AND GET "4" FACTOR FROM ORIG STRING.
program[i+2].opd1 = strdup(tmp);
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+1].opd2 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "call") &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[esp]") && eq(program[i+1].opd2, "eax") &&
eq(program[i+2].opcode, "pop") && eq(program[i+2].opd1, "eax")
) {
/*
call rfib1 ;; CALL rfib1 , 1 , 1
mov [esp], eax
pop eax ;; ADD
=>
call rfib1 ;; CALL rfib1 , 1 , 1
add esp, 4
*/
program[i+1].opcode = "add"; program[i+1].opd1 = "esp"; program[i+1].opd2 = "4";
program[i+2].opcode = ""; program[i+2].opd1 = ""; program[i+2].opd2 = "";
backcomment(i+2, i+1); wipeout(i+2, 1);
i -= 10; if (i < 0) i = 0; continue;
}
}
// sequences of 2 instructions:
if ((eq(program[i].psect, "text") ) &&
(eq(program[i+1].psect, "text") ) && isnull(program[i+1].label)
) {
if ((eq(program[i].opcode, "push") ) &&
((eq(program[i].opd1, "eax") ) || (eq(program[i].opd1, "ecx") )) &&
(eq(program[i+1].opcode, "pop") )
) { // sequences of 2 instructions:
/*
push eax; pop fred => mov eax, fred
*/
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = ""; program[i+1].opd1 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( (eq(program[i].opcode, "push") && (strncmp(program[i].opd1, "dword [", 7) == 0)) &&
(eq(program[i+1].opcode, "pop") && (strncmp(program[i+1].opd1, "dword [", 7) == 0))
) {
// stack-based design of code generator ensures that push followed by pop is never
// done anywhere that a register holds any significant content
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = "eax"; // ANY REGISTER WILL DO!
program[i+1].opcode = "mov";
program[i+1].opd2 = program[i].opd1;
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "push") && (eq(program[i].opd1, "eax") || eq(program[i].opd1, "ecx") || eq(program[i].opd1, "edx")) &&
eq(program[i+1].opcode, "pop") && (strncmp(program[i].opd1, "dword [", 7) == 0)
) {
/*
push edx
pop dword [xrem] ;; POP xrem
=>
mov dword [xrem], edx
*/
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = "";
program[i+1].opd2 = "";
program[i+1].opd2 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "push")
&& (strncmp(program[i].opd1, "dword ", 6) == 0) && (program[i].opd1[6] != '[') &&
eq(program[i+1].opcode, "pop")
) {
// push dword 0 ;; PUSH # 0
// pop eax ;; RET 1 ; pass result back in register eax
// stack-based design of code generator ensures that push followed by pop is never
// done anywhere that a register holds any significant content
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
#ifdef NEVER
{// use this snippet when testing changes
static char x[1024];
if (program[i].comment == NULL) {
sprintf(x, "CHECK HERE");
} else {
sprintf(x, "%s ;; CHECK HERE", program[i].comment);
}
program[i].comment = strdup(x);
}
#endif
program[i+1].opcode = "";
program[i+1].opd1 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "push") &&
eq(program[i+1].opcode, "pop") &&
(eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx") || eq(program[i+1].opd1, "edx"))
) {
// push dword 0 ;; PUSH # 0
// pop eax ;; RET 1 ; pass result back in register eax
/*
push dword [fp] ;; PUSH fp
pop eax ;; RET 1 ; pass result back in register eax
*/
// stack-based design of code generator ensures that push followed by pop is never
// done anywhere that a register holds any significant content
program[i].opcode = "mov";
program[i].opd2 = program[i].opd1;
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = "";
program[i+1].opd1 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "add") && eq(program[i].opd1, "esp") && eq(program[i].opd2, "4") &&
eq(program[i+1].opcode, "push") && (strncmp(program[i+1].opd1, "dword [", 7) != 0)
) { // successive procedure calls ( > 1 arg is a logical extension ...)
/*
NOTE: due to addressing limitations, the followingt *cannot* be optimised:
push dword [code] ;; PUSH code
call stack ;; CALL stack , 1 , 0
add esp, 4
push dword [text] ;; PUSH text
call stack ;; CALL stack , 1 , 0
add esp, 4
push dword [num] ;; PUSH num
call stack ;; CALL stack , 1 , 0
add esp, 4
mov ecx, dword [ci] ;; PUSH ci
*/
program[i].opcode = "mov";
program[i].opd1 = "[esp]";
program[i].opd2 = program[i+1].opd1;
program[i+1].opcode = "";
program[i+1].opd1 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "add") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx")
) { // sequences of 2 instructions:
/*
add ecx, eax
mov eax, ecx
-> add ecx, eax
*/
char *swap = program[i].opd2;
program[i].opd2 = program[i].opd1;
program[i].opd1 = swap;
program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "eax") &&
(eq(program[i+1].opcode, "cmp") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub"))
&& eq(program[i+1].opd1, "ecx") && eq(program[i+1].opd2, "eax")
) {
/*
mov eax, dword 'z' ;; PUSH # 'z' ;; CMPGT
cmp ecx, eax
-> cmp ecx, dword 'z'
*/
/*
mov ecx, eax ; transfer fn result to the stack
cmp ecx, dword 0 ;; PUSH # 0 ;; CMPLT
*/
program[i].opcode = program[i+1].opcode;
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
(eq(program[i+1].opcode, "and") || eq(program[i+1].opcode, "add") || eq(program[i+1].opcode, "sub"))
&& eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "ecx")
) {
/*
mov ecx, dword 95 ;; PUSH # 95 ;; BAND
and eax, ecx
-> and eax, dword 95
*/
program[i].opcode = program[i+1].opcode;
program[i].opd1 = program[i+1].opd1;
program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && eq(program[i].opd2, "eax") &&
eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx")
) {
/*
mov ecx, eax ; transfer fn result to the stack
cmp ecx, dword 0 ;; PUSH # 0 ;; CMPLT
-> cmp eax, dword 0
*/
program[i].opcode = program[i+1].opcode;
program[i].opd1 = program[i].opd2;
program[i].opd2 = program[i+1].opd2;
program[i+1].opcode = ""; program[i+1].opd1 = ""; program[i+1].opd2 = NULL;
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd2, "eax") &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "ecx") &&
eq(program[i].opd1, program[i+1].opd2)
) {
/*
mov dword [i], eax ;; POP i
mov ecx, dword [i] ;; PUSH i
=>
mov dword [i], eax ;; POP i
mov ecx, eax
if i.opd1 == i+1.opd2 - trivial case of register remembering
*/
program[i].opcode = "Mov"; // PROTECT IT FROM BEING MERGED BY AN "inc"
program[i+1].opd2 = program[i].opd2; // trivial case of register remembering
if (eq(program[i+1].opd1, program[i+1].opd2)) { // although this may be worth its own entry below!
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+1].opd2 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
}
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && (eq(program[i].opd2, "eax") || eq(program[i].opd2, "ecx")) &&
eq(program[i+1].opcode, "mov") && (eq(program[i+1].opd1, "eax") || eq(program[i+1].opd1, "ecx")) &&
eq(program[i].opd2, program[i+1].opd1) &&
eq(program[i].opd1, program[i+1].opd2)
) {
/*
mov dword [i], eax ;; POP i
mov eax, dword [i] ;; PUSH i
=>
mov dword [i], eax
if i.opd1 == i+1.opd2 - trivial case of register remembering
*/
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+1].opd2 = "";
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
(strncmp(program[i].opd2, "dword ", 6) == 0) && (program[i].opd2[6] != '[') && (program[i].opd2[6] != '-') &&
(eq(program[i+1].opcode, "shr") || eq(program[i+1].opcode, "shl")) && eq(program[i+1].opd1, "eax") && eq(program[i+1].opd2, "cl")
) {
/*
mov ecx, dword 7 ;; PUSH # 7
;; RSH
shr eax, cl
=>
shr eax, dword 7
*/
program[i].opcode = program[i+1].opcode;
program[i].opd1 = "eax";
sprintf(tmp, "byte %s", program[i].opd2+6);
program[i].opd2 = strdup(tmp);
program[i+1].opcode = "";
program[i+1].opd1 = "";
program[i+1].opd2 = NULL;
backcomment(i+1, i);
wipeout(i+1, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd1, "[ecx+eax*4]") && eq(program[i+1].opd2, "edx")
) {
/*
mov ecx, c ;; PUSH ci
;; POPI 4
mov [ecx+eax*4], edx ;; PUSH ci
=>
mov [c+eax*4], edx
*/
sprintf(tmp, "[%s+eax*4]", program[i].opd2);
program[i+1].opd1 = strdup(tmp);
program[i].opcode = "";
program[i].opd1 = "";
program[i].opd2 = "";
backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") && (strncmp(program[i].opd2, "dword ", 6) != 0) &&
eq(program[i+1].opcode, "mov") && eq(program[i+1].opd2, "dword [ecx+eax*4]")
) { // now the reverse version
/*
mov ecx, a
mov eax, dword [ecx+eax*4]
=>
mov eax, [a+eax*4]
*/
sprintf(tmp, "dword [%s+eax*4]", program[i].opd2);
program[i+1].opd2 = strdup(tmp);
program[i].opcode = "";
program[i].opd1 = "";
program[i].opd2 = "";
backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, "edx") && (strncmp(program[i].opd2, "dword [", 7) != 0) &&
// try it, and see what combinations are rejected?
eq(program[i+1].opcode, "mov") &&
(eq(program[i+1].opd1 + strlen(program[i+1].opd1) - strlen("+eax*4]"), "+eax*4]")) && eq(program[i+1].opd2, "edx")
) {
/*
mov edx, ecx ;; PUSH & a
;; PUSH fp
;; POPI 4
mov [a+eax*4], edx
=>
mov [a+eax*4], ecx
*/
program[i+1].opd2 = program[i].opd2;
program[i].opcode = "";
program[i].opd1 = "";
program[i].opd2 = "";
backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
}
// single instructions:
if (eq(program[i].opcode, "mov") && eq(program[i].opd1, program[i].opd2)) {
program[i].opcode = ""; program[i].opd1 = ""; program[i].opd2 = NULL;
backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
i += 1;
}
// now we do optimisations that break the default assumptions relied on above
// When these are done, you have to be very careful what you peephole afterwards
i = 0;
for (;;) {
if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)
if (eq(program[i].opcode, "mov") &&
eq(program[i+1].opcode, "cmp") && isnull(program[i+1].label) &&
iscondjump(program[i+2].opcode) && isnull(program[i+2].label) &&
eq(program[i].opd1, program[i+1].opd1)
) {
int next = i+3, optimised = FALSE;
while (eq(program[next].opcode, "mov") && isnull(program[next].label) && eq(program[next].psect, "text") &&
eq(program[next+1].opcode, "cmp") && isnull(program[next+1].label) &&
iscondjump(program[next+2].opcode) && isnull(program[next+2].label) &&
eq(program[i].opd1, program[next].opd1) && eq(program[next].opd1, program[next+1].opd1) &&
eq(program[i].opd2, program[next].opd2)
) {
program[next].opcode = ""; program[next].opd1 = ""; program[next].opd2 = NULL;
backcomment(next, next-1);
wipeout(next, 1);
next = next + 2;
optimised = TRUE;
}
if (optimised) {
program[i].opcode = "Mov";
// i -= 10; if (i < 0) i = 0; continue;
}
}
i += 1;
}
i = 0;
for (;;) {
if (i >= next_free_entry-4) break; // (there are some fenceposting dummies on the end of the file)
if ( eq(program[i].opcode, "mov") && eq(program[i].opd1, "ecx") &&
eq(program[i+1].opcode, "cmp") && eq(program[i+1].opd1, "ecx") &&
(strncmp(program[i+1].opd2, "dword ", 6) == 0) && (program[i+1].opd2[6] != '[') && (program[i+1].opd2[6] != '-')
) {
/*
mov ecx, dword [code] ;; PUSH code
cmp ecx, dword 'F' ;; PUSH # 'F'
=>
cmp dword [code], dword 'F'
This optimisation should be done *after* the skip chains
*/
program[i+1].opd1 = program[i].opd2;
program[i].opcode = "";
program[i].opd1 = "";
program[i].opd2 = NULL;
backcomment(i, i-1);
wipeout(i, 1);
i -= 10; if (i < 0) i = 0; continue;
}
i += 1;
}
}
void dump(void)
{
int i;
plant("text", "__end_of_program", "", "", NULL, "End of program");
if (opt_optimiser) {
plant("text", "", "", "", NULL, "(a few dummies for the optimizer's fenceposting)");
plant("text", "", "", "", NULL, "...");
plant("text", "", "", "", NULL, "...");
plant("text", "", "", "", NULL, "...");
plant("text", "", "", "", NULL, "...");
optimise();
}
// Should print all .bss and .data entries first, leaving only .text
// - this will allow optimising over instruction sequences which were broken
// by inline data. Remember to wipeout the deleted entries.
for (i = 0; i < next_free_entry; i++) {
if (eq(program[i].label, "__end_of_program")) break;
if (eq(program[i].psect, "data")) reap(
program[i].psect,
program[i].label,
program[i].opcode,
program[i].opd1,
program[i].opd2,
program[i].comment
);
}
for (i = 0; i < next_free_entry; i++) {
if (eq(program[i].label, "__end_of_program")) break;
if (eq(program[i].psect, "bss")) reap(
program[i].psect,
program[i].label,
program[i].opcode,
program[i].opd1,
program[i].opd2,
program[i].comment
);
}
for (i = 0; i < next_free_entry; i++) {
if (eq(program[i].label, "__end_of_program")) break;
if (eq(program[i].psect, "text")) reap(
program[i].psect,
program[i].label,
program[i].opcode,
program[i].opd1,
program[i].opd2,
program[i].comment
);
}
}
// ===================================================================================
int bestparse; // for error reporting.
char *looking_for; // 'while looking for <PHRASENAME>' (or literal) ...
void indent(int depth, FILE *f)
{
int i;
for (i = 0; i < depth; i++) fprintf(f, " ");
}
int startparse(int pp) // depth is only for indentation in diags
{
bestparse = -1; // for error reporting.
looking_for = "<UNKNOWN>"; // 'while looking for <PHRASENAME>' (or literal) ...
parse(pp, 0);
}
static int nexttmp = 1;
int parse(int pp, int depth) // depth is only for indentation in diags
{
/* Main parsing procedure. This is a table-driven parser, with the tables
being generated from the grammar rules embedded in the 'compile' procedure
below. The result of the parse is a tree structure, and the values of the
nodes in the tree structure are used to drive a large 'case' statement
which selects the actions to be performed after a successful parse.
There is no grammatical structure embedded in this procedure. If you're
looking for the grammar definition, see the procedure called 'compile' instead.
*/
int saved_cp, saved_ap, i, gp, alts, match;
char saved_desc[256];
/* Initialisation */
gp = phrase_start[pp-512-MAX_BIP];
alts = gram[gp];
/* Debugging */
#ifdef DEBUG_PARSER
if (debug_parser) {
fprintf(outfile, "\n");
indent(depth, outfile);
fprintf(outfile, "Phrase %s/%d (%d alternatives) = ", phrasename[pp-512], pp, alts);
fflush(outfile);
}
#endif
gp++; // gp now points to first element (length) of first alt
saved_cp = cp;
saved_ap = ap;
for (i = 0; i < alts; i++) {
/* Starting with the root phrase, recursively examine each alternative */
int each, phrases = gram[gp++], phrase_count, gap = 0;
cp = saved_cp;
ap = saved_ap;
if (ap+3 > next_free_a) next_free_a = ap+3;
// makespace(A, next_free_a, a_size);
A[ap++] = pp; // record which phrase (could be done outside loop)
A[ap++] = i; // and which alt.
// Count slots needed. *Could* be precalculated and stored
// in the grammar, either embedded (after the ALT) or as a
// separate table
for (each = 0; each < phrases; each++) if (gram[gp+each] >= 512) gap++;
A[ap++] = gap; // Count of alts (gap)
// ap+gap now points to the slot after the space required, which
// is where the first subphrase will be stored.
ap = ap+gap; // recursive subphrases are stored after this phrase.
// ap is left updated if successful.
// successfully parsed phrases are stored in A[saved_ap+3+n]
if (saved_ap+3+gap > next_free_a) next_free_a = saved_ap+3+gap;
// makespace(A, next_free_a, a_size);
/* Debug */
// this loop is only for diagnostics
#ifdef DEBUG_PARSER
if (debug_parser) {
char *saved_descp;
fprintf(outfile, "\n");
indent(depth, outfile);
fprintf(outfile, "Alternative %d: (%d phrases) ", i+1, phrases);
saved_descp = saved_desc; *saved_descp = '\0';
for (each = 0; each < phrases; each++) {
int phrase = gram[gp+each];
if (phrase < 256) {
saved_descp += sprintf(saved_descp, " '%c'", phrase);
} else if (phrase < 512) {
saved_descp += sprintf(saved_descp, " \"%s\"/%d", keyword[phrase-256], phrase-256);
} else if (phrase < 512+MAX_BIP) {
saved_descp += sprintf(saved_descp, " {%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
} else {
saved_descp += sprintf(saved_descp, " <%s/%d>", phrasename[phrase-512], phrase);
}
}
fprintf(outfile, "%s\n", saved_desc);
fflush(outfile);
}
#endif
match = TRUE; // stays true if all subphrases match
phrase_count = 0; // only phrases which make it into the A record,
// i.e. excluding literals and keywords
for (each = 0; each < phrases; each++) {
/* Within a single grammar rule (alternative), ensure that each subphrase is present */
int phrase = gram[gp+each];
/* Debug */
#ifdef DEBUG_PARSER
if (debug_parser) {
indent(depth, outfile);
fprintf(outfile, "Input token stream = '%s' '%s' '%s' ...\n",
(cp < nextfree ? c[cp].s : "EOF"),
(cp+1 < nextfree ? c[cp+1].s : "EOF"),
(cp+2 < nextfree ? c[cp+2].s : "EOF"));
}
#endif
if (cp > bestparse) {
static char s[128];
#ifdef DEBUG_PARSER
if (phrase < 256) {
sprintf(s, "'%c'", phrase);
} else if (phrase < 512) {
sprintf(s, "\"%s\"", keyword[phrase-256]);
} else if (phrase < 512+MAX_BIP) {
sprintf(s, "{%s}", phrasename[phrase-512]);
} else {
sprintf(s, "<%s>", phrasename[phrase-512]);
}
#endif
looking_for = s;
bestparse = cp;
}
#ifdef DEBUG_PARSER
if (debug_parser) indent(depth, outfile);
#endif
if (phrase < 256) {
/* Literal */
#ifdef DEBUG_PARSER
if (debug_parser) fprintf(outfile, "'%c'", phrase);
#endif
if ((c[cp].t != TYPE_CHAR) || (c[cp].s[0] != phrase)) match = FALSE; else cp++;
// Don't record literals
} else if (phrase < 512) {
/* Keyword */
#ifdef DEBUG_PARSER
if (debug_parser) fprintf(outfile, "\"%s\"/%d", keyword[phrase-256], phrase-256);
#endif
if (strcmp(keyword[phrase-256], c[cp].s) != 0) match = FALSE; else cp++;
// Don't record keywords
} else if (phrase < 512+MAX_BIP) {
/* Built-in phrase */
int where = ap; // next phrase to be parsed will be stored at current 'ap'.
#ifdef DEBUG_PARSER
if (debug_parser) fprintf(outfile, "{%s/BIP%d}", phrasename[phrase-512], BIP[phrase-512]);
#endif
if (c[cp].t != BIP[phrase-512]) match = FALSE; else {
A[ap++] = phrase;
A[ap++] = 1;
A[ap++] = 1;
A[ap++] = cp++;
A[saved_ap+3+phrase_count++] = where; // Record BIP
}
} else {
/* Recursive call to parser for a subphrase */
int where = ap; // next phrase to be parsed will be stored at current 'ap'.
#ifdef DEBUG_PARSER
if (debug_parser) fprintf(outfile, "<%s/%d>", phrasename[phrase-512], phrase);
#endif
if (!parse(phrase, depth+1)) match = FALSE; else {
A[saved_ap+3+phrase_count++] = where;
}
}
/* debug */
#ifdef DEBUG_PARSER
if (debug_parser) {
fprintf(outfile, "\n");
indent(depth, outfile);
fprintf(outfile, "Tried alternative %d: %s - result was %s\n", each+1, saved_desc, (match ? "TRUE" : "FALSE"));
fflush(outfile);
}
#endif
if (!match) break;
}
gp += phrases; // move over all phrases, to next alt
if (match) break;
#ifdef DEBUG_PARSER
else if (debug_parser) {
indent(depth, outfile);
fprintf(outfile, "** Alternative %d FAILED.\n", i+1);
}
#endif
// gp now points to first element (length) of next alt, or start of next phrase
}
return(match);
}
static char debugstr[1024];
static char *debugs;
static int paramoffset = 1;
static int next_string_const = 1000;
char *compile(int ap)
{
static char arg[128];
static int comparison_alt_cache;
int saved_ap;
int phrase; // A[ap] is the phrase number. A[ap+1] is the alt.
int alt; // For consistency, in BIPs, the Alt should always be 1
// although that may not be the case at the moment :-(
int phrases; // defined subphrases
int i;
int condno;
// The following ecce command executed on this file will generate varcalc.g:
// ecce -c "(v.//\\.s..(v/ /e)?m,k)0;%c" varcalc.c varcalc.g
// May later tweak takeon.c to read from varcalc.c rather than varcalc.g
// thus simplifying build process and becoming more like yacc.
saved_ap = ap;
phrase = A[ap++];
alt = A[ap++];
phrases = A[ap++];
#ifdef DEBUG
fprintf(outfile, "compile(A[%d]) phrase=%s\n", saved_ap, phrasename[phrase-512]);
#endif
switch (phrase) {
//\\ B<EOF> = 0;
case P_EOF:
fprintf(stderr, "EOF!\n");
dump();
exit(0);
//\\ B<TAG> = 1;
case P_TAG:
return NULL;
//\\ B<STRING> = 2;
case P_STRING:
//\\ B<CHARCONST> = 3;
case P_CHARCONST:
//\\ B<INT> = 5;
case P_INT:
//\\ B<COMMENT> = 8;
case P_COMMENT:
break;
//\\ B<NEWLINE> = 9;
case P_NEWLINE:
//\\ P<SS> = <EOF>, <LABEL> <NEWLINE>, <INSTR> <NEWLINE>, <dotPROC> <NEWLINE>, <dotEND> <NEWLINE>, <dotWORD> <NEWLINE>, <dotCONST> <NEWLINE>, <dotARRAY> <NEWLINE>, <OPTCOMMENT> <NEWLINE>, <SYNTAXERROR>;
case P_SS:
if (alt == 0) {
dump();
exit(0); // or would 'break' be cleaner here?
}
if (alt < 8) {
int cx;
#ifdef NEVER
debugs = debugstr;
debugs += sprintf(debugs, " SECTION .data\n");
debugs += sprintf(debugs, "line%d: db \"", lineno);
hack = debugs;
for (cx = 0; cx < cp-1; cx++) {
if (c[cx].t == TYPE_CHARCONST) {
if (*c[cx].s == '"') {
debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s);
} else {
debugs += sprintf(debugs, "'%s' ", c[cx].s);
}
} else {
debugs += sprintf(debugs, "%s ", c[cx].s);
}
}
fprintf(outfile, " ;; %s\n", hack);
debugs += sprintf(debugs, "\", 10, 0\n");
debugs += sprintf(debugs, " SECTION .text\n");
debugs += sprintf(debugs, " pop eax\n");
debugs += sprintf(debugs, " push eax\n");
debugs += sprintf(debugs, " push eax\n");
debugs += sprintf(debugs, " push dword fmt_tos ; address of %%s format string\n");
debugs += sprintf(debugs, " call printf ; Call C function\n");
debugs += sprintf(debugs, " add esp, 8 ; pop 2 params\n");
debugs += sprintf(debugs, " push dword line%d ; address of string\n", lineno);
debugs += sprintf(debugs, " push dword fmt_str ; address of %%s format string\n");
debugs += sprintf(debugs, " call printf ; Call C function\n");
debugs += sprintf(debugs, " add esp, 8 ; pop 2 params\n");
#else
hack = debugs = debugstr;
for (cx = 0; cx < cp-1; cx++) {
if (c[cx].t == TYPE_CHARCONST) {
if (*c[cx].s == '"') {
debugs += sprintf(debugs, "'\", %d, \"", *c[cx].s);
} else {
debugs += sprintf(debugs, "'%s' ", c[cx].s);
}
} else {
debugs += sprintf(debugs, "%s ", c[cx].s);
}
}
#endif
compile(A[ap]);
}
return NULL;
//\\ P<SYNTAXERROR> = ;
case P_SYNTAXERROR:
fprintf(stderr, "Parse failed looking for %s\n", looking_for);
return NULL;
//\\ P<CALL> = "CALL" <TAG> ',' <INT> ',' <INT>;
case P_CALL:
if (strcmp(c[A[A[ap]+3]].s, "psym") == 0) { // special for 'print "xxx"'
plant("text", "", "push", "dword fmt_chr", NULL, "address of format string");
plant("text", "", "call", "printf", NULL, "Call C function");
plant("text", "", "add", "esp", "8", "pop 2 params");
} else {
plant("text", NULL, "call", c[A[A[ap]+3]].s, NULL, NULL);
if (atoi(c[A[A[ap+1]+3]].s) != 0) {
plant("text", "", "add", "esp", itoaf("%d", atoi(c[A[A[ap+1]+3]].s)*4), NULL);
}
if (atoi(c[A[A[ap+2]+3]].s) != 0) {
plant("text", "", "push", "eax", NULL, NULL);
}
}
return NULL;
//\\ P<dotPROC> = <LABEL> '.' "PROC";
case P_dotPROC:
paramoffset = 1; // somewhat hacky, used with .PARAM
plant("text", c[A[A[A[ap]]+3]].s, "push", "ebp", NULL, NULL);
plant("text", "", "mov", "ebp", "esp", NULL);
return NULL;
//\\ P<dotEND> = '.' "END";
case P_dotEND:
plant("text", "", "mov", "esp", "ebp", NULL);
plant("text", "", "pop", "ebp", NULL, NULL);
// fprintf(outfile, " mov eax,0 ; normal, no error, return value\n"); // drop-through case in void procedure.
// ... if it was a function with a result, this is wrong anyway...
plant("text", "", "ret", "", NULL, NULL);
dump();
exit(0); // might be nicer to find a cleaner way to break out of main parse loop...
//\\ P<dotWORD> = <LABEL> '.' "WORD" <INT>;
case P_dotWORD:
#ifdef ORIG
plant("bss",c[A[A[A[ap]]+3]].s, "resd", c[A[A[ap+1]+3]].s, NULL, NULL);
#else
if (atoi(c[A[A[ap+1]+3]].s) == 1) {
plant("data",c[A[A[A[ap]]+3]].s, "dd", "$80808080", NULL, NULL);
} else {
plant("data",c[A[A[A[ap]]+3]].s, itoaf("times %d dd", atoi(c[A[A[ap+1]+3]].s)), "$80808080", NULL, NULL);
}
#endif
return NULL;
//\\ P<dotARRAY> = <LABEL> '.' "ARRAY";
case P_dotARRAY:
plant("data", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL);
return NULL;
//\\ P<dotCONST> = '.' "CONST" <INT>;
case P_dotCONST:
plant("data", "", "dd", c[A[A[ap]+3]].s, NULL, NULL);
return NULL;
//\\ P<EXIT> = "EXIT";
case P_EXIT: // does this require a RET first? End of program?
//\\ P<RET> = "RET" <INT>;
case P_RET:
if (atoi(c[A[A[ap]+3]].s) != 0) {
plant("text", "", "pop", "eax", NULL, NULL);
}
plant("text", "", "mov", "esp", "ebp", NULL);
plant("text", "", "pop", "ebp", NULL, NULL);
if ((phrase == P_EXIT) /* || (atoi(c[A[A[ap]+3]].s) == 0) */ ) {
// Do we really need to return 0 status code from void function? Let's omit it and see...
// OK... can omit for internal functions, but MAIN is called as an int function
plant("text", "", "mov", "eax", "0", NULL);
}
plant("text", "", "ret", "", NULL, NULL);
if (phrase == P_EXIT) {
dump();
exit(0);
}
return NULL;
//\\ P<INSTR> = <OPTLABEL> <ASSEM> <OPTCOMMENT>;
case P_INSTR:
compile(A[ap]);
compile(A[ap+1]);
return NULL;
//\\ P<ASSEM> = <ADD>, <SUB>, <MUL>, <DIV>, <MOD>, <NEG>, <NOT>, <BAND>, <BOR>, <LSH>, <RSH>,
//\\ <CALL>, <RET>, <PUSHSTR>, <PUSHLIT>, <PUSHIND>, <PUSHADDR>, <PUSHVAR>, <POPIND>, <POPVAR>,
//\\ <PARAM>, <CMP>, <BT>, <BF>, <B>, <PRINT>, <EXIT>, <UNKNOWN>;
case P_ASSEM:
compile(A[ap]);
return NULL;
//\\ P<CMP> = <COND>;
case P_CMP:
condno = A[A[ap]+1];
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "cmp", "ecx", "eax", NULL);
#ifdef STUPID_COMP_SHORTCUT
comparison_alt_cache = condno;
#else
// See http://faydoc.tripod.com/cpu/setge.htm for SETGE instruction as an alternative?
#ifdef branching
fprintf(outfile, " j"); compile(A[ap]); fprintf(outfile, " T_%d\n", nexttmp);
fprintf(outfile, " push dword 1 ; false\n");
fprintf(outfile, " jmp T_%d\n", nexttmp+1);
fprintf(outfile, "T_%d: push dword 0 ; true\n", nexttmp);
fprintf(outfile, "T_%d:", nexttmp+1);
nexttmp += 2;
#else
// fprintf(outfile, " set"); compile(A[ap]); fprintf(outfile, " al\n");
// fprintf(outfile, " xor ah,ah\n");
// fprintf(outfile, " push word 0\n");
// fprintf(outfile, " push ax");
plant("text", "", "mov", "eax", "0", NULL);
// fprintf(outfile, " mov ah,0\n");
// fprintf(outfile, " cwde\n");
sprintf(arg, "set%s", cond[condno]); // <COND>
plant("text", "", arg, "al", NULL, NULL);
//fprintf(outfile, " set"); compile(A[ap]); fprintf(outfile, " al\n");
plant("text", "", "push", "eax", NULL, NULL);
/*
Rainer suggests the much improved:
cmp <whatever>
setl al
movzx eax,al
push eax
*/
#endif
#endif
return NULL;
//\\ P<COND> = "CMPGE", "CMPLT", "CMPLE", "CMPGT", "CMPEQ", "CMPNE";
case P_COND:
// fprintf(outfile, "%s", cond[alt]); handle a level up now
return NULL;
//\\ P<BF> = "BF" <TAG>;
case P_BF:
#ifdef STUPID_COMP_SHORTCUT
plant("text", "", jcond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s, NULL, NULL);
//fprintf(outfile, " j%s %s", cond[comparison_alt_cache ^ 1], c[A[A[ap]+3]].s);
#else
fprintf(outfile, " pop eax\n");
// fprintf(outfile, " cmp eax, 0\n");
fprintf(outfile, " or eax, eax\n");
fprintf(outfile, " je %s", c[A[A[ap]+3]].s);
#endif
return NULL;
//\\ P<BT> = "BT" <TAG>;
case P_BT:
#ifdef STUPID_COMP_SHORTCUT
plant("text", "", jcond[comparison_alt_cache], c[A[A[ap]+3]].s, NULL, NULL);
// fprintf(outfile, " j%s %s", cond[comparison_alt_cache], c[A[A[ap]+3]].s);
#else
fprintf(outfile, " pop eax\n");
// fprintf(outfile, " cmp eax, 0\n");
fprintf(outfile, " or eax, eax\n");
fprintf(outfile, " jne %s", c[A[A[ap]+3]].s);
#endif
return NULL;
//\\ P<B> = "B" <TAG>;
case P_B:
plant("text", "", "jmp", c[A[A[ap]+3]].s, NULL, NULL);
return NULL;
//\\ P<MUL> = "MUL";
case P_MUL:
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "imul", "ecx", NULL, "(edx:eax = eax*ecx)");
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<DIV> = "DIV";
case P_DIV:
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "mov", "edx", "0", NULL);
plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)");
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<MOD> = "MOD";
case P_MOD:
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "mov", "edx", "0", NULL);
plant("text", "", "idiv", "ecx", NULL, "(div -> eax, remainder -> edx)");
plant("text", "", "push", "edx", NULL, NULL);
return NULL;
//\\ P<ADD> = "ADD";
case P_ADD:
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "add", "ecx", "eax", NULL);
plant("text", "", "push", "ecx", NULL, NULL);
//fprintf(outfile, " pop eax\n");
//fprintf(outfile, " add [esp],eax\n");
return NULL;
//\\ P<SUB> = "SUB";
case P_SUB:
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "sub", "ecx", "eax", NULL);
plant("text", "", "push", "ecx", NULL, NULL);
//fprintf(outfile, " pop eax\n");
//fprintf(outfile, " sub [esp],eax\n");
return NULL;
// <NEG>, <NOT>, <LSH>, <RSH>,
//\\ P<NEG> = "NEG";
case P_NEG:
// fprintf(outfile, " pop eax\n");
// fprintf(outfile, " neg eax\n");
// fprintf(outfile, " push eax");
plant("text", "", "neg", "dword [esp]", NULL, NULL);
return NULL;
//\\ P<NOT> = "NOT";
case P_NOT:
// fprintf(outfile, " pop eax\n");
// fprintf(outfile, " not eax\n");
// fprintf(outfile, " push eax");
plant("text", "", "not", "dword [esp]", NULL, NULL);
return NULL;
//\\ P<LSH> = "LSH";
case P_LSH:
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "shl", "eax", "cl", NULL);
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<RSH> = "RSH";
case P_RSH:
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "shr", "eax", "cl", NULL);
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<BAND> = "BAND", "LAND";
case P_BAND: // cheap hack for now
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "and", "eax", "ecx", NULL);
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<BOR> = "BOR", "LOR";
case P_BOR: // ditto
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "or", "eax", "ecx", NULL);
plant("text", "", "push", "eax", NULL, NULL);
return NULL;
//\\ P<PRINT> = "PRINT";
case P_PRINT:
plant("text", "", "push", "dword fmt_str", NULL, "address of %s format string");
plant("text", "", "call", "printf", NULL, "external call to C");
plant("text", "", "add", "esp", "8", NULL);
return NULL;
//\\ P<PUSHLIT> = "PUSH" '#' <NUM>;
case P_PUSHLIT: // changed from OPD to NUM for the moment
sprintf(arg, "dword %s", compile(A[ap]));
plant("text", "", "push", arg, NULL, NULL);
//fprintf(outfile, " push dword "); compile(A[ap]);
return NULL;
//\\ P<PUSHSTR> = "PUSHS" <STRING>;
case P_PUSHSTR:
//fprintf(outfile, " SECTION .data\n");
sprintf(arg, "\"%s\"", c[A[A[ap]+3]].s); // TO DO! String const needs escaping?
plant("data", itoaf("string%d", ++next_string_const), "db", arg, "0", NULL);
//fprintf(outfile, "string%d: db \"%s\", 0\n", ++next_string_const, c[A[A[ap]+3]].s);
//fprintf(outfile, " SECTION .text\n");
plant("text", "", "push", itoaf("dword string%d", next_string_const), NULL, NULL);
return NULL;
//\\ P<PUSHIND> = "PUSHI" <NUM>;
case P_PUSHIND:
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "push", "dword [ecx+eax*4]", NULL, NULL);
return NULL;
//\\ P<PUSHVAR> = "PUSH" <TAG>;
case P_PUSHVAR:
sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s);
plant("text", "", "push", arg, NULL, NULL);
return NULL;
//\\ P<PUSHADDR> = "PUSH" '&' <TAG>;
case P_PUSHADDR:
plant("text", "", "push", c[A[A[ap]+3]].s, NULL, NULL);// if this is a procedure param it is different from a static
return NULL;
//\\ P<POPIND> = "POPI" <NUM>;
case P_POPIND:
// fprintf(outfile, " pop ecx\n");
// fprintf(outfile, " pop dword [ecx]");
plant("text", "", "pop", "eax", NULL, NULL);
plant("text", "", "pop", "ecx", NULL, NULL);
plant("text", "", "pop", "edx", NULL, NULL);
plant("text", "", "mov", "[ecx+eax*4]", "edx", NULL);
return NULL;
//\\ P<POPVAR> = "POP" <TAG>;
case P_POPVAR:
sprintf(arg, "dword [%s]", c[A[A[ap]+3]].s);
plant("text", "", "pop", arg, NULL, NULL);
return NULL;
//\\ P<PARAM> = '.' "PARAM" <TAG>;
case P_PARAM:
sprintf(arg, "%s ebp+%0d", c[A[A[ap]+3]].s, 4*++paramoffset);
plant("text", "", "%define", arg, NULL, NULL);
//fprintf(outfile, "%%define %s ebp+%0d\n", c[A[A[ap]+3]].s, 4*++paramoffset);
plant("text", "", "sub", "esp", "4", NULL);
return NULL;
//\\ P<UNKNOWN> = <TAG> <OPTOPD>;
case P_UNKNOWN:
fprintf(outfile, " %s; UNKNOWN OPCODE", c[A[A[ap]+3]].s);
compile(A[ap+1]);
return NULL;
//\\ P<OPD> = <NUM>, <TAG>, ;
case P_OPD:
if (alt == 0) fprintf(outfile, "%s", compile(A[ap]));
else if (alt == 1) fprintf(outfile, "%s", c[A[A[ap]+3]].s);
return NULL;
//\\ P<NUM> = <CHARCONST>, <INT>;
case P_NUM:
{static char last_num[128], *p;
p = last_num;
if (alt == 0) {
p += sprintf(p, "'");
if (*c[A[A[ap]+3]].s == '\n') {
p += sprintf(p, "\\n");
} else {
p += sprintf(p, "%s", c[A[A[ap]+3]].s);
}
if (*c[A[A[ap]+3]].s == '\'') p += sprintf(p, "'");
p += sprintf(p, "'");
} else {
p += sprintf(p, "%s", c[A[A[ap]+3]].s);
}
//fprintf(outfile, "%s", last_num);
return last_num;
}
//\\ P<LABEL> = <TAG> ':';
case P_LABEL:
plant("text", c[A[A[A[ap]]+3]].s, "", "", NULL, NULL);
// now handled up a level except for OPTLABELs in front of regular code (ie jump dests)
if (strcmp(c[A[A[A[ap]]+3]].s, "__start") == 0) {
plant("text", "main", "push", "ebp", NULL, "the program label for the entry point");
plant("text", "", "mov", "ebp", "esp", "set up incoming call stack frame");
/*
To make ecce work, we need some sort of initialisation like this:
if (argc != 3)
{
fprintf (stderr, "syntax: %s infile outfile\n", argv[0]);
exit (1);
}
if (strcmp (argv[1], argv[2]) == 0)
{
fprintf (stderr, "%s: output file cannot overwrite input file\n",
argv[0]);
exit (1);
}
infile = fopen (argv[1], "r");
if (infile == NULL)
{
fprintf (stderr, "%s: cannot read file '%s'\n", argv[0], argv[1]);
exit (1);
}
outfile = fopen (argv[2], "w");
if (outfile == NULL)
{
fprintf (stderr, "%s: cannot write file '%s'\n", argv[0], argv[2]);
exit (1);
}
Note that the unix calling convention has argc at [esp] and the args following.
*/
}
return NULL;
//\\ P<OPTLABEL> = <LABEL>, ;
case P_OPTLABEL:
if (alt == 0) compile(A[ap]);
return NULL;
//\\ P<OPTOPD> = <OPD>, ;
case P_OPTOPD:
if (alt == 0) compile(A[ap]);
return NULL;
//\\ P<OPTCOMMENT> = <COMMENT>, ;
case P_OPTCOMMENT:
//\\ E
break;
}
}
int main(int argc, char **argv)
{
char outname[256], *s;
Progname = argv[0];
if ((argc >= 2) && (strcmp(argv[1], "-d") == 0)) {
debug_parser = TRUE; argc -= 1; argv += 1;
}
if ((argc >= 2) && (strcmp(argv[1], "-O") == 0)) {
opt_optimiser = TRUE; argc -= 1; argv += 1;
}
if (argc != 2) {
fprintf(stderr, "syntax: %s [-d] file.asm\n", Progname); exit(1);
}
sprintf(outname, "%s", argv[1]);
s = strrchr(outname, '.');
if (s == NULL) s = outname+strlen(outname);
sprintf(s, "%s", ".S");
infile = fopen(argv[1], "r");
if (infile == NULL) {
fprintf(stderr, "%s: cannot open input %s - %s\n", Progname, argv[1], strerror(errno)); exit(2);
}
outfile = fopen(outname, "w");
if (outfile == NULL) {
fprintf(stderr, "%s: cannot open output %s - %s\n", Progname, outname, strerror(errno)); exit(3);
}
fprintf(outfile, "; Compile with: nasm -O1 -f elf %s\n", outname);
plant("text", "", "global", "main", NULL, "the standard gcc entry point");
plant("text", "", "extern", "printf", NULL, NULL);
plant("text", "", "extern", "getchar", NULL, NULL);
plant("text", "", "extern", "putchar", NULL, NULL);
plant("text", "", "extern", "fgetc", NULL, NULL);
plant("text", "", "extern", "fputc", NULL, NULL);
plant("text", "", "extern", "fflush", NULL, NULL);
plant("text", "", "extern", "exit", NULL, NULL);
plant("text", "", "extern", "stdin", NULL, NULL);
plant("text", "", "extern", "stdout", NULL, NULL);
plant("data", "fmt_str", "db", "\"%s\"", "0", NULL);
plant("data", "fmt_chr", "db", "\"%c\"", "0", NULL);
plant("data", "fmt_tos", "db", "\"TOS: %08x\"", "10, 0", NULL);
for (;;) {
nextfree = 0; arraysize = 0;
next_free_a = 0; a_size = 0;
cp = 0; // code pointer. Has to be global state.
ap = 0; // Analysis record pointer.
// parse on a line-by-line basis
line_reconstruction();
#ifdef DEBUG_LEXER
if (debug_lexer) {
int i; // DEBUG ONLY
fprintf(stderr, "\nLexical token stream:\n\n");
for (i = 0; i < nextfree; i++) {
fprintf(stderr, "C[%d] => %s, line %d, col %d: [%0d] %s\n",
i, c[i].f, c[i].l, c[i].col, c[i].t, c[i].s);
}
}
#endif
if (startparse(PHRASE_BASE)) {
compile(0);
} else {
fprintf(stderr, "Can't\n");
exit(1);
}
}
dump();
exit(0);
return(0);
}