maketable.c

/*
    This program takes a boolean expression, which can be bracketed and using any of
    the logic operators (not, and, or, nand, nor, xor, nxor) and builds a truth table
    representing the output of the expression for all possible inputs,

    for example:

    a = b + c.d

    b c d => a
    0 0 0    0
    0 0 1    0
    0 1 0    0
    0 1 1    1
    1 0 0    1
    1 0 1    1
    1 1 0    1
    1 1 1    1

    The table is then used to rephrase the equation in terms of Conjunctive Normal Form
    (sum-of-products) which is in turn used to convert to any other form such as NAND or
    NOR and then reduced to minimal gate form.

    b c d => a
    0 0 0    0  ~b.~c.~d +
    0 0 1    0  ~b.~c. d +
    0 1 0    0  ~b .c.~d +
    0 1 1    1  ~b. c. d +
    1 0 0    1   b.~c.~d +
    1 0 1    1   b.~c. d +
    1 1 0    1   b. c.~d +
    1 1 1    1   b. c. d

    a = b+(c.d) == a =    0     +   0     +    0      + ~b.c.d + b.~c.~d + b.~c.d + b.c.~d + b.c.d
                     =                                  ~b.c.d + b.(~c.~d + ~c.d + c.~d + c.d)
                     =                                  ~b.c.d + b.(~c.(~d + d) + c.(~d + d))
                     =                                  ~b.c.d + b.(~c + c)
                     =                                  ~b.c.d + b
                     =                                  c.d + b
                     =                                  ~( ~(c.d) . ~b )
                     =                                  ~( ~(c.d) . ~(b.b) )
                     =                                  NAND(NAND(c,d), NAND(b,b))

    This program only handles single output boolean equations.  It does *not*
    optimise common terms for a set of multiple equations where there are
    common inputs.  That would require a program like REDUCE and/or a technique
    like Karnaugh maps.

    I previously wrote a Pascal program with similar goals, which worked by
    forming expression trees like a compiler.  This program attempts a simpler
    solution, by fully evaluating the boolean function to produce a truth table,
    then reducing the truth table first back to conjunctive normal form, then
    to a bracketed form using heuristics that attempt to produce the simplest
    bracketed representation of the equation.

    Addendum: now I'm looking to programming PALs or PLAs, I might turn this
    code into something that outputs an array for programmimng one of these.

 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <error.h>
#include <errno.h>
#include <assert.h>

static inline int IEXP(int base, int exp) {
  int res = base;
  if (exp < 1) return 0;
  while (exp-- > 1) res *= base;
  return res;
}

#define TRUE (0==0)
#define FALSE (0!=0)
int debug=FALSE;

#define max 127
int o[max],  // operator (ascii code).  0 means a leaf (implies t[] is 'C' or 'V').  >= 1024 means a function.
 corv[max],  // value (constant) or variable (input pin or partial expression)
    t[max],  // type ('C' or 'V' or 'E' (expr))
    l[max],  // left or only operand
    r[max];  // right operand

char text[max];

int sym;
int textp = 0;
int next = 1;

// system functions are entered into stringpool before user variables.
int OPNOT, OPNAND, OPAND, OPNOR, OPOR, OPNXOR, OPNXOR2, OPXOR, OPXOR2 /*, OPT, OPF*/;
int VariableBase = 0;

#define nil 0

void fail(char *s)
{
  fprintf(stderr, "eval fails: %s\n", s);
  exit(1);
}

/*
Well, basically each operator has three attributes: a precedence value
and two flags. One flag says whether it's suffix unary or not, the other
tells us whether (if binary) it right-associates. These three things are
packed into a single number, equal to four times the precedence,
plus 2 if right-assoc, plus 1 if unary-suffix. These attributes are
returned by a straightforward lookup function which is also capable
of being used to answer the simple yes-no question of whether a
symbol is an operator or not, which is useful in the simpler versions
of the calculator (brackets-only, or not even that).
 */

int prec(int sym) {
#define ASSIGN 1
#define LOGOR  2
#define LOGAND 3
#define LOGNOT 6
#define rightassoc 2
#define unarysuffix 1
#define precnot (6<<2)
  if (debug) fprintf(stderr, "prec(%c)\n", sym==0 ? '?' : sym);
  if ((sym=='+') || (sym=='|')) return LOGOR<<2;
  if ((sym=='.') || (sym=='&')) return LOGAND<<2;
  if  (sym=='=')                return ASSIGN<<2;
  if ((sym=='~') || (sym=='!')) return LOGNOT<<2;
  if  (sym=='\'')               return (LOGNOT<<2)|unarysuffix;
  return 0; // not an operator
}

// Arrays instead of structs for expression tree nodes.
// Leaf nodes have O[E]==0 with the data value in CorV[E].
// Other nodes have the operator in O[E] with CorV[E] unused.
// L[E] and R[E] point to (are indices of) subnodes.

char stringpool[4096];
char *str[128];
int strindex = 0;
int strpool(char *s) {
  int idx, seq;
  strcpy(stringpool+strindex, s);
  seq = idx = 0;
  while (idx <= strindex) {
    if (strcmp(stringpool+idx, s) == 0) {
      if (idx == strindex) {
        str[seq] = stringpool+strindex;
        strindex = strindex + strlen(s) + 1;
      }
      if (debug) fprintf(stderr, "Map \"%s\" -> %d ('%c')\n", str[seq], seq, seq-VariableBase+'A');
      return seq;
    }
    seq += 1; idx += strlen(stringpool+idx) + 1;
  }
}

int newnode(int type, int op, int val, int left, int right) {
  if (next>=max) fail("Arrays full");
  o[next] = op; corv[next]=val; t[next] = type; l[next] = left; r[next] = right;
  return next++;
}

static inline int newconstnode(int op, int val) {
  if (debug) fprintf(stderr, "newconstnode(op='%c' val=%d) -> %d\n", op,val,next);
  return newnode('C', op, val, nil, nil);
}
static inline int newvarnode(int val) {
  if (debug) fprintf(stderr, "newvarnode(val=%s) -> %d\n", str[val],next);
  return newnode('V', 0, val, nil, nil);
}
static inline int newexprnode(int op, int left, int right) {
  if (debug) {
    if (op >= 1024) {
      fprintf(stderr, "newexprnode(op=\"%s\" left=%d right=%d) -> %d\n", str[op-1024],left,right,next);
    } else {
      fprintf(stderr, "newexprnode(op='%c' left=%d right=%d) -> %d\n", op,left,right,next);
    }
  }
  return newnode('E', op, 0, left, right);
}

// The result of EXPR is the index to the top level node describing
// the (sub-)expression it has built.

int skip(int ch) {
  if (sym == ch) {
    sym = text[++textp]; assert(sym != ' ');
    if (debug) fprintf(stderr, "skip(%c) -> T, {%c}\n", ch, sym==0 ? '?' : sym);
    return (TRUE);
  } else {
    if (debug) fprintf(stderr, "skip(%c) -> F, {%c}\n", ch, sym==0 ? '?' : sym);
    return (FALSE);
  }
}

int expr (int refprec) {
int op,p,f,L,r;
  if (debug) fprintf(stderr, "expr(prec=%d,rightassoc=%d,unarysuffix=%d)\n", refprec>>2,refprec&rightassoc,refprec&unarysuffix);
  if (skip('~') || skip('!')) {       // unary prefix (only NOT)
    r = expr(precnot);                // get operand
    if (o[r]==0) {                    // that was a constant:
      if (t[r]=='C')
        corv[r] = ~corv[r];
      else
        r = newexprnode('~',nil,r);
      L = r;      // negate it directly
    } else L = newexprnode('~',nil,r);
  } else if (skip('(')) {             // bracketed subexpression
    L = expr(0); if (!skip(')')) fail("Close bracket missing");
  } else if ((sym|1)=='1') {  // bare operand, 0 or 1.
    L = 0; do {L = L*10-'0'+sym; skip(sym);} while (sym>='0' && sym<='9');
    L = newconstnode(0,L);
  } else if ((sym>='a' && sym<='z') || (sym>='A' && sym<='Z') || sym=='_') {  // TO DO!!!  variable or function call
    char name[256];
    L = 0; do {name[L++]=sym; skip(sym);} while ((sym>='a' && sym<='z') || (sym>='A' && sym<='Z') || sym=='_' || (sym>='0' && sym<='9'));
    name[L] = '\0';
    L = strpool(name);
    // and a variable followed by (...) is a function call, eg NAND, XOR etc.
    if (sym=='(') {
      skip(sym);
      op=L;
      r = nil;
      L = expr(0); // Rainer would do some clever trick here with prec(',') but we'll insist all functions are built-in and take 2 args
      if (op != 0 /*NOT()*/) {
        if (!skip(',')) fail("Second parameter to function missing");
        r = expr(0);
      }
      if (!skip(')')) {
        if (skip(',')) fail("Too many parameters"); else fail("Close bracket missing");
      }
      L = newexprnode(op+1024,L,r);
    } else L = newvarnode(L);

  } else fail("Operand expected");
  while (1) {
    op = sym;
    f = prec(op); p = f>>2<<2;  // postfix 'not' should be picked up here.
    if ((p<=refprec) && (p<refprec || (f&rightassoc)==0)) return L;  // 2: right associative
    skip(op); r = nil; if ((f&unarysuffix)==0) r = expr(p);          // 1: unary suffix
    L = newexprnode(op,L,r);
  }
}

/*
Notice how there is no longer (a need for) a separate operand getting
function, this task being absorbed into EXPR itself.

Next thing we do is write an expression evaluator, to which the result
of the top level call to EXPR can be passed.  Like this:
*/

int calculate (int a, int op, int b) {
  int c, i;
       if ((op=='+') || (op=='|')) {fprintf(stderr, "Or          \n"); c =  a|b;}
  else if  (op=='~')               {fprintf(stderr, "Not         \n"); c =   ~b;}
  else if  (op=='\'')              {fprintf(stderr, "Not         \n"); c = ~a  ;}
  else if ((op=='.') || (op=='&')) {fprintf(stderr, "And         \n"); c =  a&b;}
  else if (op >= 1024) {
    op -= 1024; c = 0;
    fprintf(stderr, "Call %s\n", str[op]);
    // Could substitute the actual logic here. Might even open up
    // the ability for the user to define their own functions.
  } else fail("Unrecognised operator in 'calculate'");
  //printf("       (%d %c %d = %d)\n",a,op,b,c); return c;
  return c;
}

int eval (int top, int e) {
int a,op,c;
  if (debug) fprintf(stderr, "eval(%d)\n", e);
  if (e==nil) return 0; // fail("assert: e!=nil in eval");
  op = o[e];
  if (op==0) {    // leaf node
    a = corv[e];
    // If the term is not an input pin, we could substitute the definition of the term instead
    // so that all evaluations work directly on input pin values.
    if (t[e] == 'V') fprintf(stderr, "Push %s  ; '%c'\n",str[a], a-VariableBase+'A'); else fprintf(stderr, "Push #%d\n",a);
    return a; // return the contents of the variable or the const value
  } else if (op=='=') {
    a = l[e];
    c = eval(0,r[e]);
    fprintf(stderr, "Pop %s\n", str[corv[a]]); // *Or* Save this definition under this name to be inserted in-line later?
    if (!top) fprintf(stderr, "Push %s  ; '%c'\n",str[corv[a]], corv[a]-VariableBase+'A');
  } else {
    a = eval(0,l[e]);
    c = eval(0,r[e]);
    return calculate(a,op,c);
  }
}

char **varname = NULL;
int *tt = NULL;

int *creatett(int terms) {
  int row, rows = IEXP(2, terms);
  int *tt;
  fprintf(stderr, "Creating truth table of %d entries\n", rows);
  tt = calloc(rows, sizeof(int));
  return tt;
}

void pt(int *tt, int terms) {
  int col, row, rows;
  rows = IEXP(2, terms);
  fprintf(stdout, "   ");
  for (col = 0; col < terms; col++) {
    fprintf(stdout, "%s ", varname[col]);
  }
  fprintf(stdout, "   A\n");
  for (row = 0; row < rows; row++) {
    fprintf(stdout, "   ");
    col = terms;
    while (--col >= 0) {
      fprintf(stdout, "%c ", ((row & (1<<col)) ? '1' : '0'));
    }
    fprintf(stdout, "   %c\n", tt[row]+'0');
  }
}

int main(int argc, char **argv) 
{
  int i, bitpos, standaloneZero, standaloneOne, parameters, term, terms, Ones, Zeroes, termfound;
  int node;
#ifdef NEVER
#endif

  if ((argc >= 2) && (strcmp(argv[1], "-d") == 0)) {
    debug = TRUE;
    argc--; argv++;
  }
  
  if (argc == 1) {
    srand(time(NULL));
    parameters = 3;
    terms = IEXP(2, parameters);
    varname = calloc(parameters, sizeof(char *));
    for (i = 0; i < parameters; i++) {
      char tempname[2] = { 0, 0 };
      tempname[0] = i+'B';
      varname[i] = strdup(tempname);
    }
    tt = creatett(parameters); // 3 parameters, 2^3 rows, all init to 0.
    //for (i = (terms/2 - 1); i < terms; i++) tt[i] = 1; // b.c + d <- 1
    for (i = 0; i < terms; i++) tt[i] = random()&1;
    // We now have our truth-table:
   restart:
    pt(tt,parameters);
    bitpos = parameters;
    termfound = FALSE;
    while (--bitpos >= 0) {
      Ones = 0; Zeroes = 0;
      standaloneZero = TRUE;
      standaloneOne = TRUE;
      for (term = 0; term < terms; term++) {
        if (term & (1<<bitpos)) {
          if (tt[term]) {
            // good so far
            Ones += 1;
          } else {
            standaloneOne = FALSE;
            //fprintf(stderr, "bitpos %d  term %d\n", bitpos, term);
          }
        } else {
          if (tt[term]) {
            standaloneZero = FALSE;
            //fprintf(stderr, "bitpos %d  term %d\n", bitpos, term);
          } else {
            // good so far
            Zeroes += 1;
          }
        }
      }
      if (standaloneOne) { // can do similar for ~term and standaloneZero
        int newterm = 0;
        termfound = TRUE;
        fprintf(stderr, "Expression: %s +\n", varname[bitpos]);
        int *tt_reduced = calloc(terms, sizeof(int));
        // eliminate bitpos!
        for (term = 0; term < terms; term++) {
          if (term & (1<<bitpos)) {
            // eliminate
          } else {
            tt_reduced[newterm++] = tt[term];
          }
        }
        if (parameters == 1) {
          // say something
          exit(0);
        }
        parameters -= 1;
        goto restart;
      } else {
        fprintf(stderr, "%s: bitpos %d  Zeroes = %d, Ones = %d\n", varname[bitpos], bitpos, Zeroes, Ones);
      }
    }
    if (!termfound) {
      // No simple +term available in this set. Need to split and recurse.
      // Use the variable that has the most even balance of matching Zeroes and Ones
      // (or the highest count?) 
      // (lhs) + (rhs)
    }
    // end of demo
  } else if (argc == 2) {
    // Take a file as input.
    FILE *equations = fopen(argv[1], "r");
    if (equations == NULL) {
      fprintf(stderr, "%s: %s - %s\n", argv[0], argv[1], strerror(errno));
      exit(EXIT_FAILURE);
    }
    // predeclare common boolean functions
    OPNOT=1024+strpool("not"); // not is the only fn with a single param and has to be handled differently
    // Should I add T and F?  0 and 1 are accepted.  0 (low) is false and 1 (high) is true.
    OPNAND =1024+strpool("nand");
    OPAND  =1024+strpool("and");
    OPNOR  =1024+strpool("nor");
    OPOR   =1024+strpool("or");
    OPNXOR =1024+strpool("nxor"); OPNXOR2 =1024+strpool("eq"); // eq ('equiv') is the same as not(exor())  No support yet for '=='
    OPXOR  =1024+strpool("xor");  OPXOR2  =1024+strpool("ne");
    VariableBase = OPXOR2-1024+1; // ne ('nequiv') is xor.  No support yet for '!=' or '#' or '<>'
    
    /*
       The main thing remaining to be done is to separate inputs from outputs so that
       we can build the two-dimensional PLA table with inputs across the top and
       outputs down the left hand column.  To convert an arbitrary expression into
       sum-of-products form, we need to loop over all the inputs, so we need to know
       which terms are inputs.

       Thinks... if var appears only on RHS, it's an input pin.
       If it appears only on LHS it's an output pin.
       If it appears on both LHS and RHS of different equations,
       then it's an internal variable and does not have a pin.

       Note that this current draft of equation handling does not take into
       account "don't care" terms.  I *think* they happen easily just by initialising
       the input components all to X and then setting each component to 0 or 1 as currently
       done.  So shouldn't be a big mod to add.
     */
    do {
      // fprintf(stderr, "Expr: "); fflush(stderr); // prompt not needed if reading from a file.
      do {
        textp = 0;
        do {
          sym = fgetc(equations);
          if (sym == EOF) {
            if (textp) { // EOF *should* be at the start of a line.
              text[textp]='\0'; fprintf(stderr, "Newline missing at end of file: %s<EOF>", text);
            }
            fprintf(stderr, "\n");
            exit(0);
          }
          if (sym != ' ') text[textp++] = sym;
        } while (sym != '\n');
        text[--textp] = '\0';
      } while (text[0] == '#' || text[0] == '\0');
      fprintf(stderr, "Eval(\"%s\")\n", text);
      sym = text[textp = 0];
      node = expr(0);
      if (node == 0 || sym != '\0') {
        fprintf(stderr, "%%RMS-E-PRSFAIL, \"%s\"\n", text);
        exit(1);
      }
      eval(1,node);
      fprintf(stderr, "----------------------------------------\n");
    } while (node);
    fprintf(stderr, "\n");
    fclose(equations);
    exit(EXIT_SUCCESS);
  }
  /*
Creating truth table of 8 entries
   B C D    A
   0 0 0    0
   1 0 0    1
   0 1 0    0
   1 1 0    1
   0 0 1    0
   1 0 1    1
   0 1 1    0
   1 1 1    1

bitpos 2  Zeroes = 2, Ones = 2
bitpos 1  Zeroes = 2, Ones = 2

+B is a term

Eliminating B leaves:
   C D    A
   0 0    0
   0 0    1
   1 0    0
   1 0    1
   0 1    0
   0 1    1
   1 1    0
   1 1    1

two different results for any pair of digits means that term is not included in the result,
eg C.D - need a consistent and correct way of identifying these...

Should we eliminate the contribution of B to the results as well? I.e.:
   C D    A.~B
   0 0    0
   0 0    0
   1 0    0
   1 0    0
   0 1    0
   0 1    0
   1 1    0
   1 1    0

If so how do we use this information?

Next...

Creating truth table of 8 entries
   B C D    A
   0 0 0    1
   1 0 0    1
   0 1 0    0
   1 1 0    0
   0 0 1    1
   1 0 1    1
   0 1 1    1
   1 1 1    0

bitpos 2  Zeroes = 2, Ones = 3
bitpos 1  Zeroes = 0, Ones = 1
bitpos 0  Zeroes = 1, Ones = 2

This fails to detect the almost perfect match of ~C (excepting 1 term)

C: Zeroes = 0, Ones = 1   implies   NotZeroes = 4, NotOnes = 3 ...

Perhaps at this point we should invert C and redefine it as ~C, and re-run this stage?

   */
  
  exit(0);
  return(0);
}
/*
    16 tables of 2-input functions

   B C  F
   0 0  0
   0 1  0
   1 0  0
   1 1  0

        B.C
   0 0  0
   0 1  0
   1 0  0
   1 1  1

        B.~C
   0 0  0
   0 1  0
   1 0  1
   1 1  0

        B
   0 0  0
   0 1  0
   1 0  1
   1 1  1

        ~B.C
   0 0  0
   0 1  1
   1 0  0
   1 1  0

        C
   0 0  0
   0 1  1
   1 0  0
   1 1  1

        B EXOR C
   0 0  0
   0 1  1
   1 0  1
   1 1  0

        B+C
   0 0  0
   0 1  1
   1 0  1
   1 1  1

        ~(B+C)   (NOR)
   0 0  1
   0 1  0
   1 0  0
   1 1  0

        EQUIV (or ~EXOR)
   0 0  1
   0 1  0
   1 0  0
   1 1  1

        ~C
   0 0  1
   0 1  0
   1 0  1
   1 1  0

        ~(~B.C) or B+~C
   0 0  1
   0 1  0
   1 0  1
   1 1  1

        ~B
   0 0  1
   0 1  1
   1 0  0
   1 1  0

        B.~A
   0 0  1
   0 1  1
   1 0  0
   1 1  1

        ~(B.C)
   0 0  1
   0 1  1
   1 0  1
   1 1  0

        T
   0 0  1
   0 1  1
   1 0  1
   1 1  1


How about this one?

Creating truth table of 8 entries
   B C D    A
   0 0 0    0
   0 0 1    1
   0 1 0    1
   0 1 1    0
   1 0 0    0
   1 0 1    1
   1 1 0    1
   1 1 1    0
D: bitpos 2  Zeroes = 2, Ones = 2
C: bitpos 1  Zeroes = 2, Ones = 2
B: bitpos 0  Zeroes = 2, Ones = 2

All choices look the same, but A == C EXOR D so eliminating B is essential.

 */