/*
   Generic transitive closure module supporting multiple relationships.
   Can be used for anything from a compiler to a social network analyzer.
   However relationships are Boolean. There are no weights in this code.

   Note that the things being examined are represented by integers in the
   initial draft - these could be indexes into arrays of more complex
   objects perhaps.  The integers do not have to be contiguous or zero-based
   however, unlike C array indexes - we do our own mapping of the sparse
   integers into a dense 0-based range internally (as this is necessary
   for an efficient implementation of Warshall's algorithm using matrices)

   This may change when/if I make this module more generic.
 */

#include <stdio.h>

/*
     The following is a draft interface for making this module more generic
     so that it can be used in other contexts.

     PROCEDURE = define_object_type("procedure");

     HAS_SIDE_EFFECTS = define_property(PROCEDURE, "has side effects");
     USES_REGISTER_X  = define_property(PROCEDURE, "uses register X");
     USES_REGISTER_Y  = define_property(PROCEDURE, "uses register Y");

     CALLS               = define_transitive_relationship(PROCEDURE, "calls");
     IS_CALLED_BY        = define_inverse_relationship(PROCEDURE, "is called by", CALLS);
     SHARES_A_PSECT_WITH = define_symmetrical_relationship(PROCEDURE, "shares a psect with");
     
     SQRT = create_object_instance(PROCEDURE, "sqrt");
     EUCLID = create_object_instance(PROCEDURE, "_euclid");
     HYPOTENUSE = create_object_instance(PROCEDURE, "hyp");

     assign_property(PROCEDURE, EUCLID, USES_REGISTER_X);

     assign_relationship(PROCEDURE, SQRT, CALLS, EUCLID);
     assign_relationship(PROCEDURE, HYPOTENUSE, CALLS, SQRT);

     show_objects(PROCEDURE, USES_REGISTER_X);

     show_properties(PROCEDURE, HYPOTENUSE);

     if (has_property(PROCEDURE, HYPOTENUSE, USES_REGISTER_X)) {
        codegen(" PSHS X");
        codegen(" LBSR hyp");
        codegen(" PULS X");
     }     

     // wipe space used?
 */

/*
     MODULE = define_object_type("module");

     HAS_INIT_CODE    = define_property(PROCEDURE, "has init code");

     IMPORTS             = define_transitive_relationship(MODULE, "imports");
     IS_IMPORTED_BY      = define_inverse_relationship(MODULE, "is imported by", CALLS);
     
     INT  = create_object_instance(MODULE, "IntegerBuiltins");
     FPU  = create_object_instance(MODULE, "FloatingBuiltins");
     MATH = create_object_instance(MODULE, "Math");
     GEOM = create_object_instance(MODULE, "Geometry");

     assign_property(MODULE, MATH, HAS_INIT_CODE);

     assign_relationship(MODULE, MATH, IMPORTS, INT);
     assign_relationship(MODULE, GEOM, IMPORTS, INT);
     assign_relationship(MODULE, GEOM, IMPORTS, FPU);

     show_objects(MODULE, IMPORTS);

     show_properties(MODULE, HAS_INIT_CODE);

     // total ordering is a topological sort of the graph, eg by Khan's algorithm.
     // Results returned via callbacks in this example.
     // A topological sorting program can be found at https://git.suckless.org/sbase/file/tsort.c.html

     if (!generate_total_ordering(MODULE, IMPORTS, &output_init_code, &init_state)) {
       printf("Cannot initialise modules due to a cyclic dependency\n"); exit(1);
     }

     //if (has_property(MODULE, GEOM, HAS_INIT_CODE)) {
        // ...
     //}

     // wipe space used?
 */

// Below is my initial implementation for a specific relationship, used in imptoc.
// It needs to be considerably expanded to make it into a useful generic library,
// while keeping the interface as simple as possible:

#define MAX_PROCS 128
static int procno[MAX_PROCS]; // Maximum possible number of procedures
                              // in a compilation unit (file).  We *could*
                              // make this a re-sizable dynamic array off
                              // the heap, but that's overkill for now.

int next_free_procno = 0;     // Number of vertices in the graph

unsigned char callgraph[MAX_PROCS][MAX_PROCS];
unsigned char side_effect[MAX_PROCS];

int map(int sparse) { // alternatively the user's object could be a 'void *' pointer rather than an unconstrained integer.
  int mapped;
  procno[next_free_procno] = sparse;
  for (mapped = 0; mapped <= next_free_procno; mapped++) {
    if (procno[mapped] == sparse) {
      if (mapped == next_free_procno) next_free_procno++;
      return mapped;
    }
  }
}

void call(int caller, int callee) {
  int mcr, mce;
  mcr = map(caller);
  mce = map(callee);
  callgraph[mce][mcr] = 1; // relationship is transitive
  callgraph[mcr][mcr] = 1; // all procedures are considered to call themselves
  callgraph[mce][mce] = 1; // i.e. relationship is reflexive
  // to do: note reverse relationship unless anti-symmetric
  // (see https://medium.com/@WindUpDurb/on-partial-ordering-total-ordering-and-the-topological-sort-9f9c0d0d812f )
  printf("proc %0d calls proc %0d\n", caller, callee);
}

void has_side_effects(int proc) {
  side_effect[map(proc)] = 1;
  printf("proc %0d is known to have side effects\n", proc);
}

void show_procs_with_side_effects(void) {
  int i, j;
  printf("\nThe following matrix is the transitive closure of the call graph\n\n          ");
  for (i = 0; i < next_free_procno; i++) printf ("%0d ", procno[i]);
  printf ("\n");
  for (i = 0; i < next_free_procno; i++) {
    if (side_effect[i]) printf("BAD:  %0d: ",procno[i]); else printf("GOOD: %0d: ",procno[i]);
    for (j = 0; j < next_free_procno; j++) {
      if (side_effect[i]) side_effect[j] |= callgraph[i][j];
      printf ("%2d ", callgraph[i][j]);
    }
    printf("\n");
  }
  printf("\nThe following procedures could all cause side effects, whether directly or indirectly:\n\n");
  for (int i = 0; i < next_free_procno; i++)
    if (side_effect[i]) printf("%d\n", procno[i]);
}

int Toposort(void) {
  /*
    From https://stackoverflow.com/questions/59965812/topological-sort-based-on-a-comparator-rather-than-a-graph

    Let S be the set of elements you want to sort, and there is a partial order
    in S. Let used be the dictionary that maps each element from S to a boolean
    value (false by default) that will be true when we visit a 'node' with this
    element. Let stack be the stack of elements from S (empty by default).

    Define a method dfs(x) (x is an element of S) that does the following:

    Set used[x] to true

    For each element y in S:

      If used[y] is false and x is less than or equal to y (process only edges
       that go from x to y and do not process cases where edge goes from y to x
       or where there is no edge at all - this algorithm handles only 'less than
       or equal to' relations.):

      Call dfs(y)
      Add x to stack

      Then:

      For each element x in S:

      If used[x] is false:

      Call dfs(x)

    After this loop, elements in stack will be ordered (first item to be popped
    from stack is minimal one (not necessarily minimum), last item is maximal
    one (not necessarily maximum)).
   */

  int stack[next_free_procno];
  int used[next_free_procno];
  int x, next_free_stackp = 0, warned = 0;

  void dfs(int x) {
    int y;
    used[x] = 1;
    for (y = 0; y < next_free_procno; y++) {  // For each element y in S:
      if ((!used[y]) && /*x is less than or equal to y*/ (x!=y) && callgraph[x][y]) dfs(y);
    }
    stack[next_free_stackp++] = x; //       Add x to stack
  }

  printf("\n");
  {int i,j;
    for (i = 0; i < next_free_procno; i++)
      for (j = 0; j < next_free_procno; j++)
        if ((i < j) && ((callgraph[i][j] & callgraph[j][i]) != 0)) {
          fprintf(stderr, "Warning: loop detected between proc %d and proc %d\n", procno[i], procno[j]);
          warned++;
        }
    if (warned) printf("\n");
  }

  for (x = 0; x < next_free_procno; x++) used[x] = 0; // Init 'used[]'
  for (x = 0; x < next_free_procno; x++) { //   For each element x in S:
    if (!used[x]) dfs(x);
  }

  if (!warned) {
    printf("One possible total ordering: ");
    do printf(" %d", procno[stack[--next_free_stackp]]); while (next_free_stackp);
    printf("\n");
  }

  // REMINDER: check what happens when there is a loop...
  return warned==0;
}

void Warshall(void) {
  int i, j, k;
  for (k = 0; k < next_free_procno; k++)
    for (i = 0; i < next_free_procno; i++)
      for (j = 0; j < next_free_procno; j++)
        callgraph[i][j] |= (callgraph[i][k] & callgraph[k][j]);
}

int main(int argc, char **argv) {
  // The internal ID numbers used for procedures in the compiler are
  // just the indexes of the tuples for the procedure definition,
  // which will be quite sparse in practice.  They could equally be
  // the 16-bit addresses of the procedures when compiled for the 6809!

  call(10,13); // Call this every time a call to another procedure
  call(10,11); // is detected while compiling a procedure.
  call(11,12);
  call(12,13);
  // call(12,10); // add a loop and see what happens...
  
  has_side_effects(12); // Call this every time (or at least once) while
                        // compiling a procedure if a direct side-effect
                        // is detected. Note that we exclude procedure calls
                        // from this requirement.

  Warshall();                      // Discover every procedure that calls procedures
                                   // with side-effects, transitively.  Hence why we
                                   // need a transitive closure.
  show_procs_with_side_effects();  // Print the topologically sorted graph

  // This is not relevant to side effects.  It's something you would use when
  // working out the order to call module initialisation code, for example...
  if (!Toposort()) { // can be called before or after Warshall()
    printf("- cannot produce a total ordering of procs.\n");
    // Note that in real life, if you were determinining an order of
    // initialisation for a language with modules (such as ModulaII)
    // then the 'called by' relation is actually only relevant for
    // calls *in the initialisation code* - not elsewhere in the
    // module.
  }
  return 0;
}