/* stable release traditional-1.0.4 */ /* (c) Graham Toal, 06June1999 */ /* See the file README in this directory for a description of the problem. */ /* This version is orders of magnitudes faster than ws-stable.c because it generates only 'traditional' squares, ie it cannot generate double squares. */ /* This is a very short description of my wordsquare program. To understand the implementation details as opposed to the big picture you will also need to read the document on DAWGs (Directed Acyclic Word Graphs) which I'll be writing next. The high-level part of the algorithm is actually very obvious, basic and simple: take an empty square then put down a word in the top row. Place a word underneath it, and if the downward columns look like they can also form valid words (checked by looking for the starts of the word in the dictionary), then continue to place words on subsequent rows until the square is full. If at any time some partially formed word in a downward column is nonsense, remove the last word you placed and try another one. Sometimes you may backtrack more than one row before you find another word that can be placed which allows sensible partially formed vertical words. That's the basic algorithm, but in practice it is refined considerably because as it stands it would take forever to finish. To make the description easier, let's assume we've placed 5 full rows successfully, and are looking at placing the 6th row. We could simply try each word in the dictionary sequentially, and as we place each word, check the 6-letter downward stems, to confirm that we have not blocked placement of a 7th row by some nonsensical downward stem, such as "peacez...." But... by placing whole words at a time, we might find ourselves repeating the first 7 or 8 vertical checks every time, eg if we place "superhuman" on row 6 and the vertical check fails on the 9th column, then we place 'supersonic" on row 6, we end up checking the same 5 vertical words again, that intersect with the prefix 'super'. So, rather than place whole words at a time, and backtracking whole words when one doesn't allow further placements, we actually place the horizontal words one letter at a time, and when a letter fails, we backtrack just one letter. So, after placing "supersoni" and realising that the "i" blocked a downward placement in column 9, we would then look for something else beginning with "superson..". Not finding any, we backtrach to "superso..." etc until we eventually get to a position "super....." which allows another letter to be placed, in this case "superh...." which is the start of "superhuman". Next, the big optimisation comes from never actually checking the vertical prefixes twice. We use the 'dawg' structure to perform an incremental spelling check down the vertical columns, BUT as we build the square up row by row, we store the intermediate stages of each vertical cross-check in another square array the same size as the wordsquare, and so when we add one more row to the square, for each column we only have to check that one more letter can be added to the partial word at that stage. Let's say we had already formed the partial word 'peace' on one of the downward columns, then if the horizontal letter being placed in the 6th row at that column is say 'm', we know from our partially checked word 'peace' that it can be followed at that point by 'maker' and so 'm' is valid. Finally, having this mechanism in place means that we know that immediately below any partially formed vertical word, only a small number of letters are valid - eg with 'peace.....' we may have only "peacemaker" or "peacetimes" so only 'm' or 't' are valid at that point. This means we no longer have to try every word in the dictionary in turn - because we can limit the letters in words we are placing horizontally to only those that allow vertical words to be formed. This cuts down the iterations tremendously. The only extra trick needed here is simply to mark the first row as allowing any letter from 'a' to 'z' so that all words are valid in the first row, so that after the first row is placed, only horizontally placed letters that allow valid downward words to be formed can be placed. This whole strategy of trimming what is placed horizontally by the partially-formed vertical words works only because the data structure that we use to represent the word list is a tree, and for any position in any word, we have instant access to the suffixes that can follow that prefix, for the cost of a single memory operation rather than any complex database lookup. The next iteration of this document will include some examples and partially formed squares, and a link to the description of the dawg data structure. Graham Toal Texas, 15Aug99 */ #include <stdio.h> #include <stdlib.h> #include "splib.h" #ifndef TRUE #define TRUE (0==0) #define FALSE (!TRUE) #endif /* The dictionary itself is global, as these variables are seldom referenced, and I don't want to pass them in as parameters to every procedure just so I can access them occassionaly */ NODE *rootdawg; INDEX nedges; #define MAX 11 /* Biggest we're willing to tackle will eventually be 10 */ static int sq[/MAX* row */][/MAX* col */]; /* letters */ static NODE *fork_point[MAX+/1* row */][/MAX* col */]; /* downward word branch choices from this row */ static char check[MAX+/1* row */][/MAX* col */][256+/1* alphabet */]; /* string of letters which are valid in each position in the row below */ /* This one is *NOT* row, col */ static char vertical_word[MAX /* col */][MAX+1 /* row/wordlen */]; /* This is probably redundant - it's just a printable horizontal word also used for communication. Can probably use sq[][] instead. */ char word[MAX /* row */][MAX+1 /* len */]; /* Doubles as producing answers and debug routine */ void print_square(int row, int col, int max, char *s) { int i, j; printf(s); for (j = 0; j < row; j++) { for (i = 0; i < max; i++) putchar(sq[j][i]); putchar('\n'); } for (j = row; j < max; j++) { for (i = 0; i < max; i++) putchar('.'); putchar('\n'); } putchar('\n'); fflush(stdout); } /* parameter col is the length is of the horizontal word and should always be invoked externally with len=0. hnode should be dawg+1 for the start of every word in the full wordlist. */ /* Iterate a letter at a time across each row; recurse at the end of a row to go to the next row. Backtrack when the square is blocked. */ void next_letter(NODE *hnode, int row, int max, int col) { NODE *nextnode; int ch; int i; NODE *savednode; char savedcheck[256]; if (row == max || col == max) return; /* safety - I'd prefer this were done at the point of the recursive call */ /* It's because of things like that that some of the arrays have to be +1 */ for (;;) { /* loop across horizontal letter placement */ ch = (char)(((*hnode) >> V_LETTER) & M_LETTER); word[row][col] = ch; word[row][col+1] = '\0'; sq[row][col] = ch; vertical_word[col][row] = ch; vertical_word[col][row+1] = '\0'; nextnode = rootdawg+((*hnode) & M_NODE_POINTER); /* Test to see if this edge letter is allowed in the vertical crosscheck list */ /* ***BUT*** if generating single squares, throw away the generated cross-check, and force the downward word to match the relevant across word: col 012345 FREDDY row 0 ROGERS row <n> EGBERT row (just placed) DEE row+1 (next to look at) DRR YST */ if ( ( (col < row) && (ch == sq[col][row]) && (strchr(check[row][col], ch) != NULL) ) || ( (col >= row) && (strchr(check[row][col], ch) != NULL) ) ) { int nextfree; int found; NODE *vnode; /* letter is placed. Before we go any further, did this placement generate a solution? */ /* letter is placed in the square, now see if it is valid. We place next horizontal letter recursively if this one was accepted OK. It may be rejected due to vertical crosscheck and it may be rejected because it blocks off the vertical word from being expanded further downwards. Of course we shouldn't care about the latter if this is the final row. */ /* INLINE: fork_point[row+1][col] = follow_edge(fork_point[row][col], ch); */ vnode = fork_point[row][col]; found = FALSE; for (;;) { /* placed a letter, look for next set of options */ if (ch == (((*vnode) >> V_LETTER) & M_LETTER)) { vnode = rootdawg+(*vnode & M_NODE_POINTER); found = TRUE; break; } if ((*vnode & M_END_OF_NODE) != 0) break; vnode++; } if (found) { savednode = fork_point[row+1][col]; strcpy(savedcheck, check[row+1][col]); fork_point[row+1][col] = vnode; nextfree = 0; for (;;) { if (vnode == NULL) { fprintf(stdout, "Don't know why vnode is NULL\n"); exit(1); } check[row+1][col][nextfree++] = ((*vnode) >> V_LETTER) & M_LETTER; if ((*vnode & M_END_OF_NODE) != 0) break; vnode++; } check[row+1][col][nextfree++] = '\0'; if (col+1 == max) { /* good placement on this row. recurse to next row. reset the dictionary to start of line */ if (row+1 == max) { /* As the word would not have been placed unless it formed a vertical word also, we must have a solution. Print it and return */ print_square(max,max,max,"Solution:\n"); } /* good placement. recurse to next row. reset the dictionary */ next_letter(rootdawg+1, row+1, max, 0); } else { /* place next letter horizontally */ next_letter( nextnode, row, max, col+1); } /* what I worry about here is when we backtrack, do all the things like check[] and fork_point[] work properly? They *should* do, because ones we backtrack over should never be looked at, but I have this niggling doubt, at least as long as the program isn't working... */ fork_point[row+1][col] = savednode; strcpy(check[row+1][col], savedcheck); } else { char tmp[256]; /* Cannot make this play because it blocks of progress for a vertical word. Go back up a row. */; /* (or maybe backtrack a letter and try another word on this row?) */ } } /* now see if there is another letter at this position. */ if ((*hnode & M_END_OF_NODE) != 0) break; /* end of node */ hnode++; } /* end loop across horizontal letter placement */ } int main(int argc, char **argv) { char fname[256]; int row, col; int max; if (argc == 1) { max = 4; } else if (argc == 2) { max = *argv[1]; if (isdigit(max)) max = max - '0'; else max = 4; } else { fprintf(stderr, "syntax: ws 4\n (or any number 2-9)\n"); exit(1); } sprintf(fname, "dawg.%0d", max); if (!dawg_init(fname, &rootdawg, &nedges)) { fprintf(stdout, "Cannot open %s\n", fname); exit(1); } /* Have to initialise first row specially. *all* words are valid at this point. */ row = 0; for (row = 0; row < max; row++) { /* wrong for all but 0th row, but we don't care as it'll be filled in in time anyway. */ for (col = 0; col < max; col++) { sq[row][col] = '_'; /* This would be better done by extraction from the dawg if I weren't so lazy, but the 'strchr()' overhead is low */ strcpy(check[row][col], "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); fork_point[row][col] = rootdawg+1; strcpy(vertical_word[col], ""); } } next_letter(rootdawg+1, 0, max, /0**/len); print_square(max,max,max,"Final state (non solution):\n"); exit(0); }