#include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <ctype.h> #ifndef FALSE #define TRUE (0==0) #define FALSE (0!=0) #endif #define debug_input 1 #define debug_sizes 2 #define debug_strcmp 4 #define debug_lookup 8 #define debug_letterbanks 16 #define debug_letterswaps 32 static int debug = 0 /*|debug_letterswaps|debug_letterbanks|debug_lookup|debug_strcmp|debug_sizes|debug_input*/; char **string = NULL; int *lb = NULL; // points to a cycle of letterbank equivalents int *ls = NULL; // points to a list of letterswap targets #define LB_MAX 2000000 #define LS_MAX 1000000 static int lbpool[LB_MAX]; static int lspool[LS_MAX]; int next_lb = 0; /* index into lbpool */ int next_ls = 0; /* index into lspool */ int next_word = 0; int MAX_WORDS = 0; int lookup(char *word) { // Return index of the word. Binary chop for now. int low = 1, high = MAX_WORDS; // inclusive at each end while (low <= high) { int middle = (low+high)/2; int comparison = strcmp(string[middle], word); if (debug&debug_strcmp) fprintf(stderr, "[%s:%d %s:%d %s:%d] %d\n", string[low], low, string[middle], middle, string[high], high, comparison); if (comparison < 0) { low = middle+1; } else if (comparison > 0) { high = middle-1; } else return middle; } return 0; // not found } #ifndef MAX_LINE #define MAX_LINE (8*1024) #endif void first_pass(FILE *in) { char line[MAX_LINE+1]; int lp; for (;;) { // read all words lp = 0; for (;;) { // read letters within a word int c = fgetc(in); if (c == EOF) { if (lp == 0) { if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d\n", MAX_WORDS); return; } line[lp] = '\0'; fprintf(stderr, "Unexpected end of file at \"%s\"\n", line); exit(EXIT_FAILURE); } else if (c == '\n') { break; } else if (c == '\r') { // in case of M$ continue; } else { line[lp++] = c; if (lp == MAX_LINE) { fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2); exit(EXIT_FAILURE); } } } line[lp] = '\0'; MAX_WORDS += 1; if (debug&debug_input) fprintf(stderr, "#D P0: %s\n", line); } /* NOT REACHED */ } void second_pass(FILE *in) { char line[MAX_LINE+1]; int lp; for (;;) { // read all words lp = 0; for (;;) { // read letters within a word int c = fgetc(in); if (c == EOF) { if (lp == 0) { if (debug&debug_sizes) fprintf(stderr, "#D MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word); if (next_word != MAX_WORDS+1) { fprintf(stderr, "Second pass inconsistency - MAX_WORDS = %d, next_word = %d\n", MAX_WORDS, next_word); exit(EXIT_FAILURE); } return; } exit(EXIT_FAILURE); } else if (c == '\n') { break; } else if (c == '\r') { // in case of M$ continue; } else { line[lp++] = c; if (lp == MAX_LINE) exit(EXIT_FAILURE); } } line[lp] = '\0'; string[next_word] = strdup(line); // Matbe change to a stringpool? lb[next_word] = 0; // none generated yet ls[next_word] = 0; // ditto if (debug&debug_input) fprintf(stderr, "#D P1: %s\n", line); next_word += 1; // So next_word will equal 1 for the first word, "aa" } /* NOT REACHED */ } void add_letterbank_link(char *from_word, char *to_word) { if (strcmp(from_word, to_word) == 0) return; // Don't link to self if (debug&debug_letterbanks) fprintf(stderr, "Add link \"%s\" (word[%d].lb (%d) ) : \"%s\" (%d)\n", from_word, lookup(from_word), next_lb, to_word, lookup(to_word)); // And do it... lbpool[next_lb++] = lookup(to_word); if (next_lb >= LB_MAX) { fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2); exit(EXIT_FAILURE); } } void add_letterbank(char *from_word, char *group) { /* Loop over each word in group, add a letterbank link from word -> each */ char *word, *end; char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP. lb[lookup(from_word)] = next_lb; if (debug&debug_letterbanks) fprintf(stderr, "Add \"%s\": \"%s\"\n", from_word, group); strcpy(copy, group); word = copy; for (;;) { end = strchr(word, ' '); if (end != NULL) *end++ = '\0'; add_letterbank_link(from_word, word); if ((word = end) == NULL) { // last word // end of this group lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) { fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2); exit(EXIT_FAILURE); } return; } } } void add_letterbank_group(char *group) { char *word, *end; char copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP. strcpy(copy, group); word = copy; for (;;) { end = strchr(word, ' '); if (end != NULL) *end++ = '\0'; add_letterbank(word, group); word = end; // last word if (end == NULL) return; } } void create_letterbanks(FILE *in) { char line[MAX_LINE+1]; int lp; for (;;) { // read all letterbank lines lp = 0; for (;;) { // read letters within a letterbank line int c = fgetc(in); if (c == EOF) { if (lp == 0) return; // EOF should be at the start of a line. line[lp] = '\0'; fprintf(stderr, "Unexpected end of file at \"%s\"\n", line); exit(EXIT_FAILURE); } else if (c == '\n') { break; } else if (c == '\r') { // in case of M$ continue; } else { line[lp++] = c; if (lp == MAX_LINE) { fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2); exit(EXIT_FAILURE); } } } line[lp] = '\0'; if (debug&debug_letterbanks) fprintf(stderr, "#D LB: %s\n", line); add_letterbank_group(line); } } void insert_letterswap(int from_word, int to_word) { static int last_from_word = 0; if (last_from_word != from_word) { lspool[++next_ls] = 0; if (next_ls >= LS_MAX) { fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2); exit(EXIT_FAILURE); } ls[from_word] = next_ls; last_from_word = from_word; } lspool[next_ls] = to_word; lspool[++next_ls] = 0; if (next_ls >= LS_MAX) { fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2); exit(EXIT_FAILURE); } } void add_one_letterswap(char *from_word, char *to_word) { int old_word, new_word; new_word = lookup(to_word); if (new_word != 0) { old_word = lookup(from_word); if (debug&debug_letterswaps) fprintf(stderr, "SWAP: %s (%d) -> %s (%d)\n", from_word, old_word, to_word, new_word); insert_letterswap(old_word, new_word); } } void add_letterswaps(char *from_word) { int i, letpos; char saved_let; char to_word[MAX_LINE+1]; strcpy(to_word, from_word); for (letpos = 0; letpos < strlen(from_word); letpos++) { // try all letters at position letpos (except saved_let) saved_let = from_word[letpos]; // try all letters of the same case at this position // case hack reduces the need to know the exact details // of the wordlist file. if (isalpha(saved_let)) { if (isupper(saved_let)) { for (i = 32; i < 256; i++) { if (isalpha(i) && isupper(i) && (i != saved_let)) { to_word[letpos] = (char)i; add_one_letterswap(from_word, to_word); to_word[letpos] = saved_let; } } } else if (islower(saved_let)) { for (i = 32; i < 256; i++) { if (isalpha(i) && islower(i) && (i != saved_let)) { to_word[letpos] = (char)i; add_one_letterswap(from_word, to_word); to_word[letpos] = saved_let; } } } } } } void create_letterswaps(void) { int wordno; for (wordno = 1; wordno <= MAX_WORDS; wordno++) { add_letterswaps(string[wordno]); } } int main(int argc, char **argv) { FILE *dict, *bank; char *wordlist, *banklist; if (argc == 1) { wordlist = "wordlist.txt"; banklist = "letterbanks.txt"; } else if (argc == 2) { wordlist = argv[1]; banklist = "letterbanks.txt"; } else if (argc == 3) { wordlist = argv[1]; banklist = argv[2]; } else { fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n"); exit(EXIT_FAILURE); } dict = fopen(wordlist, "r"); if (dict == NULL) { fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno)); exit(EXIT_FAILURE); } bank = fopen(banklist, "r"); if (bank == NULL) { fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno)); exit(EXIT_FAILURE); } fprintf(stderr, "Converting word list \"%s\"\n", wordlist); fprintf(stderr, " and letter bank \"%s\"\n\n", banklist); first_pass(dict); // determine MAX_WORDS if (MAX_WORDS == 0) { fprintf(stderr, "No valid words found in %s\n\n", wordlist); exit(EXIT_FAILURE); } fseek(dict, SEEK_SET, 0L); // Re-read word list next_word = 1; string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = strdup(""); /* lb and ls point to a flat array where the links are stored. lb and ls are indexed by lookup("word"), ie another layer of indirection is needed to get to all the links for a particular word */ lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1; ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0; fprintf(stdout, "\n"); fprintf(stdout, "// Array of all words. word[1].string returns \"aa\" etc.\n"); fprintf(stdout, "\n"); fprintf(stdout, "\n"); fprintf(stdout, "typedef struct transformation {\n"); fprintf(stdout, " char *string;\n"); fprintf(stdout, " int lb; // points to a list of letterbank equivalents\n"); fprintf(stdout, " int ls; // points to a list of letterswap targets\n"); fprintf(stdout, "} transformation;\n"); fprintf(stdout, "\n"); second_pass(dict); create_letterbanks(bank); create_letterswaps(); fprintf(stdout, "/* Generated tables: */\n"); fprintf(stdout, "\n"); fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n"); { int idx = 0; for (;;) { if ((idx != 0) && (lbpool[idx] == 0)) break; fprintf(stdout, "/* %6d: */ ", idx); for (;;) { int to_word = lbpool[idx++]; fprintf(stdout, " %d,", to_word); if (to_word == 0) break; } fprintf(stdout, "\n"); } } fprintf(stdout, "};\n"); fprintf(stdout, "\n"); fprintf(stdout, "const int ls[] = {\n"); { int idx = 0; for (;;) { fprintf(stdout, "/* %6d: */ ", idx); for (;;) { int to_word = lspool[idx++]; fprintf(stdout, " %d,", to_word); if (to_word == 0) break; } fprintf(stdout, "\n"); if (idx >= next_ls) break; } } fprintf(stdout, "};\n"); fprintf(stdout, "\n"); fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS); fprintf(stdout, "\n"); fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n"); for (next_word = 0; next_word <= MAX_WORDS; next_word++) { fprintf(stdout, "/* %6d: */ { \"%s\", %d, %d },\n", next_word, string[next_word], lb[next_word], ls[next_word]); } fprintf(stdout, "};\n"); if (debug&debug_lookup) { fprintf(stderr, "Lookup a -> %d\n", lookup("a")); fprintf(stderr, "Lookup aa -> %d\n", lookup("aa")); fprintf(stderr, "Lookup aargh -> %d\n", lookup("aargh")); fprintf(stderr, "Lookup pitstop -> %d\n", lookup("pitstop")); fprintf(stderr, "Lookup pitsaw -> %d\n", lookup("pitsaw")); fprintf(stderr, "Lookup zyzzyvas -> %d\n", lookup("zyzzyvas")); fprintf(stderr, "Lookup zzzzzz -> %d\n", lookup("zzzzzz")); } exit(EXIT_SUCCESS); return (0); }