#include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #include <ctype.h> #ifndef FALSE #define TRUE (0==0) #define FALSE (0!=0) #endif char **string = NULL; int *lb = NULL; // each entry points to a list of letterbank equivalents int *ls = NULL; // each entry points to a list of letterswap targets char *stringpool = NULL; // consecutive \0-terminated strings #define LB_MAX 2000000 #define LS_MAX 1000000 static int lbpool[LB_MAX]; static int lspool[LS_MAX]; int next_lb = 0; /* index into lbpool */ int next_ls = 0; /* index into lspool */ int next_word = 0; int MAX_WORDS = 0; int MAX_STRINGPOOL = 1; int next_stringpool = 0; int lookup(char *word) { // Return index of the word. Binary chop for now. int low = 1, high = MAX_WORDS; // inclusive at each end while (low <= high) { int middle = (low+high)/2; int comparison = strcmp(string[middle], word); if (comparison < 0) { low = middle+1; } else if (comparison > 0) { high = middle-1; } else return middle; } return 0; // not found } #ifndef MAX_LINE #define MAX_LINE (8*1024) #endif // determine number of words in dictionary and size of string pool // updates MAX_WORDS and MAX_STRINGPOOL void first_pass(FILE *in) { char line[MAX_LINE+1]; int lp; int c; for (;;) { // read all lines lp = 0; for (;;) { // read letters within a word if ((c = fgetc(in)) == EOF) { if (lp == 0) return; line[lp] = '\0'; fprintf(stderr, "Unexpected end of file at \"%s\"\n", line); exit(EXIT_FAILURE); } else if (c == '\n') break; else if (c == '\r') continue; else { line[lp++] = c; if (lp == MAX_LINE) { fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2); exit(EXIT_FAILURE); } MAX_STRINGPOOL += 1; } } line[lp] = '\0'; MAX_STRINGPOOL += 1; MAX_WORDS += 1; } } void second_pass(FILE *in) { char line[MAX_LINE+1]; int lp; int c; for (;;) { // read all words lp = 0; string[next_word] = &stringpool[next_stringpool]; for (;;) { // read letters within a word if ((c = fgetc(in)) == EOF) { if (lp == 0) return; exit(EXIT_FAILURE); } else if (c == '\n') break; else if (c == '\r') continue; else { line[lp++] = c; if (lp == MAX_LINE) exit(EXIT_FAILURE); stringpool[next_stringpool++] = c; } } line[lp] = '\0'; lb[next_word] = 0; // none generated yet ls[next_word] = 0; // ditto stringpool[next_stringpool++] = '\0'; next_word += 1; // So next_word will equal 1 for the first word, "aa" } /* NOT REACHED */ } void create_letterbanks(FILE *in) { char group[MAX_LINE+1]; char from_group_copy[MAX_LINE+1]; // modify copy, leave 'group' untouched. DON'T USE HEAP. char *from_word, *from_word_end; char copy[MAX_LINE+1]; char *word, *end; int lp; for (;;) { // read all letterbank lines lp = 0; for (;;) { // read letters within a letterbank line int c = fgetc(in); if (c == EOF) { if (lp == 0) return; // EOF should be at the start of a line. group[lp] = '\0'; fprintf(stderr, "Unexpected end of file at \"%s\"\n", group); exit(EXIT_FAILURE); } else if (c == '\n') { break; } else if (c == '\r') { // in case of M$ continue; } else { group[lp++] = c; if (lp == MAX_LINE) { fprintf(stderr, "Please recompile with MAX_LINE = %d\n", MAX_LINE*2); exit(EXIT_FAILURE); } } } group[lp] = '\0'; strcpy(from_group_copy, group); from_word = from_group_copy; for (;;) { from_word_end = strchr(from_word, ' '); if (from_word_end != NULL) *from_word_end++ = '\0'; lb[lookup(from_word)] = next_lb; strcpy(copy, group); word = copy; for (;;) { end = strchr(word, ' '); if (end != NULL) *end++ = '\0'; if (strcmp(from_word, word) != 0) { // Don't link to self lbpool[next_lb++] = lookup(word); if (next_lb >= LB_MAX) { fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2); exit(EXIT_FAILURE); } } if ((word = end) == NULL) { // last word // end of this group lbpool[next_lb++] = 0; if (next_lb >= LB_MAX) { fprintf(stderr, "Please recompile with LB_MAX = %d\n", LB_MAX*2); exit(EXIT_FAILURE); } break; } } from_word = from_word_end; // last word if (from_word_end == NULL) break; } } } void add_one_letterswap(char *from_word, char *to_word) { static int last_old_word = 0; int old_word, new_word; new_word = lookup(to_word); if (new_word != 0) { old_word = lookup(from_word); if (last_old_word != old_word) { lspool[++next_ls] = 0; if (next_ls >= LS_MAX) { fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2); exit(EXIT_FAILURE); } ls[old_word] = next_ls; last_old_word = old_word; } lspool[next_ls] = new_word; lspool[++next_ls] = 0; if (next_ls >= LS_MAX) { fprintf(stderr, "Please recompile with LS_MAX = %d\n", LS_MAX*2); exit(EXIT_FAILURE); } } } void create_letterswaps(void) { char *from_word; int wordno; int i, letpos; char saved_let; char to_word[MAX_LINE+1]; for (wordno = 1; wordno <= MAX_WORDS; wordno++) { from_word = string[wordno]; strcpy(to_word, from_word); for (letpos = 0; letpos < strlen(from_word); letpos++) { // try all letters at position letpos (except saved_let) saved_let = from_word[letpos]; // try all letters of the same case at this position // case hack reduces the need to know the exact details // of the wordlist file. if (isalpha(saved_let)) { if (isupper(saved_let)) { for (i = 32; i < 256; i++) { if (isalpha(i) && isupper(i) && (i != saved_let)) { to_word[letpos] = (char)i; add_one_letterswap(from_word, to_word); to_word[letpos] = saved_let; } } } else if (islower(saved_let)) { for (i = 32; i < 256; i++) { if (isalpha(i) && islower(i) && (i != saved_let)) { to_word[letpos] = (char)i; add_one_letterswap(from_word, to_word); to_word[letpos] = saved_let; } } } } } } } int main(int argc, char **argv) { FILE *dict, *bank; char *wordlist, *banklist; if (argc == 1) { wordlist = "wordlist.txt"; banklist = "letterbanks.txt"; } else if (argc == 2) { wordlist = argv[1]; banklist = "letterbanks.txt"; } else if (argc == 3) { wordlist = argv[1]; banklist = argv[2]; } else { fprintf(stderr, "syntax: gentables [wordlist.txt [letterbanks.txt]]\n"); exit(EXIT_FAILURE); } dict = fopen(wordlist, "r"); if (dict == NULL) { fprintf(stderr, "Cannot convert word list \"%s\" - %s\n\n", wordlist, strerror(errno)); exit(EXIT_FAILURE); } bank = fopen(banklist, "r"); if (bank == NULL) { fprintf(stderr, "Cannot convert letterbank \"%s\" - %s\n\n", banklist, strerror(errno)); exit(EXIT_FAILURE); } fprintf(stderr, "Converting word list \"%s\"\n", wordlist); fprintf(stderr, " and letter bank \"%s\"\n\n", banklist); first_pass(dict); // determine MAX_WORDS and MAX_STRINGPOOL if (MAX_WORDS == 0) { fprintf(stderr, "No valid words found in %s\n\n", wordlist); exit(EXIT_FAILURE); } fseek(dict, SEEK_SET, 0L); // Re-read word list fprintf(stdout, "\n"); fprintf(stdout, "// Array of all words. word[1].string returns \"aa\" etc.\n"); fprintf(stdout, "\n"); fprintf(stdout, "\n"); fprintf(stdout, "typedef struct transformation {\n"); fprintf(stdout, " char *string;\n"); fprintf(stdout, " int lb; // points to a list of letterbank equivalents\n"); fprintf(stdout, " int ls; // points to a list of letterswap targets\n"); fprintf(stdout, "} transformation;\n"); fprintf(stdout, "\n"); stringpool = malloc((MAX_STRINGPOOL+1) * sizeof(char)); stringpool[0] = '\0'; next_stringpool = 1; string = malloc((MAX_WORDS+1) * sizeof(char *)); string[0] = stringpool; next_word = 1; lb = malloc((MAX_WORDS+1) * sizeof(int)); lb[0] = 0; lbpool[0] = 0; next_lb = 1; ls = malloc((MAX_WORDS+1) * sizeof(int)); ls[0] = 0; lspool[0] = 0; next_ls = 0; if ((!string) || (!lb) || (!ls) || (!stringpool)) { fprintf(stderr, "Malloc failed.\n\n"); exit(EXIT_FAILURE); } second_pass(dict); create_letterbanks(bank); create_letterswaps(); fprintf(stdout, "/* Generated tables: */\n"); fprintf(stdout, "\n"); fprintf(stdout, "const int lb[] = { /* Values are indexes of equivalent words, 0 terminates */\n"); { int idx = 0; for (;;) { if ((idx != 0) && (lbpool[idx] == 0)) break; fprintf(stdout, "/* %6d: */ ", idx); for (;;) { int to_word = lbpool[idx++]; fprintf(stdout, " %d,", to_word); if (to_word == 0) break; } fprintf(stdout, "\n"); } } fprintf(stdout, "};\n"); fprintf(stdout, "\n"); fprintf(stdout, "const int ls[] = {\n"); { int idx = 0; for (;;) { fprintf(stdout, "/* %6d: */ ", idx); for (;;) { int to_word = lspool[idx++]; fprintf(stdout, " %d,", to_word); if (to_word == 0) break; } fprintf(stdout, "\n"); if (idx >= next_ls) break; } } fprintf(stdout, "};\n"); fprintf(stdout, "\n"); // NOTE: If modifying this code to produce tables for some other language, // you may find it easier to output the strings, the lb links, and the ls links // as three separate scalar arrays rather than a single array of 3-element records. fprintf(stdout, "#define MAX_WORDS %d\n", MAX_WORDS); fprintf(stdout, "\n"); fprintf(stdout, "transformation word[MAX_WORDS+1] = {\n"); for (next_word = 0; next_word <= MAX_WORDS; next_word++) { fprintf(stdout, "/* %6d: */ { \"%s\", %d, %d },\n", next_word, string[next_word], lb[next_word], ls[next_word]); } fprintf(stdout, "};\n"); free(string); free(lb); free(ls); free(stringpool); exit(EXIT_SUCCESS); return (0); }