#include <stdio.h> /* wordprob.c - calculate the probabilities of drawing words at random out * of a bag of letter tiles. * * Copyright (C) 1993 by John J. Chew, III <jjchew@math.utoronto.ca>. * All Rights Reserved. * * <title>wordprob.c</title> * * $Header: wordprob.c,v 1.1 93/07/28 09:11:05 jjchew Locked $ */ #undef DEBUG #define MAX_BINGO_LENGTH 15 /* configurable */ unsigned char *gBag = "aaaaaaaaabbccddddeeeeeeeeeeeeffggghhiiiiiiiiijkllllmm\ nnnnnnooooooooppqrrrrrrssssttttttuuuuvvwwxyyz__"; unsigned char gBlank = '_'; /* calculated */ int gBagSize; int gBlankCount; int gFrequency[256]; double gChooseSmall[MAX_BINGO_LENGTH+1][MAX_BINGO_LENGTH+1]; double gChooseBag[MAX_BINGO_LENGTH+1]; /* general external variables */ char *gProgramName; enum { none, reciprocal, ppb, numerator } gResultMode = none; /* functions in this file */ int main(); void init(); void process(); int main(argc, argv) int argc; char **argv; { int ch; int errflg = 0; extern int optind; extern char *optarg; while ((ch = getopt(argc, argv, "B:bnr")) != EOF) switch(ch) { case 'B': gBag = optarg; break; case 'b': if (gResultMode != ppb && gResultMode != none) errflg++; else gResultMode = ppb; break; case 'n': if (gResultMode != numerator && gResultMode != none) errflg++; else gResultMode = numerator; break; case 'r': if (gResultMode != reciprocal && gResultMode != none) errflg++; else gResultMode = reciprocal; break; case '?': default: errflg++; break; } if (errflg) { fputs("Usage: wordprob [-b|-n|-r] [file...]\n", stderr); fputs(" -B bag set non-standard bag (blank=_)\n", stderr); fputs(" -b display parts per billion\n", stderr); fputs(" -n display numerator only\n", stderr); fputs(" -r display reciprocal of probability\n", stderr); exit(2); } if (gResultMode == none) gResultMode = reciprocal; init(&argc, &argv); if (optind == argc) process(stdin, "(stdin)"); else for (; optind < argc; optind++) process(fopen(argv[optind], "r"), argv[optind]); return 0; } void init(argcp, argvp) int *argcp; char ***argvp; { double a, r; int i, j; char *cp; /* command-line argument processing */ gProgramName = **argvp; /* bag analysis */ gBagSize = strlen(gBag); for (cp=gBag; *cp; ) gFrequency[*cp++]++; gBlankCount = gFrequency[gBlank]; if (gBlankCount != 2) { fprintf(stderr, "%s: current code supports only two-blank bags\n", gProgramName); exit(1); } /* combinatorial precalculation */ gChooseSmall[0][0] = gChooseBag[0] = a = 1.0; for (r=i=1; i<=MAX_BINGO_LENGTH; i++, r++) { gChooseBag[i] = a *= (gBagSize + 1.0 - r)/r; gChooseSmall[i][0] = gChooseSmall[i][i] = 1.0; for (j=1; j<MAX_BINGO_LENGTH; j++) gChooseSmall[i][j] = gChooseSmall[i-1][j-1]+gChooseSmall[i-1][j]; } } void process(aFilePointer, aFileName) FILE *aFilePointer; char *aFileName; { int ch; int charNumber = 0; int frequency; int i, j; int lineNumber = 1; unsigned char blankChars[MAX_BINGO_LENGTH]; unsigned char rackChars[MAX_BINGO_LENGTH]; double *rackChoices[MAX_BINGO_LENGTH]; int rackCounts[MAX_BINGO_LENGTH]; int rackLength = 0; enum { in, out } state = in; char word[MAX_BINGO_LENGTH+1]; if (NULL == aFilePointer) { fprintf(stderr, "%s: cannot open %s for input.\n", gProgramName, aFileName) ; perror(gProgramName); exit(1); } while (EOF != (ch = getc(aFilePointer))) { charNumber++; if (state == out) { if (ch == '\n') { state = in; rackLength = 0; charNumber = 0; lineNumber++; } putchar(ch); } else if (0 == (frequency = gFrequency[(unsigned char)ch]) || ch == gBlank) { int blanksDrawn; double thisChoice; double totalChoices = 0.0; /* no blanks */ thisChoice = 1.0; for (i=0; i<rackLength; i++) thisChoice*=rackChoices[i][rackCounts[i]]; totalChoices += thisChoice; for (blanksDrawn=1; blanksDrawn<gBlankCount; blanksDrawn++) { } #ifdef DEBUG putchar('\n'); #endif /* one blank */ for (i=0; i<rackLength; i++) { int j; rackCounts[i]--; thisChoice = gChooseSmall[gBlankCount][1]; for (j=0;j<rackLength;j++) thisChoice*=rackChoices[j][rackCounts[j]]; totalChoices += thisChoice; #ifdef DEBUG printf(" with blank for '%c': %.0f\n", rackChars[i], thisChoice); #endif rackCounts[i]++; } /* two blanks */ for (i=0; i<rackLength; i++) { int j; rackCounts[i]--; for (j=i; j<rackLength; j++) if (rackCounts[j]) { int k; rackCounts[j]--; thisChoice = gChooseSmall[gBlankCount][2]; for (k=0;k<rackLength;k++) thisChoice*=rackChoices[k][rackCounts[k]]; totalChoices += thisChoice; rackCounts[j]++; #ifdef DEBUG printf(" with blank for '%c' and '%c': %.0f\n", rackChars[i], rackChars[j], thisChoice); #endif } rackCounts[i]++; } charNumber--; word[charNumber] = '\0'; printf("%.0f\t%s", gResultMode == ppb ? (1e9*totalChoices)/gChooseBag[charNumber] : gResultMode == numerator ? totalChoices : gChooseBag[charNumber]/totalChoices, word); /* printf("\t%.0f\t%.0f",totalChoices,gChooseBag[charNumber]/totalChoices); */ #ifdef DEBUG printf(" %f/%f = 1/%f\n", totalChoices, gChooseBag[charNumber], gChooseBag[charNumber]/totalChoices); for (i=0; i<rackLength; i++) { int j; printf("%d) '%c'*%d: ", i, rackChars[i], rackCounts[i]); for (j=0; j<=rackCounts[i]; j++) printf("%4.0f", rackChoices[i][j]); putchar('\n'); } #endif state = out; ungetc(ch, aFilePointer); } else if (charNumber > MAX_BINGO_LENGTH) { putchar('\n'); fflush(stdout); fprintf(stderr, "%s: word too long in line %d of %s.\n", gProgramName, lineNumber, aFileName); exit(1); } else { word[charNumber-1] = ch; for (i=0; i<rackLength; i++) if (ch == rackChars[i]) { ++rackCounts[i]; break; } if (i == rackLength) { rackLength++; rackChars[i] = ch; rackChoices[i] = &gChooseSmall[frequency][0]; rackCounts[i] = 1; } } } }