#include <stdio.h>
/* wordprob.c - calculate the probabilities of drawing words at random out
* of a bag of letter tiles.
*
* Copyright (C) 1993 by John J. Chew, III <jjchew@math.utoronto.ca>.
* All Rights Reserved.
*
* <title>wordprob.c</title>
*
* $Header: wordprob.c,v 1.1 93/07/28 09:11:05 jjchew Locked $
*/
#undef DEBUG
#define MAX_BINGO_LENGTH 15
/* configurable */
unsigned char *gBag = "aaaaaaaaabbccddddeeeeeeeeeeeeffggghhiiiiiiiiijkllllmm\
nnnnnnooooooooppqrrrrrrssssttttttuuuuvvwwxyyz__";
unsigned char gBlank = '_';
/* calculated */
int gBagSize;
int gBlankCount;
int gFrequency[256];
double gChooseSmall[MAX_BINGO_LENGTH+1][MAX_BINGO_LENGTH+1];
double gChooseBag[MAX_BINGO_LENGTH+1];
/* general external variables */
char *gProgramName;
enum { none, reciprocal, ppb, numerator } gResultMode = none;
/* functions in this file */
int main();
void init();
void process();
int main(argc, argv)
int argc;
char **argv;
{
int ch;
int errflg = 0;
extern int optind;
extern char *optarg;
while ((ch = getopt(argc, argv, "B:bnr")) != EOF) switch(ch) {
case 'B':
gBag = optarg;
break;
case 'b':
if (gResultMode != ppb && gResultMode != none) errflg++;
else gResultMode = ppb;
break;
case 'n':
if (gResultMode != numerator && gResultMode != none) errflg++;
else gResultMode = numerator;
break;
case 'r':
if (gResultMode != reciprocal && gResultMode != none) errflg++;
else gResultMode = reciprocal;
break;
case '?':
default:
errflg++;
break;
}
if (errflg) {
fputs("Usage: wordprob [-b|-n|-r] [file...]\n", stderr);
fputs(" -B bag set non-standard bag (blank=_)\n", stderr);
fputs(" -b display parts per billion\n", stderr);
fputs(" -n display numerator only\n", stderr);
fputs(" -r display reciprocal of probability\n", stderr);
exit(2);
}
if (gResultMode == none) gResultMode = reciprocal;
init(&argc, &argv);
if (optind == argc) process(stdin, "(stdin)");
else for (; optind < argc; optind++)
process(fopen(argv[optind], "r"), argv[optind]);
return 0;
}
void init(argcp, argvp)
int *argcp;
char ***argvp;
{
double a, r;
int i, j;
char *cp;
/* command-line argument processing */
gProgramName = **argvp;
/* bag analysis */
gBagSize = strlen(gBag);
for (cp=gBag; *cp; ) gFrequency[*cp++]++;
gBlankCount = gFrequency[gBlank];
if (gBlankCount != 2)
{
fprintf(stderr, "%s: current code supports only two-blank bags\n",
gProgramName);
exit(1);
}
/* combinatorial precalculation */
gChooseSmall[0][0] = gChooseBag[0] = a = 1.0;
for (r=i=1; i<=MAX_BINGO_LENGTH; i++, r++) {
gChooseBag[i] = a *= (gBagSize + 1.0 - r)/r;
gChooseSmall[i][0] = gChooseSmall[i][i] = 1.0;
for (j=1; j<MAX_BINGO_LENGTH; j++)
gChooseSmall[i][j] = gChooseSmall[i-1][j-1]+gChooseSmall[i-1][j];
}
}
void process(aFilePointer, aFileName)
FILE *aFilePointer;
char *aFileName;
{
int ch;
int charNumber = 0;
int frequency;
int i, j;
int lineNumber = 1;
unsigned char blankChars[MAX_BINGO_LENGTH];
unsigned char rackChars[MAX_BINGO_LENGTH];
double *rackChoices[MAX_BINGO_LENGTH];
int rackCounts[MAX_BINGO_LENGTH];
int rackLength = 0;
enum { in, out } state = in;
char word[MAX_BINGO_LENGTH+1];
if (NULL == aFilePointer) {
fprintf(stderr, "%s: cannot open %s for input.\n", gProgramName, aFileName)
;
perror(gProgramName);
exit(1);
}
while (EOF != (ch = getc(aFilePointer))) {
charNumber++;
if (state == out) {
if (ch == '\n') {
state = in;
rackLength = 0;
charNumber = 0;
lineNumber++;
}
putchar(ch);
}
else if (0 == (frequency = gFrequency[(unsigned char)ch]) || ch == gBlank)
{
int blanksDrawn;
double thisChoice;
double totalChoices = 0.0;
/* no blanks */
thisChoice = 1.0;
for (i=0; i<rackLength; i++) thisChoice*=rackChoices[i][rackCounts[i]];
totalChoices += thisChoice;
for (blanksDrawn=1; blanksDrawn<gBlankCount; blanksDrawn++) {
}
#ifdef DEBUG
putchar('\n');
#endif
/* one blank */
for (i=0; i<rackLength; i++) {
int j;
rackCounts[i]--;
thisChoice = gChooseSmall[gBlankCount][1];
for (j=0;j<rackLength;j++) thisChoice*=rackChoices[j][rackCounts[j]];
totalChoices += thisChoice;
#ifdef DEBUG
printf(" with blank for '%c': %.0f\n", rackChars[i], thisChoice);
#endif
rackCounts[i]++;
}
/* two blanks */
for (i=0; i<rackLength; i++) {
int j;
rackCounts[i]--;
for (j=i; j<rackLength; j++) if (rackCounts[j]) {
int k;
rackCounts[j]--;
thisChoice = gChooseSmall[gBlankCount][2];
for (k=0;k<rackLength;k++) thisChoice*=rackChoices[k][rackCounts[k]];
totalChoices += thisChoice;
rackCounts[j]++;
#ifdef DEBUG
printf(" with blank for '%c' and '%c': %.0f\n",
rackChars[i], rackChars[j], thisChoice);
#endif
}
rackCounts[i]++;
}
charNumber--;
word[charNumber] = '\0';
printf("%.0f\t%s",
gResultMode == ppb
? (1e9*totalChoices)/gChooseBag[charNumber]
: gResultMode == numerator
? totalChoices
: gChooseBag[charNumber]/totalChoices,
word);
/* printf("\t%.0f\t%.0f",totalChoices,gChooseBag[charNumber]/totalChoices); */
#ifdef DEBUG
printf(" %f/%f = 1/%f\n", totalChoices, gChooseBag[charNumber],
gChooseBag[charNumber]/totalChoices);
for (i=0; i<rackLength; i++)
{
int j;
printf("%d) '%c'*%d: ", i, rackChars[i], rackCounts[i]);
for (j=0; j<=rackCounts[i]; j++)
printf("%4.0f", rackChoices[i][j]);
putchar('\n');
}
#endif
state = out;
ungetc(ch, aFilePointer);
}
else if (charNumber > MAX_BINGO_LENGTH) {
putchar('\n'); fflush(stdout);
fprintf(stderr, "%s: word too long in line %d of %s.\n",
gProgramName, lineNumber, aFileName);
exit(1);
}
else {
word[charNumber-1] = ch;
for (i=0; i<rackLength; i++)
if (ch == rackChars[i]) { ++rackCounts[i]; break; }
if (i == rackLength) {
rackLength++;
rackChars[i] = ch;
rackChoices[i] = &gChooseSmall[frequency][0];
rackCounts[i] = 1;
}
}
}
}