#include <stdio.h> #include <ctype.h> #include <stdlib.h> #include <string.h> #include "english.h" #define FALSE (0) #define TRUE (!0) /* ** English to Phoneme translation. ** ** Rules are made up of four parts: ** ** The left context. ** The text to match. ** The right context. ** The phonemes to substitute for the matched text. ** ** Procedure: ** ** Seperate each block of letters (apostrophes included) ** and add a space on each side. For each unmatched ** letter in the word, look through the rules where the ** text to match starts with the letter in the word. If ** the text to match is found and the right and left ** context patterns also match, output the phonemes for ** that rule and skip to the next unmatched letter. ** ** ** Special Context Symbols: ** ** # One or more vowels ** : Zero or more consonants ** ^ One consonant. ** . One of B, D, V, G, J, L, M, N, R, W or Z (voiced ** consonants) ** % One of ER, E, ES, ED, ING, ELY (a suffix) ** (Right context only) ** + One of E, I or Y (a "front" vowel) */ #ifndef ORIGINAL char *copystr(s) char *s; { char *p; p = malloc(strlen(s)+1); strcpy(p, s); return(p); } int read_one_rule(rfile, left, mid, right, rep) FILE *rfile; char *left; char *mid; char *right; char *rep; { char line[128], *s, *p; for (;;) { if (fgets(line, 128, rfile) == NULL) return(FALSE); if (*line != '*') break; } s = line; p = s; while (*s != '|') { if (isalpha(*s)) *s = toupper(*s); if (*s == '<') *s = ' '; /* nothing */ s++; } *s++ = '\0'; strcpy(left, p); p = s; while (*s != '|') { if (isalpha(*s)) *s = toupper(*s); s++; } *s++ = '\0'; strcpy(mid, p); p = s; while (*s != '=') { if (*s == '>') *s = ' '; /* nothing */ s++; } *s++ = '\0'; strcpy(right, p); p = s; if (*s >= ' ') { s += 1; while (*s > ' ') s++; } *s++ = '\0'; strcpy(rep, p); return(TRUE); } void Init_Rules() { #define max_rules 100 /* for now */ FILE *rulefile; char left[128], mid[128], right[128], rep[128]; int lastrule[27] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int i; rulefile = fopen("rules-navy", "r"); if (rulefile == NULL) { fprintf(stderr, "Cannot open rule file 'rules-navy'\n"); exit(1); } Rules = malloc(27*sizeof(Rule *)); Rules[0] = punct_rules = malloc(max_rules * sizeof(Rule)); Rules[1] = A_rules = malloc(max_rules * sizeof(Rule)); Rules[2] = B_rules = malloc(max_rules * sizeof(Rule)); Rules[3] = C_rules = malloc(max_rules * sizeof(Rule)); Rules[4] = D_rules = malloc(max_rules * sizeof(Rule)); Rules[5] = E_rules = malloc(max_rules * sizeof(Rule)); Rules[6] = F_rules = malloc(max_rules * sizeof(Rule)); Rules[7] = G_rules = malloc(max_rules * sizeof(Rule)); Rules[8] = H_rules = malloc(max_rules * sizeof(Rule)); Rules[9] = I_rules = malloc(max_rules * sizeof(Rule)); Rules[10] = J_rules = malloc(max_rules * sizeof(Rule)); Rules[11] = K_rules = malloc(max_rules * sizeof(Rule)); Rules[12] = L_rules = malloc(max_rules * sizeof(Rule)); Rules[13] = M_rules = malloc(max_rules * sizeof(Rule)); Rules[14] = N_rules = malloc(max_rules * sizeof(Rule)); Rules[15] = O_rules = malloc(max_rules * sizeof(Rule)); Rules[16] = P_rules = malloc(max_rules * sizeof(Rule)); Rules[17] = Q_rules = malloc(max_rules * sizeof(Rule)); Rules[18] = R_rules = malloc(max_rules * sizeof(Rule)); Rules[19] = S_rules = malloc(max_rules * sizeof(Rule)); Rules[20] = T_rules = malloc(max_rules * sizeof(Rule)); Rules[21] = U_rules = malloc(max_rules * sizeof(Rule)); Rules[22] = V_rules = malloc(max_rules * sizeof(Rule)); Rules[23] = W_rules = malloc(max_rules * sizeof(Rule)); Rules[24] = X_rules = malloc(max_rules * sizeof(Rule)); Rules[25] = Y_rules = malloc(max_rules * sizeof(Rule)); Rules[26] = Z_rules = malloc(max_rules * sizeof(Rule)); while (read_one_rule(rulefile, left, mid, right, rep)) { int thisrule; if (isalpha(*mid) && isupper(*mid)) { thisrule = *mid - 'A' + 1; } else { thisrule = 0; /* punct */ } /* fprintf(stderr, "Added rule[%d][%d] = {left=%s mid=%s right=%s rep=%s}\n", thisrule, lastrule[thisrule], left, mid, right, rep); */ Rules[thisrule][lastrule[thisrule]][0] = copystr(left); Rules[thisrule][lastrule[thisrule]][1] = copystr(mid); Rules[thisrule][lastrule[thisrule]][2] = copystr(right); Rules[thisrule][lastrule[thisrule]][3] = copystr(rep); lastrule[thisrule] += 1; } /* fprintf(stderr, "Rules read\n"); */ for (i = 0; i < 27; i++) { Rules[i][lastrule[i]][0] = Anything; Rules[i][lastrule[i]][1] = NULL; Rules[i][lastrule[i]][2] = Anything; Rules[i][lastrule[i]][3] = Silent; } } #endif int isvowel(chr) char chr; { return (chr == 'A' || chr == 'E' || chr == 'I' || chr == 'O' || chr == 'U'); } int isconsonant(chr) char chr; { return (isupper(chr) && !isvowel(chr)); } // #ifndef ORIGINAL -- debugging -DORIGINAL xlate_word(word) char word[]; { int index; /* Current position in word */ int type; /* First letter of match part */ index = 1; /* Skip the initial blank */ do { if (isupper(word[index])) type = word[index] - 'A' + 1; else type = 0; /* fprintf(stderr, "find rule Rules[%d] at %p -> %p\n", type, &Rules[type], Rules[type]); */ index = find_rule(word, index, Rules[type]); } while (word[index] != '\0'); } find_rule(word, index, rules) char word[]; int index; Rule *rules; { Rule *rule; char *left, *match, *right, *output; int remainder; for (;;) /* Search for the rule */ { rule = rules++; match = (*rule)[1]; if (match == 0) /* bad symbol! */ { fprintf(stderr, "Error: Can't find rule for: '%c' in \"%s\"\n", word[index], word); return index+1; /* Skip it! */ } /* fprintf(stderr, "rule = %p, (*rule) = %p left=%s mid=%s right=%s rep=%s\n", rule, (*rule), (*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]); */ for (remainder = index; *match != '\0'; match++, remainder++) { if (*match != word[remainder]) break; } if (*match != '\0') /* found missmatch */ continue; /* printf("\nWord: \"%s\", Index:%4d, Trying: \"%s/%s/%s\" = \"%s\"\n", word, index, (*rule)[0], (*rule)[1], (*rule)[2], (*rule)[3]); */ left = (*rule)[0]; right = (*rule)[2]; if (!leftmatch(left, &word[index-1])) continue; /* printf("leftmatch(\"%s\",\"...%c\") succeded!\n", left, word[index-1]); */ if (!rightmatch(right, &word[remainder])) continue; /* printf("rightmatch(\"%s\",\"%s\") succeded!\n", right, &word[remainder]); */ output = (*rule)[3]; /* printf("Success: "); */ outstring(output); return remainder; } } // #endif -- DEBUGGING -DORIGINAL leftmatch(pattern, context) char *pattern; /* first char of pattern to match in text */ char *context; /* last char of text to be matched */ { char *pat; char *text; int count; if (*pattern == '\0') /* null string matches any context */ { return TRUE; } /* point to last character in pattern string */ count = strlen(pattern); pat = pattern + (count - 1); text = context; for (; count > 0; pat--, count--) { /* First check for simple text or space */ if (isalpha(*pat) || *pat == '\'' || *pat == ' ') { if (*pat != *text) { return FALSE; } else { text--; continue; } } switch (*pat) { case '#': /* One or more vowels */ if (!isvowel(*text)) return FALSE; text--; while (isvowel(*text)) text--; break; case ':': /* Zero or more consonants */ while (isconsonant(*text)) text--; break; case '^': /* One consonant */ if (!isconsonant(*text)) return FALSE; text--; break; case '.': /* B, D, V, G, J, L, M, N, R, W, Z */ if (*text != 'B' && *text != 'D' && *text != 'V' && *text != 'G' && *text != 'J' && *text != 'L' && *text != 'M' && *text != 'N' && *text != 'R' && *text != 'W' && *text != 'Z') return FALSE; text--; break; case '+': /* E, I or Y (front vowel) */ if (*text != 'E' && *text != 'I' && *text != 'Y') return FALSE; text--; break; case '%': default: fprintf(stderr, "Bad char in left rule: '%c'\n", *pat); return FALSE; } } return TRUE; } rightmatch(pattern, context) char *pattern; /* first char of pattern to match in text */ char *context; /* last char of text to be matched */ { char *pat; char *text; if (*pattern == '\0') /* null string matches any context */ return TRUE; pat = pattern; text = context; for (pat = pattern; *pat != '\0'; pat++) { /* First check for simple text or space */ if (isalpha(*pat) || *pat == '\'' || *pat == ' ') { if (*pat != *text) { return FALSE; } else { text++; continue; } } switch (*pat) { case '#': /* One or more vowels */ if (!isvowel(*text)) return FALSE; text++; while (isvowel(*text)) text++; break; case ':': /* Zero or more consonants */ while (isconsonant(*text)) text++; break; case '^': /* One consonant */ if (!isconsonant(*text)) return FALSE; text++; break; case '.': /* B, D, V, G, J, L, M, N, R, W, Z */ if (*text != 'B' && *text != 'D' && *text != 'V' && *text != 'G' && *text != 'J' && *text != 'L' && *text != 'M' && *text != 'N' && *text != 'R' && *text != 'W' && *text != 'Z') return FALSE; text++; break; case '+': /* E, I or Y (front vowel) */ if (*text != 'E' && *text != 'I' && *text != 'Y') return FALSE; text++; break; case '%': /* ER, E, ES, ED, ING, ELY (a suffix) */ if (*text == 'E') { text++; if (*text == 'L') { text++; if (*text == 'Y') { text++; break; } else { text--; /* Don't gobble L */ break; } } else if (*text == 'R' || *text == 'S' || *text == 'D') text++; break; } else if (*text == 'I') { text++; if (*text == 'N') { text++; if (*text == 'G') { text++; break; } } return FALSE; } else return FALSE; default: fprintf(stderr, "Bad char in right rule:'%c'\n", *pat); return FALSE; } } return TRUE; }