#include <iostream> #include <fstream> #include <iterator> #include <string> #include <vector> #include <thread> #include <chrono> #include <unordered_map> #include <cstdio> #include <pulse/simple.h> #include <pulse/error.h> #include <espeak/speak_lib.h> using std::cout; using std::cin; using std::endl; using std::string; using std::unordered_map; using std::vector; //using std::iterator; using std::ifstream; using std::istreambuf_iterator; const int BUF_SIZE = 128; const string audio_dir = "../samples/"; const string audio_format = ".wav"; /*phonememode: bits0-3: 0= just phonemes. 1= include ties (U+361) for phoneme names of more than one letter. 2= include zero-width-joiner for phoneme names of more than one letter. 3= separate phonemes with underscore characters. bits 4-7: 0= eSpeak's ascii phoneme names. 1= International Phonetic Alphabet (as UTF-8 characters).*/ const int phoneme_mode = 3 | (1 << 4); const vector<string> phoneme_names = { "EH3", "EH2", "EH1", "A2", "A1", "ZH", "AH2", "I3", "I2", "I1", "M", "N", "B", "V", "CH", "SH", "Z", "AW1", "NG", "AH1", "OO1", "OO", "L", "K", "J", "H", "G", "F", "D", "S", "A", "AY", "Y1", "UH3", "AH", "P", "O", "I", "U", "Y", "T", "R", "E", "W", "AE", "AE1", "AW2", "UH2", "UH1", "UH", "O2", "O1", "IU", "U1", "THV", "TH", "ER", "EH", "E1", "AW" }; pa_simple *s = NULL; vector<vector<char>> buffers; //phoneme name to buffer unordered_map<string, vector<char>*> name_to_buffer; //ipa to votrax phoneme unordered_map<string, string> ipa_to_votrax; inline void sleep(int millis){ std::this_thread::sleep_for(std::chrono::milliseconds(millis)); } void init(){ //ipa_to_votrax["I"] = "EH3"; ipa_to_votrax["ˈɛ"] = "EH2"; //ipa_to_votrax["ˈɛ"] = "EH1"; ipa_to_votrax["ˈeɪ"] = "A2"; //ipa_to_votrax["ˈeɪ"] = "A1"; ipa_to_votrax["ʒ"] = "ZH"; ipa_to_votrax["ˈɒ"] = "AH2"; //ipa_to_votrax["I"] = "I3"; ipa_to_votrax["ˈɪ"] = "I2"; ipa_to_votrax["ˌi"] = "I3"; ipa_to_votrax["I"] = "I1"; ipa_to_votrax["m"] = "M"; ipa_to_votrax["n"] = "N"; ipa_to_votrax["b"] = "B"; ipa_to_votrax["v"] = "V"; ipa_to_votrax["tʃ"] = "CH"; ipa_to_votrax["ʃ"] = "SH"; ipa_to_votrax["z"] = "Z"; ipa_to_votrax["ˈɔː"] = "AW1"; ipa_to_votrax["ŋ"] = "NG"; ipa_to_votrax["ˈɑː"] = "AH1"; ipa_to_votrax["ˈʊ"] = "OO1"; ipa_to_votrax["ˈʊ"] = "OO"; ipa_to_votrax["l"] = "L"; ipa_to_votrax["k"] = "K"; ipa_to_votrax["dʒ"] = "J"; ipa_to_votrax["h"] = "H"; ipa_to_votrax["ɡ"] = "G"; ipa_to_votrax["f"] = "F"; ipa_to_votrax["d"] = "D"; ipa_to_votrax["s"] = "S"; //ipa_to_votrax["ˈeɪ"] = "A"; ipa_to_votrax["ˈeɪ"] = "AY"; ipa_to_votrax["j"] = "Y1"; ipa_to_votrax["ə"] = "UH3"; ipa_to_votrax["a"] = "AH"; ipa_to_votrax["p"] = "P"; ipa_to_votrax["ˈəʊ"] = "O"; ipa_to_votrax["ˈɪ"] = "I"; ipa_to_votrax["ɪ"] = "I"; ipa_to_votrax["ˈuː"] = "U"; ipa_to_votrax["i"] = "Y"; ipa_to_votrax["t"] = "T"; ipa_to_votrax["ɹ"] = "R"; ipa_to_votrax["ˈiː"] = "E"; ipa_to_votrax["w"] = "W"; //ipa_to_votrax["ˈa"] = "AE"; ipa_to_votrax["ˈa"] = "AE1"; ipa_to_votrax["ˈɒ"] = "AW2"; ipa_to_votrax["ɐ"] = "UH2"; //ipa_to_votrax["ˈʌ"] = "UH1"; ipa_to_votrax["ˈʌ"] = "UH"; ipa_to_votrax["ʌ"] = "UH"; ipa_to_votrax["ɔː"] = "O2"; ipa_to_votrax["ˈɔː"] = "O1"; //ipa_to_votrax["uː"] = "IU"; ipa_to_votrax["uː"] = "U1"; ipa_to_votrax["ð"] = "THV"; ipa_to_votrax["θ"] = "TH"; ipa_to_votrax["ˈɜː"] = "ER"; ipa_to_votrax["ˈɛ"] = "EH"; ipa_to_votrax["ˈiː"] = "E1"; ipa_to_votrax["ˈɔː"] = "AW"; static const pa_sample_spec ss = { .format = PA_SAMPLE_S16LE, .rate = 44100, .channels = 1 }; int error; if(!(s = pa_simple_new(NULL, "phoneme", PA_STREAM_PLAYBACK, NULL, "playback", &ss, NULL, NULL, &error))) { cout << "pa_simple_new() failed: " << pa_strerror(error) << endl; } buffers.reserve(phoneme_names.size()); for(unsigned i = 0; i < phoneme_names.size(); ++i){ string filename = audio_dir + phoneme_names[i] + audio_format; ////cout << "opening: " << filename << endl; ifstream input(filename, std::ios::binary); if(!input.is_open()){ cout << "error opening file" << endl; } buffers.emplace(buffers.begin() + i, vector<char>(istreambuf_iterator<char>(input), {}));; //cout << "size: " << buffers.back().size() << endl; name_to_buffer[phoneme_names[i]] = &(buffers[i]); } espeak_Initialize(espeak_AUDIO_OUTPUT(), 0, NULL, 0); espeak_SetVoiceByName("en"); } void play_sound(vector<char> buffer){ int error; //WAV header is 44 bytes //removing 256 at the end because there's extra data or something? if(pa_simple_write(s, &(buffer[44]), buffer.size() - 256, &error) < 0){ cout << "pa_simple_write() failed: " << pa_strerror(error) << endl; } if(pa_simple_drain(s, &error) < 0){ cout << "pa_simple_drain() failed: " << pa_strerror(error) << endl; } } void play(string &phoneme){ string votrax = ipa_to_votrax[phoneme]; cout << "playing " << phoneme << " aka " << votrax << endl; if(votrax != string("")){ play_sound(*(name_to_buffer[votrax])); //sleep(100); } } string getPhonemes(string &&input){ const char *output = espeak_TextToPhonemes((const void**)&(input), espeakCHARS_UTF8, phoneme_mode); string out(output); free((void*)output); return out; } void parse(string &&str){ str.erase(str.begin()); //remove first space cout << "phonemes: " << str << endl; vector<string> phonemes; const string delimiter = "_"; size_t pos; while((pos = str.find(delimiter)) != string::npos || (pos = str.find(" ")) != string::npos){ string token = str.substr(0, pos); phonemes.push_back(token); str.erase(0, pos + 1); } phonemes.push_back(str); for(string &s : phonemes){ play(s); } } int main(int argc, char **argv){ if(argc < 2){ cout << "Usage: ./ipa2chip \"phonemes\"" << endl; return 1; } init(); //string input; //while(std::getline(cin, input)){ // parse(getPhonemes(input)); //} for(int i = 1; i < argc; ++i){ parse(getPhonemes(argv[i])); sleep(50); } pa_simple_free(s); }