#include <iostream>
#include <fstream>
#include <iterator>
#include <string>
#include <vector>
#include <thread>
#include <chrono>
#include <unordered_map>

#include <cstdio>

#include <pulse/simple.h>
#include <pulse/error.h>

#include <espeak/speak_lib.h>

using std::cout;
using std::cin;
using std::endl;
using std::string;
using std::unordered_map;
using std::vector;
//using std::iterator;
using std::ifstream;
using std::istreambuf_iterator;

const int BUF_SIZE = 128;

const string audio_dir = "../samples/";
const string audio_format = ".wav";

/*phonememode: bits0-3:
  0= just phonemes.
  1= include ties (U+361) for phoneme names of more than one letter.
  2= include zero-width-joiner for phoneme names of more than one letter.
  3= separate phonemes with underscore characters.
  bits 4-7:
  0= eSpeak's ascii phoneme names.
  1= International Phonetic Alphabet (as UTF-8 characters).*/
const int phoneme_mode = 3 | (1 << 4);

const vector<string> phoneme_names = {
    "EH3",
    "EH2",
    "EH1",
    "A2",
    "A1",
    "ZH",
    "AH2",
    "I3",
    "I2",
    "I1",
    "M",
    "N",
    "B",
    "V",
    "CH",
    "SH",
    "Z",
    "AW1",
    "NG",
    "AH1",
    "OO1",
    "OO",
    "L",
    "K",
    "J",
    "H",
    "G",
    "F",
    "D",
    "S",
    "A",
    "AY",
    "Y1",
    "UH3",
    "AH",
    "P",
    "O",
    "I",
    "U",
    "Y",
    "T",
    "R",
    "E",
    "W",
    "AE",
    "AE1",
    "AW2",
    "UH2",
    "UH1",
    "UH",
    "O2",
    "O1",
    "IU",
    "U1",
    "THV",
    "TH",
    "ER",
    "EH",
    "E1",
    "AW"
};

pa_simple *s = NULL;

vector<vector<char>> buffers;

//phoneme name to buffer
unordered_map<string, vector<char>*> name_to_buffer;

//ipa to votrax phoneme
unordered_map<string, string> ipa_to_votrax;

inline void sleep(int millis){
    std::this_thread::sleep_for(std::chrono::milliseconds(millis));
}

void init(){
    //ipa_to_votrax["I"] = "EH3";
    ipa_to_votrax["ˈɛ"] = "EH2";
    //ipa_to_votrax["ˈɛ"] = "EH1";
    ipa_to_votrax["ˈeɪ"] = "A2";
    //ipa_to_votrax["ˈeɪ"] = "A1";
    ipa_to_votrax["ʒ"]  = "ZH";
    ipa_to_votrax["ˈɒ"] = "AH2";
    //ipa_to_votrax["I"] = "I3";
    ipa_to_votrax["ˈɪ"]  = "I2";
    ipa_to_votrax["ˌi"]  = "I3";
    ipa_to_votrax["I"]   = "I1";
    ipa_to_votrax["m"]   = "M";
    ipa_to_votrax["n"]   = "N";
    ipa_to_votrax["b"]   = "B";
    ipa_to_votrax["v"]   = "V";
    ipa_to_votrax["tʃ"]  = "CH";
    ipa_to_votrax["ʃ"]   = "SH";
    ipa_to_votrax["z"]   = "Z";
    ipa_to_votrax["ˈɔː"] = "AW1";
    ipa_to_votrax["ŋ"]   = "NG";
    ipa_to_votrax["ˈɑː"] = "AH1";
    ipa_to_votrax["ˈʊ"]  = "OO1";
    ipa_to_votrax["ˈʊ"]  = "OO";
    ipa_to_votrax["l"]   = "L";
    ipa_to_votrax["k"]   = "K";
    ipa_to_votrax["dʒ"]  = "J";
    ipa_to_votrax["h"]   = "H";
    ipa_to_votrax["ɡ"]   = "G";
    ipa_to_votrax["f"]   = "F";
    ipa_to_votrax["d"]   = "D";
    ipa_to_votrax["s"]   = "S";
    //ipa_to_votrax["ˈeɪ"] = "A";
    ipa_to_votrax["ˈeɪ"] = "AY";
    ipa_to_votrax["j"]   = "Y1";
    ipa_to_votrax["ə"]   = "UH3";
    ipa_to_votrax["a"]   = "AH";
    ipa_to_votrax["p"]   = "P";
    ipa_to_votrax["ˈəʊ"] = "O";
    ipa_to_votrax["ˈɪ"]  = "I";
    ipa_to_votrax["ɪ"]   = "I";
    ipa_to_votrax["ˈuː"] = "U";
    ipa_to_votrax["i"]   = "Y";
    ipa_to_votrax["t"]   = "T";
    ipa_to_votrax["ɹ"]   = "R";
    ipa_to_votrax["ˈiː"] = "E";
    ipa_to_votrax["w"]   = "W";
    //ipa_to_votrax["ˈa"]  = "AE";
    ipa_to_votrax["ˈa"]  = "AE1";
    ipa_to_votrax["ˈɒ"]  = "AW2";
    ipa_to_votrax["ɐ"]   = "UH2";
    //ipa_to_votrax["ˈʌ"]  = "UH1";
    ipa_to_votrax["ˈʌ"]  = "UH";
    ipa_to_votrax["ʌ"]  = "UH";
    ipa_to_votrax["ɔː"]  = "O2";
    ipa_to_votrax["ˈɔː"] = "O1";
    //ipa_to_votrax["uː"] = "IU";
    ipa_to_votrax["uː"]  = "U1";
    ipa_to_votrax["ð"]   = "THV";
    ipa_to_votrax["θ"]   = "TH";
    ipa_to_votrax["ˈɜː"] = "ER";
    ipa_to_votrax["ˈɛ"]  = "EH";
    ipa_to_votrax["ˈiː"] = "E1";
    ipa_to_votrax["ˈɔː"] = "AW";

    static const pa_sample_spec ss = {
        .format = PA_SAMPLE_S16LE,
        .rate = 44100,
        .channels = 1
    };

    int error;
    if(!(s = pa_simple_new(NULL, "phoneme", PA_STREAM_PLAYBACK, NULL, "playback", &ss, NULL, NULL, &error))) {
        cout << "pa_simple_new() failed: " << pa_strerror(error) << endl;
    }

    buffers.reserve(phoneme_names.size());

    for(unsigned i = 0; i < phoneme_names.size(); ++i){
        string filename = audio_dir + phoneme_names[i] + audio_format;
        ////cout << "opening: " << filename << endl;

        ifstream input(filename, std::ios::binary);
        if(!input.is_open()){ cout << "error opening file" << endl; }

        buffers.emplace(buffers.begin() + i, vector<char>(istreambuf_iterator<char>(input), {}));;
        //cout << "size: " << buffers.back().size() << endl;
        name_to_buffer[phoneme_names[i]] = &(buffers[i]);
    }

    espeak_Initialize(espeak_AUDIO_OUTPUT(), 0, NULL, 0);
    espeak_SetVoiceByName("en");
}

void play_sound(vector<char> buffer){
    int error;
    //WAV header is 44 bytes
    //removing 256 at the end because there's extra data or something?
    if(pa_simple_write(s, &(buffer[44]), buffer.size() - 256, &error) < 0){
        cout << "pa_simple_write() failed: " << pa_strerror(error) << endl;
    }
    if(pa_simple_drain(s, &error) < 0){
        cout << "pa_simple_drain() failed: " << pa_strerror(error) << endl;
    }
}


void play(string &phoneme){
    string votrax = ipa_to_votrax[phoneme];
    cout << "playing " << phoneme << " aka " << votrax << endl;
    if(votrax != string("")){
        play_sound(*(name_to_buffer[votrax]));
        //sleep(100);
    }
}

string getPhonemes(string &&input){
    const char *output = espeak_TextToPhonemes((const void**)&(input), espeakCHARS_UTF8, phoneme_mode);
    string out(output);
    free((void*)output);
    return out;
}

void parse(string &&str){
    str.erase(str.begin()); //remove first space
    cout << "phonemes: " << str << endl;
    vector<string> phonemes;

    const string delimiter = "_";
    size_t pos;
    while((pos = str.find(delimiter)) != string::npos || 
                (pos = str.find(" ")) != string::npos){
        string token = str.substr(0, pos);
        phonemes.push_back(token);
        str.erase(0, pos + 1);
    }
    phonemes.push_back(str);

    for(string &s : phonemes){
        play(s);
    }
}

int main(int argc, char **argv){
    if(argc < 2){
        cout << "Usage: ./ipa2chip \"phonemes\"" << endl;
        return 1;
    }
    init();
    //string input;
    //while(std::getline(cin, input)){
    //    parse(getPhonemes(input));
    //}
    for(int i = 1; i < argc; ++i){
        parse(getPhonemes(argv[i]));
        sleep(50);
    }
    pa_simple_free(s);
}