Spaces:

Jobanpreet
/

Text_To_Speech_Punjabi_FastSpeech2

Runtime error

App Files Files Community

Jobanpreet commited on Dec 26, 2023

Commit

445eb21

1 Parent(s): cfd96d2

Upload 4 files

Browse files

Files changed (4) hide show

app.py +141 -0
get_phone_mapped_python.py +67 -0
multilingualcharmap.json +1 -0
text_preprocess_for_inference.py +887 -0

app.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import sys
+import os
+#replace the path with your hifigan path to import Generator from models.py
+sys.path.append("hifigan")
+import argparse
+import torch
+from espnet2.bin.tts_inference import Text2Speech
+from models import Generator
+from scipy.io.wavfile import write
+from meldataset import MAX_WAV_VALUE
+from env import AttrDict
+import json
+import yaml
+import numpy as np
+from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor
+SAMPLING_RATE = 22050
+def load_hifigan_vocoder(language, gender, device):
+    # Load HiFi-GAN vocoder configuration file and generator model for the specified language and gender
+    vocoder_config = f"vocoder/{gender}/aryan/hifigan/config.json"
+    vocoder_generator = f"vocoder/{gender}/aryan/hifigan/generator"
+    # Read the contents of the vocoder configuration file
+    with open(vocoder_config, 'r') as f:
+        data = f.read()
+    json_config = json.loads(data)
+    h = AttrDict(json_config)
+    torch.manual_seed(h.seed)
+    # Move the generator model to the specified device (CPU or GPU)
+    device = torch.device(device)
+    generator = Generator(h).to(device)
+    state_dict_g = torch.load(vocoder_generator, device)
+    generator.load_state_dict(state_dict_g['generator'])
+    generator.eval()
+    generator.remove_weight_norm()
+    # Return the loaded and prepared HiFi-GAN generator model
+    return generator
+def load_fastspeech2_model(language, gender, device):
+    #updating the config.yaml fiel based on language and gender
+    with open(f"punjabi/{gender}/model/config.yaml", "r") as file:
+     config = yaml.safe_load(file)
+    current_working_directory = os.getcwd()
+    feat="model/feats_stats.npz"
+    pitch="model/pitch_stats.npz"
+    energy="model/energy_stats.npz"
+    feat_path=os.path.join(current_working_directory,language,gender,feat)
+    pitch_path=os.path.join(current_working_directory,language,gender,pitch)
+    energy_path=os.path.join(current_working_directory,language,gender,energy)
+    config["normalize_conf"]["stats_file"]  = feat_path
+    config["pitch_normalize_conf"]["stats_file"]  = pitch_path
+    config["energy_normalize_conf"]["stats_file"]  = energy_path
+    with open(f"punjabi/{gender}/model/config.yaml", "w") as file:
+        yaml.dump(config, file)
+    tts_model = f"punjabi/{gender}/model/model.pth"
+    tts_config = f"punjabi/{gender}/model/config.yaml"
+    return Text2Speech(train_config=tts_config, model_file=tts_model, device=device)
+def text_synthesis(language, gender, sample_text, vocoder, MAX_WAV_VALUE, device):
+    # Perform Text-to-Speech synthesis
+    with torch.no_grad():
+        # Load the FastSpeech2 model for the specified language and gender
+        model = load_fastspeech2_model(language, gender, device)
+        # Generate mel-spectrograms from the input text using the FastSpeech2 model
+        out = model(sample_text, decode_conf={"alpha": 1})
+        print("TTS Done")
+        x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
+        x = x.to(device)
+        # Use the HiFi-GAN vocoder to convert mel-spectrograms to raw audio waveforms
+        y_g_hat = vocoder(x)
+        audio = y_g_hat.squeeze()
+        audio = audio * MAX_WAV_VALUE
+        audio = audio.cpu().numpy().astype('int16')
+        return audio
+def perform_text_synthesis(text_input, language, gender):
+    preprocessed_text, _ = preprocessor.preprocess(text_input, language, gender)
+    preprocessed_text = " ".join(preprocessed_text)
+    audio = text_synthesis(language, gender, preprocessed_text, vocoder, MAX_WAV_VALUE, device)
+    return audio
+import streamlit as st
+import torch
+import numpy as np
+from scipy.io.wavfile import write
+from text_preprocess_for_inference import CharTextPreprocessor
+language = "punjabi"
+# gender = 'male'
+device = "cuda" if torch.cuda.is_available() else "cpu"
+preprocessor = CharTextPreprocessor()
+# Streamlit app
+st.title("Text to Speech Punjabi Language")
+text_input = st.text_area("Enter text")
+# Radio button for selecting the gender
+gender = st.radio("Select Gender", ("male", "female"))
+vocoder = load_hifigan_vocoder(language, gender, device)
+import streamlit as st
+from io import BytesIO
+# Assuming the perform_text_synthesis function returns the audio as a numpy array
+if st.button("Convert to Speech"):
+    audio = perform_text_synthesis(text_input, language, gender.lower())
+    # Convert the audio numpy array to bytes
+    audio_bytes = BytesIO()
+    write(audio_bytes, SAMPLING_RATE, audio)
+    # Display the audio in Streamlit
+    st.audio(audio_bytes, format="audio/wav")
+# Streamlit footer (optional)
+st.text("Powered by Sabudh Interns")

get_phone_mapped_python.py ADDED Viewed

	@@ -0,0 +1,67 @@

+class TextReplacer:
+    def __init__(self):
+        self.replacements = {
+        'aa':'A',
+        'ae':'ऍ',
+        'ag':'ऽ',
+        'ai':'ऐ',
+        'au':'औ',
+        'axx':'अ',
+        'ax':'ऑ',
+        'bh':'B',
+        'ch':'C',
+        'dh':'ध',
+        'dxhq':'T',
+        'dxh':'ढ',
+        'dxq':'D',
+        'dx':'ड',
+        'ee':'E',
+        'ei':'ऐ',
+        'eu':'உ',
+        'gh':'घ',
+        'gq':'G',
+        'hq':'H',
+        'ii':'I',
+        'jh':'J',
+        'khq':'K',
+        'kh':'ख',
+        'kq':'क',
+        'ln':'ൾ',
+        'lw':'ൽ',
+        'lx':'ള',
+        'mq':'M',
+        'nd':'ऩ',
+        'ng':'ङ',
+        'nj':'ञ',
+        'nk':'Y',
+        'nn':'N',
+        'nw':'ൺ',
+        'nx':'ण',
+        'oo':'O',
+        'ou':'औ',
+        'ph':'P',
+        'rqw':'ॠ',
+        'rq':'R',
+        'rw':'ർ',
+        'rx':'ऱ',
+        'sh':'श',
+        'sx':'ष',
+        'txh':'ठ',
+        'th':'थ',
+        'tx':'ट',
+        'uu':'U',
+        'wv':'W',
+        'zh':'Z'
+    # ... Add more replacements as needed
+        }
+    def apply_replacements(self, text):
+        for key, value in self.replacements.items():
+            # print('KEY AND VALUE OF PARSED OUTPUT',key, value)
+            text = text.replace(key, value)
+            text = text.replace(" ", "")
+        return text

multilingualcharmap.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"assamese_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "assamese_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bengali_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "b", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bengali_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "b", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "bodo_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "\u0921", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "gujarati_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "gujarati_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "hindi_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "\u0915", "K": "K", "G": "G", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "hindi_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "\u0915", "K": "K", "G": "G", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "kannada_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "kannada_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "malayalam_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "Z", "\u0928": "n", "N": "N", "\u0d7e": "\u0d7e", "\u0d7d": "\u0d7d", "\u0d7a": "\u0d7a", "\u0d7c": "\u0d7c", "\u0960": "R"}, "malayalam_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "Z", "\u0928": "n", "N": "N", "\u0d7e": "\u0d7e", "\u0d7d": "\u0d7d", "\u0d7a": "\u0d7a", "\u0d7c": "\u0d7c", "\u0960": "R"}, "manipuri_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "r", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u091f", "\u0922": "\u091f", "\u0923": "n", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "P", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0936", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u091f", "T": "\u091f", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "n", "\u0d7c": "r", "\u0960": "r"}, "manipuri_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "r", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u091f", "\u0922": "\u091f", "\u0923": "n", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "l", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u091f", "T": "\u091f", "f": "P", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "l", "\u0d7d": "l", "\u0d7a": "n", "\u0d7c": "r", "\u0960": "r"}, "marathi_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "f", "\u0930": "\u0930", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "marathi_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "\u090d", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "\u0921", "T": "\u0922", "f": "f", "\u0930": "\u0930", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "odia_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "T", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "odia_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "E", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "\u0919", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "D", "T": "T", "f": "P", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "rajasthani_male": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "D", "T": "T", "f": "f", "\u0930": "r", "M": "M", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "rajasthani_female": {"a": "a", "\u0911": "\u0911", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "E", "E": "E", "\u0910": "\u0910", "o": "o", "O": "o", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "J", "\u091e": "y", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "z", "D": "D", "T": "\u0922", "f": "f", "\u0930": "r", "M": "n", "q": "q", "H": "h", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "tamil_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "\u0b89", "U": "U", "R": "r", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "k", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "t", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "p", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0937", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "k", "G": "g", "z": "j", "D": "\u0921", "T": "\u0921", "f": "f", "\u0930": "\u0930", "M": "n", "q": "n", "H": "h", "Z": "Z", "\u0928": "\u0928", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "r"}, "tamil_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "\u0b89", "U": "U", "R": "r", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "k", "g": "g", "\u0918": "g", "\u0919": "\u0919", "c": "c", "C": "c", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u091f", "\u0921": "\u0921", "\u0922": "\u0921", "\u0923": "\u0923", "t": "t", "\u0925": "t", "d": "d", "\u0927": "d", "n": "n", "p": "p", "P": "p", "b": "b", "B": "b", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0937", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "k", "G": "g", "z": "j", "D": "\u0921", "T": "\u0921", "f": "f", "\u0930": "\u0930", "M": "n", "q": "n", "H": "h", "Z": "Z", "\u0928": "\u0928", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "r"}, "telugu_male": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}, "telugu_female": {"a": "a", "\u0911": "A", "A": "A", "\u0905": "A", "i": "i", "I": "I", "u": "u", "\u0b89": "u", "U": "U", "R": "R", "e": "e", "E": "E", "\u0910": "\u0910", "o": "o", "O": "O", "\u090d": "E", "\u0914": "\u0914", "k": "k", "\u0916": "\u0916", "g": "g", "\u0918": "\u0918", "\u0919": "n", "c": "c", "C": "C", "j": "j", "J": "j", "\u091e": "\u091e", "\u091f": "\u091f", "\u0920": "\u0920", "\u0921": "\u0921", "\u0922": "\u0922", "\u0923": "\u0923", "t": "t", "\u0925": "\u0925", "d": "d", "\u0927": "\u0927", "n": "n", "p": "p", "P": "P", "b": "b", "B": "B", "m": "m", "y": "y", "r": "r", "l": "l", "\u0d33": "\u0d33", "w": "w", "\u0936": "\u0936", "\u0937": "\u0937", "s": "s", "h": "h", "\u0915": "k", "K": "\u0916", "G": "g", "z": "j", "D": "\u0921", "T": "\u0922", "f": "P", "\u0930": "\u0930", "M": "n", "q": "q", "H": "H", "Z": "y", "\u0928": "n", "N": "n", "\u0d7e": "\u0d33", "\u0d7d": "l", "\u0d7a": "\u0923", "\u0d7c": "r", "\u0960": "R"}}

text_preprocess_for_inference.py ADDED Viewed

	@@ -0,0 +1,887 @@

+'''
+TTS Preprocessing
+Developed by Arun Kumar A(CS20S013) - November 2022
+Code Changes by Utkarsh - 2023
+'''
+import os
+import re
+import json
+import pandas as pd
+import string
+from collections import defaultdict
+import time
+import subprocess
+import shutil
+from multiprocessing import Process
+import traceback
+#imports of dependencies from environment.yml
+from num_to_words import num_to_word
+from g2p_en import G2p
+def add_to_dictionary(dict_to_add, dict_file):
+    append_string = ""
+    for key, value in dict_to_add.items():
+        append_string += (str(key) + " " + str(value) + "\n")
+    if os.path.isfile(dict_file):
+        # make a copy of the dictionary
+        source_dir = os.path.dirname(dict_file)
+        dict_file_name = os.path.basename(dict_file)
+        temp_file_name = "." + dict_file_name + ".temp"
+        temp_dict_file = os.path.join(source_dir, temp_file_name)
+        shutil.copy(dict_file, temp_dict_file)
+        # append the new words in the dictionary to the temp file
+        with open(temp_dict_file, "a") as f:
+            f.write(append_string)
+        # check if the write is successful and then replace the temp file as the dict file
+        try:
+            df_orig = pd.read_csv(dict_file, delimiter=" ", header=None, dtype=str)
+            df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
+            if len(df_temp) > len(df_orig):
+                os.rename(temp_dict_file, dict_file)
+                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+        except:
+            print(traceback.format_exc())
+    else:
+        # create a new dictionary
+        with open(dict_file, "a") as f:
+            f.write(append_string)
+        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+class TextCleaner:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#" : "",
+            "[.,;।!](\r\n)*" : "# ",
+            "[.,;।!](\n)*" : "# ",
+            "(\r\n)+" : "# ",
+            "(\n)+" : "# ",
+            "(\r)+" : "# ",
+            """[?;:)(!|&’‘,।\."]""": "",
+            "[/']" : "",
+            "[-–]" : " ",
+        }
+    def clean(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def clean_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            line = line.strip()
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+class Phonifier:
+    def __init__(self, dict_location=None):
+        if dict_location is None:
+            dict_location = "phone_dict"
+        self.dict_location = dict_location
+        self.phone_dictionary = {}
+        # load dictionary for all the available languages
+        for dict_file in os.listdir(dict_location):
+            try:
+                if dict_file.startswith("."):
+                    # ignore hidden files
+                    continue
+                language = dict_file
+                dict_file_path = os.path.join(dict_location, dict_file)
+                df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
+                self.phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
+            except Exception as e:
+                print(traceback.format_exc())
+        print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+        self.g2p = G2p()
+        print('Loading G2P model... Done!')
+        # Mapping between the cmu phones and the iitm cls
+        self.cmu_2_cls_map = {
+            "AA" : "aa",
+            "AA0" : "aa",
+            "AA1" : "aa",
+            "AA2" : "aa",
+            "AE" : "axx",
+            "AE0" : "axx",
+            "AE1" : "axx",
+            "AE2" : "axx",
+            "AH" : "a",
+            "AH0" : "a",
+            "AH1" : "a",
+            "AH2" : "a",
+            "AO" : "ax",
+            "AO0" : "ax",
+            "AO1" : "ax",
+            "AO2" : "ax",
+            "AW" : "ou",
+            "AW0" : "ou",
+            "AW1" : "ou",
+            "AW2" : "ou",
+            "AX" : "a",
+            "AY" : "ei",
+            "AY0" : "ei",
+            "AY1" : "ei",
+            "AY2" : "ei",
+            "B" : "b",
+            "CH" : "c",
+            "D" : "dx",
+            "DH" : "d",
+            "EH" : "ee",
+            "EH0" : "ee",
+            "EH1" : "ee",
+            "EH2" : "ee",
+            "ER" : "a r",
+            "ER0" : "a r",
+            "ER1" : "a r",
+            "ER2" : "a r",
+            "EY" : "ee",
+            "EY0" : "ee",
+            "EY1" : "ee",
+            "EY2" : "ee",
+            "F" : "f",
+            "G" : "g",
+            "HH" : "h",
+            "IH" : "i",
+            "IH0" : "i",
+            "IH1" : "i",
+            "IH2" : "i",
+            "IY" : "ii",
+            "IY0" : "ii",
+            "IY1" : "ii",
+            "IY2" : "ii",
+            "JH" : "j",
+            "K" : "k",
+            "L" : "l",
+            "M" : "m",
+            "N" : "n",
+            "NG" : "ng",
+            "OW" : "o",
+            "OW0" : "o",
+            "OW1" : "o",
+            "OW2" : "o",
+            "OY" : "ei",
+            "OY0" : "ei",
+            "OY1" : "ei",
+            "OY2" : "ei",
+            "P" : "p",
+            "R" : "r",
+            "S" : "s",
+            "SH" : "sh",
+            "T" : "tx",
+            "TH" : "t",
+            "UH" : "u",
+            "UH0" : "u",
+            "UH1" : "u",
+            "UH2" : "u",
+            "UW" : "uu",
+            "UW0" : "uu",
+            "UW1" : "uu",
+            "UW2" : "uu",
+            "V" : "w",
+            "W" : "w",
+            "Y" : "y",
+            "Z" : "z",
+            "ZH" : "sh",
+        }
+        # Mapping between the iitm cls and iitm char
+        self.cls_2_chr_map = {
+            "aa" : "A",
+            "ii" : "I",
+            "uu" : "U",
+            "ee" : "E",
+            "oo" : "O",
+            "nn" : "N",
+            "ae" : "ऍ",
+            "ag" : "ऽ",
+            "au" : "औ",
+            "axx" : "अ",
+            "ax" : "ऑ",
+            "bh" : "B",
+            "ch" : "C",
+            "dh" : "ध",
+            "dx" : "ड",
+            "dxh" : "ढ",
+            "dxhq" : "T",
+            "dxq" : "D",
+            "ei" : "ऐ",
+            "ai" : "ऐ",
+            "eu" : "உ",
+            "gh" : "घ",
+            "gq" : "G",
+            "hq" : "H",
+            "jh" : "J",
+            "kh" : "ख",
+            "khq" : "K",
+            "kq" : "क",
+            "ln" : "ൾ",
+            "lw" : "ൽ",
+            "lx" : "ള",
+            "mq" : "M",
+            "nd" : "न",
+            "ng" : "ङ",
+            "nj" : "ञ",
+            "nk" : "Y",
+            "nw" : "ൺ",
+            "nx" : "ण",
+            "ou" : "औ",
+            "ph" : "P",
+            "rq" : "R",
+            "rqw" : "ॠ",
+            "rw" : "ർ",
+            "rx" : "र",
+            "sh" : "श",
+            "sx" : "ष",
+            "th" : "थ",
+            "tx" : "ट",
+            "txh" : "ठ",
+            "wv" : "W",
+            "zh" : "Z",
+        }
+        # Multilingual support for OOV characters
+        oov_map_json_file = 'multilingualcharmap.json'
+        with open(oov_map_json_file, 'r') as oov_file:
+            self.oov_map = json.load(oov_file)
+    def __is_float(self, word):
+        parts = word.split('.')
+        if len(parts) != 2:
+            return False
+        return parts[0].isdecimal() and parts[1].isdecimal()
+    def en_g2p(self, word):
+        phn_out = self.g2p(word)
+        # print(f"phn_out: {phn_out}")
+        # iterate over the string list and replace each word with the corresponding value from the dictionary
+        for i, phn in enumerate(phn_out):
+            if phn in self.cmu_2_cls_map.keys():
+                phn_out[i] = self.cmu_2_cls_map[phn]
+                # cls_out = self.cmu_2_cls_map[phn]
+                if phn_out[i] in self.cls_2_chr_map.keys():
+                    phn_out[i] = self.cls_2_chr_map[phn_out[i]]
+                else:
+                    pass
+            else:
+                pass  # ignore words that are not in the dictionary
+            # print(f"i: {i}, phn: {phn}, cls_out: {cls_out}, phn_out: {phn_out[i]}")
+        return ("".join(phn_out)).strip().replace(" ", "")
+    def __post_phonify(self, text, language, gender):
+        language_gender_id = language+'_'+gender
+        if language_gender_id in self.oov_map.keys():
+            output_string = ''
+            for char in text:
+                if char in self.oov_map[language_gender_id].keys():
+                    output_string += self.oov_map[language_gender_id][char]
+                else:
+                    output_string += char
+                # output_string += self.oov_map['language_gender_id']['char']
+            return output_string
+        else:
+            return text
+    def __is_english_word(self, word):
+        maxchar = max(word)
+        if u'\u0000' <= maxchar <= u'\u007f':
+            return True
+        return False
+    def __phonify(self, text, language, gender):
+        # text is expected to be a list of strings
+        words = set((" ".join(text)).split(" "))
+        #print(f"words test: {words}")
+        non_dict_words = []
+        if language in self.phone_dictionary:
+            for word in words:
+                # print(f"word: {word}")
+                if word not in self.phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+                    #print('INSIDE IF CONDITION OF ADDING WORDS')
+        else:
+            non_dict_words = words
+        print(f"word not in dict: {non_dict_words}")
+        if len(non_dict_words) > 0:
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                tamil_parser_cmd = "tamil_parser.sh"
+                subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+                from get_phone_mapped_python import TextReplacer
+                from indic_unified_parser.uparser import wordparse
+                text_replacer=TextReplacer()
+                # def write_output_to_file(output_text, file_path):
+                #     with open(file_path, 'w') as f:
+                #         f.write(output_text)
+                parsed_output_list = []
+                for word in non_dict_words:
+                    parsed_word = wordparse(word, 0, 0, 1)
+                    parsed_output_list.append(parsed_word)
+                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
+                with open(out_dict_file, 'w', encoding='utf-8') as file:
+                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                        line = f"{original_word}\t{formatted_word}\n"
+                        file.write(line)
+                        print(line, end='')
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                #print('DATAFRAME OUTPUT FILE', df.head())
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                #print("new dict",new_dict)
+                if language not in self.phone_dictionary:
+                    self.phone_dictionary[language] = new_dict
+                else:
+                    self.phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                #print("phone_dict", self.phone_dictionary)
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                print(f"Error: While loading {out_dict_file}")
+                traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for phrase in text:
+            phrase_phonified = []
+            for word in phrase.split(" "):
+                if self.__is_english_word(word):
+                    if word in self.phone_dictionary["english"]:
+                        phrase_phonified.append(str(self.phone_dictionary["english"][word]))
+                    else:
+                        phrase_phonified.append(str(self.en_g2p(word)))
+                elif word in self.phone_dictionary[language]:
+                    # if a word could not be parsed, skip it
+                    phrase_phonified.append(str(self.phone_dictionary[language][word]))
+            # text_phonified.append(self.__post_phonify(" ".join(phrase_phonified),language, gender))
+            text_phonified.append(" ".join(phrase_phonified))
+        return text_phonified
+    def __merge_lists(self, lists):
+        merged_string = ""
+        for list in lists:
+            for word in list:
+                merged_string += word + " "
+        return merged_string.strip()
+    def __phonify_list(self, text, language, gender):
+        # text is expected to be a list of list of strings
+        words = set(self.__merge_lists(text).split(" "))
+        non_dict_words = []
+        if language in self.phone_dictionary:
+            for word in words:
+                if word not in self.phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+        else:
+            non_dict_words = words
+        if len(non_dict_words) > 0:
+            print(len(non_dict_words))
+            print(non_dict_words)
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                tamil_parser_cmd = "tamil_parser.sh"
+                subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+                from get_phone_mapped_python import TextReplacer
+                from indic_unified_parser.uparser import wordparse
+                text_replacer=TextReplacer()
+                parsed_output_list = []
+                for word in non_dict_words:
+                    parsed_word = wordparse(word, 0, 0, 1)
+                    parsed_output_list.append(parsed_word)
+                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
+                with open(out_dict_file, 'w', encoding='utf-8') as file:
+                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                        line = f"{original_word}\t{formatted_word}\n"
+                        file.write(line)
+                        print(line, end='')
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                print(new_dict)
+                if language not in self.phone_dictionary:
+                    self.phone_dictionary[language] = new_dict
+                else:
+                    self.phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for line in text:
+            line_phonified = []
+            for phrase in line:
+                phrase_phonified = []
+                for word in phrase.split(" "):
+                    if self.__is_english_word(word):
+                        if word in self.phone_dictionary["english"]:
+                            phrase_phonified.append(str(self.phone_dictionary["english"][word]))
+                        else:
+                            phrase_phonified.append(str(self.en_g2p(word)))
+                    elif word in self.phone_dictionary[language]:
+                        # if a word could not be parsed, skip it
+                        phrase_phonified.append(str(self.phone_dictionary[language][word]))
+                # line_phonified.append(self.__post_phonify(" ".join(phrase_phonified), language, gender))
+                line_phonified.append(" ".join(phrase_phonified))
+            text_phonified.append(line_phonified)
+        return text_phonified
+    def phonify(self, text, language, gender):
+        if not isinstance(text, list):
+            out = self.__phonify([text], language, gender)
+            return out[0]
+        return self.__phonify(text, language, gender)
+    def phonify_list(self, text, language, gender):
+        if isinstance(text, list):
+            return self.__phonify_list(text, language, gender)
+        else:
+            print("Error!! Expected to have a list as input.")
+class TextNormalizer:
+    def __init__(self, char_map_location=None, phonifier = Phonifier()):
+        self.phonifier = phonifier
+        if char_map_location is None:
+            char_map_location = "charmap"
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#$" : "",
+            "# +$" : "",
+        }
+        # this is the list of languages supported by num_to_words
+        self.keydict = {"english" : "en",
+            "hindi" : "hi",
+            "gujarati" : "gu",
+            "marathi" : "mr",
+            "bengali" : "bn",
+            "telugu" : "te",
+            "tamil" : "ta",
+            "kannada" : "kn",
+            "odia" : "or",
+            "punjabi" : "pa"
+        }
+        self.g2p = G2p()
+        print('Loading G2P model... Done!')
+    def __post_cleaning(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def __post_cleaning_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+    def __check_char_type(self, str_c):
+        # Determine the type of the character
+        if str_c.isnumeric():
+            char_type = "number"
+        elif str_c in string.punctuation:
+            char_type = "punctuation"
+        elif str_c in string.whitespace:
+            char_type = "whitespace"
+        elif str_c.isalpha() and str_c.isascii():
+            char_type = "ascii"
+        else:
+            char_type = "non-ascii"
+        return char_type
+    def insert_space(self, text):
+        '''
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Initialize variables to track the previous character type and whether a space should be inserted
+        prev_char_type = None
+        next_char_type = None
+        insert_space = False
+        # Output string
+        output_string = ""
+        # Iterate through each character in the text
+        for i, c in enumerate(text):
+            # Determine the type of the character
+            char_type = self.__check_char_type(c)
+            if i == (len(text) - 1):
+                next_char_type = None
+            else:
+                next_char_type = self.__check_char_type(text[i+1])
+            # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+            # If the character type has changed from the previous character, check if a space should be inserted
+            if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                if next_char_type != "punctuation" or next_char_type != "whitespace":
+                    insert_space = True
+            # Insert a space if needed
+            if insert_space:
+                output_string += " "+c
+                insert_space = False
+            else:
+                output_string += c
+            # Update the previous character type
+            prev_char_type = char_type
+        # Print the modified text
+        output_string = re.sub(r' +', ' ', output_string)
+        return output_string
+    def insert_space_list(self, text):
+        '''
+        Expect the input to be in form of list of string.
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Output string list
+        output_list = []
+        for line in text:
+            # Initialize variables to track the previous character type and whether a space should be inserted
+            prev_char_type = None
+            next_char_type = None
+            insert_space = False
+            # Output string
+            output_string = ""
+            # Iterate through each character in the line
+            for i, c in enumerate(line):
+                # Determine the type of the character
+                char_type = self.__check_char_type(c)
+                if i == (len(line) - 1):
+                    next_char_type = None
+                else:
+                    next_char_type = self.__check_char_type(line[i+1])
+                # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+                # If the character type has changed from the previous character, check if a space should be inserted
+                if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                    if next_char_type != "punctuation" or next_char_type != "whitespace":
+                        insert_space = True
+                # Insert a space if needed
+                if insert_space:
+                    output_string += " "+c
+                    insert_space = False
+                else:
+                    output_string += c
+                # Update the previous character type
+                prev_char_type = char_type
+            # Print the modified line
+            output_string = re.sub(r' +', ' ', output_string)
+            output_list.append(output_string)
+        return output_list
+    def num2text(self, text, language):
+        if language in self.keydict.keys():
+            digits = sorted(list(map(int, re.findall(r'\d+', text))),reverse=True)
+            if digits:
+                for digit in digits:
+                    text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
+            return self.__post_cleaning(text)
+        else:
+            print(f"No num-to-char for the given language {language}.")
+            return self.__post_cleaning(text)
+    def num2text_list(self, text, language):
+        # input is supposed to be a list of strings
+        if language in self.keydict.keys():
+            output_text = []
+            for line in text:
+                digits = sorted(list(map(int, re.findall(r'\d+', line))),reverse=True)
+                if digits:
+                    for digit in digits:
+                        line = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', line)
+                output_text.append(line)
+            return self.__post_cleaning_list(output_text)
+        else:
+            print(f"No num-to-char for the given language {language}.")
+            return self.__post_cleaning_list(text)
+    def normalize(self, text, language):
+        return self.__post_cleaning(text)
+    def normalize_list(self, text, language):
+        # input is supposed to be a list of strings
+        return self.__post_cleaning_list(text)
+class TextPhrasifier:
+    @classmethod
+    def phrasify(cls, text):
+        phrase_list = []
+        for phrase in text.split("#"):
+            phrase = phrase.strip()
+            if phrase != "":
+                phrase_list.append(phrase)
+        return phrase_list
+class TextPhrasifier_List:
+    @classmethod
+    def phrasify(cls, text):
+        # input is supposed to be a list of strings
+        # output is list of list of strings
+        output_list = []
+        for line in text:
+            phrase_list = []
+            for phrase in line.split("#"):
+                phrase = phrase.strip()
+                if phrase != "":
+                    phrase_list.append(phrase)
+            output_list.append(phrase_list)
+        return output_list
+class DurAlignTextProcessor:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : "",
+           "^" : "$",
+            "$" : ".",
+        }
+        self.cleaning_rules_English = {
+            " +" : "",
+            "$" : ".",
+        }
+    def textProcesor(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesorForEnglish(self, text):
+        for key, replacement in self.cleaning_rules_English.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesor_list(self, text):
+        # input expected in 'list of list of string' format
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                for idx in range(0,len(line)):
+                    line[idx] = re.sub(key, replacement, line[idx])
+            output_text.append(line)
+        return output_text
+class TTSDurAlignPreprocessor:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        # text = text.strip()
+        print(text)
+        text = self.text_cleaner.clean(text)
+        print("cleaned text", text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        # print(text)
+        text = self.text_normalizer.normalize(text, language)
+        # print(text)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        #print("phrased",phrasified_text)
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+        print("phonetext",phonified_text)
+        phonified_text = self.post_processor.textProcesor(phonified_text)
+        print(phonified_text)
+        return phonified_text, phrasified_text
+class TTSDurAlignPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        phonified_text = self.post_processor.textProcesor_list(phonified_text)
+        return phonified_text, phrasified_text
+class CharTextPreprocessor:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+    def preprocess(self, text, language, gender=None):
+        text = text.strip()
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class CharTextPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer()
+                ):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+    def preprocess(self, text, language, gender=None):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class TTSPreprocessor:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                text_phrasefier = TextPhrasifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.text_phrasefier = text_phrasefier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        text = text.strip()
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+        print(phonified_text)
+        phonified_text = self.post_processor.textProcesorForEnglish(phonified_text)
+        print(phonified_text)
+        return phonified_text, phrasified_text
+class TTSPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                text_phrasefier = TextPhrasifier_List()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.text_phrasefier = text_phrasefier
+    def preprocess(self, text, language, gender):
+        # print(f"Original text: {text}")
+        text = self.text_cleaner.clean_list(text)
+        # print(f"After text cleaner: {text}")
+        # text = self.text_normalizer.insert_space_list(text)
+        # print(f"After insert space: {text}")
+        text = self.text_normalizer.num2text_list(text, language)
+        # print(f"After num2text: {text}")
+        text = self.text_normalizer.normalize_list(text, language)
+        # print(f"After text normalizer: {text}")
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        # print(f"phrasified_text: {phrasified_text}")
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        # print(f"phonified_text: {phonified_text}")
+        return phonified_text, phrasified_text