Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

__pycache__/NumberToText.cpython-310.pyc +0 -0
__pycache__/text_preprocess_for_inference.cpython-310.pyc +0 -0
__pycache__/utilities.cpython-310.pyc +0 -0
hifigan/__pycache__/__init__.cpython-310.pyc +0 -0
hifigan/__pycache__/env.cpython-310.pyc +0 -0
hifigan/__pycache__/meldataset.cpython-310.pyc +0 -0
hifigan/__pycache__/models.cpython-310.pyc +0 -0
hifigan/__pycache__/utils.cpython-310.pyc +0 -0
hindi_latest/female/model/config.yaml +3 -3
hindi_latest/male/model/model.pth +3 -0
main_ov.py +346 -0
server.py +272 -0
start_server.bat +6 -0
test_tts.py +36 -0
text_preprocess_for_inference.py +971 -949
tmp/non_dict_words_1776242609.109296 +0 -0
tmp/non_dict_words_1776242740.8554978 +0 -0
utilities.py +75 -0

__pycache__/NumberToText.cpython-310.pyc ADDED Viewed

Binary file (3.04 kB). View file

__pycache__/text_preprocess_for_inference.cpython-310.pyc ADDED Viewed

Binary file (22.3 kB). View file

__pycache__/utilities.cpython-310.pyc ADDED Viewed

Binary file (8.17 kB). View file

hifigan/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (199 Bytes). View file

hifigan/__pycache__/env.cpython-310.pyc ADDED Viewed

Binary file (864 Bytes). View file

hifigan/__pycache__/meldataset.cpython-310.pyc ADDED Viewed

Binary file (5.43 kB). View file

hifigan/__pycache__/models.cpython-310.pyc ADDED Viewed

Binary file (8.71 kB). View file

hifigan/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (1.99 kB). View file

hindi_latest/female/model/config.yaml CHANGED Viewed

@@ -51,7 +51,7 @@ energy_extract_conf:
   win_length: 4096
 energy_normalize: global_mvn
 energy_normalize_conf:
-  stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/hindi_latest/female/model/energy_stats.npz
 exclude_weight_decay: false
 exclude_weight_decay_conf: {}
 feats_extract: fbank
@@ -90,7 +90,7 @@ no_forward_run: false
 non_linguistic_symbols: null
 normalize: global_mvn
 normalize_conf:
-  stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/hindi_latest/female/model/feats_stats.npz
 num_att_plot: 3
 num_cache_chunks: 1024
 num_iters_per_epoch: 800
@@ -111,7 +111,7 @@ pitch_extract_conf:
   reduction_factor: 1
 pitch_normalize: global_mvn
 pitch_normalize_conf:
-  stats_file: /home/speech/Fastspeech2_latest models/Fastspeech2_HS/hindi_latest/female/model/pitch_stats.npz
 pretrain_path: null
 print_config: false
 required:

   win_length: 4096
 energy_normalize: global_mvn
 energy_normalize_conf:
+  stats_file: C:\Users\PEGPVINUser01\Desktop\Realtime-Translation-AIPC\backend\TTS\FastSpeech2_HS_HF\hindi_latest\female\model/energy_stats.npz
 exclude_weight_decay: false
 exclude_weight_decay_conf: {}
 feats_extract: fbank
 non_linguistic_symbols: null
 normalize: global_mvn
 normalize_conf:
+  stats_file: C:\Users\PEGPVINUser01\Desktop\Realtime-Translation-AIPC\backend\TTS\FastSpeech2_HS_HF\hindi_latest\female\model/feats_stats.npz
 num_att_plot: 3
 num_cache_chunks: 1024
 num_iters_per_epoch: 800
   reduction_factor: 1
 pitch_normalize: global_mvn
 pitch_normalize_conf:
+  stats_file: C:\Users\PEGPVINUser01\Desktop\Realtime-Translation-AIPC\backend\TTS\FastSpeech2_HS_HF\hindi_latest\female\model/pitch_stats.npz
 pretrain_path: null
 print_config: false
 required:

hindi_latest/male/model/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81790566fd4660ca46c4692f6fbdb30d8e5c9cd657084a0eb40c804e6ec2b9ab
+size 152128410

main_ov.py ADDED Viewed

	@@ -0,0 +1,346 @@

+from text_preprocess_for_inference import TTSDurAlignPreprocessor, CharTextPreprocessor, TTSPreprocessor
+from espnet2.bin.tts_inference import Text2Speech
+from scipy.io.wavfile import write
+import json
+import torch
+import yaml
+import sys
+from utilities import SAMPLING_RATE, WARMUP_PARAGRAPHS
+from datetime import datetime
+import os
+import time
+import numpy as np
+import openvino as ov
+sys.path.append(os.getenv("HIFIGAN_PATH", f"hifigan"))
+from hifigan.env import AttrDict
+from hifigan.models import Generator
+from hifigan.meldataset import MAX_WAV_VALUE
+import torch.nn.functional as F
+import nltk
+nltk.download('averaged_perceptron_tagger_eng')
+device = "cuda" if torch.cuda.is_available() else "cpu"
+MAX_DEFAULT_VALUE = 600
+def load_hifigan_vocoder(language: str, gender: str, device: str, dtype: str = "float32"):
+    """
+    Loads HiFi-GAN vocoder configuration file and generator model.
+    """
+    vocoder_config = f"vocoder/{gender}/{language}/config.json"
+    vocoder_generator = f"vocoder/{gender}/{language}/generator"
+    if not os.path.exists(vocoder_config) or not os.path.exists(vocoder_generator):
+        raise FileNotFoundError(
+            f"Vocoder files not found. Expected config: {vocoder_config}, generator: {vocoder_generator}")
+    with open(vocoder_config, 'r') as f:
+        data = f.read()
+    json_config = json.loads(data)
+    h = AttrDict(json_config)
+    torch.manual_seed(h.seed)
+    device = torch.device(device)
+    generator = Generator(h).to(device)
+    state_dict_g = torch.load(vocoder_generator, map_location=device)
+    generator.load_state_dict(state_dict_g['generator'])
+    generator.eval()
+    generator.remove_weight_norm()
+    if dtype == "bfloat16":
+        generator = generator.to(torch.bfloat16)
+    return generator
+def load_fastspeech2_model(language: str, gender: str, device: str, dtype: str = "float32"):
+    """
+    Loads FastSpeech2 model and updates its configuration with absolute paths.
+    """
+    config_path = f"{language}/{gender}/model/config.yaml"
+    tts_model_path = f"{language}/{gender}/model/model.pth"
+    if not os.path.exists(config_path) or not os.path.exists(tts_model_path):
+        raise FileNotFoundError(
+            f"FastSpeech2 model files not found. Expected config: {config_path}, model: {tts_model_path}")
+    with open(config_path, "r") as file:
+        config = yaml.safe_load(file)
+    current_working_directory = os.getcwd()
+    feat_rel_path = "model/feats_stats.npz"
+    pitch_rel_path = "model/pitch_stats.npz"
+    energy_rel_path = "model/energy_stats.npz"
+    feat_path = os.path.join(current_working_directory,
+                             language, gender, feat_rel_path)
+    pitch_path = os.path.join(
+        current_working_directory, language, gender, pitch_rel_path)
+    energy_path = os.path.join(
+        current_working_directory, language, gender, energy_rel_path)
+    config["normalize_conf"]["stats_file"] = feat_path
+    config["pitch_normalize_conf"]["stats_file"] = pitch_path
+    config["energy_normalize_conf"]["stats_file"] = energy_path
+    # Temporarily write the modified config to a new file or use a BytesIO object if preferred
+    with open(config_path, "w") as file:
+        yaml.dump(config, file)
+    model = Text2Speech(train_config=config_path, model_file=tts_model_path, device=device, vocoder_config=None,vocoder_file=None)
+    model.vocoder=None
+    if dtype == "bfloat16":
+        model.model = model.model.to(torch.bfloat16)
+    return model
+def split_into_chunks(text: str, words_per_chunk: int = 100):
+    """Splits text into chunks of specified words_per_chunk."""
+    words = text.split()
+    chunks = [words[i:i + words_per_chunk]
+              for i in range(0, len(words), words_per_chunk)]
+    return [' '.join(chunk) for chunk in chunks]
+class Text2SpeechApp:
+    def __init__(self, language: str, batch_size: str = 1, alpha: float = 1, dtype: str = "bfloat16"):
+        self.alpha = alpha
+        self.lang = language
+        self.batch_size = batch_size
+        self.dtype = dtype
+        self.vocoder_model = {}
+        self.fastspeech2_model = {}
+        self.supported_genders = []
+        self.preprocessor = TTSDurAlignPreprocessor()
+        genders = ["male", "female"]
+        for gender in genders:
+            try:
+                self.vocoder_model[gender] = load_hifigan_vocoder(
+                    f"{language}_latest", gender, device, self.dtype)
+                with torch.no_grad():
+                    self.vocoder_model[gender] = ov.convert_model(self.vocoder_model[gender], example_input=torch.ones([1, 160, MAX_DEFAULT_VALUE]))
+                self.vocoder_model[gender] = ov.compile_model(self.vocoder_model[gender], device_name="CPU")
+                print(
+                    f"Loaded HiFi-GAN vocoder for {language}-{gender}")
+                self.fastspeech2_model[gender] = load_fastspeech2_model(
+                    f"{language}_latest", gender, device, self.dtype)
+                # with torch.no_grad():
+                    # self.fastspeech2_model[gender] = ov.convert_model(self.fastspeech2_model[gender])
+                print(
+                    f"Loaded FastSpeech2 model for {language}-{gender}")
+                self.supported_genders.append(gender)
+            except FileNotFoundError as e:
+                print(
+                    f"Error loading model for {language}-{gender}: {e}. This model key will not be available.")
+            except Exception as e:
+                print(
+                    f"An unexpected error occurred while loading model for {language}-{gender}: {e}. This model key will not be available.")
+        self.warmup()
+    def pre_print(self, print_str: str):
+        print("=================================================")
+        print(print_str)
+        print("=================================================")
+    def warmup(self):
+        self.pre_print("TTS Warming up!")
+        lang = self.lang.lower()
+        text = WARMUP_PARAGRAPHS.get(lang)
+        if not text:
+            print(f"No warmup paragraph available for language: {lang}")
+            return
+        # Ensure warmup output directory exists
+        output_dir = "./warmup_outputs"
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"Running warmup for language: {lang}")
+        print(f"Warmup text length: {len(text.split())} words")
+        total_start_time = time.time()
+        for gender in ["male", "female"]:
+            if gender not in self.fastspeech2_model:
+                print(f"Skipping warmup for {gender} - model not loaded.")
+                continue
+            print(f"Starting warmup for {lang}-{gender}")
+            try:
+                gender_start_time = time.time()
+                for i in range(2):  # Run twice; adjust as needed
+                    print(f"Warmup iteration {i + 1} for {gender}")
+                    time_taken, _ = self.convert_and_save(
+                        text=text,
+                        speaker_gender=gender,
+                        output_file_dir=output_dir
+                    )
+                    print(f"Iteration {i + 1} for {gender} completed in {time_taken:.2f} seconds")
+                gender_total_time = time.time() - gender_start_time
+                print(f"Total warmup time for {gender}: {gender_total_time:.2f} seconds")
+            except Exception as e:
+                print(f"Warmup failed for {lang}-{gender}: {e}")
+        total_time = time.time() - total_start_time
+        print(f"Total TTS warmup completed in {total_time:.2f} seconds")
+        self.pre_print("TTS Warming finished!")
+    def save_to_file(self, audio_arr, file_path):
+        write(file_path, SAMPLING_RATE, audio_arr)
+        print(f"Audio saved to {file_path}")
+    def convert_and_save(self, text: str, speaker_gender="male", output_file_dir: str = "./outputs"):
+        timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+        output_file = f"{output_file_dir}/{self.lang}_{speaker_gender}_{timestamp}.wav"
+        start = time.time()
+        audio_arr = []
+        result_chunks = split_into_chunks(text)
+        for chunk_text in result_chunks:
+            # Preprocess the text
+            preprocessed_text, _ = self.preprocessor.preprocess(
+                chunk_text, self.lang, speaker_gender)
+            preprocessed_text = " ".join(preprocessed_text)
+            with torch.no_grad():
+                # Generate mel-spectrograms
+                out = self.fastspeech2_model[speaker_gender](preprocessed_text,
+                                             decode_conf={"alpha": self.alpha})
+                x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
+                # Convert mel-spectrograms to raw audio waveforms
+                y_g_hat = self.vocoder_model[speaker_gender](x)
+                audio = y_g_hat.squeeze()
+                audio = audio * MAX_WAV_VALUE
+            audio = audio.numpy().astype('int16')
+            audio_arr.append(audio)
+        result_array = np.concatenate(audio_arr, axis=0)
+        self.save_to_file(audio_arr=result_array, file_path=output_file)
+        time_taken = time.time() - start
+        return time_taken, output_file
+    def generate_audio_bytes(self, text: str, speaker_gender="male", save_file: bool = False):
+            preprocessed_text, _ = self.preprocessor.preprocess(
+                text, self.lang, speaker_gender)
+            preprocessed_text = " ".join(preprocessed_text)
+            with torch.no_grad():
+                # Generate mel-spectrograms
+                st = time.perf_counter()
+                out = self.fastspeech2_model[speaker_gender](preprocessed_text,
+                                             decode_conf={"alpha": self.alpha})
+                x = out["feat_gen_denorm"].T.unsqueeze(0) * 2.3262
+                # Convert mel-spectrograms to raw audio waveforms
+                # [8,8,8,2]; default sr = 44100
+                trim_length = int(8 * 8 * 8 * 2 * x.shape[-1])
+                x = F.pad(x, (0, MAX_DEFAULT_VALUE - x.shape[-1]), value=-12)
+                st = time.perf_counter()
+                y_g_hat = self.vocoder_model[speaker_gender](x)
+                audio = y_g_hat[0][0][0][:trim_length]
+                audio = audio * MAX_WAV_VALUE
+            return audio
+    def evaluate_performance(self, input_sentences: list, save_file: bool = False):
+        total_sentences = len(input_sentences)
+        print(f"\nTotal T2S to be done: {total_sentences}\n")
+        for i, sentence in enumerate(input_sentences):
+            start_time = time.perf_counter()
+            audio = self.generate_audio_bytes(text=sentence)
+            time_taken = time.perf_counter() - start_time
+            print("=================================================")
+            print(f"Sentence {i + 1}/{total_sentences}:{sentence} processed in {time_taken:.2f} seconds")
+            if save_file:
+                os.makedirs(f"audios_{self.dtype}/numpy_files", exist_ok=True)
+                os.makedirs(f"audios_{self.dtype}/audio_files", exist_ok=True)
+                output_file = f"audios_{self.dtype}/numpy_files/file_{i}.npy"
+                if audio.dtype == torch.bfloat16:
+                    audio = audio.to(torch.float32)
+                audio = audio.astype('int16')
+                np.save(output_file, audio)
+                audio_file_path = f"audios_{self.dtype}/audio_files/file_{i}.wav"
+                with open(audio_file_path, "wb") as f:
+                    write(f, SAMPLING_RATE, audio)
+                print(f"Audio saved to {audio_file_path}")
+        return time_taken
+    def save_to_files(self, byte_ios, file_prefix: str) -> list[str]:
+        file_paths = []
+        for i in range(len(byte_ios)):
+            timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+            file_path = f"{file_prefix}_{timestamp}_{i + 1}.wav"
+            file_paths.append(file_path)
+            with open(file_path, "wb") as f:
+                f.write(byte_ios[i].read())
+            print(f"Audio saved to {file_path}")
+        return file_paths
+    def batch_convert_and_save(self, input_sentences: list[str], speaker_gender="male", output_file_dir: str = "./outputs"):
+        start_time = time.time()
+        output_file_paths = []
+        total_sentences = len(input_sentences)
+        os.makedirs(output_file_dir, exist_ok=True)
+        print(f"Total T2S to be done: {total_sentences}\n")
+        combined_para = ''.join(input_sentences)
+        paragraph_time, output_path = self.convert_and_save(
+            combined_para, speaker_gender=speaker_gender, output_file_dir=output_file_dir)
+        print(f"Paragraph Time: {paragraph_time}\n")
+        output_file_paths.append(output_path)
+        time_taken = time.time() - start_time
+        return time_taken, output_file_paths
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Text to Speech benchmarking")
+    parser.add_argument("--batch_size", type=int, default=1, help="Batch size for TTS inference")
+    parser.add_argument("--language", type=str, default="hindi", help="Language for TTS")
+    parser.add_argument("--alpha", type=float, default=1.0, help="Alpha value for FastSpeech2 decoding")
+    parser.add_argument("--dtype", type=str, default="float32", help="Data type for model inference")
+    args = parser.parse_args()
+    batch_size = 1
+    language = "hindi"
+    alpha = 1
+    tts = Text2SpeechApp(batch_size=batch_size, alpha=alpha, language=language, dtype=args.dtype)
+    st = time.perf_counter()
+    texts = [
+        "जीवन में सफलता पाने के लिए केवल सपने देखना ही नहीं, बल्कि उन्हें पूरा करने के लिए निरंतर प्रयास और आत्मविश्वास भी ज़रूरी होता है।",
+        "कठिन परिस्थितियाँ हमें तोड़ने नहीं आतीं, बल्कि हमें मज़बूत बनाकर जीवन के असली अर्थ से परिचित कराती हैं।",
+        "सकारात्मक सोच और सही दृष्टिकोण के साथ किया गया हर छोटा प्रयास भी एक दिन बड़ी उपलब्धि में बदल जाता है।",
+        "जब हम निस्वार्थ भाव से दूसरों की मदद करते हैं, तब हमारे अपने जीवन में भी शांति और संतुलन अपने आप आ जाता है।"
+    ]
+    total_time = tts.evaluate_performance(texts, save_file=True)
+    et = time.perf_counter()
+    print(f"Total time for evaluating {len(texts)} sentences: {et - st:.2f} seconds")
+    print(f"Average time per sentence: {(et - st)/len(texts):.2f} seconds")

server.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import asyncio
+import base64
+import io
+import logging
+import os
+import time
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from scipy.io.wavfile import write as wav_write
+from main_ov import Text2SpeechApp
+from utilities import SAMPLING_RATE, SUPPORTED_OUTPUT_LANGS
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# --- Language code mapping (Bhashini 2-letter <-> full name) ---
+LANG_CODE_TO_NAME = {
+    "hi": "hindi",
+    "ta": "tamil",
+    "te": "telugu",
+    "kn": "kannada",
+    "ml": "malayalam",
+    "pa": "punjabi",
+    "bn": "bengali",
+}
+LANG_NAME_TO_CODE = {v: k for k, v in LANG_CODE_TO_NAME.items()}
+# --- Pydantic models for Bhashini pipeline request/response ---
+class LanguageConfig(BaseModel):
+    sourceLanguage: str
+    sourceScriptCode: str | None = None
+    targetLanguage: str | None = None
+class TaskConfig(BaseModel):
+    language: LanguageConfig
+    serviceId: str | None = None
+    gender: str = "female"
+    samplingRate: int = 48000
+class PipelineTask(BaseModel):
+    taskType: str
+    config: TaskConfig
+class InputItem(BaseModel):
+    source: str
+class InputData(BaseModel):
+    input: list[InputItem] | None = None
+class PipelineRequest(BaseModel):
+    pipelineTasks: list[PipelineTask]
+    inputData: InputData
+class AudioItem(BaseModel):
+    audioContent: str | None = None
+    audioUri: str | None = None
+class ResponseConfig(BaseModel):
+    audioFormat: str = "wav"
+    language: LanguageConfig
+    encoding: str = "base64"
+    samplingRate: int = 48000
+class PipelineResponseItem(BaseModel):
+    taskType: str
+    config: ResponseConfig
+    output: list | None = None
+    audio: list[AudioItem] | None = None
+    metrics: dict | None = None
+class PipelineResponse(BaseModel):
+    pipelineResponse: list[PipelineResponseItem]
+class SimpleTtsRequest(BaseModel):
+    text: str
+    language: str = "hi"
+    gender: str = "female"
+    samplingRate: int = 48000
+# --- App setup ---
+app = FastAPI(title="FastSpeech2 TTS API (Bhashini-compatible)")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Dict of language_name -> Text2SpeechApp instance
+tts_engines: dict[str, Text2SpeechApp] = {}
+@app.on_event("startup")
+def load_models():
+    """Load TTS models for all configured languages at startup."""
+    logger.info(f"SUPPORTED_OUTPUT_LANGS: {SUPPORTED_OUTPUT_LANGS}")
+    logger.info(f"LANG_CODE_TO_NAME: {LANG_CODE_TO_NAME}")
+    for lang_name in SUPPORTED_OUTPUT_LANGS:
+        lang_name = lang_name.strip().lower()
+        if lang_name not in LANG_NAME_TO_CODE:
+            logger.warning(f"Unknown language '{lang_name}' in LANGUAGES env var, skipping.")
+            continue
+        logger.info(f"Loading TTS models for '{lang_name}'...")
+        try:
+            tts_engines[lang_name] = Text2SpeechApp(language=lang_name, dtype=os.getenv("TTS_DTYPE", "float32"))
+            logger.info(f"✓ Successfully loaded '{lang_name}' with genders: {tts_engines[lang_name].supported_genders}")
+        except Exception as e:
+            logger.error(f"✗ Failed to load models for '{lang_name}': {str(e)}")
+            logger.exception(f"Exception details for '{lang_name}':")
+    logger.info(f"Final loaded languages: {list(tts_engines.keys())}")
+def _synthesize(tts_app: Text2SpeechApp, text: str, gender: str, requested_sr: int) -> tuple[str, float]:
+    """Run TTS inference and return base64-encoded WAV string and audio duration in seconds."""
+    audio_tensor = tts_app.generate_audio_bytes(text=text, speaker_gender=gender)
+    # Convert to int16 numpy
+    if hasattr(audio_tensor, "numpy"):
+        audio_np = audio_tensor.numpy().astype(np.int16)
+    else:
+        audio_np = np.array(audio_tensor, dtype=np.int16)
+    # Resample if requested rate differs from native rate
+    output_sr = SAMPLING_RATE
+    if requested_sr != SAMPLING_RATE:
+        import librosa
+        audio_float = audio_np.astype(np.float32) / 32768.0
+        audio_float = librosa.resample(audio_float, orig_sr=SAMPLING_RATE, target_sr=requested_sr)
+        audio_np = (audio_float * 32768.0).astype(np.int16)
+        output_sr = requested_sr
+    # Write WAV to in-memory buffer
+    buf = io.BytesIO()
+    wav_write(buf, output_sr, audio_np)
+    wav_bytes = buf.getvalue()
+    audio_duration_s = float(len(audio_np) / output_sr) if output_sr > 0 else 0.0
+    return base64.b64encode(wav_bytes).decode("ascii"), audio_duration_s
+def _resolve_tts_engine(lang_code: str, gender: str) -> tuple[str, Text2SpeechApp, str]:
+    lang_code = lang_code.lower()
+    lang_name = LANG_CODE_TO_NAME.get(lang_code)
+    if not lang_name:
+        raise HTTPException(status_code=400, detail=f"Unsupported language code: '{lang_code}'")
+    if lang_name not in tts_engines:
+        raise HTTPException(status_code=400, detail=f"Language '{lang_name}' not loaded. Available: {list(tts_engines.keys())}")
+    tts_app = tts_engines[lang_name]
+    resolved_gender = gender.lower()
+    if resolved_gender not in tts_app.supported_genders:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Gender '{resolved_gender}' not available for '{lang_name}'. Available: {tts_app.supported_genders}"
+        )
+    return lang_name, tts_app, resolved_gender
+@app.post("/services/inference/pipeline", response_model=PipelineResponse)
+async def inference_pipeline(request: PipelineRequest):
+    t_start = time.perf_counter()
+    if not request.pipelineTasks:
+        raise HTTPException(status_code=400, detail="pipelineTasks is empty")
+    task = request.pipelineTasks[0]
+    if task.taskType != "tts":
+        raise HTTPException(status_code=400, detail=f"Unsupported taskType: '{task.taskType}'. Only 'tts' is supported.")
+    # Resolve language
+    lang_code = task.config.language.sourceLanguage
+    _, tts_app, gender = _resolve_tts_engine(lang_code, task.config.gender)
+    requested_sr = task.config.samplingRate
+    # Validate input
+    if not request.inputData.input:
+        raise HTTPException(status_code=400, detail="inputData.input is empty")
+    # Process all input texts and collect audio
+    audio_items = []
+    total_audio_duration_s = 0.0
+    for item in request.inputData.input:
+        b64_audio, audio_duration_s = await asyncio.to_thread(_synthesize, tts_app, item.source, gender, requested_sr)
+        total_audio_duration_s += audio_duration_s
+        audio_items.append(AudioItem(audioContent=b64_audio, audioUri=None))
+    latency_ms = round((time.perf_counter() - t_start) * 1000, 2)
+    rtf = round((latency_ms / 1000) / total_audio_duration_s, 4) if total_audio_duration_s > 0 else 0.0
+    response = PipelineResponse(
+        pipelineResponse=[
+            PipelineResponseItem(
+                taskType="tts",
+                config=ResponseConfig(
+                    audioFormat="wav",
+                    language=LanguageConfig(sourceLanguage=lang_code, sourceScriptCode=""),
+                    encoding="base64",
+                    samplingRate=requested_sr,
+                ),
+                output=None,
+                audio=audio_items,
+                metrics={
+                    "latency_ms": latency_ms,
+                    "audio_duration_s": round(total_audio_duration_s, 3),
+                    "rtf": rtf,
+                },
+            )
+        ]
+    )
+    return response
+@app.post("/tts")
+async def tts_compat(request: SimpleTtsRequest):
+    """Compatibility endpoint for clients calling /tts on port 5000."""
+    sentence = request.text.strip()
+    if not sentence:
+        raise HTTPException(status_code=400, detail="text is empty")
+    lang_code = request.language
+    _, tts_app, gender = _resolve_tts_engine(lang_code, request.gender)
+    t_start = time.perf_counter()
+    b64_audio, audio_duration_s = await asyncio.to_thread(_synthesize, tts_app, sentence, gender, request.samplingRate)
+    latency_ms = round((time.perf_counter() - t_start) * 1000, 2)
+    rtf = round((latency_ms / 1000) / audio_duration_s, 4) if audio_duration_s > 0 else 0.0
+    return {
+        "audioContent": b64_audio,
+        "audioFormat": "wav",
+        "encoding": "base64",
+        "samplingRate": request.samplingRate,
+        "metrics": {
+            "latency_ms": latency_ms,
+            "audio_duration_s": round(audio_duration_s, 3),
+            "rtf": rtf,
+        },
+    }
+@app.get("/health")
+def health():
+    loaded_langs = {lang: engine.supported_genders for lang, engine in tts_engines.items()}
+    return {
+        "status": "ok",
+        "loadedLanguages": loaded_langs,
+        "availableLanguages": list(LANG_CODE_TO_NAME.values()),
+    }

start_server.bat ADDED Viewed

	@@ -0,0 +1,6 @@

+@echo off
+set PYTHONUTF8=1
+set TTS_DTYPE=float32
+set LANGUAGES=hindi,punjabi,tamil
+uvicorn server:app --host 0.0.0.0 --port 5000
+pause

test_tts.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import requests
+import json
+import base64
+url = "http://localhost:8000/services/inference/pipeline"
+payload = {
+    "pipelineTasks": [
+        {
+            "taskType": "tts",
+            "config": {
+                "language": {"sourceLanguage": "hi"},
+                "gender": "female",
+                "samplingRate": 48000,
+            },
+        }
+    ],
+    "inputData": {
+        "input": [
+            {"source": "मेरा नाम कशा है"}
+        ]
+    },
+}
+response = requests.post(url, json=payload)
+print(f"Status: {response.status_code}")
+if response.status_code == 200:
+    data = response.json()
+    audio_b64 = data["pipelineResponse"][0]["audio"][0]["audioContent"]
+    audio_bytes = base64.b64decode(audio_b64)
+    with open("test_output.wav", "wb") as f:
+        f.write(audio_bytes)
+    print(f"Audio saved to test_output.wav ({len(audio_bytes)} bytes)")
+else:
+    print(f"Error: {response.text}")

text_preprocess_for_inference.py CHANGED Viewed

@@ -1,949 +1,971 @@
-'''
-TTS Preprocessing
-Developed by Arun Kumar A(CS20S013) - November 2022
-Code Changes by Utkarsh - 2023
-'''
-import os
-import re
-import json
-import pandas as pd
-import string
-from collections import defaultdict
-import time
-import subprocess
-import shutil
-from multiprocessing import Process
-import traceback
-#imports of dependencies from environment.yml
-from num_to_words import num_to_word
-from g2p_en import G2p
-def add_to_dictionary(dict_to_add, dict_file):
-    append_string = ""
-    for key, value in dict_to_add.items():
-        append_string += (str(key) + " " + str(value) + "\n")
-    if os.path.isfile(dict_file):
-        # make a copy of the dictionary
-        source_dir = os.path.dirname(dict_file)
-        dict_file_name = os.path.basename(dict_file)
-        temp_file_name = "." + dict_file_name + ".temp"
-        temp_dict_file = os.path.join(source_dir, temp_file_name)
-        shutil.copy(dict_file, temp_dict_file)
-        # append the new words in the dictionary to the temp file
-        with open(temp_dict_file, "a") as f:
-            f.write(append_string)
-        # check if the write is successful and then replace the temp file as the dict file
-        try:
-            df_orig = pd.read_csv(dict_file, delimiter=" ", header=None, dtype=str)
-            df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
-            if len(df_temp) > len(df_orig):
-                os.rename(temp_dict_file, dict_file)
-                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
-        except:
-            print(traceback.format_exc())
-    else:
-        # create a new dictionary
-        with open(dict_file, "a") as f:
-            f.write(append_string)
-        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
-class TextCleaner:
-    def __init__(self):
-        # this is a static set of cleaning rules to be applied
-        self.cleaning_rules = {
-            " +" : " ",
-            "^ +" : "",
-            " +$" : "",
-            "#" : "",
-            "[.,;।!](\r\n)*" : "# ",
-            "[.,;।!](\n)*" : "# ",
-            "(\r\n)+" : "# ",
-            "(\n)+" : "# ",
-            "(\r)+" : "# ",
-            """[?;:)(!|&’‘,।\."]""": "",
-            "[/']" : "",
-            "[-–]" : " ",
-        }
-    def clean(self, text):
-        for key, replacement in self.cleaning_rules.items():
-            text = re.sub(key, replacement, text)
-        return text
-    def clean_list(self, text):
-        # input is supposed to be a list of strings
-        output_text = []
-        for line in text:
-            line = line.strip()
-            for key, replacement in self.cleaning_rules.items():
-                line = re.sub(key, replacement, line)
-            output_text.append(line)
-        return output_text
-class Phonifier:
-    def __init__(self, dict_location=None):
-        if dict_location is None:
-            dict_location = "phone_dict"
-        self.dict_location = dict_location
-        # self.phone_dictionary = {}
-        # # load dictionary for all the available languages
-        # for dict_file in os.listdir(dict_location):
-        #     try:
-        #         if dict_file.startswith("."):
-        #             # ignore hidden files
-        #             continue
-        #         language = dict_file
-        #         dict_file_path = os.path.join(dict_location, dict_file)
-        #         df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
-        #         self.phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
-        #     except Exception as e:
-        #         print(traceback.format_exc())
-        # print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
-        self.g2p = G2p()
-        print('Loading G2P model... Done!')
-        # Mapping between the cmu phones and the iitm cls
-        self.cmu_2_cls_map = {
-            "AA" : "aa",
-            "AA0" : "aa",
-            "AA1" : "aa",
-            "AA2" : "aa",
-            "AE" : "axx",
-            "AE0" : "axx",
-            "AE1" : "axx",
-            "AE2" : "axx",
-            "AH" : "a",
-            "AH0" : "a",
-            "AH1" : "a",
-            "AH2" : "a",
-            "AO" : "ax",
-            "AO0" : "ax",
-            "AO1" : "ax",
-            "AO2" : "ax",
-            "AW" : "ou",
-            "AW0" : "ou",
-            "AW1" : "ou",
-            "AW2" : "ou",
-            "AX" : "a",
-            "AY" : "ei",
-            "AY0" : "ei",
-            "AY1" : "ei",
-            "AY2" : "ei",
-            "B" : "b",
-            "CH" : "c",
-            "D" : "dx",
-            "DH" : "d",
-            "EH" : "ee",
-            "EH0" : "ee",
-            "EH1" : "ee",
-            "EH2" : "ee",
-            "ER" : "a r",
-            "ER0" : "a r",
-            "ER1" : "a r",
-            "ER2" : "a r",
-            "EY" : "ee",
-            "EY0" : "ee",
-            "EY1" : "ee",
-            "EY2" : "ee",
-            "F" : "f",
-            "G" : "g",
-            "HH" : "h",
-            "IH" : "i",
-            "IH0" : "i",
-            "IH1" : "i",
-            "IH2" : "i",
-            "IY" : "ii",
-            "IY0" : "ii",
-            "IY1" : "ii",
-            "IY2" : "ii",
-            "JH" : "j",
-            "K" : "k",
-            "L" : "l",
-            "M" : "m",
-            "N" : "n",
-            "NG" : "ng",
-            "OW" : "o",
-            "OW0" : "o",
-            "OW1" : "o",
-            "OW2" : "o",
-            "OY" : "ei",
-            "OY0" : "ei",
-            "OY1" : "ei",
-            "OY2" : "ei",
-            "P" : "p",
-            "R" : "r",
-            "S" : "s",
-            "SH" : "sh",
-            "T" : "tx",
-            "TH" : "t",
-            "UH" : "u",
-            "UH0" : "u",
-            "UH1" : "u",
-            "UH2" : "u",
-            "UW" : "uu",
-            "UW0" : "uu",
-            "UW1" : "uu",
-            "UW2" : "uu",
-            "V" : "w",
-            "W" : "w",
-            "Y" : "y",
-            "Z" : "z",
-            "ZH" : "sh",
-        }
-        # Mapping between the iitm cls and iitm char
-        self.cls_2_chr_map = {
-            "aa" : "A",
-            "ii" : "I",
-            "uu" : "U",
-            "ee" : "E",
-            "oo" : "O",
-            "nn" : "N",
-            "ae" : "ऍ",
-            "ag" : "ऽ",
-            "au" : "औ",
-            "axx" : "अ",
-            "ax" : "ऑ",
-            "bh" : "B",
-            "ch" : "C",
-            "dh" : "ध",
-            "dx" : "ड",
-            "dxh" : "ढ",
-            "dxhq" : "T",
-            "dxq" : "D",
-            "ei" : "ऐ",
-            "ai" : "ऐ",
-            "eu" : "உ",
-            "gh" : "घ",
-            "gq" : "G",
-            "hq" : "H",
-            "jh" : "J",
-            "kh" : "ख",
-            "khq" : "K",
-            "kq" : "क",
-            "ln" : "ൾ",
-            "lw" : "ൽ",
-            "lx" : "ള",
-            "mq" : "M",
-            "nd" : "न",
-            "ng" : "ङ",
-            "nj" : "ञ",
-            "nk" : "Y",
-            "nw" : "ൺ",
-            "nx" : "ण",
-            "ou" : "औ",
-            "ph" : "P",
-            "rq" : "R",
-            "rqw" : "ॠ",
-            "rw" : "ർ",
-            "rx" : "र",
-            "sh" : "श",
-            "sx" : "ष",
-            "th" : "थ",
-            "tx" : "ट",
-            "txh" : "ठ",
-            "wv" : "W",
-            "zh" : "Z",
-        }
-        # Multilingual support for OOV characters
-        oov_map_json_file = 'multilingualcharmap.json'
-        with open(oov_map_json_file, 'r') as oov_file:
-            self.oov_map = json.load(oov_file)
-    def load_lang_dict(self, language, phone_dictionary):
-        # load dictionary for requested language
-        try:
-            dict_file = language
-            print("language", language)
-            dict_file_path = os.path.join(self.dict_location, dict_file)
-            print("dict_file_path", dict_file_path)
-            df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
-            phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
-            dict_file = 'english'
-            dict_file_path = os.path.join(self.dict_location, dict_file)
-            df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
-            phone_dictionary['english'] = df.set_index(0).to_dict('dict')[1]
-        except Exception as e:
-            print(traceback.format_exc())
-        return phone_dictionary
-    def __is_float(self, word):
-        parts = word.split('.')
-        if len(parts) != 2:
-            return False
-        return parts[0].isdecimal() and parts[1].isdecimal()
-    def en_g2p(self, word):
-        phn_out = self.g2p(word)
-        # print(f"phn_out: {phn_out}")
-        # iterate over the string list and replace each word with the corresponding value from the dictionary
-        for i, phn in enumerate(phn_out):
-            if phn in self.cmu_2_cls_map.keys():
-                phn_out[i] = self.cmu_2_cls_map[phn]
-                # cls_out = self.cmu_2_cls_map[phn]
-                if phn_out[i] in self.cls_2_chr_map.keys():
-                    phn_out[i] = self.cls_2_chr_map[phn_out[i]]
-                else:
-                    pass
-            else:
-                pass  # ignore words that are not in the dictionary
-            # print(f"i: {i}, phn: {phn}, cls_out: {cls_out}, phn_out: {phn_out[i]}")
-        return ("".join(phn_out)).strip().replace(" ", "")
-    def __post_phonify(self, text, language, gender):
-        language_gender_id = language+'_'+gender
-        if language_gender_id in self.oov_map.keys():
-            output_string = ''
-            for char in text:
-                if char in self.oov_map[language_gender_id].keys():
-                    output_string += self.oov_map[language_gender_id][char]
-                else:
-                    output_string += char
-                # output_string += self.oov_map['language_gender_id']['char']
-            return output_string
-        else:
-            return text
-    def __is_english_word(self, word):
-        maxchar = max(word)
-        if u'\u0000' <= maxchar <= u'\u007f':
-            return True
-        return False
-    def __phonify(self, text, language, gender, phone_dictionary):
-        # text is expected to be a list of strings
-        words = set((" ".join(text)).split(" "))
-        #print(f"words test: {words}")
-        non_dict_words = []
-        if language in phone_dictionary:
-            for word in words:
-                # print(f"word: {word}")
-                if word not in phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
-                    non_dict_words.append(word)
-                    #print('INSIDE IF CONDITION OF ADDING WORDS')
-        else:
-            non_dict_words = words
-        print(f"word not in dict: {non_dict_words}")
-        if len(non_dict_words) > 0:
-            # unified parser has to be run for the non dictionary words
-            os.makedirs("tmp", exist_ok=True)
-            timestamp = str(time.time())
-            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
-            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
-            with open(non_dict_words_file, "w") as f:
-                f.write("\n".join(non_dict_words))
-            if(language == 'tamil'):
-                current_directory = os.getcwd()
-                #tamil_parser_cmd = "tamil_parser.sh"
-                tamil_parser_cmd = f"{current_directory}/ssn_parser_new/tamil_parser.py"
-                #subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
-                subprocess.run(["python", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, f"{current_directory}/ssn_parser_new"])
-            elif(language == 'english'):
-                phn_out_dict = {}
-                for i in range(0,len(non_dict_words)):
-                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
-                # Create a string representation of the dictionary
-                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
-                print(f"data_str: {data_str}")
-                with open(out_dict_file, "w") as f:
-                    f.write(data_str)
-            else:
-                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
-                from get_phone_mapped_python import TextReplacer
-                from indic_unified_parser.uparser import wordparse
-                text_replacer=TextReplacer()
-                # def write_output_to_file(output_text, file_path):
-                #     with open(file_path, 'w') as f:
-                #         f.write(output_text)
-                parsed_output_list = []
-                for word in non_dict_words:
-                    parsed_word = wordparse(word, 0, 0, 1)
-                    parsed_output_list.append(parsed_word)
-                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
-                with open(out_dict_file, 'w', encoding='utf-8') as file:
-                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
-                        line = f"{original_word}\t{formatted_word}\n"
-                        file.write(line)
-                        print(line, end='')
-            try:
-                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
-                #print('DATAFRAME OUTPUT FILE', df.head())
-                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
-                #print("new dict",new_dict)
-                if language not in phone_dictionary:
-                    phone_dictionary[language] = new_dict
-                else:
-                    phone_dictionary[language].update(new_dict)
-                # run a non-blocking child process to update the dictionary file
-                #print("phone_dict", self.phone_dictionary)
-                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
-                p.start()
-            except Exception as err:
-                print(f"Error: While loading {out_dict_file}")
-                traceback.print_exc()
-        # phonify text with dictionary
-        text_phonified = []
-        for phrase in text:
-            phrase_phonified = []
-            for word in phrase.split(" "):
-                if self.__is_english_word(word):
-                    if word in phone_dictionary["english"]:
-                        phrase_phonified.append(str(phone_dictionary["english"][word]))
-                    else:
-                        phrase_phonified.append(str(self.en_g2p(word)))
-                elif word in phone_dictionary[language]:
-                    # if a word could not be parsed, skip it
-                    phrase_phonified.append(str(phone_dictionary[language][word]))
-            # text_phonified.append(self.__post_phonify(" ".join(phrase_phonified),language, gender))
-            text_phonified.append(" ".join(phrase_phonified))
-        return text_phonified
-    def __merge_lists(self, lists):
-        merged_string = ""
-        for list in lists:
-            for word in list:
-                merged_string += word + " "
-        return merged_string.strip()
-    def __phonify_list(self, text, language, gender, phone_dictionary):
-        # text is expected to be a list of list of strings
-        words = set(self.__merge_lists(text).split(" "))
-        non_dict_words = []
-        if language in phone_dictionary:
-            for word in words:
-                if word not in phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
-                    non_dict_words.append(word)
-        else:
-            non_dict_words = words
-        if len(non_dict_words) > 0:
-            print(len(non_dict_words))
-            print(non_dict_words)
-            # unified parser has to be run for the non dictionary words
-            os.makedirs("tmp", exist_ok=True)
-            timestamp = str(time.time())
-            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
-            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
-            with open(non_dict_words_file, "w") as f:
-                f.write("\n".join(non_dict_words))
-            if(language == 'tamil'):
-                current_directory = os.getcwd()
-                #tamil_parser_cmd = "tamil_parser.sh"
-                tamil_parser_cmd = f"{current_directory}/ssn_parser_new/tamil_parser.py"
-                #subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser"])
-                subprocess.run(["python", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, f"{current_directory}/ssn_parser_new"])
-            elif(language == 'english'):
-                phn_out_dict = {}
-                for i in range(0,len(non_dict_words)):
-                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
-                # Create a string representation of the dictionary
-                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
-                print(f"data_str: {data_str}")
-                with open(out_dict_file, "w") as f:
-                    f.write(data_str)
-            else:
-                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
-                from get_phone_mapped_python import TextReplacer
-                from indic_unified_parser.uparser import wordparse
-                text_replacer=TextReplacer()
-                parsed_output_list = []
-                for word in non_dict_words:
-                    parsed_word = wordparse(word, 0, 0, 1)
-                    parsed_output_list.append(parsed_word)
-                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
-                with open(out_dict_file, 'w', encoding='utf-8') as file:
-                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
-                        line = f"{original_word}\t{formatted_word}\n"
-                        file.write(line)
-                        print(line, end='')
-            try:
-                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
-                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
-                print(new_dict)
-                if language not in phone_dictionary:
-                    phone_dictionary[language] = new_dict
-                else:
-                    phone_dictionary[language].update(new_dict)
-                # run a non-blocking child process to update the dictionary file
-                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
-                p.start()
-            except Exception as err:
-                traceback.print_exc()
-        # phonify text with dictionary
-        text_phonified = []
-        for line in text:
-            line_phonified = []
-            for phrase in line:
-                phrase_phonified = []
-                for word in phrase.split(" "):
-                    if self.__is_english_word(word):
-                        if word in phone_dictionary["english"]:
-                            phrase_phonified.append(str(phone_dictionary["english"][word]))
-                        else:
-                            phrase_phonified.append(str(self.en_g2p(word)))
-                    elif word in phone_dictionary[language]:
-                        # if a word could not be parsed, skip it
-                        phrase_phonified.append(str(phone_dictionary[language][word]))
-                # line_phonified.append(self.__post_phonify(" ".join(phrase_phonified), language, gender))
-                line_phonified.append(" ".join(phrase_phonified))
-            text_phonified.append(line_phonified)
-        return text_phonified
-    def phonify(self, text, language, gender, phone_dictionary):
-        if not isinstance(text, list):
-            out = self.__phonify([text], language, gender)
-            return out[0]
-        return self.__phonify(text, language, gender, phone_dictionary)
-    def phonify_list(self, text, language, gender, phone_dictionary):
-        if isinstance(text, list):
-            return self.__phonify_list(text, language, gender, phone_dictionary)
-        else:
-            print("Error!! Expected to have a list as input.")
-class TextNormalizer:
-    def __init__(self, char_map_location=None):
-        # self.phonifier = phonifier
-        if char_map_location is None:
-            char_map_location = "charmap"
-        # this is a static set of cleaning rules to be applied
-        self.cleaning_rules = {
-            " +" : " ",
-            "^ +" : "",
-            " +$" : "",
-            "#$" : "",
-            "# +$" : "",
-        }
-        # this is the list of languages supported by num_to_words
-        self.keydict = {"english" : "en",
-            "hindi" : "hi",
-            "gujarati" : "gu",
-            "marathi" : "mr",
-            "bengali" : "bn",
-            "telugu" : "te",
-            "tamil" : "ta",
-            "kannada" : "kn",
-            "odia" : "or",
-            "punjabi" : "pa"
-        }
-        # self.g2p = G2p()
-        # print('Loading G2P model... Done!')
-    def __post_cleaning(self, text):
-        for key, replacement in self.cleaning_rules.items():
-            text = re.sub(key, replacement, text)
-        return text
-    def __post_cleaning_list(self, text):
-        # input is supposed to be a list of strings
-        output_text = []
-        for line in text:
-            for key, replacement in self.cleaning_rules.items():
-                line = re.sub(key, replacement, line)
-            output_text.append(line)
-        return output_text
-    def __check_char_type(self, str_c):
-        # Determine the type of the character
-        if str_c.isnumeric():
-            char_type = "number"
-        elif str_c in string.punctuation:
-            char_type = "punctuation"
-        elif str_c in string.whitespace:
-            char_type = "whitespace"
-        elif str_c.isalpha() and str_c.isascii():
-            char_type = "ascii"
-        else:
-            char_type = "non-ascii"
-        return char_type
-    def insert_space(self, text):
-        '''
-        Check if the text contains numbers and English words and if they are without space inserts space between them.
-        '''
-        # Initialize variables to track the previous character type and whether a space should be inserted
-        prev_char_type = None
-        next_char_type = None
-        insert_space = False
-        # Output string
-        output_string = ""
-        # Iterate through each character in the text
-        for i, c in enumerate(text):
-            # Determine the type of the character
-            char_type = self.__check_char_type(c)
-            if i == (len(text) - 1):
-                next_char_type = None
-            else:
-                next_char_type = self.__check_char_type(text[i+1])
-            # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
-            # If the character type has changed from the previous character, check if a space should be inserted
-            if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
-                if next_char_type != "punctuation" or next_char_type != "whitespace":
-                    insert_space = True
-            # Insert a space if needed
-            if insert_space:
-                output_string += " "+c
-                insert_space = False
-            else:
-                output_string += c
-            # Update the previous character type
-            prev_char_type = char_type
-        # Print the modified text
-        output_string = re.sub(r' +', ' ', output_string)
-        return output_string
-    def insert_space_list(self, text):
-        '''
-        Expect the input to be in form of list of string.
-        Check if the text contains numbers and English words and if they are without space inserts space between them.
-        '''
-        # Output string list
-        output_list = []
-        for line in text:
-            # Initialize variables to track the previous character type and whether a space should be inserted
-            prev_char_type = None
-            next_char_type = None
-            insert_space = False
-            # Output string
-            output_string = ""
-            # Iterate through each character in the line
-            for i, c in enumerate(line):
-                # Determine the type of the character
-                char_type = self.__check_char_type(c)
-                if i == (len(line) - 1):
-                    next_char_type = None
-                else:
-                    next_char_type = self.__check_char_type(line[i+1])
-                # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
-                # If the character type has changed from the previous character, check if a space should be inserted
-                if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
-                    if next_char_type != "punctuation" or next_char_type != "whitespace":
-                        insert_space = True
-                # Insert a space if needed
-                if insert_space:
-                    output_string += " "+c
-                    insert_space = False
-                else:
-                    output_string += c
-                # Update the previous character type
-                prev_char_type = char_type
-            # Print the modified line
-            output_string = re.sub(r' +', ' ', output_string)
-            output_list.append(output_string)
-        return output_list
-    def num2text(self, text, language):
-        if language in self.keydict.keys():
-            digits = sorted(list(map(int, re.findall(r'\d+', text))),reverse=True)
-            if digits:
-                for digit in digits:
-                    text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
-            return self.__post_cleaning(text)
-        else:
-            print(f"No num-to-char for the given language {language}.")
-            return self.__post_cleaning(text)
-    def num2text_list(self, text, language):
-        # input is supposed to be a list of strings
-        if language in self.keydict.keys():
-            output_text = []
-            for line in text:
-                digits = sorted(list(map(int, re.findall(r'\d+', line))),reverse=True)
-                if digits:
-                    for digit in digits:
-                        line = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', line)
-                output_text.append(line)
-            return self.__post_cleaning_list(output_text)
-        else:
-            print(f"No num-to-char for the given language {language}.")
-            return self.__post_cleaning_list(text)
-    def numberToTextConverter(self, text, language):
-        if language in self.keydict.keys():
-            matches = re.findall(r'\d+\.\d+|\d+', text)
-            digits = sorted([int(match) if match.isdigit() else match if re.match(r'^\d+(\.\d+)?$', match) else str(match) for match in matches], key=lambda x: float(x) if isinstance(x, str) and '.' in x else x, reverse=True)
-            if digits:
-                for digit in digits:
-                    if isinstance(digit, int):
-                        text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language]).replace(",", "")+' ', text)
-                    else:
-                        parts = str(digit).split('.')
-                        integer_part = int(parts[0])
-                        data1 = num_to_word(integer_part, self.keydict[language]).replace(",", "")
-                        decimal_part = str(parts[1])
-                        data2 = ''
-                        for i in decimal_part:
-                            data2 = data2+' '+num_to_word(i, self.keydict[language])
-                        if language == 'hindi':
-                            final_data = f'{data1} दशमलव {data2}'
-                        elif language == 'tamil':
-                            final_data = f'{data1} புள்ளி {data2}'
-                        else:
-                            final_data = f'{data1} point {data2}'
-                        text = re.sub(str(digit), ' '+final_data+' ', text)
-            return self.__post_cleaning(text)
-        else:
-            words = {
-                '0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
-                '5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine'
-            }
-            # Use regular expression to find and replace decimal points in numbers
-            text = re.sub(r'(?<=\d)\.(?=\d)', ' point ', text)
-            # Find all occurrences of numbers with decimal points and convert them to words
-            matches = re.findall(r'point (\d+)', text)
-            for match in matches:
-                replacement = ' '.join(words[digit] for digit in match)
-                text = text.replace(f'point {match}', f'point {replacement}', 1)
-            return self.__post_cleaning(text)
-    def normalize(self, text, language):
-        return self.__post_cleaning(text)
-    def normalize_list(self, text, language):
-        # input is supposed to be a list of strings
-        return self.__post_cleaning_list(text)
-class TextPhrasifier:
-    @classmethod
-    def phrasify(cls, text):
-        phrase_list = []
-        for phrase in text.split("#"):
-            phrase = phrase.strip()
-            if phrase != "":
-                phrase_list.append(phrase)
-        return phrase_list
-class TextPhrasifier_List:
-    @classmethod
-    def phrasify(cls, text):
-        # input is supposed to be a list of strings
-        # output is list of list of strings
-        output_list = []
-        for line in text:
-            phrase_list = []
-            for phrase in line.split("#"):
-                phrase = phrase.strip()
-                if phrase != "":
-                    phrase_list.append(phrase)
-            output_list.append(phrase_list)
-        return output_list
-class DurAlignTextProcessor:
-    def __init__(self):
-        # this is a static set of cleaning rules to be applied
-        self.cleaning_rules = {
-            " +" : " ",
-           "^" : "$",
-            "$" : ".",
-        }
-        self.cleaning_rules_English = {
-            " +" : " ",
-            "$" : ".",
-        }
-    def textProcesor(self, text):
-        for key, replacement in self.cleaning_rules.items():
-            for idx in range(0,len(text)):
-                text[idx] = re.sub(key, replacement, text[idx])
-        return text
-    def textProcesorForEnglish(self, text):
-        for key, replacement in self.cleaning_rules_English.items():
-            for idx in range(0,len(text)):
-                text[idx] = re.sub(key, replacement, text[idx])
-        return text
-    def textProcesor_list(self, text):
-        # input expected in 'list of list of string' format
-        output_text = []
-        for line in text:
-            for key, replacement in self.cleaning_rules.items():
-                for idx in range(0,len(line)):
-                    line[idx] = re.sub(key, replacement, line[idx])
-            output_text.append(line)
-        return output_text
-class SharedInit:
-    def __init__(self,
-                text_cleaner = TextCleaner(),
-                text_normalizer=TextNormalizer(),
-                phonifier = Phonifier(),
-                text_phrasefier = TextPhrasifier(),
-                post_processor = DurAlignTextProcessor()):
-        self.text_cleaner = text_cleaner
-        self.text_normalizer = text_normalizer
-        self.phonifier = phonifier
-        self.text_phrasefier = text_phrasefier
-        self.post_processor = post_processor
-class TTSDurAlignPreprocessor(SharedInit):
-    def preprocess(self, text, language, gender, phone_dictionary):
-        # text = text.strip()
-        #print(text)
-        text = self.text_normalizer.numberToTextConverter(text, language)
-        text = self.text_cleaner.clean(text)
-        #print("cleaned text", text)
-        # text = self.text_normalizer.insert_space(text)
-        #text = self.text_normalizer.num2text(text, language)
-        # print(text)
-        text = self.text_normalizer.normalize(text, language)
-        # print(text)
-        phrasified_text = TextPhrasifier.phrasify(text)
-        #print("phrased",phrasified_text)
-        if language not in list(phone_dictionary.keys()):
-            phone_dictionary = self.phonifier.load_lang_dict(language, phone_dictionary)
-        #print(phone_dictionary.keys())
-        phonified_text = self.phonifier.phonify(phrasified_text, language, gender, phone_dictionary)
-        #print("phonetext",phonified_text)
-        phonified_text = self.post_processor.textProcesor(phonified_text)
-        #print(phonified_text)
-        return phonified_text, phrasified_text
-class TTSDurAlignPreprocessor_VTT(SharedInit):
-    def preprocess(self, text, language, gender):
-        # text = text.strip()
-        text = self.text_cleaner.clean_list(text)
-        # text = self.text_normalizer.insert_space_list(text)
-        text = self.text_normalizer.num2text_list(text, language)
-        text = self.text_normalizer.normalize_list(text, language)
-        phrasified_text = TextPhrasifier_List.phrasify(text)
-        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
-        phonified_text = self.post_processor.textProcesor_list(phonified_text)
-        return phonified_text, phrasified_text
-class CharTextPreprocessor(SharedInit):
-    def preprocess(self, text, language, gender=None, phone_dictionary=None):
-        text = text.strip()
-        text = self.text_normalizer.numberToTextConverter(text, language)
-        text = self.text_cleaner.clean(text)
-        # text = self.text_normalizer.insert_space(text)
-        #text = self.text_normalizer.num2text(text, language)
-        text = self.text_normalizer.normalize(text, language)
-        phrasified_text = TextPhrasifier.phrasify(text)
-        phonified_text = phrasified_text # No phonification for character TTS models
-        return phonified_text, phrasified_text
-class CharTextPreprocessor_VTT(SharedInit):
-    def preprocess(self, text, language, gender=None):
-        # text = text.strip()
-        text = self.text_cleaner.clean_list(text)
-        # text = self.text_normalizer.insert_space_list(text)
-        text = self.text_normalizer.num2text_list(text, language)
-        text = self.text_normalizer.normalize_list(text, language)
-        phrasified_text = TextPhrasifier_List.phrasify(text)
-        phonified_text = phrasified_text # No phonification for character TTS models
-        return phonified_text, phrasified_text
-class TTSPreprocessor(SharedInit):
-    def preprocess(self, text, language, gender, phone_dictionary):
-        text = text.strip()
-        text = self.text_normalizer.numberToTextConverter(text, language)
-        text = self.text_cleaner.clean(text)
-        # text = self.text_normalizer.insert_space(text)
-        #text = self.text_normalizer.num2text(text, language)
-        text = self.text_normalizer.normalize(text, language)
-        phrasified_text = TextPhrasifier.phrasify(text)
-        if language not in list(phone_dictionary.keys()):
-            phone_dictionary = self.phonifier.load_lang_dict(language, phone_dictionary)
-        phonified_text = self.phonifier.phonify(phrasified_text, language, gender, phone_dictionary)
-        #print(phonified_text)
-        phonified_text = self.post_processor.textProcesorForEnglish(phonified_text)
-        #print(phonified_text)
-        return phonified_text, phrasified_text
-class TTSPreprocessor_VTT(SharedInit):
-    def preprocess(self, text, language, gender):
-        # print(f"Original text: {text}")
-        text = self.text_cleaner.clean_list(text)
-        # print(f"After text cleaner: {text}")
-        # text = self.text_normalizer.insert_space_list(text)
-        # print(f"After insert space: {text}")
-        text = self.text_normalizer.num2text_list(text, language)
-        # print(f"After num2text: {text}")
-        text = self.text_normalizer.normalize_list(text, language)
-        # print(f"After text normalizer: {text}")
-        phrasified_text = TextPhrasifier_List.phrasify(text)
-        # print(f"phrasified_text: {phrasified_text}")
-        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
-        # print(f"phonified_text: {phonified_text}")
-        return phonified_text, phrasified_text

+'''
+TTS Preprocessing
+Developed by Arun Kumar A(CS20S013) - November 2022
+Updated by Utkarsh Pathak (DA24S011) - Septmeber 2023
+'''
+import os
+import re
+import sys
+import json
+import pandas as pd
+import string
+from collections import defaultdict
+import time
+import subprocess
+import shutil
+from multiprocessing import Process
+import traceback
+#imports of dependencies from environment.yml
+# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
+from num_to_words import num_to_word
+from g2p_en import G2p
+from NumberToText import NumberDictionary
+def add_to_dictionary(dict_to_add, dict_file):
+    append_string = ""
+    for key, value in dict_to_add.items():
+        append_string += (str(key) + " " + str(value) + "\n")
+    if os.path.isfile(dict_file):
+        # make a copy of the dictionary
+        source_dir = os.path.dirname(dict_file)
+        dict_file_name = os.path.basename(dict_file)
+        temp_file_name = "." + dict_file_name + ".temp"
+        temp_dict_file = os.path.join(source_dir, temp_file_name)
+        shutil.copy(dict_file, temp_dict_file)
+        # append the new words in the dictionary to the temp file
+        with open(temp_dict_file, "a") as f:
+            f.write(append_string)
+        # check if the write is successful and then replace the temp file as the dict file
+        try:
+            df_orig = pd.read_csv(dict_file, delimiter=" ", header=None, dtype=str)
+            df_temp = pd.read_csv(temp_dict_file, delimiter=" ", header=None, dtype=str)
+            if len(df_temp) > len(df_orig):
+                os.replace(temp_dict_file, dict_file)
+                print(f"{len(dict_to_add)} new words appended to Dictionary: {dict_file}")
+        except:
+            print(traceback.format_exc())
+    else:
+        # create a new dictionary
+        with open(dict_file, "a") as f:
+            f.write(append_string)
+        print(f"New Dictionary: {dict_file} created with {len(dict_to_add)} words")
+class TextCleaner:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#" : "",
+            "[.,;।!](\r\n)*" : "# ",
+            "[.,;।!](\n)*" : "# ",
+            "(\r\n)+" : "# ",
+            "(\n)+" : "# ",
+            "(\r)+" : "# ",
+            """[?;:)(!|&’‘,।\."]""": "",
+            "[/']" : "",
+            "[-–]" : " ",
+        }
+    def clean(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def clean_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            line = line.strip()
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+class Phonifier:
+    def __init__(self, dict_location=None):
+        if dict_location is None:
+            dict_location = "phone_dict"
+        self.dict_location = dict_location
+        self.devanagari_langs = {
+            "bodo",
+            "hindi",
+            "kashmiri",
+            "marathi",
+            "nepali",
+            "santali",
+            "rajasthani",
+            "sindhi",
+            "dogri",
+            "sanskrit",
+            "maithili",
+            "konkani",
+        }
+        self.phone_dictionary = {}
+        # load dictionary for all the available languages
+        for dict_file in os.listdir(dict_location):
+            try:
+                if dict_file.startswith("."):
+                    # ignore hidden files
+                    continue
+                language = dict_file
+                dict_file_path = os.path.join(dict_location, dict_file)
+                df = pd.read_csv(dict_file_path, delimiter=" ", header=None, dtype=str)
+                self.phone_dictionary[language] = df.set_index(0).to_dict('dict')[1]
+            except Exception as e:
+                #print(traceback.format_exc())
+                print("")
+        print("Phone dictionary loaded for the following languages:", list(self.phone_dictionary.keys()))
+        self.g2p = G2p()
+        print('Loading G2P model... Done!')
+        # Mapping between the cmu phones and the iitm cls
+        self.cmu_2_cls_map = {
+            "AA" : "aa",
+            "AA0" : "aa",
+            "AA1" : "aa",
+            "AA2" : "aa",
+            "AE" : "axx",
+            "AE0" : "axx",
+            "AE1" : "axx",
+            "AE2" : "axx",
+            "AH" : "a",
+            "AH0" : "a",
+            "AH1" : "a",
+            "AH2" : "a",
+            "AO" : "ax",
+            "AO0" : "ax",
+            "AO1" : "ax",
+            "AO2" : "ax",
+            "AW" : "ou",
+            "AW0" : "ou",
+            "AW1" : "ou",
+            "AW2" : "ou",
+            "AX" : "a",
+            "AY" : "ei",
+            "AY0" : "ei",
+            "AY1" : "ei",
+            "AY2" : "ei",
+            "B" : "b",
+            "CH" : "c",
+            "D" : "dx",
+            "DH" : "d",
+            "EH" : "ee",
+            "EH0" : "ee",
+            "EH1" : "ee",
+            "EH2" : "ee",
+            "ER" : "a r",
+            "ER0" : "a r",
+            "ER1" : "a r",
+            "ER2" : "a r",
+            "EY" : "ee",
+            "EY0" : "ee",
+            "EY1" : "ee",
+            "EY2" : "ee",
+            "F" : "f",
+            "G" : "g",
+            "HH" : "h",
+            "IH" : "i",
+            "IH0" : "i",
+            "IH1" : "i",
+            "IH2" : "i",
+            "IY" : "ii",
+            "IY0" : "ii",
+            "IY1" : "ii",
+            "IY2" : "ii",
+            "JH" : "j",
+            "K" : "k",
+            "L" : "l",
+            "M" : "m",
+            "N" : "n",
+            "NG" : "ng",
+            "OW" : "o",
+            "OW0" : "o",
+            "OW1" : "o",
+            "OW2" : "o",
+            "OY" : "ei",
+            "OY0" : "ei",
+            "OY1" : "ei",
+            "OY2" : "ei",
+            "P" : "p",
+            "R" : "r",
+            "S" : "s",
+            "SH" : "sh",
+            "T" : "tx",
+            "TH" : "t",
+            "UH" : "u",
+            "UH0" : "u",
+            "UH1" : "u",
+            "UH2" : "u",
+            "UW" : "uu",
+            "UW0" : "uu",
+            "UW1" : "uu",
+            "UW2" : "uu",
+            "V" : "w",
+            "W" : "w",
+            "Y" : "y",
+            "Z" : "z",
+            "ZH" : "sh",
+        }
+        # Mapping between the iitm cls and iitm char
+        self.cls_2_chr_map = {
+            "aa" : "A",
+            "ii" : "I",
+            "uu" : "U",
+            "ee" : "E",
+            "oo" : "O",
+            "nn" : "N",
+            "ae" : "ऍ",
+            "ag" : "ऽ",
+            "au" : "औ",
+            "axx" : "अ",
+            "ax" : "ऑ",
+            "bh" : "B",
+            "ch" : "C",
+            "dh" : "ध",
+            "dx" : "ड",
+            "dxh" : "ढ",
+            "dxhq" : "T",
+            "dxq" : "D",
+            "ei" : "ऐ",
+            "ai" : "ऐ",
+            "eu" : "உ",
+            "gh" : "घ",
+            "gq" : "G",
+            "hq" : "H",
+            "jh" : "J",
+            "kh" : "ख",
+            "khq" : "K",
+            "kq" : "क",
+            "ln" : "ൾ",
+            "lw" : "ൽ",
+            "lx" : "ള",
+            "mq" : "M",
+            "nd" : "न",
+            "ng" : "ङ",
+            "nj" : "ञ",
+            "nk" : "Y",
+            "nw" : "ൺ",
+            "nx" : "ण",
+            "ou" : "औ",
+            "ph" : "P",
+            "rq" : "R",
+            "rqw" : "ॠ",
+            "rw" : "ർ",
+            "rx" : "र",
+            "sh" : "श",
+            "sx" : "ष",
+            "th" : "थ",
+            "tx" : "ट",
+            "txh" : "ठ",
+            "wv" : "W",
+            "zh" : "Z",
+        }
+        # Multilingual support for OOV characters
+        oov_map_json_file = 'multilingualcharmap.json'
+        with open(oov_map_json_file, 'r') as oov_file:
+            self.oov_map = json.load(oov_file)
+    def __is_float(self, word):
+        parts = word.split('.')
+        if len(parts) != 2:
+            return False
+        return parts[0].isdecimal() and parts[1].isdecimal()
+    def en_g2p(self, word):
+        phn_out = self.g2p(word)
+        # print(f"phn_out: {phn_out}")
+        # iterate over the string list and replace each word with the corresponding value from the dictionary
+        for i, phn in enumerate(phn_out):
+            if phn in self.cmu_2_cls_map.keys():
+                phn_out[i] = self.cmu_2_cls_map[phn]
+                # cls_out = self.cmu_2_cls_map[phn]
+                if phn_out[i] in self.cls_2_chr_map.keys():
+                    phn_out[i] = self.cls_2_chr_map[phn_out[i]]
+                else:
+                    pass
+            else:
+                pass  # ignore words that are not in the dictionary
+            # print(f"i: {i}, phn: {phn}, cls_out: {cls_out}, phn_out: {phn_out[i]}")
+        return ("".join(phn_out)).strip().replace(" ", "")
+    def __post_phonify(self, text, language, gender):
+        language_gender_id = language+'_'+gender
+        if language_gender_id in self.oov_map.keys():
+            output_string = ''
+            for char in text:
+                if char in self.oov_map[language_gender_id].keys():
+                    output_string += self.oov_map[language_gender_id][char]
+                else:
+                    output_string += char
+                # output_string += self.oov_map['language_gender_id']['char']
+            return output_string
+        else:
+            return text
+    def __is_english_word(self, word):
+        maxchar = max(word)
+        if u'\u0000' <= maxchar <= u'\u007f':
+            return True
+        return False
+    def __normalize_phones(self, phone_str):
+        """
+        Apply phone-level replacements after phonification
+        """
+        replace_map = {
+            "P": "f",
+            "ष": "श",
+            "ग़": "ग",
+            "T": "ढ",
+            "ऱ": "r",
+            "jञ": "gy",
+            "क़":"क",
+            "ख़":"ख",
+            "ड़":"ड",
+            "फ़":"फ",
+            "य़":"य",
+            "ऱ":"r",
+            "Y":"",
+            "G":"g",
+            "क":"k",
+            "ay":"E",
+            "kH":"ख",
+            "ऩ":"n"
+        }
+        for src, tgt in replace_map.items():
+            phone_str = phone_str.replace(src, tgt)
+        return phone_str
+    def __phonify(self, text, language, gender):
+        # text is expected to be a list of strings
+        words = set((" ".join(text)).split(" "))
+        #print(f"words test: {words}")
+        non_dict_words = []
+        if language in self.phone_dictionary:
+            for word in words:
+                # print(f"word: {word}")
+                if word not in self.phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+                    #print('INSIDE IF CONDITION OF ADDING WORDS')
+        else:
+            non_dict_words = words
+        print(f"word not in dict: {non_dict_words}")
+        if len(non_dict_words) > 0:
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                current_directory = os.getcwd()
+                tamil_parser_cmd = f"{current_directory}/ssn_parser/Tamil_Parser/ssn_parser/tamil_parser.sh"
+                subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, f"{current_directory}/ssn_parser/Tamil_Parser/ssn_parser"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                # unified_parser_cmd = "phonify_wrapper.sh"
+                # subprocess.run(["bash", unified_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "/speech/arun/tts/tts_api/text2phone/"])
+                out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+                from get_phone_mapped_python import TextReplacer
+                from indic_unified_parser.uparser import wordparse
+                text_replacer=TextReplacer()
+                # def write_output_to_file(output_text, file_path):
+                #     with open(file_path, 'w') as f:
+                #         f.write(output_text)
+                parsed_output_list = []
+                for word in non_dict_words:
+                    parsed_word = wordparse(word, 0, 0, 1)
+                    parsed_output_list.append(parsed_word)
+                replaced_output_list = [text_replacer.apply_replacements(parsed_word) for parsed_word in parsed_output_list]
+                with open(out_dict_file, 'w', encoding='utf-8') as file:
+                    for original_word, formatted_word in zip(non_dict_words, replaced_output_list):
+                        line = f"{original_word}\t{formatted_word}\n"
+                        file.write(line)
+                        print(line, end='')
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                #print('DATAFRAME OUTPUT FILE', df.head())
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                #print("new dict",new_dict)
+                if language not in self.phone_dictionary:
+                    self.phone_dictionary[language] = new_dict
+                else:
+                    self.phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                #print("phone_dict", self.phone_dictionary)
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                print(f"Error: While loading {out_dict_file}")
+                #traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for phrase in text:
+            phrase_phonified = []
+            for word in phrase.split(" "):
+                if self.__is_english_word(word):
+                    if word in self.phone_dictionary["english"]:
+                        phrase_phonified.append(str(self.phone_dictionary["english"][word]))
+                    else:
+                        phrase_phonified.append(str(self.en_g2p(word)))
+                elif word in self.phone_dictionary[language]:
+                    # if a word could not be parsed, skip it
+                    phrase_phonified.append(str(self.phone_dictionary[language][word]))
+            # text_phonified.append(self.__post_phonify(" ".join(phrase_phonified),language, gender))
+            phone_line = " ".join(phrase_phonified)
+            if language in self.devanagari_langs:
+                phone_line = self.__normalize_phones(phone_line)
+            text_phonified.append(phone_line)
+        return text_phonified
+    def __merge_lists(self, lists):
+        merged_string = ""
+        for list in lists:
+            for word in list:
+                merged_string += word + " "
+        return merged_string.strip()
+    def __phonify_list(self, text, language, gender):
+        # text is expected to be a list of list of strings
+        words = set(self.__merge_lists(text).split(" "))
+        non_dict_words = []
+        if language in self.phone_dictionary:
+            for word in words:
+                if word not in self.phone_dictionary[language] and (language == "english" or (not self.__is_english_word(word))):
+                    non_dict_words.append(word)
+        else:
+            non_dict_words = words
+        if len(non_dict_words) > 0:
+            print(len(non_dict_words))
+            print(non_dict_words)
+            # unified parser has to be run for the non dictionary words
+            os.makedirs("tmp", exist_ok=True)
+            timestamp = str(time.time())
+            non_dict_words_file = os.path.abspath("tmp/non_dict_words_" + timestamp)
+            out_dict_file = os.path.abspath("tmp/out_dict_" + timestamp)
+            with open(non_dict_words_file, "w") as f:
+                f.write("\n".join(non_dict_words))
+            if(language == 'tamil'):
+                tamil_parser_cmd = "tamil_parser.sh"
+                subprocess.run(["bash", tamil_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser/"])
+            elif(language == 'english'):
+                phn_out_dict = {}
+                for i in range(0,len(non_dict_words)):
+                    phn_out_dict[non_dict_words[i]] = self.en_g2p(non_dict_words[i])
+                # Create a string representation of the dictionary
+                data_str = "\n".join([f"{key}\t{value}" for key, value in phn_out_dict.items()])
+                print(f"data_str: {data_str}")
+                with open(out_dict_file, "w") as f:
+                    f.write(data_str)
+            else:
+                unified_parser_cmd = "phonify_wrapper.sh"
+                subprocess.run(["bash", unified_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "ssn_parser/"])
+            # unified_parser_cmd = "phonify_wrapper.sh"
+            # subprocess.run(["bash", unified_parser_cmd, non_dict_words_file, out_dict_file, timestamp, "/var/www/html/IITM_TTS/E2E_TTS_FS2/text_proc/text2phone/"])
+            try:
+                df = pd.read_csv(out_dict_file, delimiter="\t", header=None, dtype=str)
+                new_dict = df.dropna().set_index(0).to_dict('dict')[1]
+                print(new_dict)
+                if language not in self.phone_dictionary:
+                    self.phone_dictionary[language] = new_dict
+                else:
+                    self.phone_dictionary[language].update(new_dict)
+                # run a non-blocking child process to update the dictionary file
+                p = Process(target=add_to_dictionary, args=(new_dict, os.path.join(self.dict_location, language)))
+                p.start()
+            except Exception as err:
+                traceback.print_exc()
+        # phonify text with dictionary
+        text_phonified = []
+        for line in text:
+            line_phonified = []
+            for phrase in line:
+                phrase_phonified = []
+                for word in phrase.split(" "):
+                    if self.__is_english_word(word):
+                        if word in self.phone_dictionary["english"]:
+                            phrase_phonified.append(str(self.phone_dictionary["english"][word]))
+                        else:
+                            phrase_phonified.append(str(self.en_g2p(word)))
+                    elif word in self.phone_dictionary[language]:
+                        # if a word could not be parsed, skip it
+                        phrase_phonified.append(str(self.phone_dictionary[language][word]))
+                # line_phonified.append(self.__post_phonify(" ".join(phrase_phonified), language, gender))
+                line_phonified.append(" ".join(phrase_phonified))
+            text_phonified.append(line_phonified)
+        return text_phonified
+    def phonify(self, text, language, gender):
+        if not isinstance(text, list):
+            out = self.__phonify([text], language, gender)
+            return out[0]
+        return self.__phonify(text, language, gender)
+    def phonify_list(self, text, language, gender):
+        if isinstance(text, list):
+            return self.__phonify_list(text, language, gender)
+        else:
+            print("Error!! Expected to have a list as input.")
+class TextNormalizer:
+    def __init__(self, char_map_location=None, phonifier = Phonifier()):
+        self.phonifier = phonifier
+        if char_map_location is None:
+            char_map_location = "charmap"
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            " +" : " ",
+            "^ +" : "",
+            " +$" : "",
+            "#$" : "",
+            "# +$" : "",
+        }
+        # this is the list of languages supported by num_to_words
+        self.keydict = {"english" : "en",
+            "hindi" : "hi",
+            "gujarati" : "gu",
+            "marathi" : "mr",
+            "bengali" : "bn",
+            "telugu" : "te",
+            "tamil" : "ta",
+            "kannada" : "kn",
+            "odia" : "or",
+            "punjabi" : "pa"
+        }
+        self.g2p = G2p()
+        #print('Loading G2P model... Done!')
+    def __post_cleaning(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            text = re.sub(key, replacement, text)
+        return text
+    def __post_cleaning_list(self, text):
+        # input is supposed to be a list of strings
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                line = re.sub(key, replacement, line)
+            output_text.append(line)
+        return output_text
+    def __check_char_type(self, str_c):
+        # Determine the type of the character
+        if str_c.isnumeric():
+            char_type = "number"
+        elif str_c in string.punctuation:
+            char_type = "punctuation"
+        elif str_c in string.whitespace:
+            char_type = "whitespace"
+        elif str_c.isalpha() and str_c.isascii():
+            char_type = "ascii"
+        else:
+            char_type = "non-ascii"
+        return char_type
+    def insert_space(self, text):
+        '''
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Initialize variables to track the previous character type and whether a space should be inserted
+        prev_char_type = None
+        next_char_type = None
+        insert_space = False
+        # Output string
+        output_string = ""
+        # Iterate through each character in the text
+        for i, c in enumerate(text):
+            # Determine the type of the character
+            char_type = self.__check_char_type(c)
+            if i == (len(text) - 1):
+                next_char_type = None
+            else:
+                next_char_type = self.__check_char_type(text[i+1])
+            # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+            # If the character type has changed from the previous character, check if a space should be inserted
+            if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                if next_char_type != "punctuation" or next_char_type != "whitespace":
+                    insert_space = True
+            # Insert a space if needed
+            if insert_space:
+                output_string += " "+c
+                insert_space = False
+            else:
+                output_string += c
+            # Update the previous character type
+            prev_char_type = char_type
+        # Print the modified text
+        output_string = re.sub(r' +', ' ', output_string)
+        return output_string
+    def insert_space_list(self, text):
+        '''
+        Expect the input to be in form of list of string.
+        Check if the text contains numbers and English words and if they are without space inserts space between them.
+        '''
+        # Output string list
+        output_list = []
+        for line in text:
+            # Initialize variables to track the previous character type and whether a space should be inserted
+            prev_char_type = None
+            next_char_type = None
+            insert_space = False
+            # Output string
+            output_string = ""
+            # Iterate through each character in the line
+            for i, c in enumerate(line):
+                # Determine the type of the character
+                char_type = self.__check_char_type(c)
+                if i == (len(line) - 1):
+                    next_char_type = None
+                else:
+                    next_char_type = self.__check_char_type(line[i+1])
+                # print(f"{i}: {c} is a {char_type} character and next character is a {next_char_type}")
+                # If the character type has changed from the previous character, check if a space should be inserted
+                if (char_type != prev_char_type and prev_char_type != None and char_type != "punctuation" and char_type != "whitespace"):
+                    if next_char_type != "punctuation" or next_char_type != "whitespace":
+                        insert_space = True
+                # Insert a space if needed
+                if insert_space:
+                    output_string += " "+c
+                    insert_space = False
+                else:
+                    output_string += c
+                # Update the previous character type
+                prev_char_type = char_type
+            # Print the modified line
+            output_string = re.sub(r' +', ' ', output_string)
+            output_list.append(output_string)
+        return output_list
+    # def num2text(self, text, language):
+    #     if language in self.keydict.keys():
+    #         digits = sorted(list(map(int, re.findall(r'\d+', text))),reverse=True)
+    #         if digits:
+    #             for digit in digits:
+    #                 text = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', text)
+    #         return self.__post_cleaning(text)
+    #     else:
+    #         print(f"No num-to-char for the given language {language}.")
+    #         return self.__post_cleaning(text)
+    #====================================================
+    def num2text(self, text, language):
+        # print("utkqdd")
+        pattern = r'\d+\.\d+|\d+'
+        numbers = re.findall(pattern, text)
+        numbers = [num for num in numbers]
+        temp = NumberDictionary()
+        for data in numbers:
+            num_to_text = ' ' + temp.num2text(data, language)
+            # print(num_to_text)
+            pattern = r'(?<!\d)' + re.escape(data) + r'(?!\d)'
+            text = re.sub(pattern, num_to_text, text)
+        # print(text)
+        return text
+    def num2text_list(self, text, language):
+        # input is supposed to be a list of strings
+        # if language in self.keydict.keys():
+            output_text = []
+            # print("utk")
+            # print("text", text)
+            temp = NumberDictionary()
+            for line in text:
+                # print("lines",line)
+                pattern = r'\d+\.\d+|\d+'
+                numbers = re.findall(pattern, line)
+                numbers = [num for num in numbers]
+                for data in numbers:
+                    num_to_text = ' ' + temp.num2text(data, language)
+                    pattern = r'(?<!\d)' + re.escape(data) + r'(?!\d)'
+                    line = re.sub(pattern, num_to_text, line)
+                output_text.append(line)
+            # print(output_text)
+            return output_text
+    #====================================================
+    # def num2text_list(self, text, language):
+    #     # input is supposed to be a list of strings
+    #     if language in self.keydict.keys():
+    #         output_text = []
+    #         for line in text:
+    #             digits = sorted(list(map(int, re.findall(r'\d+', line))),reverse=True)
+    #             if digits:
+    #                 for digit in digits:
+    #                     line = re.sub(str(digit), ' '+num_to_word(digit, self.keydict[language])+' ', line)
+    #             output_text.append(line)
+    #         return self.__post_cleaning_list(output_text)
+    #     else:
+    #         print(f"No num-to-char for the given language {language}.")
+    #         return self.__post_cleaning_list(text)
+    def normalize(self, text, language):
+        return self.__post_cleaning(text)
+    def normalize_list(self, text, language):
+        # input is supposed to be a list of strings
+        return self.__post_cleaning_list(text)
+class TextPhrasifier:
+    @classmethod
+    def phrasify(cls, text):
+        phrase_list = []
+        for phrase in text.split("#"):
+            phrase = phrase.strip()
+            if phrase != "":
+                phrase_list.append(phrase)
+        return phrase_list
+class TextPhrasifier_List:
+    @classmethod
+    def phrasify(cls, text):
+        # input is supposed to be a list of strings
+        # output is list of list of strings
+        output_list = []
+        for line in text:
+            phrase_list = []
+            for phrase in line.split("#"):
+                phrase = phrase.strip()
+                if phrase != "":
+                    phrase_list.append(phrase)
+            output_list.append(phrase_list)
+        return output_list
+class DurAlignTextProcessor:
+    def __init__(self):
+        # this is a static set of cleaning rules to be applied
+        self.cleaning_rules = {
+            r"\s{2,}": " ",   # collapse multiple spaces into one
+            r"^": "$",
+            r"$": ".",
+        }
+        self.cleaning_rules_English = {
+            " +" : "",
+            "$" : ".",
+        }
+    def textProcesorForEnglish(self, text):
+        for key, replacement in self.cleaning_rules_English.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesor(self, text):
+        for key, replacement in self.cleaning_rules.items():
+            for idx in range(0,len(text)):
+                text[idx] = re.sub(key, replacement, text[idx])
+        return text
+    def textProcesor_list(self, text):
+        # input expected in 'list of list of string' format
+        output_text = []
+        for line in text:
+            for key, replacement in self.cleaning_rules.items():
+                for idx in range(0,len(line)):
+                    line[idx] = re.sub(key, replacement, line[idx])
+            output_text.append(line)
+        return output_text
+class TTSDurAlignPreprocessor:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        # text = text.strip()
+        print(text)
+        text = self.text_cleaner.clean(text)
+        print("cleaned text", text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        # print(text)
+        text = self.text_normalizer.normalize(text, language)
+        # print(text)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        #print("phrased",phrasified_text)
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+        #print("phonetext",phonified_text)
+        phonified_text = self.post_processor.textProcesor(phonified_text)
+        print(phonified_text)
+        return phonified_text, phrasified_text
+class TTSDurAlignPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                post_processor = DurAlignTextProcessor()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        phonified_text = self.post_processor.textProcesor_list(phonified_text)
+        return phonified_text, phrasified_text
+class CharTextPreprocessor:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+    def preprocess(self, text, language, gender=None):
+        text = text.strip()
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class CharTextPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+    def preprocess(self, text, language, gender=None):
+        # text = text.strip()
+        text = self.text_cleaner.clean_list(text)
+        # text = self.text_normalizer.insert_space_list(text)
+        text = self.text_normalizer.num2text_list(text, language)
+        text = self.text_normalizer.normalize_list(text, language)
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        phonified_text = phrasified_text # No phonification for character TTS models
+        return phonified_text, phrasified_text
+class TTSPreprocessor:
+    def __init__(
+        self,
+        text_cleaner = TextCleaner(),
+        text_normalizer=TextNormalizer(),
+        phonifier = Phonifier(),
+        text_phrasefier = TextPhrasifier(),
+        post_processor = DurAlignTextProcessor()
+    ):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.text_phrasefier = text_phrasefier
+        self.post_processor = post_processor
+    def preprocess(self, text, language, gender):
+        text = text.strip()
+        text = self.text_cleaner.clean(text)
+        # text = self.text_normalizer.insert_space(text)
+        text = self.text_normalizer.num2text(text, language)
+        text = self.text_normalizer.normalize(text, language)
+        phrasified_text = TextPhrasifier.phrasify(text)
+        phonified_text = self.phonifier.phonify(phrasified_text, language, gender)
+        return phonified_text, phrasified_text
+class TTSPreprocessor_VTT:
+    def __init__(self,
+                text_cleaner = TextCleaner(),
+                text_normalizer=TextNormalizer(),
+                phonifier = Phonifier(),
+                text_phrasefier = TextPhrasifier_List()):
+        self.text_cleaner = text_cleaner
+        self.text_normalizer = text_normalizer
+        self.phonifier = phonifier
+        self.text_phrasefier = text_phrasefier
+    def preprocess(self, text, language, gender):
+        # print(f"Original text: {text}")
+        text = self.text_cleaner.clean_list(text)
+        # print(f"After text cleaner: {text}")
+        # text = self.text_normalizer.insert_space_list(text)
+        # print(f"After insert space: {text}")
+        text = self.text_normalizer.num2text_list(text, language)
+        # print(f"After num2text: {text}")
+        text = self.text_normalizer.normalize_list(text, language)
+        # print(f"After text normalizer: {text}")
+        phrasified_text = TextPhrasifier_List.phrasify(text)
+        # print(f"phrasified_text: {phrasified_text}")
+        phonified_text = self.phonifier.phonify_list(phrasified_text, language, gender)
+        # print(f"phonified_text: {phonified_text}")
+        return phonified_text, phrasified_text

tmp/non_dict_words_1776242609.109296 ADDED Viewed

File without changes

tmp/non_dict_words_1776242740.8554978 ADDED Viewed

File without changes

utilities.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import time
+import random
+from datetime import datetime
+import base64
+from enum import Enum
+import os
+import librosa
+import numpy as np
+import soundfile as sf
+SAMPLING_RATE = int(os.getenv("DEFAULT_SAMPLING_RATE", "48000"))
+output_audio_dir = os.getenv("OUTPUT_DIR", f"{os.path.dirname(os.path.realpath(__file__))}/outputs")
+os.makedirs(output_audio_dir, exist_ok=True)
+LANG_TEMPOS = {
+    "malayalam": 1.2,
+    "telugu": 1.3,
+    "hindi": 0.9,
+    "punjabi": 0.9
+}
+SUPPORTED_OUTPUT_LANGS = os.getenv("LANGUAGES", "hindi,bengali,tamil").split(",")
+def isOutputLangSupported(lang: str):
+    if(lang not in SUPPORTED_OUTPUT_LANGS):
+        raise ValueError(f"Unsupported output language: {lang}. Supported languages are: {SUPPORTED_OUTPUT_LANGS}")
+def truncate_string(s, max_length=100):
+    return s[:max_length] + ('...' if len(s) > max_length else '')
+WARMUP_PARAGRAPHS = {
+    "hindi": (
+        "भारत एक विशाल देश है जहाँ विभिन्न भाषाएं, धर्म और संस्कृतियाँ एक साथ अस्तित्व में हैं। "
+        "यहाँ की विविधता में एकता इसकी सबसे बड़ी विशेषता है। हर राज्य की अपनी एक परंपरा होती है और "
+        "खानपान भी अलग होता है। भारतीय समाज में परिवार को अत्यंत महत्त्व दिया जाता है और लोग एक-दूसरे "
+        "की मदद के लिए हमेशा तैयार रहते हैं। पर्व-त्योहार जैसे दीपावली, होली, ईद और क्रिसमस यहाँ बड़े "
+        "हर्षोल्लास के साथ मनाए जाते हैं। यहाँ की कला, संगीत, और साहित्य ने दुनिया भर में अपनी पहचान बनाई है।"
+    ),
+    "kannada": (
+        "ಭಾರತವು ವಿಭಿನ್ನ ಭಾಷೆಗಳು, ಧರ್ಮಗಳು ಮತ್ತು ಸಂಸ್ಕೃತಿಗಳ ನಾಡಾಗಿದೆ. ಇದು ಒಂದೇ ದೇಶದಲ್ಲಿ ಅನೇಕ ಭಿನ್ನತೆಗಳನ್ನು "
+        "ಒಳಗೊಂಡಿದೆ. ಪ್ರತಿಯೊಂದು ರಾಜ್ಯಕ್ಕೂ ತನ್ನದೇ ಆದ ಪರಂಪರೆ, ಸಂಸ್ಕೃತಿ ಮತ್ತು ಆಹಾರದ ಶೈಲಿ ಇರುತ್ತದೆ. "
+        "ಇಲ್ಲಿ ಕುಟುಂಬ ಮತ್ತು ಸಮುದಾಯಕ್ಕೆ ಹೆಚ್ಚಿನ ಮಹತ್ವ ನೀಡಲಾಗುತ್ತದೆ. ಹಬ್ಬಗಳು ಮತ್ತು ಆಚರಣೆಗಳು ಜನರ ನಡುವಿನ "
+        "ಸಾಮರಸ್ಯವನ್ನು ಬಲಪಡಿಸುತ್ತವೆ. ಭಾರತದಲ್ಲಿ ಸಂಗೀತ, ನೃತ್ಯ ಮತ್ತು ಸಾಹಿತ್ಯ ಕ್ಷೇತ್ರಗಳಲ್ಲಿ ಸಮೃದ್ಧ ಪರಂಪರೆ ಇದೆ."
+    ),
+    "malayalam": (
+        "ഇന്ത്യ ഒരു വൈവിധ്യങ്ങളാൽ സമ്പന്നമായ ദേശമാണ്. വിവിധ ഭാഷകളും മതങ്ങളും സംസ്‌കാരങ്ങളുമുള്ള ഈ രാജ്യത്ത് "
+        "ഏറ്റവും വലിയ പ്രത്യേകത ഏകതയിലാണ്. ഓരോ സംസ്ഥാനത്തിനും തനതായ ചരിത്രവും സംസ്കാരവുമുണ്ട്. "
+        "കുടുംബ ബന്ധങ്ങൾക്കും ഒത്തുചേരലിനും വലിയ വിലയാണ് ഇന്ത്യയിലെ ജനങ്ങൾ നൽകുന്നത്. "
+        "ഓണവും വിഷുവും പോലുള്ള ഉത്സവങ്ങൾ ഒരേ സമയം ആത്മീയതയും സന്തോഷവും പങ്കുവെക്കാൻ സഹായിക്കുന്നു. "
+        "ഇവിടെ സംഗീതം, നാടകം, ചലച്ചിത്രം തുടങ്ങിയ കലാരൂപങ്ങൾ ലോകപ്രശസ്തമാണ്."
+    ),
+    "punjabi": (
+        "ਭਾਰਤ ਇੱਕ ਐਸਾ ਦੇਸ਼ ਹੈ ਜਿੱਥੇ ਬਹੁਤ ਸਾਰੀਆਂ ਭਾਸ਼ਾਵਾਂ, ਧਰਮਾਂ ਅਤੇ ਰਿਵਾਇਤਾਂ ਦਾ ਮਿਲਾਪ ਹੈ। "
+        "ਇਸ ਦੀ ਸਭ ਤੋਂ ਵੱਡੀ ਖਾਸੀਅਤ ਇਹ ਹੈ ਕਿ ਇੱਥੇ ਵੱਖ-ਵੱਖਤਾ ਵਿੱਚ ਵੀ ਏਕਤਾ ਹੈ। ਹਰ ਰਾਜ ਦੀ ਆਪਣੀ ਪਛਾਣ ਹੁੰਦੀ ਹੈ "
+        "ਅਤੇ ਇੱਥੇ ਦੇ ਲੋਕ ਆਪਣੇ ਪਰਿਵਾਰ ਅ��ੇ ਸੰਸਕਾਰਾਂ ਨਾਲ ਜੁੜੇ ਰਹਿੰਦੇ ਹਨ। ਪੰਜਾਬੀ ਲੋਕਾਂ ਦੀ ਬੋਲੀ, ਭੰਗੜਾ, ਗਿੱਧਾ ਅਤੇ "
+        "ਖਾਣ-ਪੀਣ ਦੀ ਰੀਤ ਸਾਰੀ ਦੁਨੀਆ ਵਿੱਚ ਮਸ਼ਹੂਰ ਹੈ। ਤਿਉਹਾਰ, ਖੇਡਾਂ ਅਤੇ ਲੋਕ ਸੱਭਿਆਚਾਰ ਇੱਥੇ ਦੀ ਰੋਹਕਤਾ ਵਧਾਉਂਦੇ ਹਨ।"
+    ),
+    "tamil": (
+        "இந்தியா என்பது பல்வேறு கலாசாரங்களும் மொழிகளும் கொண்ட நாடாகும். ஒவ்வொரு மாநிலத்திற்கும் தனித்துவமான பாரம்பரியம் "
+        "மற்றும் சுவையான உணவுகள் உள்ளன. தமிழ்நாடு இவற்றில் முக்கியமான ஒரு பகுதி. தமிழ் மொழியும் கலாசாரமும் உலகெங்கும் "
+        "புகழ்பெற்றவை. குடும்பங்கள் ஒன்றிணைந்து வாழும் பாசத்திற்கும் எப்போதும் முக்கியத்துவம் அளிக்கப்படுகிறது. "
+        "தீபாவளி, பொங்கல் போன்ற பண்டிகைகள் மகிழ்ச்சியுடனும் ஒருமைப்பாட்டுடனும் கொண்டாடப்படுகின்றன. "
+        "இங்கே உள்ள பாரம்பரிய இசை, நடனம், இலக்கியம் ஆகியவை பாராட்டத்தக்கவை."
+    ),
+    "telugu": (
+        "భారతదేశం అనేక భాషలు, సంస్కృతులు మరియు సంప్రదాయాలతో కూడిన దేశం. ప్రతి రాష్ట్రానికీ తనదైన ప్రత్యేకత ఉంది. "
+        "తెలుగు రాష్ట్రాలు సంస్కృతిగా గొప్పదిగా పరిగణించబడతాయి. ఇక్కడ కుటుంబం, గౌరవం, సహాయం వంటి విలువలకు ఎక్కువ ప్రాధాన్యం "
+        "ఇస్తారు. సంక్రాంతి, ఉగాది వంటి పండుగలు కుటుంబ సభ్యులందరినీ కలిపి ఉంచుతాయి. నాట్యం, సంగీతం మరియు సాహిత్యం వంటి కళలు "
+        "తెలుగువారిలో భాగం. భోజన పరంపరలూ అద్భుతమైనవి. వీటన్నింటి సమ్మేళనమే భారతదేశాన్ని ప్రత్యేకంగా నిలబెడుతుంది."
+    )
+}