Spaces:

E-motionAssistant
/

Space2

Running

App Files Files Community

amasha03 commited on 18 days ago

Commit

433a1d8

verified ·

1 Parent(s): 69b1020

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -15

app.py CHANGED Viewed

@@ -1,27 +1,81 @@
 import gradio as gr
 from TTS.api import TTS
 from huggingface_hub import hf_hub_download
-def load_eng_model():
-    repo_id = "E-motionAssistant/text-to-speech-VITS-english"
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
-    return TTS(model_path=model_path, config_path=config_path, gpu=False)
-# Initialize
-print("Loading English TTS...")
-tts = load_eng_model()
-def speak(text):
-    output_path = "en_output.wav"
-    tts.tts_to_file(text=text, file_path=output_path)
-    return output_path
 demo = gr.Interface(
-    fn=speak,
-    inputs=gr.Textbox(label="Enter English Text"),
-    outputs=gr.Audio(label="Synthesized Audio"),
-    title="English VITS TTS"
 )
-demo.launch()

 import gradio as gr
 from TTS.api import TTS
+from TTS.tts.configs.vits_config import VitsConfig
 from huggingface_hub import hf_hub_download
+import os
+import gc
+from romanizer import sinhala_to_roman
+def load_model_safely(repo_id):
+    print(f"--- Downloading: {repo_id} ---")
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+    # 1. Load the config object first
+    config = VitsConfig()
+    config.load_json(config_path)
+    # 2. Fix the Character Mismatch BEFORE the model is built
+    if "english" in repo_id.lower():
+        print("Applying 137 character fix for English...")
+        # This fixes the 'RuntimeError: size mismatch'
+        config.model_args.num_chars = 137
+    # 3. Initialize the TTS engine shell WITHOUT loading a model yet
+    # We leave it empty so it doesn't crash on init
+    tts = TTS(gpu=False)
+    # 4. Now manually load the model into the shell using our fixed config
+    # We use positional arguments to be safe across library versions
+    tts.load_tts_model_by_path(model_path, config)
+    gc.collect()
+    return tts
+# --- Initialization ---
+print("Starting Startup Sequence...")
+try:
+    print("Loading Sinhala...")
+    sin_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-sinhala")
+    print("Loading Tamil...")
+    tam_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-tamil")
+    print("Loading English...")
+    eng_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-english")
+    print("--- ALL MODELS READY ---")
+except Exception as e:
+    print(f"CRITICAL ERROR: {e}")
+    # This will print the exact reason if it still fails
+def generate_voice(text, language):
+    try:
+        if language == "Sinhala":
+            engine = sin_tts
+            text = sinhala_to_roman(text)
+        elif language == "English":
+            engine = eng_tts
+        else:
+            engine = tam_tts
+        output_path = "output.wav"
+        engine.tts_to_file(text=str(text), file_path=output_path)
+        return output_path
+    except Exception as e:
+        print(f"Generation Error: {e}")
+        return None
+# Gradio Interface
 demo = gr.Interface(
+    fn=generate_voice,
+    inputs=[
+        gr.Textbox(label="Input Text"),
+        gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language")
+    ],
+    outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
+    title="Multilingual VITS TTS"
 )
+if __name__ == "__main__":
+    demo.launch()