Spaces:

E-motionAssistant
/

Space3

Sleeping

App Files Files Community

amasha03 commited on 27 days ago

Commit

16efb96

verified ·

1 Parent(s): d25e80a

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -68

app.py CHANGED Viewed

@@ -1,96 +1,52 @@
 import gradio as gr
 from TTS.api import TTS
-from TTS.tts.configs.vits_config import VitsConfig
 from huggingface_hub import hf_hub_download
 import os
-import gc
 # --- IMPORTING YOUR SEPARATE ROMANIZER ---
-try:
-    from romanizer import sinhala_to_roman
-except ImportError:
-    print("Warning: romanizer.py not found. Sinhala might not work correctly.")
-    def sinhala_to_roman(text): return text
-def load_model_safely(repo_id):
-    print(f"--- Downloading: {repo_id} ---")
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
-    # Load the config
-    config = VitsConfig()
-    config.load_json(config_path)
-    # THE ENGLISH "MAGIC NUMBER" FIX
-    # This prevents the 'num_chars' mismatch error for the English model
-    if "english" in repo_id.lower():
-        print("Applying 137 character fix for English...")
-        if hasattr(config, "model_args"):
-            config.model_args.num_chars = 137
     # Initialize TTS
-    tts = TTS(gpu=False)
-    # Load model positionally to avoid keyword argument errors across versions
-    tts.load_tts_model_by_path(model_path, config)
-    # Clean up RAM after loading
-    gc.collect()
-    return tts
-# --- Global Model Initialization ---
-print("Initializing all models (this may take a moment)...")
-try:
-    sin_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-sinhala")
-    tam_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-tamil")
-    eng_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-english")
-    print("SUCCESS: All 3 models loaded and ready.")
-except Exception as e:
-    print(f"CRITICAL STARTUP ERROR: {e}")
 def generate_voice(text, language):
-    print(f"User Request -> Language: {language} | Text: {text}")
     try:
-        # Select the correct engine and process text
         if language == "Sinhala":
-            engine = sin_tts
             processed_text = sinhala_to_roman(text)
-            print(f"Romanized Sinhala: {processed_text}")
-        elif language == "English":
-            engine = eng_tts
-            processed_text = text
-        else: # Tamil
-            engine = tam_tts
             processed_text = text
-        if engine is None:
-            return None
-        # Define output filename
-        output_path = f"output_{language.lower()}.wav"
-        # Generate Audio
-        engine.tts_to_file(text=str(processed_text), file_path=output_path)
-        if os.path.exists(output_path):
-            return output_path
-        return None
     except Exception as e:
-        print(f"GENERATION ERROR: {e}")
         return None
-# --- Gradio Interface ---
 demo = gr.Interface(
     fn=generate_voice,
     inputs=[
-        gr.Textbox(label="Input Text", placeholder="Type your text here..."),
-        gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language", value="English")
     ],
     outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
-    title="Multilingual VITS TTS System",
-    description="A high-quality Text-to-Speech system for English, Sinhala, and Tamil. Note: Sinhala uses an automated romanizer."
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from TTS.api import TTS
 from huggingface_hub import hf_hub_download
 import os
 # --- IMPORTING YOUR SEPARATE ROMANIZER ---
+from romanizer import sinhala_to_roman
+def load_my_model(repo_id):
+    print(f"Downloading {repo_id}...")
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
     # Initialize TTS
+    return TTS(model_path=model_path, config_path=config_path, gpu=False)
+# Load Models
+print("Initializing Sinhala...")
+sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
+print("Initializing Tamil...")
+tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
 def generate_voice(text, language):
     try:
         if language == "Sinhala":
+            # Use your separate function
             processed_text = sinhala_to_roman(text)
+            print(f"Input: {text} -> Romanized: {processed_text}")
+            engine = sin_tts
+        else:
             processed_text = text
+            engine = tam_tts
+        output_path = "output.wav"
+        engine.tts_to_file(text=processed_text, file_path=output_path)
+        return output_path
     except Exception as e:
+        print(f"Error: {e}")
         return None
+# Gradio Interface
 demo = gr.Interface(
     fn=generate_voice,
     inputs=[
+        gr.Textbox(label="Input Text"),
+        gr.Dropdown(["Sinhala", "Tamil"], label="Select Language")
     ],
     outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
+    title="Multilingual VITS TTS"
 )
+demo.launch()