Spaces:

E-motionAssistant
/

Space3

Sleeping

App Files Files Community

amasha03 commited on 26 days ago

Commit

0d91b65

verified ·

1 Parent(s): b75b6a5

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -1,59 +1,59 @@
 import gradio as gr
 from TTS.api import TTS
 from huggingface_hub import hf_hub_download
 import os
 import json
-from TTS.tts.configs.vits_config import VitsConfig
-# --- IMPORTING YOUR SEPARATE ROMANIZER ---
-from romanizer import sinhala_to_roman
 def load_my_model(repo_id):
     print(f"Downloading {repo_id}...")
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
-# Load the config into a Python object
     config = VitsConfig()
     config.load_json(config_path)
-    # FORCE the character count to 137 if it's the English model
     if "english" in repo_id.lower():
-        print(f"--- FORCING 137 CHARACTERS FOR ENGLISH ---")
-        config.model_args.num_chars = 137
-    # Initialize the TTS using the manual config object instead of the file path
-    return TTS(items_per_group=1).load_tts_model_by_path(
-        config,
-        model_path=model_path,
-        gpu=False
     )
-    # Initialize TTS
-    return TTS(model_path=model_path, config_path=config_path, gpu=False)
-# Load Models
-print("Initializing English...")
-eng_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-english")
-print("Initializing Sinhala...")
-sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
-print("Initializing Tamil...")
-tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
 def generate_voice(text, language):
     try:
         if language == "English":
             engine = eng_tts
         elif language == "Sinhala":
-            # Use your separate function
-            processed_text = sinhala_to_roman(text)
-            print(f"Input: {text} -> Romanized: {processed_text}")
             engine = sin_tts
         else:
-            processed_text = text
             engine = tam_tts
         output_path = "output.wav"
         engine.tts_to_file(text=processed_text, file_path=output_path)
@@ -67,10 +67,10 @@ demo = gr.Interface(
     fn=generate_voice,
     inputs=[
         gr.Textbox(label="Input Text"),
-        gr.Dropdown(["Sinhala", "Tamil"], label="Select Language")
     ],
     outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
     title="Multilingual VITS TTS"
 )
-demo.launch()

 import gradio as gr
 from TTS.api import TTS
+from TTS.tts.configs.vits_config import VitsConfig
 from huggingface_hub import hf_hub_download
 import os
 import json
+import gc
+from romanizer import sinhala_to_roman
 def load_my_model(repo_id):
     print(f"Downloading {repo_id}...")
     model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
     config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+    # Load and fix the config
     config = VitsConfig()
     config.load_json(config_path)
     if "english" in repo_id.lower():
+        print(f"--- SUCCESS: FORCING 137 CHARACTERS FOR ENGLISH ---")
+        if hasattr(config, "model_args"):
+            config.model_args.num_chars = 137
+    # Initialize TTS without the 'items_per_group' argument
+    tts = TTS(gpu=False)
+    # Load the model using the config object we just fixed
+    tts.load_tts_model_by_path(
+        checkpoint_path=model_path,
+        config_path=config
     )
+    gc.collect()
+    return tts
+# Load All Models
+print("Initializing Models...")
+try:
+    eng_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-english")
+    sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
+    tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
+except Exception as e:
+    print(f"CRITICAL ERROR DURING LOADING: {e}")
 def generate_voice(text, language):
     try:
         if language == "English":
             engine = eng_tts
+            processed_text = text
         elif language == "Sinhala":
             engine = sin_tts
+            # Using your romanizer logic
+            processed_text = sinhala_to_roman(text)
         else:
             engine = tam_tts
+            processed_text = text
         output_path = "output.wav"
         engine.tts_to_file(text=processed_text, file_path=output_path)
     fn=generate_voice,
     inputs=[
         gr.Textbox(label="Input Text"),
+        gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language")
     ],
     outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
     title="Multilingual VITS TTS"
 )
+demo.launch()