multilingual-tts-v3

Running

App Files Files Community

Timemaster commited on Oct 26, 2025

Commit

ab1f44a

verified ·

1 Parent(s): fbdd80a

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -13

app.py CHANGED Viewed

@@ -2,20 +2,25 @@ import gradio as gr
 from TTS.api import TTS
 import tempfile
 import os
 # --- Model Loading (Runs only once at startup) ---
-# NOTE: This model is chosen for its balance of quality and CPU compatibility.
-# You can try other models from the Coqui TTS documentation if you need more languages.
 try:
-    # Initialize TTS with the chosen model (VITS model for CPU efficiency)
-    tts_model = TTS(model_name="tts_models/en/ljspeech/vits", progress_bar=False).to("cpu")
-    VOICES = ["ljspeech"] # VITS models often have a single trained speaker
     DEFAULT_VOICE = VOICES[0]
 except Exception as e:
     tts_model = None
     VOICES = ["Model Load Error"]
     DEFAULT_VOICE = VOICES[0]
-    print(f"Error loading TTS model: {e}")
 # --- Core TTS Function (Synchronous) ---
@@ -32,7 +37,6 @@ def text_to_speech_coqui(text, speaker_name):
             tmp_path = tmp_file.name
         # Generate audio file (Coqui TTS method)
-        # We pass the speaker_name even though this VITS model only has one.
         tts_model.tts_to_file(
             text=text,
             speaker=speaker_name,
@@ -42,6 +46,7 @@ def text_to_speech_coqui(text, speaker_name):
         return "Speech synthesis complete: {}".format(text), tmp_path
     except Exception as e:
         return f"ERROR: Failed to generate audio. Details: {str(e)}", None
@@ -50,7 +55,6 @@ input_text = gr.Textbox(lines=5, label="Input Text")
 output_text = gr.Textbox(label="Output Text")
 output_audio = gr.Audio(type="filepath", label="Generated Audio File")
-# Dropdown uses the detected voices (or the error message)
 language = gr.Dropdown(
     choices=VOICES,
     value=DEFAULT_VOICE,
@@ -58,16 +62,14 @@ language = gr.Dropdown(
 )
-# --- Gradio Interface Definition ---
 interface = gr.Interface(
     fn=text_to_speech_coqui,
     inputs=[input_text, language],
     outputs=[output_text, output_audio],
-    title="Coqui TTS (CPU Optimized)",
-    description="Customizable, high-quality Text-to-Speech running on CPU."
 )
-# --- Standard Launch Command ---
 if __name__ == "__main__":
     interface.launch()

 from TTS.api import TTS
 import tempfile
 import os
+import sys
 # --- Model Loading (Runs only once at startup) ---
+# NOTE: Switched to a Tacotron2/LJSPEECH model, which typically uses the
+# Python dependency 'gruut' instead of the system package 'espeak-ng'.
 try:
+    # Initialize TTS with the chosen model.
+    # This model is known to be stable and CPU-compatible.
+    # The '.to("cpu")' ensures it runs on the free hardware tier.
+    tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False).to("cpu")
+    # For Tacotron2, we typically use the name of the dataset as the speaker
+    VOICES = ["ljspeech"]
     DEFAULT_VOICE = VOICES[0]
 except Exception as e:
     tts_model = None
     VOICES = ["Model Load Error"]
     DEFAULT_VOICE = VOICES[0]
+    # Print the error to the logs, but continue with the Gradio interface
+    print(f"Error loading TTS model (likely gruut failure or missing dependency): {e}", file=sys.stderr)
 # --- Core TTS Function (Synchronous) ---
             tmp_path = tmp_file.name
         # Generate audio file (Coqui TTS method)
         tts_model.tts_to_file(
             text=text,
             speaker=speaker_name,
         return "Speech synthesis complete: {}".format(text), tmp_path
     except Exception as e:
+        # Handle all generation errors
         return f"ERROR: Failed to generate audio. Details: {str(e)}", None
 output_text = gr.Textbox(label="Output Text")
 output_audio = gr.Audio(type="filepath", label="Generated Audio File")
 language = gr.Dropdown(
     choices=VOICES,
     value=DEFAULT_VOICE,
 )
+# --- Gradio Interface Definition and Launch ---
 interface = gr.Interface(
     fn=text_to_speech_coqui,
     inputs=[input_text, language],
     outputs=[output_text, output_audio],
+    title="Coqui TTS (Tacotron2/CPU Optimized)",
+    description="Customizable, high-quality Text-to-Speech using a model that avoids system dependencies."
 )
 if __name__ == "__main__":
     interface.launch()