multilingual-tts-v3

Running

App Files Files Community

Timemaster commited on Oct 26, 2025

Commit

5981e07

verified ·

1 Parent(s): d383dd8

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -38

app.py CHANGED Viewed

@@ -1,72 +1,73 @@
 import gradio as gr
-from gtts import gTTS
 import tempfile
 import os
-# Define a simple dictionary of available languages for gTTS
-# We can't use the massive list from tts_voice.py because gTTS uses simple language codes.
-gtts_languages = {
-    "English": "en",
-    "Mandarin Chinese": "zh-cn",
-    "French": "fr",
-    "German": "de",
-    "Spanish": "es",
-    "Japanese": "ja",
-    "Korean": "ko",
-    "Russian": "ru"
-}
-# The main function is now SYNCHRONOUS (no 'async')
-def text_to_speech_gtts(text, language_name):
     # 1. Input Validation
     if not text or not text.strip():
         return "ERROR: Input text cannot be empty.", None
     try:
-        # Get the language code from the name
-        lang_code = gtts_languages[language_name]
         # Create a temporary file path
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
             tmp_path = tmp_file.name
-        # Initialize gTTS object
-        tts = gTTS(text=text, lang=lang_code, slow=False)
-        # Save the audio file (synchronous operation)
-        tts.save(tmp_path)
         return "Speech synthesis complete: {}".format(text), tmp_path
     except Exception as e:
-        # Handle all gTTS-related errors (e.g., language not supported, network failure)
         return f"ERROR: Failed to generate audio. Details: {str(e)}", None
-# --- Modern Gradio Component Syntax (Translated UI) ---
 input_text = gr.Textbox(lines=5, label="Input Text")
 output_text = gr.Textbox(label="Output Text")
 output_audio = gr.Audio(type="filepath", label="Generated Audio File")
-default_language = "English" # Use English as the default language
 language = gr.Dropdown(
-    choices=list(gtts_languages.keys()),
-    value=default_language,
-    label="Language" # Dropped 'Voice' because gTTS has no specific voice selection
 )
 # --- Gradio Interface Definition ---
 interface = gr.Interface(
-    fn=text_to_speech_gtts, # Use the new synchronous function
-    inputs=[input_text, language],
-    outputs=[output_text, output_audio],
-    title="Google TTS Text-to-Speech (Robust Version)",
-    description="Convert text into audio using the reliable Google Text-to-Speech service. (Max 100 characters for optimal stability)"
 )
-# --- Standard Synchronous Launch Command ---
 if __name__ == "__main__":
-    # Gradio runs synchronous functions reliably without extra configuration.
     interface.launch()

 import gradio as gr
+from TTS.api import TTS
 import tempfile
 import os
+# --- Model Loading (Runs only once at startup) ---
+# NOTE: This model is chosen for its balance of quality and CPU compatibility.
+# You can try other models from the Coqui TTS documentation if you need more languages.
+try:
+    # Initialize TTS with the chosen model (VITS model for CPU efficiency)
+    tts_model = TTS(model_name="tts_models/en/ljspeech/vits", progress_bar=False).to("cpu")
+    VOICES = ["ljspeech"] # VITS models often have a single trained speaker
+    DEFAULT_VOICE = VOICES[0]
+except Exception as e:
+    tts_model = None
+    VOICES = ["Model Load Error"]
+    DEFAULT_VOICE = VOICES[0]
+    print(f"Error loading TTS model: {e}")
+# --- Core TTS Function (Synchronous) ---
+def text_to_speech_coqui(text, speaker_name):
     # 1. Input Validation
+    if not tts_model:
+        return "ERROR: TTS Model failed to load at startup. Check Space logs.", None
     if not text or not text.strip():
         return "ERROR: Input text cannot be empty.", None
     try:
         # Create a temporary file path
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             tmp_path = tmp_file.name
+        # Generate audio file (Coqui TTS method)
+        # We pass the speaker_name even though this VITS model only has one.
+        tts_model.tts_to_file(
+            text=text,
+            speaker=speaker_name,
+            file_path=tmp_path
+        )
         return "Speech synthesis complete: {}".format(text), tmp_path
     except Exception as e:
         return f"ERROR: Failed to generate audio. Details: {str(e)}", None
+# --- Gradio UI Definition ---
 input_text = gr.Textbox(lines=5, label="Input Text")
 output_text = gr.Textbox(label="Output Text")
 output_audio = gr.Audio(type="filepath", label="Generated Audio File")
+# Dropdown uses the detected voices (or the error message)
 language = gr.Dropdown(
+    choices=VOICES,
+    value=DEFAULT_VOICE,
+    label="Speaker/Voice"
 )
 # --- Gradio Interface Definition ---
 interface = gr.Interface(
+    fn=text_to_speech_coqui,
+    inputs=[input_text, language],
+    outputs=[output_text, output_audio],
+    title="Coqui TTS (CPU Optimized)",
+    description="Customizable, high-quality Text-to-Speech running on CPU."
 )
+# --- Standard Launch Command ---
 if __name__ == "__main__":
     interface.launch()