Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,20 +2,25 @@ import gradio as gr
|
|
| 2 |
from TTS.api import TTS
|
| 3 |
import tempfile
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
# --- Model Loading (Runs only once at startup) ---
|
| 7 |
-
# NOTE:
|
| 8 |
-
#
|
| 9 |
try:
|
| 10 |
-
# Initialize TTS with the chosen model
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
DEFAULT_VOICE = VOICES[0]
|
| 14 |
except Exception as e:
|
| 15 |
tts_model = None
|
| 16 |
VOICES = ["Model Load Error"]
|
| 17 |
DEFAULT_VOICE = VOICES[0]
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
# --- Core TTS Function (Synchronous) ---
|
|
@@ -32,7 +37,6 @@ def text_to_speech_coqui(text, speaker_name):
|
|
| 32 |
tmp_path = tmp_file.name
|
| 33 |
|
| 34 |
# Generate audio file (Coqui TTS method)
|
| 35 |
-
# We pass the speaker_name even though this VITS model only has one.
|
| 36 |
tts_model.tts_to_file(
|
| 37 |
text=text,
|
| 38 |
speaker=speaker_name,
|
|
@@ -42,6 +46,7 @@ def text_to_speech_coqui(text, speaker_name):
|
|
| 42 |
return "Speech synthesis complete: {}".format(text), tmp_path
|
| 43 |
|
| 44 |
except Exception as e:
|
|
|
|
| 45 |
return f"ERROR: Failed to generate audio. Details: {str(e)}", None
|
| 46 |
|
| 47 |
|
|
@@ -50,7 +55,6 @@ input_text = gr.Textbox(lines=5, label="Input Text")
|
|
| 50 |
output_text = gr.Textbox(label="Output Text")
|
| 51 |
output_audio = gr.Audio(type="filepath", label="Generated Audio File")
|
| 52 |
|
| 53 |
-
# Dropdown uses the detected voices (or the error message)
|
| 54 |
language = gr.Dropdown(
|
| 55 |
choices=VOICES,
|
| 56 |
value=DEFAULT_VOICE,
|
|
@@ -58,16 +62,14 @@ language = gr.Dropdown(
|
|
| 58 |
)
|
| 59 |
|
| 60 |
|
| 61 |
-
# --- Gradio Interface Definition ---
|
| 62 |
interface = gr.Interface(
|
| 63 |
fn=text_to_speech_coqui,
|
| 64 |
inputs=[input_text, language],
|
| 65 |
outputs=[output_text, output_audio],
|
| 66 |
-
title="Coqui TTS (CPU Optimized)",
|
| 67 |
-
description="Customizable, high-quality Text-to-Speech
|
| 68 |
)
|
| 69 |
|
| 70 |
-
|
| 71 |
-
# --- Standard Launch Command ---
|
| 72 |
if __name__ == "__main__":
|
| 73 |
interface.launch()
|
|
|
|
| 2 |
from TTS.api import TTS
|
| 3 |
import tempfile
|
| 4 |
import os
|
| 5 |
+
import sys
|
| 6 |
|
| 7 |
# --- Model Loading (Runs only once at startup) ---
|
| 8 |
+
# NOTE: Switched to a Tacotron2/LJSPEECH model, which typically uses the
|
| 9 |
+
# Python dependency 'gruut' instead of the system package 'espeak-ng'.
|
| 10 |
try:
|
| 11 |
+
# Initialize TTS with the chosen model.
|
| 12 |
+
# This model is known to be stable and CPU-compatible.
|
| 13 |
+
# The '.to("cpu")' ensures it runs on the free hardware tier.
|
| 14 |
+
tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False).to("cpu")
|
| 15 |
+
# For Tacotron2, we typically use the name of the dataset as the speaker
|
| 16 |
+
VOICES = ["ljspeech"]
|
| 17 |
DEFAULT_VOICE = VOICES[0]
|
| 18 |
except Exception as e:
|
| 19 |
tts_model = None
|
| 20 |
VOICES = ["Model Load Error"]
|
| 21 |
DEFAULT_VOICE = VOICES[0]
|
| 22 |
+
# Print the error to the logs, but continue with the Gradio interface
|
| 23 |
+
print(f"Error loading TTS model (likely gruut failure or missing dependency): {e}", file=sys.stderr)
|
| 24 |
|
| 25 |
|
| 26 |
# --- Core TTS Function (Synchronous) ---
|
|
|
|
| 37 |
tmp_path = tmp_file.name
|
| 38 |
|
| 39 |
# Generate audio file (Coqui TTS method)
|
|
|
|
| 40 |
tts_model.tts_to_file(
|
| 41 |
text=text,
|
| 42 |
speaker=speaker_name,
|
|
|
|
| 46 |
return "Speech synthesis complete: {}".format(text), tmp_path
|
| 47 |
|
| 48 |
except Exception as e:
|
| 49 |
+
# Handle all generation errors
|
| 50 |
return f"ERROR: Failed to generate audio. Details: {str(e)}", None
|
| 51 |
|
| 52 |
|
|
|
|
| 55 |
output_text = gr.Textbox(label="Output Text")
|
| 56 |
output_audio = gr.Audio(type="filepath", label="Generated Audio File")
|
| 57 |
|
|
|
|
| 58 |
language = gr.Dropdown(
|
| 59 |
choices=VOICES,
|
| 60 |
value=DEFAULT_VOICE,
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
| 65 |
+
# --- Gradio Interface Definition and Launch ---
|
| 66 |
interface = gr.Interface(
|
| 67 |
fn=text_to_speech_coqui,
|
| 68 |
inputs=[input_text, language],
|
| 69 |
outputs=[output_text, output_audio],
|
| 70 |
+
title="Coqui TTS (Tacotron2/CPU Optimized)",
|
| 71 |
+
description="Customizable, high-quality Text-to-Speech using a model that avoids system dependencies."
|
| 72 |
)
|
| 73 |
|
|
|
|
|
|
|
| 74 |
if __name__ == "__main__":
|
| 75 |
interface.launch()
|