Timemaster commited on
Commit
ab1f44a
·
verified ·
1 Parent(s): fbdd80a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -2,20 +2,25 @@ import gradio as gr
2
  from TTS.api import TTS
3
  import tempfile
4
  import os
 
5
 
6
  # --- Model Loading (Runs only once at startup) ---
7
- # NOTE: This model is chosen for its balance of quality and CPU compatibility.
8
- # You can try other models from the Coqui TTS documentation if you need more languages.
9
  try:
10
- # Initialize TTS with the chosen model (VITS model for CPU efficiency)
11
- tts_model = TTS(model_name="tts_models/en/ljspeech/vits", progress_bar=False).to("cpu")
12
- VOICES = ["ljspeech"] # VITS models often have a single trained speaker
 
 
 
13
  DEFAULT_VOICE = VOICES[0]
14
  except Exception as e:
15
  tts_model = None
16
  VOICES = ["Model Load Error"]
17
  DEFAULT_VOICE = VOICES[0]
18
- print(f"Error loading TTS model: {e}")
 
19
 
20
 
21
  # --- Core TTS Function (Synchronous) ---
@@ -32,7 +37,6 @@ def text_to_speech_coqui(text, speaker_name):
32
  tmp_path = tmp_file.name
33
 
34
  # Generate audio file (Coqui TTS method)
35
- # We pass the speaker_name even though this VITS model only has one.
36
  tts_model.tts_to_file(
37
  text=text,
38
  speaker=speaker_name,
@@ -42,6 +46,7 @@ def text_to_speech_coqui(text, speaker_name):
42
  return "Speech synthesis complete: {}".format(text), tmp_path
43
 
44
  except Exception as e:
 
45
  return f"ERROR: Failed to generate audio. Details: {str(e)}", None
46
 
47
 
@@ -50,7 +55,6 @@ input_text = gr.Textbox(lines=5, label="Input Text")
50
  output_text = gr.Textbox(label="Output Text")
51
  output_audio = gr.Audio(type="filepath", label="Generated Audio File")
52
 
53
- # Dropdown uses the detected voices (or the error message)
54
  language = gr.Dropdown(
55
  choices=VOICES,
56
  value=DEFAULT_VOICE,
@@ -58,16 +62,14 @@ language = gr.Dropdown(
58
  )
59
 
60
 
61
- # --- Gradio Interface Definition ---
62
  interface = gr.Interface(
63
  fn=text_to_speech_coqui,
64
  inputs=[input_text, language],
65
  outputs=[output_text, output_audio],
66
- title="Coqui TTS (CPU Optimized)",
67
- description="Customizable, high-quality Text-to-Speech running on CPU."
68
  )
69
 
70
-
71
- # --- Standard Launch Command ---
72
  if __name__ == "__main__":
73
  interface.launch()
 
2
  from TTS.api import TTS
3
  import tempfile
4
  import os
5
+ import sys
6
 
7
  # --- Model Loading (Runs only once at startup) ---
8
+ # NOTE: Switched to a Tacotron2/LJSPEECH model, which typically uses the
9
+ # Python dependency 'gruut' instead of the system package 'espeak-ng'.
10
  try:
11
+ # Initialize TTS with the chosen model.
12
+ # This model is known to be stable and CPU-compatible.
13
+ # The '.to("cpu")' ensures it runs on the free hardware tier.
14
+ tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False).to("cpu")
15
+ # For Tacotron2, we typically use the name of the dataset as the speaker
16
+ VOICES = ["ljspeech"]
17
  DEFAULT_VOICE = VOICES[0]
18
  except Exception as e:
19
  tts_model = None
20
  VOICES = ["Model Load Error"]
21
  DEFAULT_VOICE = VOICES[0]
22
+ # Print the error to the logs, but continue with the Gradio interface
23
+ print(f"Error loading TTS model (likely gruut failure or missing dependency): {e}", file=sys.stderr)
24
 
25
 
26
  # --- Core TTS Function (Synchronous) ---
 
37
  tmp_path = tmp_file.name
38
 
39
  # Generate audio file (Coqui TTS method)
 
40
  tts_model.tts_to_file(
41
  text=text,
42
  speaker=speaker_name,
 
46
  return "Speech synthesis complete: {}".format(text), tmp_path
47
 
48
  except Exception as e:
49
+ # Handle all generation errors
50
  return f"ERROR: Failed to generate audio. Details: {str(e)}", None
51
 
52
 
 
55
  output_text = gr.Textbox(label="Output Text")
56
  output_audio = gr.Audio(type="filepath", label="Generated Audio File")
57
 
 
58
  language = gr.Dropdown(
59
  choices=VOICES,
60
  value=DEFAULT_VOICE,
 
62
  )
63
 
64
 
65
+ # --- Gradio Interface Definition and Launch ---
66
  interface = gr.Interface(
67
  fn=text_to_speech_coqui,
68
  inputs=[input_text, language],
69
  outputs=[output_text, output_audio],
70
+ title="Coqui TTS (Tacotron2/CPU Optimized)",
71
+ description="Customizable, high-quality Text-to-Speech using a model that avoids system dependencies."
72
  )
73
 
 
 
74
  if __name__ == "__main__":
75
  interface.launch()