toshuu commited on
Commit
30cf8cd
·
verified ·
1 Parent(s): de64ba8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -38
app.py CHANGED
@@ -44,38 +44,54 @@ def _call_apply_tts(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RA
44
  print(f"Warning: Invalid speaker '{speaker}', using default '{DEFAULT_SPEAKER}'")
45
  speaker = DEFAULT_SPEAKER
46
 
47
- kw = {
48
- 'text': text,
49
- 'speaker': speaker,
50
- 'sample_rate': sample_rate
51
- }
52
 
53
- print(f"Runtime error while calling apply_tts with {kw}")
54
- last_exc = None
55
 
56
- # Try different parameter combinations
57
- for attempt_kw in [kw, {'text': text, 'speaker': speaker}]:
 
 
 
 
 
 
 
 
 
 
58
  try:
59
- res = m.apply_tts(**attempt_kw)
60
-
61
- # Handle different return types
62
- if isinstance(res, tuple):
63
- audio = res[0]
64
- else:
65
- audio = res
66
-
67
- return audio
68
-
69
- except TypeError as e:
70
- last_exc = e
71
- print(f"Attempt failed with {attempt_kw}: {e}")
72
- continue
73
- except Exception as e:
74
- last_exc = e
75
- print(f"Error with {attempt_kw}: {e}")
76
- raise
77
-
78
- raise RuntimeError(f"apply_tts call failed for all known signatures. last error: {last_exc}")
 
 
 
 
 
 
 
79
 
80
 
81
  def synthesize_text_to_wavfile(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RATE):
@@ -116,23 +132,34 @@ def tts_gradio_fn(text, speaker, sample_rate):
116
  Path to generated audio file
117
  """
118
  if not text or not text.strip():
119
- raise ValueError("Please enter some text to synthesize")
120
 
121
- path = synthesize_text_to_wavfile(text, speaker, sample_rate)
122
- return path
 
 
 
 
 
 
 
 
 
123
 
124
 
125
  # Create Gradio interface
126
  with gr.Blocks(title="Silero v4 Indic TTS") as demo:
127
  gr.Markdown("# Silero v4 Indic Text-to-Speech")
128
  gr.Markdown("Convert text to speech in multiple Indian languages")
 
129
 
130
  with gr.Row():
131
  with gr.Column():
132
  text_input = gr.Textbox(
133
  label="Enter Text",
134
- placeholder="नमस्ते, यह टेस्ट है। (Enter text in Hindi, Bengali, Tamil, Telugu, etc.)",
135
- lines=5
 
136
  )
137
 
138
  speaker_dropdown = gr.Dropdown(
@@ -158,10 +185,13 @@ with gr.Blocks(title="Silero v4 Indic TTS") as demo:
158
  # Examples
159
  gr.Examples(
160
  examples=[
161
- ["नमस्ते, यह टेस्ट है।", "hindi_female", 48000],
162
- ["হ্যালো, এটি একটি পরীক্ষা।", "bengali_female", 48000],
163
- ["வணக்கம், இது ஒரு சோதனை.", "tamil_female", 48000],
164
- ["హ���ో, ఇది ఒక పరీక్ష.", "telugu_female", 48000],
 
 
 
165
  ],
166
  inputs=[text_input, speaker_dropdown, sample_rate_dropdown],
167
  outputs=audio_output,
 
44
  print(f"Warning: Invalid speaker '{speaker}', using default '{DEFAULT_SPEAKER}'")
45
  speaker = DEFAULT_SPEAKER
46
 
47
+ # Clean and validate text
48
+ text = text.strip()
49
+ if not text:
50
+ raise ValueError("Text cannot be empty")
 
51
 
52
+ # Remove zero-width characters and normalize
53
+ text = text.replace('\u200d', '').replace('\u200c', '')
54
 
55
+ print(f"Calling apply_tts with text: '{text}', speaker: '{speaker}', sample_rate: {sample_rate}")
56
+
57
+ try:
58
+ # Try with ssml_text parameter (some models prefer this)
59
+ res = m.apply_tts(
60
+ ssml_text=text,
61
+ speaker=speaker,
62
+ sample_rate=sample_rate
63
+ )
64
+ print("Success with ssml_text parameter")
65
+ except Exception as e1:
66
+ print(f"ssml_text attempt failed: {e1}")
67
  try:
68
+ # Try with text parameter
69
+ res = m.apply_tts(
70
+ text=text,
71
+ speaker=speaker,
72
+ sample_rate=sample_rate
73
+ )
74
+ print("Success with text parameter")
75
+ except Exception as e2:
76
+ print(f"text attempt failed: {e2}")
77
+ try:
78
+ # Try minimal parameters
79
+ res = m.apply_tts(
80
+ text=text,
81
+ speaker=speaker
82
+ )
83
+ print("Success with minimal parameters")
84
+ except Exception as e3:
85
+ print(f"All attempts failed. Last error: {e3}")
86
+ raise ValueError(f"Text processing failed. The model may not support this text. Error: {e3}")
87
+
88
+ # Handle different return types
89
+ if isinstance(res, tuple):
90
+ audio = res[0]
91
+ else:
92
+ audio = res
93
+
94
+ return audio
95
 
96
 
97
  def synthesize_text_to_wavfile(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RATE):
 
132
  Path to generated audio file
133
  """
134
  if not text or not text.strip():
135
+ raise gr.Error("Please enter some text to synthesize")
136
 
137
+ # Warn if text is too long
138
+ if len(text) > 200:
139
+ raise gr.Error("Text is too long. Please use shorter text (under 200 characters)")
140
+
141
+ try:
142
+ path = synthesize_text_to_wavfile(text, speaker, sample_rate)
143
+ return path
144
+ except ValueError as e:
145
+ raise gr.Error(f"Text processing failed: {str(e)}. Try simpler text or a different language.")
146
+ except Exception as e:
147
+ raise gr.Error(f"Speech generation failed: {str(e)}")
148
 
149
 
150
  # Create Gradio interface
151
  with gr.Blocks(title="Silero v4 Indic TTS") as demo:
152
  gr.Markdown("# Silero v4 Indic Text-to-Speech")
153
  gr.Markdown("Convert text to speech in multiple Indian languages")
154
+ gr.Markdown("⚠️ **Note:** Use simple, short phrases for best results. Complex sentences may fail.")
155
 
156
  with gr.Row():
157
  with gr.Column():
158
  text_input = gr.Textbox(
159
  label="Enter Text",
160
+ placeholder="नमस्ते (Enter short text in Hindi, Bengali, Tamil, etc.)",
161
+ lines=3,
162
+ info="Keep text short and simple for best results"
163
  )
164
 
165
  speaker_dropdown = gr.Dropdown(
 
185
  # Examples
186
  gr.Examples(
187
  examples=[
188
+ ["नमस्ते", "hindi_female", 48000],
189
+ ["आप कैसे हैं", "hindi_male", 48000],
190
+ ["হ্যালো", "bengali_female", 48000],
191
+ ["வணக்கம்", "tamil_female", 48000],
192
+ ["హలో", "telugu_female", 48000],
193
+ ["ಹಲೋ", "kannada_female", 48000],
194
+ ["હેલો", "gujarati_female", 48000],
195
  ],
196
  inputs=[text_input, speaker_dropdown, sample_rate_dropdown],
197
  outputs=audio_output,