Spaces:

toshuu
/

speak

Runtime error

App Files Files Community

toshuu commited on Dec 1, 2025

Commit

30cf8cd

verified ·

1 Parent(s): de64ba8

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -38

app.py CHANGED Viewed

@@ -44,38 +44,54 @@ def _call_apply_tts(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RA
         print(f"Warning: Invalid speaker '{speaker}', using default '{DEFAULT_SPEAKER}'")
         speaker = DEFAULT_SPEAKER
-    kw = {
-        'text': text,
-        'speaker': speaker,
-        'sample_rate': sample_rate
-    }
-    print(f"Runtime error while calling apply_tts with {kw}")
-    last_exc = None
-    # Try different parameter combinations
-    for attempt_kw in [kw, {'text': text, 'speaker': speaker}]:
         try:
-            res = m.apply_tts(**attempt_kw)
-            # Handle different return types
-            if isinstance(res, tuple):
-                audio = res[0]
-            else:
-                audio = res
-            return audio
-        except TypeError as e:
-            last_exc = e
-            print(f"Attempt failed with {attempt_kw}: {e}")
-            continue
-        except Exception as e:
-            last_exc = e
-            print(f"Error with {attempt_kw}: {e}")
-            raise
-    raise RuntimeError(f"apply_tts call failed for all known signatures. last error: {last_exc}")
 def synthesize_text_to_wavfile(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RATE):
@@ -116,23 +132,34 @@ def tts_gradio_fn(text, speaker, sample_rate):
         Path to generated audio file
     """
     if not text or not text.strip():
-        raise ValueError("Please enter some text to synthesize")
-    path = synthesize_text_to_wavfile(text, speaker, sample_rate)
-    return path
 # Create Gradio interface
 with gr.Blocks(title="Silero v4 Indic TTS") as demo:
     gr.Markdown("# Silero v4 Indic Text-to-Speech")
     gr.Markdown("Convert text to speech in multiple Indian languages")
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(
                 label="Enter Text",
-                placeholder="नमस्ते, यह टेस्ट है। (Enter text in Hindi, Bengali, Tamil, Telugu, etc.)",
-                lines=5
             )
             speaker_dropdown = gr.Dropdown(
@@ -158,10 +185,13 @@ with gr.Blocks(title="Silero v4 Indic TTS") as demo:
     # Examples
     gr.Examples(
         examples=[
-            ["नमस्ते, यह टेस्ट है।", "hindi_female", 48000],
-            ["হ্যালো, এটি একটি পরীক্ষা।", "bengali_female", 48000],
-            ["வணக்கம், இது ஒரு சோதனை.", "tamil_female", 48000],
-            ["హ���ో, ఇది ఒక పరీక్ష.", "telugu_female", 48000],
         ],
         inputs=[text_input, speaker_dropdown, sample_rate_dropdown],
         outputs=audio_output,

         print(f"Warning: Invalid speaker '{speaker}', using default '{DEFAULT_SPEAKER}'")
         speaker = DEFAULT_SPEAKER
+    # Clean and validate text
+    text = text.strip()
+    if not text:
+        raise ValueError("Text cannot be empty")
+    # Remove zero-width characters and normalize
+    text = text.replace('\u200d', '').replace('\u200c', '')
+    print(f"Calling apply_tts with text: '{text}', speaker: '{speaker}', sample_rate: {sample_rate}")
+    try:
+        # Try with ssml_text parameter (some models prefer this)
+        res = m.apply_tts(
+            ssml_text=text,
+            speaker=speaker,
+            sample_rate=sample_rate
+        )
+        print("Success with ssml_text parameter")
+    except Exception as e1:
+        print(f"ssml_text attempt failed: {e1}")
         try:
+            # Try with text parameter
+            res = m.apply_tts(
+                text=text,
+                speaker=speaker,
+                sample_rate=sample_rate
+            )
+            print("Success with text parameter")
+        except Exception as e2:
+            print(f"text attempt failed: {e2}")
+            try:
+                # Try minimal parameters
+                res = m.apply_tts(
+                    text=text,
+                    speaker=speaker
+                )
+                print("Success with minimal parameters")
+            except Exception as e3:
+                print(f"All attempts failed. Last error: {e3}")
+                raise ValueError(f"Text processing failed. The model may not support this text. Error: {e3}")
+    # Handle different return types
+    if isinstance(res, tuple):
+        audio = res[0]
+    else:
+        audio = res
+    return audio
 def synthesize_text_to_wavfile(text, speaker=DEFAULT_SPEAKER, sample_rate=DEFAULT_SAMPLE_RATE):
         Path to generated audio file
     """
     if not text or not text.strip():
+        raise gr.Error("Please enter some text to synthesize")
+    # Warn if text is too long
+    if len(text) > 200:
+        raise gr.Error("Text is too long. Please use shorter text (under 200 characters)")
+    try:
+        path = synthesize_text_to_wavfile(text, speaker, sample_rate)
+        return path
+    except ValueError as e:
+        raise gr.Error(f"Text processing failed: {str(e)}. Try simpler text or a different language.")
+    except Exception as e:
+        raise gr.Error(f"Speech generation failed: {str(e)}")
 # Create Gradio interface
 with gr.Blocks(title="Silero v4 Indic TTS") as demo:
     gr.Markdown("# Silero v4 Indic Text-to-Speech")
     gr.Markdown("Convert text to speech in multiple Indian languages")
+    gr.Markdown("⚠️ **Note:** Use simple, short phrases for best results. Complex sentences may fail.")
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(
                 label="Enter Text",
+                placeholder="नमस्ते (Enter short text in Hindi, Bengali, Tamil, etc.)",
+                lines=3,
+                info="Keep text short and simple for best results"
             )
             speaker_dropdown = gr.Dropdown(
     # Examples
     gr.Examples(
         examples=[
+            ["नमस्ते", "hindi_female", 48000],
+            ["आप कैसे हैं", "hindi_male", 48000],
+            ["হ্যালো", "bengali_female", 48000],
+            ["வணக்கம்", "tamil_female", 48000],
+            ["హలో", "telugu_female", 48000],
+            ["ಹಲೋ", "kannada_female", 48000],
+            ["હેલો", "gujarati_female", 48000],
         ],
         inputs=[text_input, speaker_dropdown, sample_rate_dropdown],
         outputs=audio_output,