Spaces:

toshuu
/

speak

Runtime error

App Files Files Community

toshuu commited on Dec 1, 2025

Commit

bad894e

verified ·

1 Parent(s): 22a5251

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -82

app.py CHANGED Viewed

@@ -1,82 +1,122 @@
-import os
-audio = audio.astype('float32')
-max_abs = np.max(np.abs(audio))
-if max_abs > 1.0:
-audio = audio / max_abs
-return audio, sample_rate
-# Gradio wrapper: returns file-like audio buffer
-def tts_gradio(text, lang_dropdown, speaker_slider):
-# Map dropdown label to lang id or code expected by model
-# You might need to adjust mapping depending on model internal language ids
-lang_map = {
-"Hindi (hi)": 0,
-"Marathi (mr)": 1,
-"Bengali (bn)": 2,
-"Tamil (ta)": 3,
-"Telugu (te)": 4,
-"Kannada (kn)": 5,
-"Malayalam (ml)": 6,
-"Gujarati (gu)": 7,
-}
-lang_id = lang_map.get(lang_dropdown, 0)
-# Prevent concurrent synth calls
-with lock:
-audio, sr = synthesize_text(text, lang=lang_id, speaker=int(speaker_slider))
-# Write to temporary wav file and return its path (gradio will serve it)
-tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-sf.write(tmp.name, audio, sr)
-tmp.flush()
-tmp.close()
-return tmp.name
-# Build Gradio UI
-def build_ui():
-with gr.Blocks() as demo:
-gr.Markdown("# Silero v4 Indic — TTS (HuggingFace Space)\nDrop `v4_indic.pt` in the repo root and reload the Space.")
-with gr.Row():
-with gr.Column(scale=3):
-txt = gr.Textbox(lines=4, label="Text to synthesize", value="नमस्ते, यह एक परीक्षण है।")
-lang = gr.Dropdown(list=["Hindi (hi)", "Marathi (mr)", "Bengali (bn)", "Tamil (ta)", "Telugu (te)", "Kannada (kn)", "Malayalam (ml)", "Gujarati (gu)"], label="Language")
-speaker = gr.Slider(minimum=0, maximum=3, step=1, value=0, label="Speaker ID (if model supports multiple speakers)")
-btn = gr.Button("Synthesize")
-with gr.Column(scale=2):
-out = gr.Audio(label="Generated audio")
-btn.click(fn=tts_gradio, inputs=[txt, lang, speaker], outputs=[out])
-return demo
-if __name__ == "__main__":
-# Preload model at startup (keeps first request fast)
-try:
-load_model()
-except Exception as e:
-print("Model failed to load at startup:", e)
-demo = build_ui()
-demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
+import threading
+import tempfile
+import numpy as np
+import soundfile as sf
+import gradio as gr
+import torch
+MODEL_PATH = "v4_indic.pt"
+SAMPLE_RATE = 48000
+lock = threading.Lock()
+model = None
+def load_model():
+    global model
+    if model is not None:
+        return model
+    if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError(
+            f"Model file not found: {MODEL_PATH}. Upload v4_indic.pt to the Space root."
+        )
+    print("Loading Silero v4 model...")
+    pkg = torch.package.PackageImporter(MODEL_PATH)
+    model = pkg.load_pickle("tts_models", "model")
+    print("Model loaded.")
+    return model
+def synthesize(text, lang_id, speaker_id):
+    m = load_model()
+    if not isinstance(text, str) or len(text.strip()) == 0:
+        raise ValueError("Empty text")
+    try:
+        audio = m.apply_tts(
+            text=text,
+            speaker=speaker_id,
+            lang_id=lang_id,
+            sample_rate=SAMPLE_RATE,
+        )
+    except Exception:
+        audio = m.apply_tts(
+            text=text,
+            speaker_id=speaker_id,
+            lang_id=lang_id,
+            sample_rate=SAMPLE_RATE,
+        )
+    # Convert torch → numpy
+    if isinstance(audio, torch.Tensor):
+        audio = audio.detach().cpu().numpy()
+    audio = np.asarray(audio).astype(np.float32)
+    max_abs = np.max(np.abs(audio))
+    if max_abs > 1.0:
+        audio = audio / max_abs
+    return audio
+def tts_fn(text, language, speaker):
+    lang_map = {
+        "Hindi": 0,
+        "Marathi": 1,
+        "Bengali": 2,
+        "Tamil": 3,
+        "Telugu": 4,
+        "Kannada": 5,
+        "Malayalam": 6,
+        "Gujarati": 7,
+    }
+    lang_id = lang_map.get(language, 0)
+    with lock:
+        audio = synthesize(text, lang_id, int(speaker))
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        sf.write(tmp.name, audio, SAMPLE_RATE)
+        tmp.flush()
+        tmp.close()
+        return tmp.name
+def build_ui():
+    with gr.Blocks() as demo:
+        gr.Markdown("# 🔊 Silero v4 Indic TTS<br>Text → Speech for 8 Indian languages")
+        with gr.Row():
+            with gr.Column():
+                text = gr.Textbox(
+                    label="Enter text", value="नमस्ते, यह एक परीक्षण है।", lines=3
+                )
+                lang = gr.Dropdown(
+                    ["Hindi", "Marathi", "Bengali", "Tamil", "Telugu", "Kannada", "Malayalam", "Gujarati"],
+                    label="Language",
+                    value="Hindi",
+                )
+                speaker = gr.Slider(
+                    0, 3, value=0, step=1, label="Speaker ID (if supported)"
+                )
+                btn = gr.Button("🎤 Generate Speech")
+            with gr.Column():
+                output_audio = gr.Audio(label="Output Audio")
+        btn.click(tts_fn, inputs=[text, lang, speaker], outputs=[output_audio])
+    return demo
+if __name__ == "__main__":
+    load_model()
+    ui = build_ui()
+    ui.launch()