Spaces:

bdstar
/

VoiceTutor-AI

Sleeping

bdstar commited on Oct 31, 2025

Commit

c9304ac

verified ·

1 Parent(s): d2c0c59

update app.py file

Files changed (1) hide show

app.py CHANGED Viewed

@@ -68,29 +68,31 @@ def chat_with_llm(history_messages, user_text):
-# ---- TTS ----
 def tts_silero(text: str) -> str:
     """
-    Return path to a WAV file synthesized by Silero (CPU-friendly).
-    Works across recent torch.hub return signatures.
     """
     import torch, tempfile
     import soundfile as sf
-    # Newer torch.hub supports "trust_repo"; set to True or 'check'
-    obj = torch.hub.load(
-        repo_or_dir="snakers4/silero-models",
-        model="silero_tts",
-        language="en",
-        speaker="v3_en",
-        trust_repo=True  # or 'check' to be prompted the first time
-    )
-    # Handle both cases: either a single model, or a (model, something) tuple
-    model = obj[0] if isinstance(obj, (list, tuple)) else obj
     sample_rate = 48000
-    speaker = "en_0"  # valid default voice in v3_en pack
     audio = model.apply_tts(text=text, speaker=speaker, sample_rate=sample_rate)
     out_wav = tempfile.mktemp(suffix=".wav")
@@ -98,6 +100,7 @@ def tts_silero(text: str) -> str:
     return out_wav
 def tts_coqui_xtts(text: str) -> str:
     """
     Returns path to a WAV file synthesized by Coqui XTTS v2 (higher quality; GPU-friendly).
@@ -166,4 +169,4 @@ with gr.Blocks(title="Voice Coach") as demo:
     btn.click(pipeline, inputs=[audio_in, chatbox], outputs=[chatbox, audio_out, status])
 if __name__ == "__main__":
-    demo.launch(share=True)

+# near top-level (global singletons)
+_SILERO_TTS = None
 def tts_silero(text: str) -> str:
     """
+    Return path to WAV synthesized by Silero TTS.
+    Uses a cached model instance to avoid re-downloading each request.
     """
     import torch, tempfile
     import soundfile as sf
+    global _SILERO_TTS
+    if _SILERO_TTS is None:
+        obj = torch.hub.load(
+            repo_or_dir="snakers4/silero-models",
+            model="silero_tts",
+            language="en",
+            speaker="v3_en",
+            trust_repo=True,   # avoids interactive trust prompt
+        )
+        _SILERO_TTS = obj[0] if isinstance(obj, (list, tuple)) else obj
+    model = _SILERO_TTS
     sample_rate = 48000
+    speaker = "en_0"
     audio = model.apply_tts(text=text, speaker=speaker, sample_rate=sample_rate)
     out_wav = tempfile.mktemp(suffix=".wav")
     return out_wav
 def tts_coqui_xtts(text: str) -> str:
     """
     Returns path to a WAV file synthesized by Coqui XTTS v2 (higher quality; GPU-friendly).
     btn.click(pipeline, inputs=[audio_in, chatbox], outputs=[chatbox, audio_out, status])
 if __name__ == "__main__":
+    demo.launch()