File size: 3,667 Bytes
4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc 4fd9791 07350fc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | """
tts_engine.py
─────────────
Text-to-Speech engine.
On Hugging Face Spaces (headless server):
- pyttsx3 is skipped (needs audio hardware)
- gTTS saves an MP3 that Gradio can play back via gr.Audio
- Falls back to silent mode gracefully
Locally: pyttsx3 works offline, gTTS needs internet.
"""
import logging
import threading
import os
import io
logger = logging.getLogger(__name__)
class TTSEngine:
def __init__(self, rate: int = 160, volume: float = 1.0):
self._rate = rate
self._volume = volume
self._backend = "silent"
self._init()
def _init(self):
# Try pyttsx3 (local / desktop only)
if os.environ.get("GRADIO_SERVER_NAME") is None:
try:
import pyttsx3
e = pyttsx3.init()
e.setProperty("rate", self._rate)
e.setProperty("volume", self._volume)
self._engine = e
self._backend = "pyttsx3"
logger.info("TTS backend: pyttsx3 (offline)")
return
except Exception as exc:
logger.debug(f"pyttsx3 unavailable: {exc}")
# Try gTTS (online, works on HF Spaces)
try:
import gtts # noqa: F401
self._backend = "gtts"
logger.info("TTS backend: gTTS (online)")
return
except ImportError:
pass
logger.warning("No TTS backend available — speech output disabled.")
# ── Public API ────────────────────────────────────────────────────────────
def speak(self, text: str):
"""Blocking speech."""
if not text:
return
if self._backend == "pyttsx3":
self._engine.say(text)
self._engine.runAndWait()
elif self._backend == "gtts":
self._gtts_speak(text)
else:
logger.info(f"[TTS silent]: {text[:80]}")
def speak_async(self, text: str):
"""Non-blocking TTS in a daemon thread."""
threading.Thread(target=self.speak, args=(text,), daemon=True).start()
def to_audio_bytes(self, text: str) -> bytes | None:
"""
Returns MP3 bytes (for Gradio gr.Audio playback).
Returns None if TTS unavailable.
"""
if self._backend == "gtts":
try:
from gtts import gTTS
buf = io.BytesIO()
gTTS(text=text, lang="en", slow=False).write_to_fp(buf)
return buf.getvalue()
except Exception as exc:
logger.error(f"gTTS error: {exc}")
return None
# ── Helpers ───────────────────────────────────────────────────────────────
def _gtts_speak(self, text: str):
try:
from gtts import gTTS
import tempfile
tts = gTTS(text=text, lang="en", slow=False)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
tts.save(f.name)
tmp = f.name
for player in ("mpg123", "mpg321", "ffplay -nodisp -autoexit"):
if os.system(f"which {player.split()[0]} > /dev/null 2>&1") == 0:
os.system(f"{player} {tmp} > /dev/null 2>&1")
break
os.unlink(tmp)
except Exception as exc:
logger.error(f"gTTS playback error: {exc}") |