Spaces:
Running on Zero
Running on Zero
File size: 1,446 Bytes
df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 df6b3ac 26dae50 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | """Text-to-speech (Piper). Returns the path to a .wav. Bilingual PT/EN.
Piper runs on onnxruntime (no torch). Voices come from rhasspy/piper-voices.
"""
import os
import tempfile
import wave
_REPO = "rhasspy/piper-voices"
_VOICES = {
"pt": os.environ.get("IRIS_TTS_VOICE_PT", "pt/pt_BR/faber/medium/pt_BR-faber-medium"),
"en": os.environ.get("IRIS_TTS_VOICE_EN", "en/en_US/amy/medium/en_US-amy-medium"),
}
_cache = {}
def _load(lang: str):
if lang not in _cache:
from huggingface_hub import hf_hub_download
from piper import PiperVoice
name = _VOICES.get(lang, _VOICES["pt"])
onnx = hf_hub_download(_REPO, f"{name}.onnx")
conf = hf_hub_download(_REPO, f"{name}.onnx.json")
_cache[lang] = PiperVoice.load(onnx, config_path=conf)
return _cache[lang]
def synthesize(text: str, lang: str = "pt") -> str | None:
if not text or not text.strip():
return None
voice = _load("en" if lang == "en" else "pt")
chunks = list(voice.synthesize(text))
if not chunks:
print(f"[tts] no audio for text: {text!r}", flush=True)
return None
path = tempfile.mktemp(suffix=".wav")
with wave.open(path, "wb") as wf:
wf.setnchannels(chunks[0].sample_channels)
wf.setsampwidth(chunks[0].sample_width)
wf.setframerate(chunks[0].sample_rate)
for ch in chunks:
wf.writeframes(ch.audio_int16_bytes)
return path
|