"""Text-to-speech (Piper). Returns the path to a .wav. Bilingual PT/EN. Piper runs on onnxruntime (no torch). Voices come from rhasspy/piper-voices. """ import os import tempfile import wave _REPO = "rhasspy/piper-voices" _VOICES = { "pt": os.environ.get("IRIS_TTS_VOICE_PT", "pt/pt_BR/faber/medium/pt_BR-faber-medium"), "en": os.environ.get("IRIS_TTS_VOICE_EN", "en/en_US/amy/medium/en_US-amy-medium"), } _cache = {} def _load(lang: str): if lang not in _cache: from huggingface_hub import hf_hub_download from piper import PiperVoice name = _VOICES.get(lang, _VOICES["pt"]) onnx = hf_hub_download(_REPO, f"{name}.onnx") conf = hf_hub_download(_REPO, f"{name}.onnx.json") _cache[lang] = PiperVoice.load(onnx, config_path=conf) return _cache[lang] def synthesize(text: str, lang: str = "pt") -> str | None: if not text or not text.strip(): return None voice = _load("en" if lang == "en" else "pt") chunks = list(voice.synthesize(text)) if not chunks: print(f"[tts] no audio for text: {text!r}", flush=True) return None path = tempfile.mktemp(suffix=".wav") with wave.open(path, "wb") as wf: wf.setnchannels(chunks[0].sample_channels) wf.setsampwidth(chunks[0].sample_width) wf.setframerate(chunks[0].sample_rate) for ch in chunks: wf.writeframes(ch.audio_int16_bytes) return path