Spaces:

build-small-hackathon
/

iris

Running on Zero

iris / core /tts.py

Marcus Ramalho

Iris: hands-free live mode, money/bill reading, accessible UI, Qwen3-VL-2B

df6b3ac 1 day ago

1.45 kB

	"""Text-to-speech (Piper). Returns the path to a .wav. Bilingual PT/EN.

	Piper runs on onnxruntime (no torch). Voices come from rhasspy/piper-voices.
	"""
	import os
	import tempfile
	import wave

	_REPO = "rhasspy/piper-voices"
	_VOICES = {
	"pt": os.environ.get("IRIS_TTS_VOICE_PT", "pt/pt_BR/faber/medium/pt_BR-faber-medium"),
	"en": os.environ.get("IRIS_TTS_VOICE_EN", "en/en_US/amy/medium/en_US-amy-medium"),
	}
	_cache = {}


	def _load(lang: str):
	if lang not in _cache:
	from huggingface_hub import hf_hub_download
	from piper import PiperVoice
	name = _VOICES.get(lang, _VOICES["pt"])
	onnx = hf_hub_download(_REPO, f"{name}.onnx")
	conf = hf_hub_download(_REPO, f"{name}.onnx.json")
	_cache[lang] = PiperVoice.load(onnx, config_path=conf)
	return _cache[lang]


	def synthesize(text: str, lang: str = "pt") -> str \| None:
	if not text or not text.strip():
	return None
	voice = _load("en" if lang == "en" else "pt")
	chunks = list(voice.synthesize(text))
	if not chunks:
	print(f"[tts] no audio for text: {text!r}", flush=True)
	return None
	path = tempfile.mktemp(suffix=".wav")
	with wave.open(path, "wb") as wf:
	wf.setnchannels(chunks[0].sample_channels)
	wf.setsampwidth(chunks[0].sample_width)
	wf.setframerate(chunks[0].sample_rate)
	for ch in chunks:
	wf.writeframes(ch.audio_int16_bytes)
	return path