Spaces:
Sleeping
Sleeping
| # app/engines/piper_engine.py | |
| # Piper TTS engine — fast ONNX-based neural TTS, fully offline. | |
| # Voices are downloaded on demand from rhasspy/piper-voices on HF Hub | |
| # and cached flat in voices/piper/ for subsequent runs. | |
| # Designed for low-latency, low-resource deployment (runs on Raspberry Pi). | |
| # Faster than Kokoro on CPU, lower naturalness ceiling. | |
| # Good fallback: offline after first download, no API key, minimal VRAM. | |
| import wave | |
| import time | |
| import shutil | |
| from pathlib import Path | |
| from piper import PiperVoice | |
| from huggingface_hub import hf_hub_download | |
| from engines.base import TTSEngine | |
| # voice files live flat in voices/piper/ relative to project root | |
| _VOICES_DIR = Path(__file__).parent.parent.parent / "voices" / "piper" | |
| # cache loaded voices — loading ONNX takes ~0.5s, reuse across calls | |
| _voice_cache: dict[str, PiperVoice] = {} | |
| def _ensure_model_downloaded(voice_file: str) -> None: | |
| """ | |
| Checks for model and config at flat voices/piper/ path. | |
| If missing, downloads from rhasspy/piper-voices on HF Hub | |
| and moves to flat location. Handles .onnx and .json separately | |
| so a partial download can be recovered. | |
| """ | |
| _VOICES_DIR.mkdir(parents=True, exist_ok=True) | |
| model_path = _VOICES_DIR / voice_file | |
| config_path = _VOICES_DIR / f"{voice_file}.json" | |
| # parse voice file name into HF Hub repo subfolder structure | |
| # e.g. en_US-amy-medium.onnx -> en/en_US/amy/medium/ | |
| parts = voice_file.split("-") | |
| lang_family = parts[0].split("_")[0] # "en" | |
| lang_full = parts[0] # "en_US" | |
| speaker = parts[1] # "amy" | |
| quality = parts[2].replace(".onnx", "") # "medium" | |
| repo_subfolder = f"{lang_family}/{lang_full}/{speaker}/{quality}" | |
| if not model_path.exists(): | |
| print(f"[Piper] Downloading {voice_file} from HF Hub...") | |
| downloaded = hf_hub_download( | |
| repo_id="rhasspy/piper-voices", | |
| filename=f"{repo_subfolder}/{voice_file}", | |
| local_dir=str(_VOICES_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| shutil.move(downloaded, model_path) | |
| print(f"[Piper] Saved to {model_path}") | |
| if not config_path.exists(): | |
| print(f"[Piper] Downloading {voice_file}.json from HF Hub...") | |
| downloaded = hf_hub_download( | |
| repo_id="rhasspy/piper-voices", | |
| filename=f"{repo_subfolder}/{voice_file}.json", | |
| local_dir=str(_VOICES_DIR), | |
| local_dir_use_symlinks=False, | |
| ) | |
| shutil.move(downloaded, config_path) | |
| print(f"[Piper] Saved to {config_path}") | |
| def _get_voice(voice_file: str) -> PiperVoice: | |
| """Returns a cached PiperVoice, downloading the model first if needed.""" | |
| if voice_file not in _voice_cache: | |
| _ensure_model_downloaded(voice_file) | |
| model_path = _VOICES_DIR / voice_file | |
| _voice_cache[voice_file] = PiperVoice.load( | |
| str(model_path), | |
| use_cuda=False, # ONNX CUDA provider requires separate install | |
| ) | |
| return _voice_cache[voice_file] | |
| class PiperEngine(TTSEngine): | |
| name = "Piper (ONNX)" | |
| engine_type = "neural-local" | |
| cost_per_million_chars = 0.0 | |
| is_production_ready = False # lower naturalness than Kokoro, no band-tuned voices yet | |
| requires_internet = False # only on first run; fully offline after download | |
| BAND_CONFIG = { | |
| "K-2": {"voice_file": "en_US-amy-medium.onnx", "speed": 0.9}, | |
| "3-5": {"voice_file": "en_US-amy-medium.onnx", "speed": 1.0}, | |
| "6-8": {"voice_file": "en_US-amy-medium.onnx", "speed": 1.0}, | |
| "9-12": {"voice_file": "en_US-lessac-medium.onnx", "speed": 1.1}, | |
| } | |
| def synthesize(self, text: str, band: str, output_path: str) -> dict: | |
| config = self.get_band_config(band) | |
| voice_file = config["voice_file"] | |
| full_path = output_path + ".wav" | |
| voice = _get_voice(voice_file) | |
| start = time.time() | |
| with wave.open(full_path, "wb") as wav_file: | |
| voice.synthesize_wav(text, wav_file) | |
| latency = round(time.time() - start, 3) | |
| return { | |
| "audio_path": full_path, | |
| "latency_seconds": latency, | |
| "voice": voice_file.replace(".onnx", ""), | |
| "speed": config["speed"], | |
| "engine": self.name, | |
| } |