import os import tempfile from piper import PiperVoice from ..base import TTSModel class PiperTTSModel(TTSModel): """Piper TTS model implementation""" def __init__(self): self._voices_by_lang = None self._initialized = False @property def name(self): return "piper-tts" @property def description(self): return "Local on-device TTS with dynamic English and Chinese voice selection from Piper models" def initialize(self): """Initialize the Piper model by scanning available voices""" if self._initialized: return True try: self._voices_by_lang = self._scan_piper_voices() self._initialized = True return True except Exception as e: print(f"Error initializing Piper model: {e}") return False def _scan_piper_voices(self): """Scan available Piper voices""" voices_dir = "src/voices/piper_voices" voices_by_lang = {'English': {}, 'Chinese': {}} # Chinese: only huayan medium chinese_path = os.path.join(voices_dir, "zh", "zh_CN", "huayan", "medium", "zh_CN-huayan-medium.onnx") if os.path.exists(chinese_path): voices_by_lang['Chinese']['huayan (zh_CN)'] = chinese_path # English voices en_dir = os.path.join(voices_dir, "en") for root, dirs, files in os.walk(en_dir): if len(root.split(os.sep)) < 5: # Skip if not deep enough continue parts = root.split(os.sep) if len(parts) >= 5 and parts[-1] in ['medium', 'high']: locale = parts[-3] # en_GB or en_US voice_name = parts[-2] # alan, etc. quality = parts[-1] # medium or high for file in files: if file.endswith('.onnx') and f"{locale}-{voice_name}-{quality}" in file: path = os.path.join(root, file) label = f"{voice_name} ({locale})" # Prefer medium over high if quality == 'medium' or label not in voices_by_lang['English']: voices_by_lang['English'][label] = path break # Assume one .onnx per dir return voices_by_lang def generate_speech(self, text, language="English", voice=None, **kwargs): """ Generate speech from text using Piper TTS Args: text (str): Text to convert to speech language (str): Language name ('English' or 'Chinese') voice (str, optional): Voice name to use **kwargs: Additional parameters for generation Returns: str: Path to the generated audio file """ if not self._initialized: if not self.initialize(): raise RuntimeError("Failed to initialize Piper model") # Get available voices for the selected language available_voices = self._voices_by_lang.get(language, {}) if not available_voices: raise ValueError(f"No voices available for language: {language}") # If voice not specified or not available, use the first available voice if not voice or voice not in available_voices: voice = next(iter(available_voices.keys())) # Get the model path for the selected voice model_path = available_voices[voice] # Create a PiperVoice instance for the selected voice piper_voice = PiperVoice(model_path=model_path) # Generate speech with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: piper_voice.synthesize(text, tmp_file.name) return tmp_file.name def supports_multilingual(self): return True def get_supported_languages(self): if not self._initialized: self.initialize() return list(self._voices_by_lang.keys()) def get_available_voices(self, language="English"): """Get available voices for a specific language""" if not self._initialized: self.initialize() return list(self._voices_by_lang.get(language, {}).keys())