Spaces:
Sleeping
Sleeping
Michael Hu
refactor: replace inline model definitions with ModelFactory and remove unused imports
ef4db28
| import os | |
| import tempfile | |
| from piper import PiperVoice | |
| from ..base import TTSModel | |
| class PiperTTSModel(TTSModel): | |
| """Piper TTS model implementation""" | |
| def __init__(self): | |
| self._voices_by_lang = None | |
| self._initialized = False | |
| def name(self): | |
| return "piper-tts" | |
| def description(self): | |
| return "Local on-device TTS with dynamic English and Chinese voice selection from Piper models" | |
| def initialize(self): | |
| """Initialize the Piper model by scanning available voices""" | |
| if self._initialized: | |
| return True | |
| try: | |
| self._voices_by_lang = self._scan_piper_voices() | |
| self._initialized = True | |
| return True | |
| except Exception as e: | |
| print(f"Error initializing Piper model: {e}") | |
| return False | |
| def _scan_piper_voices(self): | |
| """Scan available Piper voices""" | |
| voices_dir = "src/voices/piper_voices" | |
| voices_by_lang = {'English': {}, 'Chinese': {}} | |
| # Chinese: only huayan medium | |
| chinese_path = os.path.join(voices_dir, "zh", "zh_CN", "huayan", "medium", "zh_CN-huayan-medium.onnx") | |
| if os.path.exists(chinese_path): | |
| voices_by_lang['Chinese']['huayan (zh_CN)'] = chinese_path | |
| # English voices | |
| en_dir = os.path.join(voices_dir, "en") | |
| for root, dirs, files in os.walk(en_dir): | |
| if len(root.split(os.sep)) < 5: # Skip if not deep enough | |
| continue | |
| parts = root.split(os.sep) | |
| if len(parts) >= 5 and parts[-1] in ['medium', 'high']: | |
| locale = parts[-3] # en_GB or en_US | |
| voice_name = parts[-2] # alan, etc. | |
| quality = parts[-1] # medium or high | |
| for file in files: | |
| if file.endswith('.onnx') and f"{locale}-{voice_name}-{quality}" in file: | |
| path = os.path.join(root, file) | |
| label = f"{voice_name} ({locale})" | |
| # Prefer medium over high | |
| if quality == 'medium' or label not in voices_by_lang['English']: | |
| voices_by_lang['English'][label] = path | |
| break # Assume one .onnx per dir | |
| return voices_by_lang | |
| def generate_speech(self, text, language="English", voice=None, **kwargs): | |
| """ | |
| Generate speech from text using Piper TTS | |
| Args: | |
| text (str): Text to convert to speech | |
| language (str): Language name ('English' or 'Chinese') | |
| voice (str, optional): Voice name to use | |
| **kwargs: Additional parameters for generation | |
| Returns: | |
| str: Path to the generated audio file | |
| """ | |
| if not self._initialized: | |
| if not self.initialize(): | |
| raise RuntimeError("Failed to initialize Piper model") | |
| # Get available voices for the selected language | |
| available_voices = self._voices_by_lang.get(language, {}) | |
| if not available_voices: | |
| raise ValueError(f"No voices available for language: {language}") | |
| # If voice not specified or not available, use the first available voice | |
| if not voice or voice not in available_voices: | |
| voice = next(iter(available_voices.keys())) | |
| # Get the model path for the selected voice | |
| model_path = available_voices[voice] | |
| # Create a PiperVoice instance for the selected voice | |
| piper_voice = PiperVoice(model_path=model_path) | |
| # Generate speech | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: | |
| piper_voice.synthesize(text, tmp_file.name) | |
| return tmp_file.name | |
| def supports_multilingual(self): | |
| return True | |
| def get_supported_languages(self): | |
| if not self._initialized: | |
| self.initialize() | |
| return list(self._voices_by_lang.keys()) | |
| def get_available_voices(self, language="English"): | |
| """Get available voices for a specific language""" | |
| if not self._initialized: | |
| self.initialize() | |
| return list(self._voices_by_lang.get(language, {}).keys()) |