tts_gallery / src /models /tts /piper_model.py
Michael Hu
refactor: replace inline model definitions with ModelFactory and remove unused imports
ef4db28
import os
import tempfile
from piper import PiperVoice
from ..base import TTSModel
class PiperTTSModel(TTSModel):
"""Piper TTS model implementation"""
def __init__(self):
self._voices_by_lang = None
self._initialized = False
@property
def name(self):
return "piper-tts"
@property
def description(self):
return "Local on-device TTS with dynamic English and Chinese voice selection from Piper models"
def initialize(self):
"""Initialize the Piper model by scanning available voices"""
if self._initialized:
return True
try:
self._voices_by_lang = self._scan_piper_voices()
self._initialized = True
return True
except Exception as e:
print(f"Error initializing Piper model: {e}")
return False
def _scan_piper_voices(self):
"""Scan available Piper voices"""
voices_dir = "src/voices/piper_voices"
voices_by_lang = {'English': {}, 'Chinese': {}}
# Chinese: only huayan medium
chinese_path = os.path.join(voices_dir, "zh", "zh_CN", "huayan", "medium", "zh_CN-huayan-medium.onnx")
if os.path.exists(chinese_path):
voices_by_lang['Chinese']['huayan (zh_CN)'] = chinese_path
# English voices
en_dir = os.path.join(voices_dir, "en")
for root, dirs, files in os.walk(en_dir):
if len(root.split(os.sep)) < 5: # Skip if not deep enough
continue
parts = root.split(os.sep)
if len(parts) >= 5 and parts[-1] in ['medium', 'high']:
locale = parts[-3] # en_GB or en_US
voice_name = parts[-2] # alan, etc.
quality = parts[-1] # medium or high
for file in files:
if file.endswith('.onnx') and f"{locale}-{voice_name}-{quality}" in file:
path = os.path.join(root, file)
label = f"{voice_name} ({locale})"
# Prefer medium over high
if quality == 'medium' or label not in voices_by_lang['English']:
voices_by_lang['English'][label] = path
break # Assume one .onnx per dir
return voices_by_lang
def generate_speech(self, text, language="English", voice=None, **kwargs):
"""
Generate speech from text using Piper TTS
Args:
text (str): Text to convert to speech
language (str): Language name ('English' or 'Chinese')
voice (str, optional): Voice name to use
**kwargs: Additional parameters for generation
Returns:
str: Path to the generated audio file
"""
if not self._initialized:
if not self.initialize():
raise RuntimeError("Failed to initialize Piper model")
# Get available voices for the selected language
available_voices = self._voices_by_lang.get(language, {})
if not available_voices:
raise ValueError(f"No voices available for language: {language}")
# If voice not specified or not available, use the first available voice
if not voice or voice not in available_voices:
voice = next(iter(available_voices.keys()))
# Get the model path for the selected voice
model_path = available_voices[voice]
# Create a PiperVoice instance for the selected voice
piper_voice = PiperVoice(model_path=model_path)
# Generate speech
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
piper_voice.synthesize(text, tmp_file.name)
return tmp_file.name
def supports_multilingual(self):
return True
def get_supported_languages(self):
if not self._initialized:
self.initialize()
return list(self._voices_by_lang.keys())
def get_available_voices(self, language="English"):
"""Get available voices for a specific language"""
if not self._initialized:
self.initialize()
return list(self._voices_by_lang.get(language, {}).keys())