Spaces:
Sleeping
Sleeping
| # src/tts_manager.py | |
| # Production TTS manager for Bantrly. | |
| # Handles grade-band voice config, engine selection, and fallback chain. | |
| # Usage: from src.tts_manager import synthesize_for_band | |
| import logging | |
| from src.kokoro_client import synthesize as kokoro_synthesize | |
| logger = logging.getLogger(__name__) | |
| # grade-band voice configuration | |
| BAND_CONFIG = { | |
| "K-2": {"voice": "af_heart", "speed": 0.85}, | |
| "3-5": {"voice": "af_heart", "speed": 0.95}, | |
| "6-8": {"voice": "af_heart", "speed": 1.00}, | |
| "9-12": {"voice": "am_echo", "speed": 1.10}, | |
| } | |
| # fallback chain order | |
| FALLBACK_CHAIN = ["kokoro", "edge_tts", "pyttsx3"] | |
| def synthesize_for_band(text: str, band: str, output_path: str) -> dict: | |
| """ | |
| Synthesize speech for a given grade band using the fallback chain. | |
| Tries Kokoro first, falls back to edge_tts, then pyttsx3. | |
| Args: | |
| text: coaching text to synthesize | |
| band: grade band string, one of "K-2", "3-5", "6-8", "9-12" | |
| output_path: path to save audio file (without extension) | |
| Returns: | |
| dict with keys: output_path, latency_seconds, engine, voice, band | |
| """ | |
| config = BAND_CONFIG.get(band, BAND_CONFIG["6-8"]) | |
| # try each engine in fallback chain | |
| for engine in FALLBACK_CHAIN: | |
| try: | |
| if engine == "kokoro": | |
| result = kokoro_synthesize( | |
| text=text, | |
| output_path=output_path + ".wav", | |
| voice=config["voice"], | |
| speed=config["speed"], | |
| ) | |
| elif engine == "edge_tts": | |
| from src.edge_tts_client import synthesize as edge_synthesize | |
| import asyncio | |
| try: | |
| loop = asyncio.get_running_loop() | |
| # running inside jupyter or async context | |
| import nest_asyncio | |
| nest_asyncio.apply() | |
| result = loop.run_until_complete(edge_synthesize( | |
| text=text, | |
| output_path=output_path + ".mp3", | |
| )) | |
| except RuntimeError: | |
| # no running loop, safe to use asyncio.run() | |
| result = asyncio.run(edge_synthesize( | |
| text=text, | |
| output_path=output_path + ".mp3", | |
| )) | |
| elif engine == "pyttsx3": | |
| from src.pyttsx3_client import synthesize as pyttsx3_synthesize | |
| result = pyttsx3_synthesize( | |
| text=text, | |
| output_path=output_path + ".wav", | |
| ) | |
| result["band"] = band | |
| if engine != "kokoro": | |
| logger.warning(f"Kokoro unavailable, using fallback: {engine}") | |
| return result | |
| except Exception as e: | |
| logger.error(f"Engine {engine} failed: {e}. Trying next.") | |
| continue | |
| raise RuntimeError("All TTS engines failed. No audio generated.") |