tts-eval-framework / src /tts_manager.py
aankitdas's picture
first commit - working app locally
a3419b6
# src/tts_manager.py
# Production TTS manager for Bantrly.
# Handles grade-band voice config, engine selection, and fallback chain.
# Usage: from src.tts_manager import synthesize_for_band
import logging
from src.kokoro_client import synthesize as kokoro_synthesize
logger = logging.getLogger(__name__)
# grade-band voice configuration
BAND_CONFIG = {
"K-2": {"voice": "af_heart", "speed": 0.85},
"3-5": {"voice": "af_heart", "speed": 0.95},
"6-8": {"voice": "af_heart", "speed": 1.00},
"9-12": {"voice": "am_echo", "speed": 1.10},
}
# fallback chain order
FALLBACK_CHAIN = ["kokoro", "edge_tts", "pyttsx3"]
def synthesize_for_band(text: str, band: str, output_path: str) -> dict:
"""
Synthesize speech for a given grade band using the fallback chain.
Tries Kokoro first, falls back to edge_tts, then pyttsx3.
Args:
text: coaching text to synthesize
band: grade band string, one of "K-2", "3-5", "6-8", "9-12"
output_path: path to save audio file (without extension)
Returns:
dict with keys: output_path, latency_seconds, engine, voice, band
"""
config = BAND_CONFIG.get(band, BAND_CONFIG["6-8"])
# try each engine in fallback chain
for engine in FALLBACK_CHAIN:
try:
if engine == "kokoro":
result = kokoro_synthesize(
text=text,
output_path=output_path + ".wav",
voice=config["voice"],
speed=config["speed"],
)
elif engine == "edge_tts":
from src.edge_tts_client import synthesize as edge_synthesize
import asyncio
try:
loop = asyncio.get_running_loop()
# running inside jupyter or async context
import nest_asyncio
nest_asyncio.apply()
result = loop.run_until_complete(edge_synthesize(
text=text,
output_path=output_path + ".mp3",
))
except RuntimeError:
# no running loop, safe to use asyncio.run()
result = asyncio.run(edge_synthesize(
text=text,
output_path=output_path + ".mp3",
))
elif engine == "pyttsx3":
from src.pyttsx3_client import synthesize as pyttsx3_synthesize
result = pyttsx3_synthesize(
text=text,
output_path=output_path + ".wav",
)
result["band"] = band
if engine != "kokoro":
logger.warning(f"Kokoro unavailable, using fallback: {engine}")
return result
except Exception as e:
logger.error(f"Engine {engine} failed: {e}. Trying next.")
continue
raise RuntimeError("All TTS engines failed. No audio generated.")