Spaces:
Sleeping
Sleeping
File size: 3,005 Bytes
a3419b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | # src/tts_manager.py
# Production TTS manager for Bantrly.
# Handles grade-band voice config, engine selection, and fallback chain.
# Usage: from src.tts_manager import synthesize_for_band
import logging
from src.kokoro_client import synthesize as kokoro_synthesize
logger = logging.getLogger(__name__)
# grade-band voice configuration
BAND_CONFIG = {
"K-2": {"voice": "af_heart", "speed": 0.85},
"3-5": {"voice": "af_heart", "speed": 0.95},
"6-8": {"voice": "af_heart", "speed": 1.00},
"9-12": {"voice": "am_echo", "speed": 1.10},
}
# fallback chain order
FALLBACK_CHAIN = ["kokoro", "edge_tts", "pyttsx3"]
def synthesize_for_band(text: str, band: str, output_path: str) -> dict:
"""
Synthesize speech for a given grade band using the fallback chain.
Tries Kokoro first, falls back to edge_tts, then pyttsx3.
Args:
text: coaching text to synthesize
band: grade band string, one of "K-2", "3-5", "6-8", "9-12"
output_path: path to save audio file (without extension)
Returns:
dict with keys: output_path, latency_seconds, engine, voice, band
"""
config = BAND_CONFIG.get(band, BAND_CONFIG["6-8"])
# try each engine in fallback chain
for engine in FALLBACK_CHAIN:
try:
if engine == "kokoro":
result = kokoro_synthesize(
text=text,
output_path=output_path + ".wav",
voice=config["voice"],
speed=config["speed"],
)
elif engine == "edge_tts":
from src.edge_tts_client import synthesize as edge_synthesize
import asyncio
try:
loop = asyncio.get_running_loop()
# running inside jupyter or async context
import nest_asyncio
nest_asyncio.apply()
result = loop.run_until_complete(edge_synthesize(
text=text,
output_path=output_path + ".mp3",
))
except RuntimeError:
# no running loop, safe to use asyncio.run()
result = asyncio.run(edge_synthesize(
text=text,
output_path=output_path + ".mp3",
))
elif engine == "pyttsx3":
from src.pyttsx3_client import synthesize as pyttsx3_synthesize
result = pyttsx3_synthesize(
text=text,
output_path=output_path + ".wav",
)
result["band"] = band
if engine != "kokoro":
logger.warning(f"Kokoro unavailable, using fallback: {engine}")
return result
except Exception as e:
logger.error(f"Engine {engine} failed: {e}. Trying next.")
continue
raise RuntimeError("All TTS engines failed. No audio generated.") |