Spaces:

aankitdas
/

tts-eval-framework

Sleeping

File size: 3,005 Bytes

a3419b6

# src/tts_manager.py
# Production TTS manager for Bantrly.
# Handles grade-band voice config, engine selection, and fallback chain.
# Usage: from src.tts_manager import synthesize_for_band

import logging
from src.kokoro_client import synthesize as kokoro_synthesize

logger = logging.getLogger(__name__)

# grade-band voice configuration
BAND_CONFIG = {
    "K-2":  {"voice": "af_heart", "speed": 0.85},
    "3-5":  {"voice": "af_heart", "speed": 0.95},
    "6-8":  {"voice": "af_heart", "speed": 1.00},
    "9-12": {"voice": "am_echo",  "speed": 1.10},
}

# fallback chain order
FALLBACK_CHAIN = ["kokoro", "edge_tts", "pyttsx3"]


def synthesize_for_band(text: str, band: str, output_path: str) -> dict:
    """
    Synthesize speech for a given grade band using the fallback chain.
    Tries Kokoro first, falls back to edge_tts, then pyttsx3.

    Args:
        text: coaching text to synthesize
        band: grade band string, one of "K-2", "3-5", "6-8", "9-12"
        output_path: path to save audio file (without extension)

    Returns:
        dict with keys: output_path, latency_seconds, engine, voice, band
    """
    config = BAND_CONFIG.get(band, BAND_CONFIG["6-8"])

    # try each engine in fallback chain
    for engine in FALLBACK_CHAIN:
        try:
            if engine == "kokoro":
                result = kokoro_synthesize(
                    text=text,
                    output_path=output_path + ".wav",
                    voice=config["voice"],
                    speed=config["speed"],
                )
            elif engine == "edge_tts":
                from src.edge_tts_client import synthesize as edge_synthesize
                import asyncio
                try:
                    loop = asyncio.get_running_loop()
                    # running inside jupyter or async context
                    import nest_asyncio
                    nest_asyncio.apply()
                    result = loop.run_until_complete(edge_synthesize(
                        text=text,
                        output_path=output_path + ".mp3",
                    ))
                except RuntimeError:
                    # no running loop, safe to use asyncio.run()
                    result = asyncio.run(edge_synthesize(
                        text=text,
                        output_path=output_path + ".mp3",
                    ))
            elif engine == "pyttsx3":
                from src.pyttsx3_client import synthesize as pyttsx3_synthesize
                result = pyttsx3_synthesize(
                    text=text,
                    output_path=output_path + ".wav",
                )

            result["band"] = band
            if engine != "kokoro":
                logger.warning(f"Kokoro unavailable, using fallback: {engine}")
            return result

        except Exception as e:
            logger.error(f"Engine {engine} failed: {e}. Trying next.")
            continue

    raise RuntimeError("All TTS engines failed. No audio generated.")