Spaces:

aankitdas
/

tts-eval-framework

Sleeping

App Files Files Community

tts-eval-framework / src /tts_manager.py

aankitdas

first commit - working app locally

a3419b6 29 days ago

raw

history blame contribute delete

3.01 kB

	# src/tts_manager.py
	# Production TTS manager for Bantrly.
	# Handles grade-band voice config, engine selection, and fallback chain.
	# Usage: from src.tts_manager import synthesize_for_band

	import logging
	from src.kokoro_client import synthesize as kokoro_synthesize

	logger = logging.getLogger(__name__)

	# grade-band voice configuration
	BAND_CONFIG = {
	"K-2": {"voice": "af_heart", "speed": 0.85},
	"3-5": {"voice": "af_heart", "speed": 0.95},
	"6-8": {"voice": "af_heart", "speed": 1.00},
	"9-12": {"voice": "am_echo", "speed": 1.10},
	}

	# fallback chain order
	FALLBACK_CHAIN = ["kokoro", "edge_tts", "pyttsx3"]


	def synthesize_for_band(text: str, band: str, output_path: str) -> dict:
	"""
	Synthesize speech for a given grade band using the fallback chain.
	Tries Kokoro first, falls back to edge_tts, then pyttsx3.

	Args:
	text: coaching text to synthesize
	band: grade band string, one of "K-2", "3-5", "6-8", "9-12"
	output_path: path to save audio file (without extension)

	Returns:
	dict with keys: output_path, latency_seconds, engine, voice, band
	"""
	config = BAND_CONFIG.get(band, BAND_CONFIG["6-8"])

	# try each engine in fallback chain
	for engine in FALLBACK_CHAIN:
	try:
	if engine == "kokoro":
	result = kokoro_synthesize(
	text=text,
	output_path=output_path + ".wav",
	voice=config["voice"],
	speed=config["speed"],
	)
	elif engine == "edge_tts":
	from src.edge_tts_client import synthesize as edge_synthesize
	import asyncio
	try:
	loop = asyncio.get_running_loop()
	# running inside jupyter or async context
	import nest_asyncio
	nest_asyncio.apply()
	result = loop.run_until_complete(edge_synthesize(
	text=text,
	output_path=output_path + ".mp3",
	))
	except RuntimeError:
	# no running loop, safe to use asyncio.run()
	result = asyncio.run(edge_synthesize(
	text=text,
	output_path=output_path + ".mp3",
	))
	elif engine == "pyttsx3":
	from src.pyttsx3_client import synthesize as pyttsx3_synthesize
	result = pyttsx3_synthesize(
	text=text,
	output_path=output_path + ".wav",
	)

	result["band"] = band
	if engine != "kokoro":
	logger.warning(f"Kokoro unavailable, using fallback: {engine}")
	return result

	except Exception as e:
	logger.error(f"Engine {engine} failed: {e}. Trying next.")
	continue

	raise RuntimeError("All TTS engines failed. No audio generated.")