aankitdas's picture
first commit - working app locally
a3419b6
# app/engines/base.py
# Abstract base class for all TTS engines.
# To add a new engine: create a new file in engines/, subclass TTSEngine,
# implement synthesize(), and register it in engines/__init__.py.
from abc import ABC, abstractmethod
class TTSEngine(ABC):
"""
Base class for all TTS engines in the Bantrly evaluation framework.
Every engine must implement synthesize() with this exact signature.
"""
# --- class-level metadata (set in each subclass) ---
name: str = ""
# Display name shown in the UI dropdown
engine_type: str = ""
# One of: "rule-based-local" | "neural-local" | "neural-cloud-free" | "neural-cloud-paid"
cost_per_million_chars: float = 0.0
# Cost in USD per 1M characters. 0.0 for free/local engines.
# Used to compute equivalent cost column in comparison table.
is_production_ready: bool = False
# If False, shown with a "baseline only" label in the UI.
requires_internet: bool = False
# If True, shown with a warning in the UI when offline.
# --- grade-band config ---
# Subclasses can override this to apply per-band voice/speed tuning.
# Format: { "K-2": {...}, "3-5": {...}, "6-8": {...}, "9-12": {...} }
BAND_CONFIG: dict = {}
@abstractmethod
def synthesize(self, text: str, band: str, output_path: str) -> dict:
"""
Synthesize text to audio file.
Args:
text: coaching text to synthesize
band: grade band — one of "K-2", "3-5", "6-8", "9-12"
output_path: full path to save audio (without extension —
each engine appends its own extension)
Returns:
dict with keys:
audio_path (str) full path to saved audio file
latency_seconds (float) wall-clock synthesis time
voice (str) voice ID used
speed (float) speed multiplier used (1.0 if N/A)
engine (str) engine name (same as self.name)
"""
...
def estimate_cost(self, text: str) -> float:
"""
Estimate cost in USD for synthesizing this text.
Returns 0.0 for free/local engines.
"""
return (len(text) / 1_000_000) * self.cost_per_million_chars
def get_band_config(self, band: str) -> dict:
"""
Get config for the given band, falling back to the most neutral
available band if the requested band is not found.
"""
if band in self.BAND_CONFIG:
return self.BAND_CONFIG[band]
# fallback priority: 6-8 > 9-12 > 3-5 > K-2 > first available
for fallback in ["6-8", "9-12", "3-5", "K-2"]:
if fallback in self.BAND_CONFIG:
return self.BAND_CONFIG[fallback]
# last resort: return first config
return next(iter(self.BAND_CONFIG.values()))