Spaces:
Sleeping
Sleeping
| # app/engines/base.py | |
| # Abstract base class for all TTS engines. | |
| # To add a new engine: create a new file in engines/, subclass TTSEngine, | |
| # implement synthesize(), and register it in engines/__init__.py. | |
| from abc import ABC, abstractmethod | |
| class TTSEngine(ABC): | |
| """ | |
| Base class for all TTS engines in the Bantrly evaluation framework. | |
| Every engine must implement synthesize() with this exact signature. | |
| """ | |
| # --- class-level metadata (set in each subclass) --- | |
| name: str = "" | |
| # Display name shown in the UI dropdown | |
| engine_type: str = "" | |
| # One of: "rule-based-local" | "neural-local" | "neural-cloud-free" | "neural-cloud-paid" | |
| cost_per_million_chars: float = 0.0 | |
| # Cost in USD per 1M characters. 0.0 for free/local engines. | |
| # Used to compute equivalent cost column in comparison table. | |
| is_production_ready: bool = False | |
| # If False, shown with a "baseline only" label in the UI. | |
| requires_internet: bool = False | |
| # If True, shown with a warning in the UI when offline. | |
| # --- grade-band config --- | |
| # Subclasses can override this to apply per-band voice/speed tuning. | |
| # Format: { "K-2": {...}, "3-5": {...}, "6-8": {...}, "9-12": {...} } | |
| BAND_CONFIG: dict = {} | |
| def synthesize(self, text: str, band: str, output_path: str) -> dict: | |
| """ | |
| Synthesize text to audio file. | |
| Args: | |
| text: coaching text to synthesize | |
| band: grade band — one of "K-2", "3-5", "6-8", "9-12" | |
| output_path: full path to save audio (without extension — | |
| each engine appends its own extension) | |
| Returns: | |
| dict with keys: | |
| audio_path (str) full path to saved audio file | |
| latency_seconds (float) wall-clock synthesis time | |
| voice (str) voice ID used | |
| speed (float) speed multiplier used (1.0 if N/A) | |
| engine (str) engine name (same as self.name) | |
| """ | |
| ... | |
| def estimate_cost(self, text: str) -> float: | |
| """ | |
| Estimate cost in USD for synthesizing this text. | |
| Returns 0.0 for free/local engines. | |
| """ | |
| return (len(text) / 1_000_000) * self.cost_per_million_chars | |
| def get_band_config(self, band: str) -> dict: | |
| """ | |
| Get config for the given band, falling back to the most neutral | |
| available band if the requested band is not found. | |
| """ | |
| if band in self.BAND_CONFIG: | |
| return self.BAND_CONFIG[band] | |
| # fallback priority: 6-8 > 9-12 > 3-5 > K-2 > first available | |
| for fallback in ["6-8", "9-12", "3-5", "K-2"]: | |
| if fallback in self.BAND_CONFIG: | |
| return self.BAND_CONFIG[fallback] | |
| # last resort: return first config | |
| return next(iter(self.BAND_CONFIG.values())) |