Spaces:
Running
Running
| # core/stt.py | |
| """Whisper audio → text (EN/BN). | |
| Uses get_config().stt_model (EN) or stt_bn_model (BN, if set), transcribed on | |
| Modal. Returns '' on failure so the caller can fall back to typed input — the | |
| primary, reliable path for Bengali especially (CLAUDE.md §2, §14). Never raises. | |
| """ | |
| from core.model_config import UI_MOCK, get_config | |
| from core.modal_infra import transcribe_remote | |
| def transcribe(audio_path: str, language: str) -> str: | |
| """language is 'en' or 'bn'. Returns '' on failure. Never raises.""" | |
| if UI_MOCK: # local UI dev — no Modal/GPU | |
| return "tell me a story about my dragon" | |
| if not audio_path: | |
| return "" | |
| cfg = get_config() | |
| if language == "bn" and cfg.stt_bn_model: | |
| model = cfg.stt_bn_model | |
| else: | |
| model = cfg.stt_model | |
| try: | |
| with open(audio_path, "rb") as f: | |
| audio_bytes = f.read() | |
| return transcribe_remote(audio_bytes, language, model) or "" | |
| except Exception as e: # noqa: BLE001 — never raise; caller uses typed input | |
| print(f"[stt.py] transcription failed: {e}") | |
| return "" | |