rupkotha / core /stt.py
Deb
rupkotha_1st_commit
f655146
Raw
History Blame Contribute Delete
1.11 kB
# core/stt.py
"""Whisper audio → text (EN/BN).
Uses get_config().stt_model (EN) or stt_bn_model (BN, if set), transcribed on
Modal. Returns '' on failure so the caller can fall back to typed input — the
primary, reliable path for Bengali especially (CLAUDE.md §2, §14). Never raises.
"""
from core.model_config import UI_MOCK, get_config
from core.modal_infra import transcribe_remote
def transcribe(audio_path: str, language: str) -> str:
"""language is 'en' or 'bn'. Returns '' on failure. Never raises."""
if UI_MOCK: # local UI dev — no Modal/GPU
return "tell me a story about my dragon"
if not audio_path:
return ""
cfg = get_config()
if language == "bn" and cfg.stt_bn_model:
model = cfg.stt_bn_model
else:
model = cfg.stt_model
try:
with open(audio_path, "rb") as f:
audio_bytes = f.read()
return transcribe_remote(audio_bytes, language, model) or ""
except Exception as e: # noqa: BLE001 — never raise; caller uses typed input
print(f"[stt.py] transcription failed: {e}")
return ""