InnerVoice / backend /services /transcriber.py
E5K7's picture
perf: Switch Whisper to tiny.en (10x faster), increase proxy+uvicorn timeouts to 120s for CPU inference
10680bc
"""
Whisper transcription service.
Lazy-loaded on first call. Falls back to empty string if unavailable.
"""
_model = None
def _load_model():
global _model
if _model is None:
try:
import whisper
_model = whisper.load_model("tiny.en")
print("[Transcriber] Whisper tiny.en model loaded.")
except Exception as e:
print(f"[Transcriber] WARNING: Could not load Whisper: {e}")
_model = "unavailable"
return _model
def transcribe(wav_path: str) -> str:
"""Transcribe a WAV file and return the text. Returns '' on failure."""
model = _load_model()
if model == "unavailable" or model is None:
return ""
try:
# Prompt Whisper to actively transcribe non-speech emotion
prompt = "This is an emotional voice diary entry. Please transcribe all speech and actively include non-speech sounds like [crying], [sobbing], [laughs], [sighs], or [sniffles]."
result = model.transcribe(
wav_path,
language="en",
initial_prompt=prompt,
condition_on_previous_text=False
)
return result.get("text", "").strip()
except Exception as e:
print(f"[Transcriber] Transcription error: {e}")
return ""