Spaces:

E5K7
/

InnerVoice

Running

File size: 1,293 Bytes

"""
Whisper transcription service.
Lazy-loaded on first call. Falls back to empty string if unavailable.
"""

_model = None


def _load_model():
    global _model
    if _model is None:
        try:
            import whisper
            _model = whisper.load_model("tiny.en")
            print("[Transcriber] Whisper tiny.en model loaded.")
        except Exception as e:
            print(f"[Transcriber] WARNING: Could not load Whisper: {e}")
            _model = "unavailable"
    return _model


def transcribe(wav_path: str) -> str:
    """Transcribe a WAV file and return the text. Returns '' on failure."""
    model = _load_model()
    if model == "unavailable" or model is None:
        return ""
    try:
        # Prompt Whisper to actively transcribe non-speech emotion
        prompt = "This is an emotional voice diary entry. Please transcribe all speech and actively include non-speech sounds like [crying], [sobbing], [laughs], [sighs], or [sniffles]."
        result = model.transcribe(
            wav_path, 
            language="en",
            initial_prompt=prompt,
            condition_on_previous_text=False
        )
        return result.get("text", "").strip()
    except Exception as e:
        print(f"[Transcriber] Transcription error: {e}")
        return ""