File size: 1,293 Bytes
bf04727
 
 
 
 
 
 
 
 
 
 
 
 
10680bc
 
bf04727
 
 
 
 
 
 
 
 
 
 
 
b5e8a53
 
 
 
 
 
 
 
bf04727
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
Whisper transcription service.
Lazy-loaded on first call. Falls back to empty string if unavailable.
"""

_model = None


def _load_model():
    global _model
    if _model is None:
        try:
            import whisper
            _model = whisper.load_model("tiny.en")
            print("[Transcriber] Whisper tiny.en model loaded.")
        except Exception as e:
            print(f"[Transcriber] WARNING: Could not load Whisper: {e}")
            _model = "unavailable"
    return _model


def transcribe(wav_path: str) -> str:
    """Transcribe a WAV file and return the text. Returns '' on failure."""
    model = _load_model()
    if model == "unavailable" or model is None:
        return ""
    try:
        # Prompt Whisper to actively transcribe non-speech emotion
        prompt = "This is an emotional voice diary entry. Please transcribe all speech and actively include non-speech sounds like [crying], [sobbing], [laughs], [sighs], or [sniffles]."
        result = model.transcribe(
            wav_path, 
            language="en",
            initial_prompt=prompt,
            condition_on_previous_text=False
        )
        return result.get("text", "").strip()
    except Exception as e:
        print(f"[Transcriber] Transcription error: {e}")
        return ""