File size: 4,314 Bytes
87071e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d860e14
88e4729
 
 
 
 
 
 
 
d860e14
88e4729
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b07c9e
88e4729
 
 
 
 
 
 
 
d860e14
87071e6
 
 
 
 
 
d860e14
87071e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88e4729
 
 
 
 
 
 
 
 
 
87071e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import io
import threading
from types import SimpleNamespace

import numpy as np
import soundfile as sf
import librosa

from simulstreaming_whisper import simul_asr_factory

_lock = threading.Lock()
_initialized = False
_asr = None
_online = None


def _get_model_path():
    """Get the path to the Whisper model.

    Behavior:
    - Prefer `WHISPER_MODEL_PATH` env var if provided.
    - Otherwise prefer `./large-v3.pt` (repo-local file) or cached `~/.cache/whisper/large-v3.pt`.
    - Do NOT attempt to download the model automatically (downloading at runtime can hang Spaces).
    - If not found, raise FileNotFoundError with guidance.
    """
    import os

    # allow user to override with env var path
    env_path = os.environ.get('WHISPER_MODEL_PATH') or os.environ.get('MODEL_PATH')
    if env_path:
        if os.path.exists(env_path):
            return env_path
        else:
            raise FileNotFoundError(f"WHISPER_MODEL_PATH is set but file not found: {env_path}")

    # allow user to request a model name/size (e.g. 'tiny', 'base', 'large-v3')
    model_name = os.environ.get('WHISPER_MODEL_NAME') or os.environ.get('WHISPER_MODEL_SIZE') or 'large-v3'

    # check local repo file first (e.g. ./tiny.pt or ./large-v3.pt)
    local_path = f'./{model_name}.pt'
    if os.path.exists(local_path):
        return local_path

    # check cache path (pre-downloaded by build or other process)
    model_dir = os.path.expanduser('~/.cache/whisper')
    model_path = os.path.join(model_dir, f'{model_name}.pt')
    if os.path.exists(model_path):
        return model_path

    # Do not attempt to download automatically in runtime.
    raise FileNotFoundError(
        'Whisper model not found. Set WHISPER_MODEL_PATH to a local model file, or set WHISPER_MODEL_NAME to a model name (e.g. tiny) and pre-download the corresponding "<name>.pt" file into the repo or ~/.cache/whisper/.'
    )

def _make_args():
    # Minimal args required by simul_asr_factory
    return SimpleNamespace(
        log_level='INFO',
        decoder=None,
        beams=1,
        model_path=_get_model_path(),
        cif_ckpt_path=None,
        frame_threshold=25,
        audio_min_len=0.0,
        audio_max_len=30.0,
        task='transcribe',
        never_fire=False,
        init_prompt=None,
        static_init_prompt=None,
        max_context_tokens=None,
        logdir=None,
        lan='en',
        min_chunk_size=1.2,
        vac=False,
        vac_chunk_size=0.04,
    )


def init_model():
    global _initialized, _asr, _online
    with _lock:
        if _initialized:
            return
        try:
            args = _make_args()
            _asr, _online = simul_asr_factory(args)
            _initialized = True
        except FileNotFoundError as e:
            print(f"Model initialization aborted: {e}")
            # leave _initialized False so callers know model not ready
        except Exception as e:
            print(f"Unexpected error initializing model: {e}")
            # don't raise here; allow the app to continue running without model


def reset():
    global _online
    with _lock:
        if _online is None:
            raise RuntimeError("Model not initialized")
        _online.init()


def _read_audio_bytes(raw_bytes):
    # Try to read with soundfile; fallback to librosa
    bio = io.BytesIO(raw_bytes)
    try:
        data, sr = sf.read(bio, dtype='float32')
    except Exception:
        bio.seek(0)
        data, sr = librosa.load(bio, sr=None, mono=True)

    if data.ndim > 1:
        data = np.mean(data, axis=1)

    if sr != 16000:
        data = librosa.resample(data, orig_sr=sr, target_sr=16000)
        sr = 16000

    # ensure float32
    data = data.astype(np.float32)
    return data


def process_chunk_from_bytes(raw_bytes):
    """Insert audio chunk and run one processing iteration. Returns the JSON-able result."""
    global _online
    if _online is None:
        raise RuntimeError("Model not initialized")
    audio = _read_audio_bytes(raw_bytes)
    with _lock:
        _online.insert_audio_chunk(audio)
        out = _online.process_iter()
    return out or {}


def finish():
    global _online
    if _online is None:
        raise RuntimeError("Model not initialized")
    with _lock:
        out = _online.finish()
    return out or {}