import io import threading from types import SimpleNamespace import numpy as np import soundfile as sf import librosa from simulstreaming_whisper import simul_asr_factory _lock = threading.Lock() _initialized = False _asr = None _online = None def _get_model_path(): """Get the path to the Whisper model. Behavior: - Prefer `WHISPER_MODEL_PATH` env var if provided. - Otherwise prefer `./large-v3.pt` (repo-local file) or cached `~/.cache/whisper/large-v3.pt`. - Do NOT attempt to download the model automatically (downloading at runtime can hang Spaces). - If not found, raise FileNotFoundError with guidance. """ import os # allow user to override with env var path env_path = os.environ.get('WHISPER_MODEL_PATH') or os.environ.get('MODEL_PATH') if env_path: if os.path.exists(env_path): return env_path else: raise FileNotFoundError(f"WHISPER_MODEL_PATH is set but file not found: {env_path}") # allow user to request a model name/size (e.g. 'tiny', 'base', 'large-v3') model_name = os.environ.get('WHISPER_MODEL_NAME') or os.environ.get('WHISPER_MODEL_SIZE') or 'large-v3' # check local repo file first (e.g. ./tiny.pt or ./large-v3.pt) local_path = f'./{model_name}.pt' if os.path.exists(local_path): return local_path # check cache path (pre-downloaded by build or other process) model_dir = os.path.expanduser('~/.cache/whisper') model_path = os.path.join(model_dir, f'{model_name}.pt') if os.path.exists(model_path): return model_path # Do not attempt to download automatically in runtime. raise FileNotFoundError( 'Whisper model not found. Set WHISPER_MODEL_PATH to a local model file, or set WHISPER_MODEL_NAME to a model name (e.g. tiny) and pre-download the corresponding ".pt" file into the repo or ~/.cache/whisper/.' ) def _make_args(): # Minimal args required by simul_asr_factory return SimpleNamespace( log_level='INFO', decoder=None, beams=1, model_path=_get_model_path(), cif_ckpt_path=None, frame_threshold=25, audio_min_len=0.0, audio_max_len=30.0, task='transcribe', never_fire=False, init_prompt=None, static_init_prompt=None, max_context_tokens=None, logdir=None, lan='en', min_chunk_size=1.2, vac=False, vac_chunk_size=0.04, ) def init_model(): global _initialized, _asr, _online with _lock: if _initialized: return try: args = _make_args() _asr, _online = simul_asr_factory(args) _initialized = True except FileNotFoundError as e: print(f"Model initialization aborted: {e}") # leave _initialized False so callers know model not ready except Exception as e: print(f"Unexpected error initializing model: {e}") # don't raise here; allow the app to continue running without model def reset(): global _online with _lock: if _online is None: raise RuntimeError("Model not initialized") _online.init() def _read_audio_bytes(raw_bytes): # Try to read with soundfile; fallback to librosa bio = io.BytesIO(raw_bytes) try: data, sr = sf.read(bio, dtype='float32') except Exception: bio.seek(0) data, sr = librosa.load(bio, sr=None, mono=True) if data.ndim > 1: data = np.mean(data, axis=1) if sr != 16000: data = librosa.resample(data, orig_sr=sr, target_sr=16000) sr = 16000 # ensure float32 data = data.astype(np.float32) return data def process_chunk_from_bytes(raw_bytes): """Insert audio chunk and run one processing iteration. Returns the JSON-able result.""" global _online if _online is None: raise RuntimeError("Model not initialized") audio = _read_audio_bytes(raw_bytes) with _lock: _online.insert_audio_chunk(audio) out = _online.process_iter() return out or {} def finish(): global _online if _online is None: raise RuntimeError("Model not initialized") with _lock: out = _online.finish() return out or {}