Spaces:
Sleeping
Sleeping
| """Local speech-to-text via faster-whisper (CTranslate2). | |
| The model is loaded lazily and cached. On a CUDA box we prefer | |
| ``int8_float16``; if CUDA is unavailable or its libraries are missing we fall | |
| back to CPU ``int8`` automatically, so transcription always works. | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import Iterator | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Callable | |
| from . import config | |
| # Cached (model, (device, compute_type)). | |
| _MODEL = None | |
| _MODEL_INFO: tuple[str, str] | None = None | |
| # Set once a CUDA runtime failure is seen, to skip the GPU on later calls. | |
| _FORCE_CPU = False | |
| class TranscriptSegment: | |
| start: float | |
| end: float | |
| text: str | |
| class Transcript: | |
| segments: list[TranscriptSegment] = field(default_factory=list) | |
| language: str = "" | |
| device: str = "" | |
| def text(self) -> str: | |
| return " ".join(seg.text.strip() for seg in self.segments).strip() | |
| def to_timestamped_text(self) -> str: | |
| lines = [] | |
| for seg in self.segments: | |
| mm, ss = divmod(int(seg.start), 60) | |
| lines.append(f"[{mm:02d}:{ss:02d}] {seg.text.strip()}") | |
| return "\n".join(lines) | |
| def _candidate_configs() -> Iterator[tuple[str, str]]: | |
| """Yield (device, compute_type) attempts in priority order.""" | |
| override = config.WHISPER_COMPUTE_TYPE | |
| device = config.WHISPER_DEVICE | |
| if device == "cpu" or _FORCE_CPU: | |
| yield ("cpu", override or "int8") | |
| return | |
| # "cuda" or "auto": try GPU first, then always fall back to CPU int8. | |
| yield ("cuda", override or "int8_float16") | |
| yield ("cpu", "int8") | |
| def _is_cuda_error(exc: Exception) -> bool: | |
| msg = str(exc).lower() | |
| return any(tok in msg for tok in ("cublas", "cudnn", "cuda", ".dll", "gpu")) | |
| def get_model(): | |
| """Load (once) and return the faster-whisper model with its resolved config.""" | |
| global _MODEL, _MODEL_INFO | |
| if _MODEL is not None: | |
| return _MODEL, _MODEL_INFO | |
| from faster_whisper import WhisperModel | |
| errors: list[str] = [] | |
| for device, compute_type in _candidate_configs(): | |
| try: | |
| _MODEL = WhisperModel( | |
| config.WHISPER_MODEL, device=device, compute_type=compute_type | |
| ) | |
| _MODEL_INFO = (device, compute_type) | |
| return _MODEL, _MODEL_INFO | |
| except Exception as exc: # CUDA libs missing, OOM, etc. | |
| errors.append(f" {device}/{compute_type}: {exc}") | |
| raise RuntimeError( | |
| "Could not load the Whisper model. Attempts:\n" + "\n".join(errors) | |
| ) | |
| def _run_transcribe( | |
| media_path: str | Path, progress: Callable[[float, str], None] | None | |
| ) -> Transcript: | |
| model, info = get_model() | |
| device = info[0] if info else "" | |
| segment_iter, meta = model.transcribe(str(media_path), vad_filter=True, beam_size=5) | |
| total = float(getattr(meta, "duration", 0.0)) or 0.0 | |
| # The CUDA libraries are loaded lazily on first encode, so a missing-cuBLAS | |
| # error surfaces while consuming this generator — not at model construction. | |
| segments: list[TranscriptSegment] = [] | |
| for seg in segment_iter: | |
| segments.append( | |
| TranscriptSegment(start=float(seg.start), end=float(seg.end), text=seg.text) | |
| ) | |
| if progress and total: | |
| frac = min(seg.end / total, 1.0) | |
| progress(frac, f"Transcribing… {int(frac * 100)}%") | |
| return Transcript( | |
| segments=segments, | |
| language=getattr(meta, "language", "") or "", | |
| device=device, | |
| ) | |
| def transcribe( | |
| media_path: str | Path, | |
| *, | |
| progress: Callable[[float, str], None] | None = None, | |
| ) -> Transcript: | |
| """Transcribe an audio/video file into timestamped segments. | |
| Falls back from GPU to CPU automatically if a CUDA runtime error (e.g. | |
| missing cuBLAS/cuDNN) occurs during inference. | |
| """ | |
| global _MODEL, _MODEL_INFO, _FORCE_CPU | |
| try: | |
| return _run_transcribe(media_path, progress) | |
| except RuntimeError as exc: | |
| on_gpu = bool(_MODEL_INFO and _MODEL_INFO[0] == "cuda") | |
| if not (on_gpu and _is_cuda_error(exc)): | |
| raise | |
| # Drop the GPU model and retry once on CPU. | |
| _MODEL, _MODEL_INFO, _FORCE_CPU = None, None, True | |
| if progress: | |
| progress(0.0, "GPU unavailable — falling back to CPU…") | |
| return _run_transcribe(media_path, progress) | |