Spaces:

vivekchakraverty
/

DocuMaker

Sleeping

App Files Files Community

DocuMaker / src /transcribe.py

vivekchakraverty

DocuMaker: video to step-by-step DOCX guide (Whisper + HF LLM + BLIP)

85b485a 18 days ago

Raw

History Blame Contribute Delete

4.44 kB

	"""Local speech-to-text via faster-whisper (CTranslate2).

	The model is loaded lazily and cached. On a CUDA box we prefer
	``int8_float16``; if CUDA is unavailable or its libraries are missing we fall
	back to CPU ``int8`` automatically, so transcription always works.
	"""
	from __future__ import annotations

	from collections.abc import Iterator
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Callable

	from . import config

	# Cached (model, (device, compute_type)).
	_MODEL = None
	_MODEL_INFO: tuple[str, str] \| None = None
	# Set once a CUDA runtime failure is seen, to skip the GPU on later calls.
	_FORCE_CPU = False


	@dataclass
	class TranscriptSegment:
	start: float
	end: float
	text: str


	@dataclass
	class Transcript:
	segments: list[TranscriptSegment] = field(default_factory=list)
	language: str = ""
	device: str = ""

	@property
	def text(self) -> str:
	return " ".join(seg.text.strip() for seg in self.segments).strip()

	def to_timestamped_text(self) -> str:
	lines = []
	for seg in self.segments:
	mm, ss = divmod(int(seg.start), 60)
	lines.append(f"[{mm:02d}:{ss:02d}] {seg.text.strip()}")
	return "\n".join(lines)


	def _candidate_configs() -> Iterator[tuple[str, str]]:
	"""Yield (device, compute_type) attempts in priority order."""
	override = config.WHISPER_COMPUTE_TYPE
	device = config.WHISPER_DEVICE
	if device == "cpu" or _FORCE_CPU:
	yield ("cpu", override or "int8")
	return
	# "cuda" or "auto": try GPU first, then always fall back to CPU int8.
	yield ("cuda", override or "int8_float16")
	yield ("cpu", "int8")


	def _is_cuda_error(exc: Exception) -> bool:
	msg = str(exc).lower()
	return any(tok in msg for tok in ("cublas", "cudnn", "cuda", ".dll", "gpu"))


	def get_model():
	"""Load (once) and return the faster-whisper model with its resolved config."""
	global _MODEL, _MODEL_INFO
	if _MODEL is not None:
	return _MODEL, _MODEL_INFO

	from faster_whisper import WhisperModel

	errors: list[str] = []
	for device, compute_type in _candidate_configs():
	try:
	_MODEL = WhisperModel(
	config.WHISPER_MODEL, device=device, compute_type=compute_type
	)
	_MODEL_INFO = (device, compute_type)
	return _MODEL, _MODEL_INFO
	except Exception as exc: # CUDA libs missing, OOM, etc.
	errors.append(f" {device}/{compute_type}: {exc}")

	raise RuntimeError(
	"Could not load the Whisper model. Attempts:\n" + "\n".join(errors)
	)


	def _run_transcribe(
	media_path: str \| Path, progress: Callable[[float, str], None] \| None
	) -> Transcript:
	model, info = get_model()
	device = info[0] if info else ""

	segment_iter, meta = model.transcribe(str(media_path), vad_filter=True, beam_size=5)
	total = float(getattr(meta, "duration", 0.0)) or 0.0

	# The CUDA libraries are loaded lazily on first encode, so a missing-cuBLAS
	# error surfaces while consuming this generator — not at model construction.
	segments: list[TranscriptSegment] = []
	for seg in segment_iter:
	segments.append(
	TranscriptSegment(start=float(seg.start), end=float(seg.end), text=seg.text)
	)
	if progress and total:
	frac = min(seg.end / total, 1.0)
	progress(frac, f"Transcribing… {int(frac * 100)}%")

	return Transcript(
	segments=segments,
	language=getattr(meta, "language", "") or "",
	device=device,
	)


	def transcribe(
	media_path: str \| Path,
	*,
	progress: Callable[[float, str], None] \| None = None,
	) -> Transcript:
	"""Transcribe an audio/video file into timestamped segments.

	Falls back from GPU to CPU automatically if a CUDA runtime error (e.g.
	missing cuBLAS/cuDNN) occurs during inference.
	"""
	global _MODEL, _MODEL_INFO, _FORCE_CPU
	try:
	return _run_transcribe(media_path, progress)
	except RuntimeError as exc:
	on_gpu = bool(_MODEL_INFO and _MODEL_INFO[0] == "cuda")
	if not (on_gpu and _is_cuda_error(exc)):
	raise
	# Drop the GPU model and retry once on CPU.
	_MODEL, _MODEL_INFO, _FORCE_CPU = None, None, True
	if progress:
	progress(0.0, "GPU unavailable — falling back to CPU…")
	return _run_transcribe(media_path, progress)