Spaces:
Running on Zero
Running on Zero
fix: T4/ZeroGPU compat — cuDNN pin, font packages, GPU detection
#2
by Blazestorm001 - opened
- app.py +11 -5
- packages.txt +2 -0
- requirements.txt +3 -2
- src/amuseme/transcriber.py +12 -3
app.py
CHANGED
|
@@ -12,22 +12,27 @@ if str(SRC_DIR) not in sys.path:
|
|
| 12 |
|
| 13 |
from amuseme.transcriber import transcribe
|
| 14 |
from amuseme.renderer import render_frames
|
| 15 |
-
from amuseme.animations import THEME_COLORS as THEMES, FONT_FAMILIES
|
| 16 |
from amuseme.video_assembler import assemble
|
| 17 |
from amuseme.logger import get_logger
|
| 18 |
|
| 19 |
logger = get_logger("app")
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
try:
|
| 23 |
import spaces
|
| 24 |
HAS_SPACES = True
|
| 25 |
except ImportError:
|
| 26 |
HAS_SPACES = False
|
| 27 |
|
|
|
|
| 28 |
if HAS_SPACES:
|
| 29 |
from huggingface_hub import snapshot_download
|
| 30 |
-
logger.info("HF Space detected. Pre-downloading heavy models
|
| 31 |
try:
|
| 32 |
snapshot_download(repo_id="Systran/faster-whisper-large-v3")
|
| 33 |
snapshot_download(repo_id="openbmb/MiniCPM5-1B")
|
|
@@ -41,8 +46,9 @@ def _gpu_transcribe(audio_path: str, model_size: str, use_demucs: bool, cond_pre
|
|
| 41 |
return transcribe(audio_path, model_size=model_size, use_demucs=use_demucs, condition_on_previous_text=cond_prev, use_vad=use_vad, theme=theme, visual_prompt=visual_prompt)
|
| 42 |
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
def generate_video(audio_path: str, theme: str, font_family: str, visual_prompt: str, model_size: str, use_demucs: bool, cond_prev: bool, use_vad: bool) -> str:
|
|
|
|
| 12 |
|
| 13 |
from amuseme.transcriber import transcribe
|
| 14 |
from amuseme.renderer import render_frames
|
| 15 |
+
from amuseme.animations import THEME_COLORS as THEMES, FONT_FAMILIES
|
| 16 |
from amuseme.video_assembler import assemble
|
| 17 |
from amuseme.logger import get_logger
|
| 18 |
|
| 19 |
logger = get_logger("app")
|
| 20 |
|
| 21 |
+
import os
|
| 22 |
+
|
| 23 |
+
# ZeroGPU Spaces set SPACES_ZERO_GPU=1; permanent GPU Spaces (T4 etc.) do not.
|
| 24 |
+
IS_ZEROGPU = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
|
| 25 |
+
|
| 26 |
try:
|
| 27 |
import spaces
|
| 28 |
HAS_SPACES = True
|
| 29 |
except ImportError:
|
| 30 |
HAS_SPACES = False
|
| 31 |
|
| 32 |
+
# Pre-download models at Space startup so they're cached before inference
|
| 33 |
if HAS_SPACES:
|
| 34 |
from huggingface_hub import snapshot_download
|
| 35 |
+
logger.info("HF Space detected. Pre-downloading heavy models...")
|
| 36 |
try:
|
| 37 |
snapshot_download(repo_id="Systran/faster-whisper-large-v3")
|
| 38 |
snapshot_download(repo_id="openbmb/MiniCPM5-1B")
|
|
|
|
| 46 |
return transcribe(audio_path, model_size=model_size, use_demucs=use_demucs, condition_on_previous_text=cond_prev, use_vad=use_vad, theme=theme, visual_prompt=visual_prompt)
|
| 47 |
|
| 48 |
|
| 49 |
+
# Only wrap with spaces.GPU on ZeroGPU — on permanent GPU Spaces it raises RuntimeError
|
| 50 |
+
if IS_ZEROGPU and HAS_SPACES:
|
| 51 |
+
_gpu_transcribe = spaces.GPU(duration=150)(_gpu_transcribe)
|
| 52 |
|
| 53 |
|
| 54 |
def generate_video(audio_path: str, theme: str, font_family: str, visual_prompt: str, model_size: str, use_demucs: bool, cond_prev: bool, use_vad: bool) -> str:
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fonts-dejavu-core
|
| 2 |
+
fonts-liberation
|
requirements.txt
CHANGED
|
@@ -3,10 +3,11 @@
|
|
| 3 |
# torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
|
| 4 |
# so they are intentionally not pinned here.
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
faster-whisper
|
| 7 |
-
ctranslate2==4.3.1
|
| 8 |
demucs
|
| 9 |
-
torchcodec
|
| 10 |
pillow
|
| 11 |
pydantic
|
| 12 |
spaces
|
|
|
|
| 3 |
# torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
|
| 4 |
# so they are intentionally not pinned here.
|
| 5 |
|
| 6 |
+
# ctranslate2: do NOT pin — let pip resolve the wheel matching ZeroGPU's
|
| 7 |
+
# CUDA/cuDNN version. Pinning 4.3.1 (cuDNN 8 build) breaks on ZeroGPU's cuDNN 9
|
| 8 |
+
# with "libcudnn_ops_infer.so.8: cannot open shared object file".
|
| 9 |
faster-whisper
|
|
|
|
| 10 |
demucs
|
|
|
|
| 11 |
pillow
|
| 12 |
pydantic
|
| 13 |
spaces
|
src/amuseme/transcriber.py
CHANGED
|
@@ -70,15 +70,24 @@ def _load_model(model_size: str = "large-v3"):
|
|
| 70 |
if _model is None:
|
| 71 |
if model_size == "turbo":
|
| 72 |
model_size = "large-v3-turbo"
|
| 73 |
-
|
| 74 |
device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
|
| 75 |
logger.info(f"Loading Whisper {model_size} on {device}...")
|
| 76 |
compute_type = "float16" if device == "cuda" else "int8"
|
| 77 |
try:
|
| 78 |
_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 79 |
except Exception as e:
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
return _model
|
| 83 |
|
| 84 |
|
|
|
|
| 70 |
if _model is None:
|
| 71 |
if model_size == "turbo":
|
| 72 |
model_size = "large-v3-turbo"
|
| 73 |
+
|
| 74 |
device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
|
| 75 |
logger.info(f"Loading Whisper {model_size} on {device}...")
|
| 76 |
compute_type = "float16" if device == "cuda" else "int8"
|
| 77 |
try:
|
| 78 |
_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
| 79 |
except Exception as e:
|
| 80 |
+
if device == "cuda":
|
| 81 |
+
# CUDA/cuDNN library mismatch (e.g. on ZeroGPU). Retry on CPU.
|
| 82 |
+
logger.warning(f"CUDA load failed ({e}). Falling back to CPU int8.")
|
| 83 |
+
try:
|
| 84 |
+
_model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 85 |
+
except Exception as e2:
|
| 86 |
+
logger.error(f"CPU fallback also failed: {e2}")
|
| 87 |
+
raise
|
| 88 |
+
else:
|
| 89 |
+
logger.warning(f"Failed to load with {compute_type}: {e}. Retrying with float32.")
|
| 90 |
+
_model = WhisperModel(model_size, device=device, compute_type="float32")
|
| 91 |
return _model
|
| 92 |
|
| 93 |
|