Spaces:

build-small-hackathon
/

aMuseMe

Running on Zero

fix: T4/ZeroGPU compat — cuDNN pin, font packages, GPU detection

by Blazestorm001 - opened 12 days ago

←

Files changed (4) hide show

app.py CHANGED Viewed

@@ -12,22 +12,27 @@ if str(SRC_DIR) not in sys.path:
 from amuseme.transcriber import transcribe
 from amuseme.renderer import render_frames
-from amuseme.animations import THEME_COLORS as THEMES, FONT_FAMILIES, DEFAULT_FONT_FAMILY
 from amuseme.video_assembler import assemble
 from amuseme.logger import get_logger
 logger = get_logger("app")
-# Try to import spaces for ZeroGPU; gracefully degrade locally
 try:
     import spaces
     HAS_SPACES = True
 except ImportError:
     HAS_SPACES = False
 if HAS_SPACES:
     from huggingface_hub import snapshot_download
-    logger.info("HF Space detected. Pre-downloading heavy models to avoid ZeroGPU timeout...")
     try:
         snapshot_download(repo_id="Systran/faster-whisper-large-v3")
         snapshot_download(repo_id="openbmb/MiniCPM5-1B")
@@ -41,8 +46,9 @@ def _gpu_transcribe(audio_path: str, model_size: str, use_demucs: bool, cond_pre
     return transcribe(audio_path, model_size=model_size, use_demucs=use_demucs, condition_on_previous_text=cond_prev, use_vad=use_vad, theme=theme, visual_prompt=visual_prompt)
-if HAS_SPACES:
-    _gpu_transcribe = spaces.GPU(duration=120)(_gpu_transcribe)
 def generate_video(audio_path: str, theme: str, font_family: str, visual_prompt: str, model_size: str, use_demucs: bool, cond_prev: bool, use_vad: bool) -> str:

 from amuseme.transcriber import transcribe
 from amuseme.renderer import render_frames
+from amuseme.animations import THEME_COLORS as THEMES, FONT_FAMILIES
 from amuseme.video_assembler import assemble
 from amuseme.logger import get_logger
 logger = get_logger("app")
+import os
+# ZeroGPU Spaces set SPACES_ZERO_GPU=1; permanent GPU Spaces (T4 etc.) do not.
+IS_ZEROGPU = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
 try:
     import spaces
     HAS_SPACES = True
 except ImportError:
     HAS_SPACES = False
+# Pre-download models at Space startup so they're cached before inference
 if HAS_SPACES:
     from huggingface_hub import snapshot_download
+    logger.info("HF Space detected. Pre-downloading heavy models...")
     try:
         snapshot_download(repo_id="Systran/faster-whisper-large-v3")
         snapshot_download(repo_id="openbmb/MiniCPM5-1B")
     return transcribe(audio_path, model_size=model_size, use_demucs=use_demucs, condition_on_previous_text=cond_prev, use_vad=use_vad, theme=theme, visual_prompt=visual_prompt)
+# Only wrap with spaces.GPU on ZeroGPU — on permanent GPU Spaces it raises RuntimeError
+if IS_ZEROGPU and HAS_SPACES:
+    _gpu_transcribe = spaces.GPU(duration=150)(_gpu_transcribe)
 def generate_video(audio_path: str, theme: str, font_family: str, visual_prompt: str, model_size: str, use_demucs: bool, cond_prev: bool, use_vad: bool) -> str:

packages.txt ADDED Viewed

requirements.txt CHANGED Viewed

@@ -3,10 +3,11 @@
 # torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
 # so they are intentionally not pinned here.
 faster-whisper
-ctranslate2==4.3.1
 demucs
-torchcodec
 pillow
 pydantic
 spaces

 # torch/torchaudio come from the ZeroGPU runtime + the demucs dependency,
 # so they are intentionally not pinned here.
+# ctranslate2: do NOT pin — let pip resolve the wheel matching ZeroGPU's
+# CUDA/cuDNN version. Pinning 4.3.1 (cuDNN 8 build) breaks on ZeroGPU's cuDNN 9
+# with "libcudnn_ops_infer.so.8: cannot open shared object file".
 faster-whisper
 demucs
 pillow
 pydantic
 spaces

src/amuseme/transcriber.py CHANGED Viewed

@@ -70,15 +70,24 @@ def _load_model(model_size: str = "large-v3"):
     if _model is None:
         if model_size == "turbo":
             model_size = "large-v3-turbo"
         device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
         logger.info(f"Loading Whisper {model_size} on {device}...")
         compute_type = "float16" if device == "cuda" else "int8"
         try:
             _model = WhisperModel(model_size, device=device, compute_type=compute_type)
         except Exception as e:
-            logger.warning(f"Failed to load {model_size} with {compute_type}: {e}. Falling back to float32.")
-            _model = WhisperModel(model_size, device=device, compute_type="float32")
     return _model

     if _model is None:
         if model_size == "turbo":
             model_size = "large-v3-turbo"
         device = "cpu" if os.environ.get("FORCE_CPU") == "1" else "cuda"
         logger.info(f"Loading Whisper {model_size} on {device}...")
         compute_type = "float16" if device == "cuda" else "int8"
         try:
             _model = WhisperModel(model_size, device=device, compute_type=compute_type)
         except Exception as e:
+            if device == "cuda":
+                # CUDA/cuDNN library mismatch (e.g. on ZeroGPU). Retry on CPU.
+                logger.warning(f"CUDA load failed ({e}). Falling back to CPU int8.")
+                try:
+                    _model = WhisperModel(model_size, device="cpu", compute_type="int8")
+                except Exception as e2:
+                    logger.error(f"CPU fallback also failed: {e2}")
+                    raise
+            else:
+                logger.warning(f"Failed to load with {compute_type}: {e}. Retrying with float32.")
+                _model = WhisperModel(model_size, device=device, compute_type="float32")
     return _model