Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

BoxOfColors commited on 3 days ago

Commit

60d3e36

1 Parent(s): 0691643

fix: load FlashSR on CPU only to avoid ZeroGPU CUDA init violation

ZeroGPU forbids CUDA initialization outside @spaces.GPU decorated
functions. FlashSR was calling torch.cuda.is_available() and .cuda()
in the main process, which poisoned subsequent @spaces.GPU calls
(causing 'GPU task aborted' on xregen). FlashSR is tiny (1.72MB) and
fast enough on CPU — no GPU needed for the upsampling step.

Files changed (1) hide show

app.py +6 -9

app.py CHANGED Viewed

@@ -527,28 +527,25 @@ def _load_flashsr():
             filename="upsampler.pth",
             local_dir=os.path.join(os.path.dirname(os.path.abspath(__file__)), ".flashsr_cache"),
         )
         model = FASR(ckpt_path)
-        if torch.cuda.is_available():
-            model.model.half().cuda()
-            print("[FlashSR] Model loaded on GPU (fp16)")
-        else:
-            print("[FlashSR] Model loaded on CPU (fp32)")
         _FLASHSR_MODEL = model
         return model
 def _apply_flashsr(wav_16k: np.ndarray) -> np.ndarray:
-    """Upsample a mono 16 kHz numpy array to 48 kHz using FlashSR.
     Returns a mono float32 numpy array at 48 kHz.
     Falls back to torchaudio sinc resampling if FlashSR fails.
     """
     try:
         model = _load_flashsr()
         t = torch.from_numpy(wav_16k.astype(np.float32)).unsqueeze(0)
-        if torch.cuda.is_available():
-            t = t.half().cuda()
-        print(f"[FlashSR] Upsampling {len(wav_16k)/FLASHSR_SR_IN:.2f}s @ 16kHz → 48kHz …")
         with torch.no_grad():
             out = model.run(t)
         # out is a tensor or numpy array — normalise to numpy float32 cpu

             filename="upsampler.pth",
             local_dir=os.path.join(os.path.dirname(os.path.abspath(__file__)), ".flashsr_cache"),
         )
+        # Always load on CPU — ZeroGPU forbids CUDA init outside @spaces.GPU.
+        # FlashSR is tiny (1.72 MB) and fast enough on CPU for post-processing.
         model = FASR(ckpt_path)
+        print("[FlashSR] Model loaded on CPU (fp32)")
         _FLASHSR_MODEL = model
         return model
 def _apply_flashsr(wav_16k: np.ndarray) -> np.ndarray:
+    """Upsample a mono 16 kHz numpy array to 48 kHz using FlashSR (CPU).
     Returns a mono float32 numpy array at 48 kHz.
     Falls back to torchaudio sinc resampling if FlashSR fails.
     """
     try:
         model = _load_flashsr()
+        # Keep on CPU — no CUDA outside @spaces.GPU in ZeroGPU environment
         t = torch.from_numpy(wav_16k.astype(np.float32)).unsqueeze(0)
+        print(f"[FlashSR] Upsampling {len(wav_16k)/FLASHSR_SR_IN:.2f}s @ 16kHz → 48kHz (CPU) …")
         with torch.no_grad():
             out = model.run(t)
         # out is a tensor or numpy array — normalise to numpy float32 cpu