Spaces:

luh0502
/

NeAR

Running on Zero

luh1124 Claude Sonnet 4.6 commited on Apr 22

Commit

1aad7b7

1 Parent(s): c7a74a4

fix: replace DINOv2 torch.hub.load with download_url_to_file in preload

torch.hub.load instantiates DINOv2 which imports xformers → triggers CUDA
context init in the main process before any @GPU callback, breaking ZeroGPU.

Replace with torch.hub.download_url_to_file for the 1.13GB weights only.
The GPU callback still downloads the small GitHub repo code on cold start
but loads weights from local cache — no CUDA init in main process.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

app.py +16 -10

app.py CHANGED Viewed

@@ -170,20 +170,26 @@ def _preload_worker() -> None:
     except Exception as exc:
         print(f"[NeAR] preload: RMBG-2.0 disk cache failed: {exc}", flush=True)
-    # Step 4: warm DINOv2 torch.hub cache.
-    # If NEAR_AUX_REPO is set, snapshot_download handles it inside load_dinov2_model.
-    # Otherwise we must pre-fetch facebookresearch/dinov2 from GitHub now (CPU-only).
     if not (os.environ.get("NEAR_DINO_LOCAL_REPO") or os.environ.get("NEAR_AUX_REPO")):
         try:
             import torch
-            _dino_tmp = torch.hub.load(
-                "facebookresearch/dinov2", "dinov2_vitl14_reg",
-                pretrained=True, verbose=False,
-            )
-            del _dino_tmp
-            print("[NeAR] preload: DINOv2 torch.hub cache ready.", flush=True)
         except Exception as exc:
-            print(f"[NeAR] preload: DINOv2 torch.hub cache failed: {exc}", flush=True)
 # ── GPU ensure helpers ────────────────────────────────────────────────────────

     except Exception as exc:
         print(f"[NeAR] preload: RMBG-2.0 disk cache failed: {exc}", flush=True)
+    # Step 4: pre-download DINOv2 weights file only (no model instantiation).
+    # torch.hub.load instantiates the model which imports xformers → triggers CUDA init
+    # in the main process, breaking ZeroGPU's context management.
+    # download_url_to_file is pure urllib — no CUDA. The GPU callback will still need
+    # to download the small GitHub repo code on cold start, but the 1.13 GB weights
+    # file is the slow part and will be served from this local cache.
     if not (os.environ.get("NEAR_DINO_LOCAL_REPO") or os.environ.get("NEAR_AUX_REPO")):
         try:
             import torch
+            ckpt_dir = os.path.join(torch.hub.get_dir(), "checkpoints")
+            os.makedirs(ckpt_dir, exist_ok=True)
+            ckpt_path = os.path.join(ckpt_dir, "dinov2_vitl14_reg4_pretrain.pth")
+            if not os.path.exists(ckpt_path):
+                torch.hub.download_url_to_file(
+                    "https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pth",
+                    ckpt_path, progress=True,
+                )
+            print("[NeAR] preload: DINOv2 weights file cached.", flush=True)
         except Exception as exc:
+            print(f"[NeAR] preload: DINOv2 weight prefetch failed: {exc}", flush=True)
 # ── GPU ensure helpers ────────────────────────────────────────────────────────