Spaces:

0xZohar
/

object-assembler

Paused

App Files Files Community

0xZohar commited on Nov 21, 2025

Commit

8526d06

verified ·

1 Parent(s): 5cb5de0

Fix: Safe device selection for CPU/GPU compatibility

Browse files

- clip_retrieval.py: Add _resolve_device() with CUDA detection & CPU fallback
- demo.py: Auto-select GPT engine (EngineFast for CUDA, Engine for CPU)
- engine.py: Normalize device_map to string for CLIP text encoder

Resolves: CUDA availability issues on HF Spaces CPU instances
Support: Works on both GPU and CPU tiers

File: code/clip_retrieval.py

Files changed (1) hide show

code/clip_retrieval.py +73 -26

code/clip_retrieval.py CHANGED Viewed

@@ -48,9 +48,8 @@ class CLIPRetriever:
         self.cache_dir = cache_dir or os.path.join(data_root, "clip_features")
         self.model_name = model_name
-        # ZeroGPU: Always use cuda (ZeroGPU manages allocation automatically)
-        # DO NOT check torch.cuda.is_available() as it returns False at startup
-        self.device = "cuda"
         # State
         self.model = None
@@ -62,6 +61,30 @@ class CLIPRetriever:
         self._load_cache()
         self._load_model()
     def _load_cache(self):
         """Load precomputed features and metadata"""
         features_path = os.path.join(self.cache_dir, "features.npy")
@@ -103,29 +126,53 @@ class CLIPRetriever:
         print(f"Loading CLIP model: {self.model_name} on {self.device}")
         print(f"Cache directory: {HF_CACHE_DIR}")
-        try:
-            self.model = CLIPModel.from_pretrained(
-                self.model_name,
-                cache_dir=HF_CACHE_DIR,
-                use_safetensors=True  # Force safetensors to bypass CVE-2025-32434
-            ).to(self.device)
-            self.processor = CLIPProcessor.from_pretrained(
-                self.model_name,
-                cache_dir=HF_CACHE_DIR,
-                use_safetensors=True  # Force safetensors to bypass CVE-2025-32434
-            )
-            self.model.eval()
-            print("✅ CLIP model loaded successfully")
-        except Exception as e:
-            print(f"❌ CLIP model loading failed: {e}")
-            raise RuntimeError(
-                f"Failed to load CLIP model from {self.model_name}\n"
-                f"Cache directory: {HF_CACHE_DIR}\n"
-                f"Error: {e}"
-            ) from e
     def _encode_text(self, text: str) -> np.ndarray:
         """

         self.cache_dir = cache_dir or os.path.join(data_root, "clip_features")
         self.model_name = model_name
+        # Resolve runtime device with safe CPU fallback (HF Spaces cpu/basic instances)
+        self.device = self._resolve_device(device)
         # State
         self.model = None
         self._load_cache()
         self._load_model()
+    def _resolve_device(self, device_override: Optional[str]) -> str:
+        """
+        Decide which device to use for the CLIP encoder.
+        Priority:
+        1) Explicit argument
+        2) Environment override: CLIP_DEVICE
+        3) CUDA if available
+        4) CPU fallback (avoids HF Spaces "no NVIDIA driver" failures)
+        """
+        if device_override:
+            return device_override
+        env_device = os.getenv("CLIP_DEVICE")
+        if env_device:
+            print(f"🔧 Using device from CLIP_DEVICE env: {env_device}")
+            return env_device
+        if torch.cuda.is_available():
+            return "cuda"
+        print("ℹ️  CUDA not available; defaulting CLIP to CPU")
+        return "cpu"
     def _load_cache(self):
         """Load precomputed features and metadata"""
         features_path = os.path.join(self.cache_dir, "features.npy")
         print(f"Loading CLIP model: {self.model_name} on {self.device}")
         print(f"Cache directory: {HF_CACHE_DIR}")
+        # Try preferred device first, then fall back to CPU if GPU is unavailable
+        preferred_device = self.device
+        device_attempts = [preferred_device]
+        if preferred_device != "cpu":
+            device_attempts.append("cpu")
+        last_error = None
+        for target_device in device_attempts:
+            try:
+                torch_dtype = torch.float16 if target_device.startswith("cuda") else torch.float32
+                model = CLIPModel.from_pretrained(
+                    self.model_name,
+                    cache_dir=HF_CACHE_DIR,
+                    use_safetensors=True,  # Force safetensors to bypass CVE-2025-32434
+                    torch_dtype=torch_dtype,
+                ).to(target_device)
+                processor = CLIPProcessor.from_pretrained(
+                    self.model_name,
+                    cache_dir=HF_CACHE_DIR,
+                    use_safetensors=True  # Force safetensors to bypass CVE-2025-32434
+                )
+                self.model = model
+                self.processor = processor
+                self.device = target_device
+                self.model.eval()
+                if target_device != preferred_device:
+                    print(f"ℹ️  CLIP loaded on {target_device} (fallback from {preferred_device})")
+                else:
+                    print("✅ CLIP model loaded successfully")
+                return
+            except Exception as e:
+                last_error = e
+                print(f"⚠️  CLIP load failed on {target_device}: {e}")
+                continue
+        # If we reach here, all attempts failed
+        raise RuntimeError(
+            f"Failed to load CLIP model from {self.model_name}\n"
+            f"Cache directory: {HF_CACHE_DIR}\n"
+            f"Error: {last_error}"
+        ) from last_error
     def _encode_text(self, text: str) -> np.ndarray:
         """