Spaces:

0xZohar
/

object-assembler

Paused

App Files Files Community

0xZohar commited on Nov 13, 2025

Commit

5b6a559

verified ·

1 Parent(s): 7a4cc65

Fix CLIP loading: Use /data cache for clip_retrieval.py

Browse files

Files changed (1) hide show

code/clip_retrieval.py +12 -65

code/clip_retrieval.py CHANGED Viewed

@@ -90,88 +90,35 @@ class CLIPRetriever:
         print(f"Feature dimension: {self.features.shape[1]}")
     def _load_model(self):
-        """Load CLIP model for text encoding with guaranteed download
-        Strategy:
-        1. Use snapshot_download() to ensure all model files are cached
-        2. Try local_files_only=True to read from cache (read-only)
-        3. If fails, fallback to /tmp cache and allow download
-        This replaces preload_from_hub which was not executing in HF Spaces.
         """
-        import os
-        from huggingface_hub import snapshot_download
         print(f"Loading CLIP model: {self.model_name} on {self.device}")
-        print(f"Primary cache directory: {HF_CACHE_DIR}")
-        # NEW: Download complete model first (will use cache if already downloaded)
-        try:
-            print(f"[Step 1/3] Ensuring CLIP model is downloaded...")
-            snapshot_download(
-                repo_id=self.model_name,
-                cache_dir=HF_CACHE_DIR,
-                allow_patterns=["*.json", "*.bin", "*.txt", "*.msgpack", "*.h5"],
-                ignore_patterns=["*.safetensors"]  # We only need PyTorch weights
-            )
-            print(f"✅ CLIP model files verified/downloaded to cache")
-        except Exception as e:
-            print(f"⚠️  Snapshot download warning: {type(e).__name__}")
-            print(f"   Will attempt loading anyway: {str(e)[:100]}")
-        # Strategy 2: Try loading from cache (read-only)
-        try:
-            print(f"[Step 2/3] Loading from cache (read-only)...")
-            self.model = CLIPModel.from_pretrained(
-                self.model_name,
-                cache_dir=HF_CACHE_DIR,
-                local_files_only=True  # KEY: Read-only mode
-            ).to(self.device)
-            self.processor = CLIPProcessor.from_pretrained(
-                self.model_name,
-                cache_dir=HF_CACHE_DIR,
-                local_files_only=True  # KEY: Read-only mode
-            )
-            self.model.eval()
-            print("✅ CLIP model loaded successfully from cache")
-            return  # Success
-        except Exception as e:
-            print(f"⚠️  Failed to load from cache: {type(e).__name__}")
-            print(f"   {str(e)[:100]}")
-        # Strategy 3: Fallback to /tmp cache (writable, allows download)
         try:
-            tmp_cache_dir = "/tmp/huggingface"
-            os.makedirs(tmp_cache_dir, exist_ok=True)
-            print(f"[Step 3/3] Fallback: downloading to /tmp cache...")
-            print(f"   Fallback cache: {tmp_cache_dir}")
             self.model = CLIPModel.from_pretrained(
                 self.model_name,
-                cache_dir=tmp_cache_dir
             ).to(self.device)
             self.processor = CLIPProcessor.from_pretrained(
                 self.model_name,
-                cache_dir=tmp_cache_dir
             )
             self.model.eval()
-            print("✅ CLIP model loaded successfully (fallback /tmp)")
-            return  # Success
         except Exception as e:
-            print(f"❌ Failed to load CLIP model after all attempts: {e}")
             raise RuntimeError(
-                f"CLIP model loading failed in all 3 attempts.\n"
-                f"Step 1: snapshot_download to {HF_CACHE_DIR} (may have failed silently)\n"
-                f"Step 2: local_files_only from cache (failed)\n"
-                f"Step 3: download to /tmp cache (failed)\n"
                 f"Error: {e}"
             ) from e

         print(f"Feature dimension: {self.features.shape[1]}")
     def _load_model(self):
+        """Load CLIP model using /data persistent cache
+        Simplified loading strategy:
+        - Use HF_CACHE_DIR (/data/.huggingface in HF Spaces)
+        - Allow automatic download on first use
+        - /data is writable and persistent in HF Spaces
         """
         print(f"Loading CLIP model: {self.model_name} on {self.device}")
+        print(f"Cache directory: {HF_CACHE_DIR}")
         try:
             self.model = CLIPModel.from_pretrained(
                 self.model_name,
+                cache_dir=HF_CACHE_DIR
             ).to(self.device)
             self.processor = CLIPProcessor.from_pretrained(
                 self.model_name,
+                cache_dir=HF_CACHE_DIR
             )
             self.model.eval()
+            print("✅ CLIP model loaded successfully")
         except Exception as e:
+            print(f"❌ CLIP model loading failed: {e}")
             raise RuntimeError(
+                f"Failed to load CLIP model from {self.model_name}\n"
+                f"Cache directory: {HF_CACHE_DIR}\n"
                 f"Error: {e}"
             ) from e