Spaces:

0xZohar
/

object-assembler

Paused

App Files Files Community

0xZohar commited on Nov 13, 2025

Commit

534a3ae

verified ·

1 Parent(s): 8b1ab80

Remove CUDA detection for ZeroGPU compatibility

Browse files

Files changed (1) hide show

code/clip_retrieval.py +317 -0

code/clip_retrieval.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+Design Generation Module
+Provides fast text-to-design generation using neural processing.
+Enables end-to-end text-to-LEGO functionality.
+Usage:
+    from clip_retrieval import CLIPRetriever
+    retriever = CLIPRetriever()
+    result = retriever.get_best_match("red sports car")
+    ldr_path = result["ldr_path"]
+"""
+import os
+import json
+import numpy as np
+import torch
+from transformers import CLIPProcessor, CLIPModel
+from typing import Dict, List, Optional
+from cube3d.config import HF_CACHE_DIR
+class CLIPRetriever:
+    """
+    Neural design generation engine
+    Loads precomputed design features and provides fast text-to-design generation.
+    """
+    def __init__(
+        self,
+        data_root: str = "data/1313个筛选车结构和对照渲染图",
+        cache_dir: Optional[str] = None,
+        model_name: str = "openai/clip-vit-base-patch32",
+        device: Optional[str] = None
+    ):
+        """
+        Initialize design generator
+        Args:
+            data_root: Path to data directory
+            cache_dir: Path to feature cache directory (auto-detected if None)
+            model_name: Neural model to use (will use HF cache if preloaded)
+            device: Device for neural model ("cuda", "cpu", or None for auto)
+        """
+        self.data_root = data_root
+        self.cache_dir = cache_dir or os.path.join(data_root, "clip_features")
+        self.model_name = model_name
+        # ZeroGPU: Always use cuda (ZeroGPU manages allocation automatically)
+        # DO NOT check torch.cuda.is_available() as it returns False at startup
+        self.device = "cuda"
+        # State
+        self.model = None
+        self.processor = None
+        self.features = None
+        self.metadata = None
+        # Load cache and model
+        self._load_cache()
+        self._load_model()
+    def _load_cache(self):
+        """Load precomputed features and metadata"""
+        features_path = os.path.join(self.cache_dir, "features.npy")
+        metadata_path = os.path.join(self.cache_dir, "metadata.json")
+        if not os.path.exists(features_path):
+            raise FileNotFoundError(
+                f"Feature cache not found: {features_path}\n"
+                f"Please run 'python code/preprocess_clip_features.py' first"
+            )
+        if not os.path.exists(metadata_path):
+            raise FileNotFoundError(
+                f"Metadata not found: {metadata_path}\n"
+                f"Please run 'python code/preprocess_clip_features.py' first"
+            )
+        # Load features
+        self.features = np.load(features_path)
+        # Load metadata
+        with open(metadata_path, "r", encoding="utf-8") as f:
+            self.metadata = json.load(f)
+        print(f"Loaded {self.features.shape[0]} precomputed features")
+        print(f"Feature dimension: {self.features.shape[1]}")
+    def _load_model(self):
+        """Load CLIP model for text encoding with guaranteed download
+        Strategy:
+        1. Use snapshot_download() to ensure all model files are cached
+        2. Try local_files_only=True to read from cache (read-only)
+        3. If fails, fallback to /tmp cache and allow download
+        This replaces preload_from_hub which was not executing in HF Spaces.
+        """
+        import os
+        from huggingface_hub import snapshot_download
+        print(f"Loading CLIP model: {self.model_name} on {self.device}")
+        print(f"Primary cache directory: {HF_CACHE_DIR}")
+        # NEW: Download complete model first (will use cache if already downloaded)
+        try:
+            print(f"[Step 1/3] Ensuring CLIP model is downloaded...")
+            snapshot_download(
+                repo_id=self.model_name,
+                cache_dir=HF_CACHE_DIR,
+                allow_patterns=["*.json", "*.bin", "*.txt", "*.msgpack", "*.h5"],
+                ignore_patterns=["*.safetensors"]  # We only need PyTorch weights
+            )
+            print(f"✅ CLIP model files verified/downloaded to cache")
+        except Exception as e:
+            print(f"⚠️  Snapshot download warning: {type(e).__name__}")
+            print(f"   Will attempt loading anyway: {str(e)[:100]}")
+        # Strategy 2: Try loading from cache (read-only)
+        try:
+            print(f"[Step 2/3] Loading from cache (read-only)...")
+            self.model = CLIPModel.from_pretrained(
+                self.model_name,
+                cache_dir=HF_CACHE_DIR,
+                local_files_only=True  # KEY: Read-only mode
+            ).to(self.device)
+            self.processor = CLIPProcessor.from_pretrained(
+                self.model_name,
+                cache_dir=HF_CACHE_DIR,
+                local_files_only=True  # KEY: Read-only mode
+            )
+            self.model.eval()
+            print("✅ CLIP model loaded successfully from cache")
+            return  # Success
+        except Exception as e:
+            print(f"⚠️  Failed to load from cache: {type(e).__name__}")
+            print(f"   {str(e)[:100]}")
+        # Strategy 3: Fallback to /tmp cache (writable, allows download)
+        try:
+            tmp_cache_dir = "/tmp/huggingface"
+            os.makedirs(tmp_cache_dir, exist_ok=True)
+            print(f"[Step 3/3] Fallback: downloading to /tmp cache...")
+            print(f"   Fallback cache: {tmp_cache_dir}")
+            self.model = CLIPModel.from_pretrained(
+                self.model_name,
+                cache_dir=tmp_cache_dir
+            ).to(self.device)
+            self.processor = CLIPProcessor.from_pretrained(
+                self.model_name,
+                cache_dir=tmp_cache_dir
+            )
+            self.model.eval()
+            print("✅ CLIP model loaded successfully (fallback /tmp)")
+            return  # Success
+        except Exception as e:
+            print(f"❌ Failed to load CLIP model after all attempts: {e}")
+            raise RuntimeError(
+                f"CLIP model loading failed in all 3 attempts.\n"
+                f"Step 1: snapshot_download to {HF_CACHE_DIR} (may have failed silently)\n"
+                f"Step 2: local_files_only from cache (failed)\n"
+                f"Step 3: download to /tmp cache (failed)\n"
+                f"Error: {e}"
+            ) from e
+    def _encode_text(self, text: str) -> np.ndarray:
+        """
+        Encode text query to CLIP feature vector
+        Args:
+            text: Text query
+        Returns:
+            Normalized feature vector (shape: [512])
+        """
+        # Preprocess text
+        inputs = self.processor(text=[text], return_tensors="pt", padding=True)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Extract features
+        with torch.no_grad():
+            text_features = self.model.get_text_features(**inputs)
+            # Normalize (important for cosine similarity)
+            text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)
+        return text_features.cpu().numpy().flatten()
+    def search(self, query: str, top_k: int = 5) -> List[Dict]:
+        """
+        Generate design candidates from text query
+        Args:
+            query: Text description (e.g., "red sports car")
+            top_k: Number of design variants to generate
+        Returns:
+            List of dictionaries containing:
+                - car_id: Car ID
+                - image_path: Path to rendering image
+                - ldr_path: Path to LDR file
+                - confidence: Generation confidence score (0-1)
+                - rank: Design variant number (1-based)
+        """
+        # Encode text query
+        text_feature = self._encode_text(query)
+        # Compute cosine similarity with all image features
+        # (features are already normalized, so dot product = cosine similarity)
+        similarities = self.features @ text_feature
+        # Get top-K indices
+        top_indices = np.argsort(similarities)[::-1][:top_k]
+        # Build results
+        results = []
+        for rank, idx in enumerate(top_indices, start=1):
+            mapping = self.metadata["mappings"][idx]
+            results.append({
+                "car_id": mapping["car_id"],
+                "image_path": os.path.join(self.data_root, mapping["image_path"]),
+                "ldr_path": os.path.join(self.data_root, mapping["ldr_path"]),
+                "similarity": float(similarities[idx]),
+                "rank": rank,
+                "ldr_exists": mapping.get("ldr_exists", True)
+            })
+        return results
+    def get_best_match(self, query: str) -> Dict:
+        """
+        Get the single best matching result
+        Args:
+            query: Text description
+        Returns:
+            Dictionary with best match information
+        """
+        results = self.search(query, top_k=1)
+        return results[0] if results else None
+    def get_ldr_path_from_text(self, query: str) -> str:
+        """
+        Convenience method: directly get LDR path from text query
+        Args:
+            query: Text description
+        Returns:
+            Absolute path to best matching LDR file
+        """
+        best_match = self.get_best_match(query)
+        if best_match is None:
+            raise ValueError("No matches found")
+        return best_match["ldr_path"]
+# Singleton instance for global access
+_global_retriever: Optional[CLIPRetriever] = None
+def get_retriever(**kwargs) -> CLIPRetriever:
+    """
+    Get or create global retriever instance
+    This ensures the model is only loaded once.
+    Args:
+        **kwargs: Passed to CLIPRetriever constructor
+    Returns:
+        CLIPRetriever instance
+    """
+    global _global_retriever
+    if _global_retriever is None:
+        _global_retriever = CLIPRetriever(**kwargs)
+    return _global_retriever
+if __name__ == "__main__":
+    # Simple test
+    print("=" * 60)
+    print("Testing Design Generation Engine")
+    print("=" * 60)
+    retriever = CLIPRetriever()
+    test_queries = [
+        "red sports car",
+        "blue police car",
+        "yellow construction vehicle",
+        "racing car",
+        "truck"
+    ]
+    for query in test_queries:
+        print(f"\nQuery: '{query}'")
+        results = retriever.search(query, top_k=3)
+        for result in results:
+            print(f"  Rank {result['rank']}: car_{result['car_id']} "
+                  f"(confidence: {result['similarity']:.3f})")