Spaces:

Stylique
/

recomendation

Paused

App Files Files Community

Ali Mohsin commited on Sep 9, 2025

Commit

1d3b4c2

1 Parent(s): c0eeb7b

gooooooooo

Browse files

Files changed (1) hide show

inference.py +87 -2

inference.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch
 import torch.nn as nn
 from PIL import Image
 from huggingface_hub import hf_hub_download
 from utils.transforms import build_inference_transform
 from models.resnet_embedder import ResNetItemEmbedder
@@ -32,6 +33,10 @@ class InferenceService:
         self.models_loaded = False
         self.model_errors = []
         # Load models with validation
         self.resnet, self.resnet_loaded = self._load_resnet()
         self.vit, self.vit_loaded = self._load_vit()
@@ -57,6 +62,75 @@ class InferenceService:
             if not self.vit_loaded:
                 self.model_errors.append("ViT: No trained weights found")
     def _load_resnet(self) -> tuple[nn.Module, bool]:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("RESNET_CHECKPOINT", "models/exports/resnet_item_embedder.pth")
@@ -236,6 +310,17 @@ class InferenceService:
         proc_items: List[Dict[str, Any]] = []
         for i, it in enumerate(items):
             print(f"🔍 DEBUG: Processing item {i}: id={it.get('id')}, has_image={it.get('image') is not None}, has_embedding={it.get('embedding') is not None}")
             emb = it.get("embedding")
             if emb is None and it.get("image") is not None:
                 # Compute on-the-fly if image provided
@@ -249,9 +334,9 @@ class InferenceService:
             proc_items.append({
                 "id": it.get("id"),
                 "embedding": emb_np,
-                "category": it.get("category")
             })
-            print(f"🔍 DEBUG: Added item {i} to proc_items, total: {len(proc_items)}")
         print(f"🔍 DEBUG: Final proc_items count: {len(proc_items)}")
         if len(proc_items) < 2:

 import torch.nn as nn
 from PIL import Image
 from huggingface_hub import hf_hub_download
+import clip
 from utils.transforms import build_inference_transform
 from models.resnet_embedder import ResNetItemEmbedder
         self.models_loaded = False
         self.model_errors = []
+        # Load CLIP for category detection
+        self.clip_model, self.clip_preprocess = None, None
+        self._load_clip()
         # Load models with validation
         self.resnet, self.resnet_loaded = self._load_resnet()
         self.vit, self.vit_loaded = self._load_vit()
             if not self.vit_loaded:
                 self.model_errors.append("ViT: No trained weights found")
+    def _load_clip(self) -> None:
+        """Load CLIP model for category detection."""
+        try:
+            print("🔄 Loading CLIP model for category detection...")
+            self.clip_model, self.clip_preprocess = clip.load("ViT-B/32", device=self.device)
+            print("✅ CLIP model loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load CLIP model: {e}")
+            self.clip_model, self.clip_preprocess = None, None
+    def _detect_category_with_clip(self, image: Image.Image) -> str:
+        """Detect clothing category using CLIP."""
+        if self.clip_model is None or self.clip_preprocess is None:
+            return "other"
+        try:
+            # Define clothing categories with descriptions
+            categories = [
+                "a shirt, t-shirt, blouse, or top",
+                "pants, jeans, trousers, or bottoms",
+                "shoes, sneakers, boots, or footwear",
+                "a jacket, blazer, coat, or outerwear",
+                "a dress or gown",
+                "a skirt or shorts",
+                "a sweater, hoodie, or pullover",
+                "a watch, ring, necklace, or jewelry",
+                "a bag, purse, or handbag",
+                "a hat, cap, or headwear",
+                "a belt or accessory"
+            ]
+            # Prepare image and text
+            image_input = self.clip_preprocess(image).unsqueeze(0).to(self.device)
+            text_inputs = clip.tokenize(categories).to(self.device)
+            # Get predictions
+            with torch.no_grad():
+                image_features = self.clip_model.encode_image(image_input)
+                text_features = self.clip_model.encode_text(text_inputs)
+                # Compute similarity
+                similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
+                values, indices = similarity[0].topk(1)
+                # Map to outfit categories
+                category_map = {
+                    0: "shirt",      # shirt, t-shirt, blouse, top
+                    1: "pants",      # pants, jeans, trousers, bottoms
+                    2: "shoes",      # shoes, sneakers, boots, footwear
+                    3: "jacket",     # jacket, blazer, coat, outerwear
+                    4: "dress",      # dress, gown
+                    5: "pants",      # skirt, shorts (map to pants for outfit logic)
+                    6: "shirt",      # sweater, hoodie, pullover (map to shirt)
+                    7: "accessory",  # watch, ring, necklace, jewelry
+                    8: "accessory",  # bag, purse, handbag
+                    9: "accessory",  # hat, cap, headwear
+                    10: "accessory"  # belt, accessory
+                }
+                predicted_category = category_map.get(indices[0].item(), "other")
+                confidence = values[0].item()
+                print(f"🔍 CLIP detected: '{predicted_category}' (confidence: {confidence:.3f})")
+                return predicted_category
+        except Exception as e:
+            print(f"❌ CLIP category detection failed: {e}")
+            return "other"
     def _load_resnet(self) -> tuple[nn.Module, bool]:
         strategy = os.getenv("MODEL_LOAD_STRATEGY", "state_dict")
         ckpt_path = os.getenv("RESNET_CHECKPOINT", "models/exports/resnet_item_embedder.pth")
         proc_items: List[Dict[str, Any]] = []
         for i, it in enumerate(items):
             print(f"🔍 DEBUG: Processing item {i}: id={it.get('id')}, has_image={it.get('image') is not None}, has_embedding={it.get('embedding') is not None}")
+            # Auto-detect category using CLIP if not provided or is None
+            category = it.get("category")
+            if not category or category == "None" or category == "":
+                if it.get("image") is not None:
+                    print(f"🔍 DEBUG: Auto-detecting category for item {i} using CLIP...")
+                    category = self._detect_category_with_clip(it["image"])
+                else:
+                    category = "other"
+                    print(f"🔍 DEBUG: No image available for item {i}, using 'other' category")
             emb = it.get("embedding")
             if emb is None and it.get("image") is not None:
                 # Compute on-the-fly if image provided
             proc_items.append({
                 "id": it.get("id"),
                 "embedding": emb_np,
+                "category": category
             })
+            print(f"🔍 DEBUG: Added item {i} to proc_items with category '{category}', total: {len(proc_items)}")
         print(f"🔍 DEBUG: Final proc_items count: {len(proc_items)}")
         if len(proc_items) < 2: