Leacb4
/

gap-clip

@@ -1,9 +1,13 @@
 #!/usr/bin/env python3
 """
-Example usage of models from Hugging Face
 """
 import torch
 from PIL import Image
 from transformers import CLIPProcessor, CLIPModel as CLIPModel_transformers
 from huggingface_hub import hf_hub_download
@@ -11,9 +15,9 @@ import json
 import os
 # Import local models (to adapt to your structure)
-from color_model import ColorCLIP, SimpleTokenizer
 from hierarchy_model import Model as HierarchyModel, HierarchyExtractor
-from config import color_emb_dim, hierarchy_emb_dim
 def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     """
@@ -25,7 +29,7 @@ def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     """
     os.makedirs(cache_dir, exist_ok=True)
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     print(f"📥 Loading models from '{repo_id}'...")
@@ -40,19 +44,19 @@ def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     # Loading vocabulary
     vocab_path = hf_hub_download(
         repo_id=repo_id,
-        filename="tokenizer_vocab.json",
         cache_dir=cache_dir
     )
     with open(vocab_path, 'r') as f:
         vocab_dict = json.load(f)
-    tokenizer = SimpleTokenizer()
     tokenizer.load_vocab(vocab_dict)
     checkpoint = torch.load(color_model_path, map_location=device)
     vocab_size = checkpoint['text_encoder.embedding.weight'].shape[0]
-    color_model = ColorCLIP(vocab_size=vocab_size, embedding_dim=color_emb_dim).to(device)
     color_model.tokenizer = tokenizer
     color_model.load_state_dict(checkpoint)
     color_model.eval()
@@ -62,7 +66,7 @@ def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     print("  📦 Loading hierarchy model...")
     hierarchy_model_path = hf_hub_download(
         repo_id=repo_id,
-        filename="hierarchy_model.pth",
         cache_dir=cache_dir
     )
@@ -71,7 +75,7 @@ def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     hierarchy_model = HierarchyModel(
         num_hierarchy_classes=len(hierarchy_classes),
-        embed_dim=hierarchy_emb_dim
     ).to(device)
     hierarchy_model.load_state_dict(hierarchy_checkpoint['model_state'])
@@ -84,7 +88,7 @@ def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     print("  📦 Loading main CLIP model...")
     main_model_path = hf_hub_download(
         repo_id=repo_id,
-        filename="gap_clip.pth",
         cache_dir=cache_dir
     )
@@ -141,22 +145,45 @@ def example_search(models, image_path: str = None, text_query: str = None):
     if text_query:
         print(f"  📝 Text query: '{text_query}'")
-        # Obtenir les embeddings de couleur et hiérarchie
         color_emb = color_model.get_text_embeddings([text_query])
         hierarchy_emb = hierarchy_model.get_text_embeddings([text_query])
         print(f"  🎨 Color embedding: {color_emb.shape}")
         print(f"  📂 Hierarchy embedding: {hierarchy_emb.shape}")
-        # Obtenir les embeddings du modèle principal
         text_inputs = processor(text=[text_query], padding=True, return_tensors="pt")
         text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
         with torch.no_grad():
-            outputs = main_model(**text_inputs)
-            text_features = outputs.text_embeds
         print(f"  🎯 Main embedding: {text_features.shape}")
     if image_path and os.path.exists(image_path):
         print(f"  🖼️  Image: {image_path}")
@@ -167,8 +194,10 @@ def example_search(models, image_path: str = None, text_query: str = None):
         image_inputs = {k: v.to(device) for k, v in image_inputs.items()}
         with torch.no_grad():
-            outputs = main_model(**image_inputs)
-            image_features = outputs.image_embeds
         print(f"  🎯 Image embedding: {image_features.shape}")
@@ -192,7 +221,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--image",
         type=str,
-        default=None,
         help="Path to an image"
     )
@@ -204,3 +233,6 @@ if __name__ == "__main__":
     # Example search
     example_search(models, image_path=args.image, text_query=args.text)

 #!/usr/bin/env python3
 """
+Example usage of models from Hugging Face.
+This file provides example code for loading and using the models (color, hierarchy, main)
+from the Hugging Face Hub. It shows how to load models, extract embeddings,
+and perform searches or similarity comparisons.
 """
 import torch
+import torch.nn.functional as F
 from PIL import Image
 from transformers import CLIPProcessor, CLIPModel as CLIPModel_transformers
 from huggingface_hub import hf_hub_download
 import os
 # Import local models (to adapt to your structure)
+from color_model import ColorCLIP, Tokenizer
 from hierarchy_model import Model as HierarchyModel, HierarchyExtractor
+import config
 def load_models_from_hf(repo_id: str, cache_dir: str = "./models_cache"):
     """
     """
     os.makedirs(cache_dir, exist_ok=True)
+    device = config.device
     print(f"📥 Loading models from '{repo_id}'...")
     # Loading vocabulary
     vocab_path = hf_hub_download(
         repo_id=repo_id,
+        filename=config.tokeniser_path,
         cache_dir=cache_dir
     )
     with open(vocab_path, 'r') as f:
         vocab_dict = json.load(f)
+    tokenizer = Tokenizer()
     tokenizer.load_vocab(vocab_dict)
     checkpoint = torch.load(color_model_path, map_location=device)
     vocab_size = checkpoint['text_encoder.embedding.weight'].shape[0]
+    color_model = ColorCLIP(vocab_size=vocab_size, embedding_dim=config.color_emb_dim).to(device)
     color_model.tokenizer = tokenizer
     color_model.load_state_dict(checkpoint)
     color_model.eval()
     print("  📦 Loading hierarchy model...")
     hierarchy_model_path = hf_hub_download(
         repo_id=repo_id,
+        filename=config.hierarchy_model_path,
         cache_dir=cache_dir
     )
     hierarchy_model = HierarchyModel(
         num_hierarchy_classes=len(hierarchy_classes),
+        embed_dim=config.hierarchy_emb_dim
     ).to(device)
     hierarchy_model.load_state_dict(hierarchy_checkpoint['model_state'])
     print("  📦 Loading main CLIP model...")
     main_model_path = hf_hub_download(
         repo_id=repo_id,
+        filename=config.main_model_path,
         cache_dir=cache_dir
     )
     if text_query:
         print(f"  📝 Text query: '{text_query}'")
+        # Get color and hierarchy embeddings
         color_emb = color_model.get_text_embeddings([text_query])
         hierarchy_emb = hierarchy_model.get_text_embeddings([text_query])
         print(f"  🎨 Color embedding: {color_emb.shape}")
+        print(f"color_emb: {color_emb}")
         print(f"  📂 Hierarchy embedding: {hierarchy_emb.shape}")
+        print(f"hierarchy_emb: {hierarchy_emb}")
+        # Get main model embeddings
         text_inputs = processor(text=[text_query], padding=True, return_tensors="pt")
         text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
         with torch.no_grad():
+            # Use text_model directly for text-only processing
+            text_outputs = main_model.text_model(**text_inputs)
+            text_features = main_model.text_projection(text_outputs.pooler_output)
+            text_features = F.normalize(text_features, dim=-1)
         print(f"  🎯 Main embedding: {text_features.shape}")
+        print(f"  🎯 First logits of main embedding: {text_features[0:10]}")
+        # Extract color and hierarchy embeddings from main embedding
+        main_color_emb = text_features[:, :config.color_emb_dim]
+        main_hierarchy_emb = text_features[:, config.color_emb_dim:config.color_emb_dim+config.hierarchy_emb_dim]
+        print(f"\n  📊 Comparison:")
+        print(f"  🎨 Color embedding from color model: {color_emb[0]}")
+        print(f"  🎨 Color embedding from main model (first {config.color_emb_dim} dims): {main_color_emb[0]}")
+        print(f"  📂 Hierarchy embedding from hierarchy model: {hierarchy_emb[0]}")
+        print(f"  📂 Hierarchy embedding from main model (dims {config.color_emb_dim}-{config.color_emb_dim+config.hierarchy_emb_dim}): {main_hierarchy_emb[0]}")
+        # Calculate cosine similarity between color embeddings
+        color_cosine_sim = F.cosine_similarity(color_emb, main_color_emb, dim=1)
+        print(f"\n  🔍 Cosine similarity between color embeddings: {color_cosine_sim.item():.4f}")
+        # Calculate cosine similarity between hierarchy embeddings
+        hierarchy_cosine_sim = F.cosine_similarity(hierarchy_emb, main_hierarchy_emb, dim=1)
+        print(f"  🔍 Cosine similarity between hierarchy embeddings: {hierarchy_cosine_sim.item():.4f}")
     if image_path and os.path.exists(image_path):
         print(f"  🖼️  Image: {image_path}")
         image_inputs = {k: v.to(device) for k, v in image_inputs.items()}
         with torch.no_grad():
+            # Use vision_model directly for image-only processing
+            vision_outputs = main_model.vision_model(**image_inputs)
+            image_features = main_model.visual_projection(vision_outputs.pooler_output)
+            image_features = F.normalize(image_features, dim=-1)
         print(f"  🎯 Image embedding: {image_features.shape}")
     parser.add_argument(
         "--image",
         type=str,
+        default="red_dress.png",
         help="Path to an image"
     )
     # Example search
     example_search(models, image_path=args.image, text_query=args.text)