Leacb4
/

gap-clip

@@ -19,7 +19,7 @@ import warnings
 warnings.filterwarnings('ignore')
 from transformers import CLIPProcessor, CLIPModel as CLIPModel_transformers
-from config import main_model_path, hierarchy_model_path, color_emb_dim, hierarchy_emb_dim, local_dataset_path, column_local_image_path, fashion_mnist_test_path
 def create_fashion_mnist_to_hierarchy_mapping(hierarchy_classes):
@@ -176,7 +176,7 @@ class FashionMNISTDataset(Dataset):
 def load_fashion_mnist_dataset(max_samples=1000, hierarchy_classes=None):
     print("📊 Loading Fashion-MNIST test dataset...")
-    df = pd.read_csv(fashion_mnist_test_path)
     print(f"✅ Fashion-MNIST dataset loaded: {len(df)} samples")
     # Create mapping if hierarchy classes are provided
@@ -600,14 +600,14 @@ class ColorHierarchyEvaluator:
         return sorted(set(hierarchies))
     def extract_color_embeddings(self, dataloader, embedding_type='text', max_samples=10000):
-        """Extract color embeddings from dims 0-16"""
         all_embeddings = []
         all_colors = []
         all_hierarchies = []
         sample_count = 0
         with torch.no_grad():
-            for batch in tqdm(dataloader, desc=f"Extracting {embedding_type} color embeddings (dims 0-16)"):
                 if sample_count >= max_samples:
                     break
@@ -627,9 +627,10 @@ class ColorHierarchyEvaluator:
                 else:
                     embeddings = outputs.text_embeds
-                # Extract only color embeddings (dims 0-16)
-                color_embeddings = embeddings[:, :self.color_emb_dim]
                 all_embeddings.append(color_embeddings.cpu().numpy())
                 all_colors.extend(colors)
                 all_hierarchies.extend(hierarchies)
@@ -670,8 +671,9 @@ class ColorHierarchyEvaluator:
                     embeddings = outputs.text_embeds
                 # Extract hierarchy embeddings (dims 17-79 -> indices 16:79)
-                hierarchy_embeddings = embeddings[:, 16:79]
                 all_embeddings.append(hierarchy_embeddings.cpu().numpy())
                 all_colors.extend(colors)
                 all_hierarchies.extend(hierarchies)
@@ -683,6 +685,46 @@ class ColorHierarchyEvaluator:
         return np.vstack(all_embeddings), all_colors, all_hierarchies
     def extract_baseline_embeddings_batch(self, dataloader, embedding_type='text', max_samples=10000):
         """Extract embeddings from baseline Fashion CLIP model"""
         all_embeddings = []
@@ -883,6 +925,52 @@ class ColorHierarchyEvaluator:
             predictions.append(predicted_label)
         return predictions
     def create_confusion_matrix(self, true_labels, predicted_labels, title="Confusion Matrix", label_type="Label"):
         """Create and plot confusion matrix"""
         unique_labels = sorted(list(set(true_labels + predicted_labels)))
@@ -898,11 +986,34 @@ class ColorHierarchyEvaluator:
         plt.tight_layout()
         return plt.gcf(), accuracy, cm
-    def evaluate_classification_performance(self, embeddings, labels, embedding_type="Embeddings", label_type="Label"):
-        """Evaluate classification performance and create confusion matrix"""
-        predictions = self.predict_labels_from_embeddings(embeddings, labels)
         accuracy = accuracy_score(labels, predictions)
-        fig, acc, cm = self.create_confusion_matrix(labels, predictions, f"{embedding_type} - {label_type} Classification", label_type)
         unique_labels = sorted(list(set(labels)))
         report = classification_report(labels, predictions, labels=unique_labels, target_names=unique_labels, output_dict=True)
         return {
@@ -1046,68 +1157,79 @@ class ColorHierarchyEvaluator:
         results = {}
-        # ========== COLOR EVALUATION (DIMS 0-16) ==========
-        print("\n🎨 COLOR EVALUATION (dims 0-16)")
         print("=" * 50)
-        # Text color embeddings
-        print("\n📝 Extracting text color embeddings...")
-        text_color_embeddings, text_colors, _ = self.extract_color_embeddings(dataloader, 'text', max_samples)
-        print(f"   Text color embeddings shape: {text_color_embeddings.shape}")
-        text_color_metrics = self.compute_similarity_metrics(text_color_embeddings, text_colors)
         text_color_class = self.evaluate_classification_performance(
-            text_color_embeddings, text_colors, "Text Color Embeddings (16D)", "Color"
         )
         text_color_metrics.update(text_color_class)
         results['text_color'] = text_color_metrics
-        del text_color_embeddings
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        # Image color embeddings
-        print("\n🖼️ Extracting image color embeddings...")
-        image_color_embeddings, image_colors, _ = self.extract_color_embeddings(dataloader, 'image', max_samples)
-        print(f"   Image color embeddings shape: {image_color_embeddings.shape}")
-        image_color_metrics = self.compute_similarity_metrics(image_color_embeddings, image_colors)
         image_color_class = self.evaluate_classification_performance(
-            image_color_embeddings, image_colors, "Image Color Embeddings (16D)", "Color"
         )
         image_color_metrics.update(image_color_class)
         results['image_color'] = image_color_metrics
-        del image_color_embeddings
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        # ========== HIERARCHY EVALUATION (DIMS 16-79) ==========
-        print("\n📋 HIERARCHY EVALUATION (dims 16-79)")
         print("=" * 50)
-        # Text hierarchy embeddings
-        print("\n📝 Extracting text hierarchy embeddings...")
-        text_hierarchy_embeddings, _, text_hierarchies = self.extract_hierarchy_embeddings(dataloader, 'text', max_samples)
-        print(f"   Text hierarchy embeddings shape: {text_hierarchy_embeddings.shape}")
-        text_hierarchy_metrics = self.compute_similarity_metrics(text_hierarchy_embeddings, text_hierarchies)
         text_hierarchy_class = self.evaluate_classification_performance(
-            text_hierarchy_embeddings, text_hierarchies, "Text Hierarchy Embeddings (64D)", "Hierarchy"
         )
         text_hierarchy_metrics.update(text_hierarchy_class)
         results['text_hierarchy'] = text_hierarchy_metrics
-        del text_hierarchy_embeddings
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        # Image hierarchy embeddings
-        print("\n🖼️ Extracting image hierarchy embeddings...")
-        image_hierarchy_embeddings, _, image_hierarchies = self.extract_hierarchy_embeddings(dataloader, 'image', max_samples)
-        print(f"   Image hierarchy embeddings shape: {image_hierarchy_embeddings.shape}")
-        image_hierarchy_metrics = self.compute_similarity_metrics(image_hierarchy_embeddings, image_hierarchies)
         image_hierarchy_class = self.evaluate_classification_performance(
-            image_hierarchy_embeddings, image_hierarchies, "Image Hierarchy Embeddings (64D)", "Hierarchy"
         )
         image_hierarchy_metrics.update(image_hierarchy_class)
         results['image_hierarchy'] = image_hierarchy_metrics
-        del image_hierarchy_embeddings
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # ========== SAVE VISUALIZATIONS ==========
@@ -1724,7 +1846,7 @@ class ColorHierarchyEvaluator:
                 'trained': trained_color_img_acc,
                 'baseline': baseline_color_img_acc,
                 'diff': diff,
-                'trained_dims': '0-16 (17 dims)',
                 'baseline_dims': 'All dimensions (512 dims)'
             })
@@ -1779,7 +1901,7 @@ class ColorHierarchyEvaluator:
             print("\nRaisons probables:")
             print("\n1. 📐 CAPACITÉ DIMENSIONNELLE:")
             print("   • Baseline: Utilise TOUTES les 512 dimensions des embeddings")
-            print("   • Modèle entraîné: Utilise seulement 17 dims (couleur) ou 64 dims (hiérarchie)")
             print("   • Impact: La baseline a accès à plus d'information pour la classification")
             print("\n2. 🎯 SUR-SPÉCIALISATION:")
@@ -1829,7 +1951,7 @@ if __name__ == "__main__":
     device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
     print(f"Using device: {device}")
-    directory = 'main_model_analysi'
     max_samples = 10000
     evaluator = ColorHierarchyEvaluator(device=device, directory=directory)

 warnings.filterwarnings('ignore')
 from transformers import CLIPProcessor, CLIPModel as CLIPModel_transformers
+from config import main_model_path, hierarchy_model_path, color_emb_dim, hierarchy_emb_dim, local_dataset_path, column_local_image_path
 def create_fashion_mnist_to_hierarchy_mapping(hierarchy_classes):
 def load_fashion_mnist_dataset(max_samples=1000, hierarchy_classes=None):
     print("📊 Loading Fashion-MNIST test dataset...")
+    df = pd.read_csv("/Users/leaattiasarfati/Desktop/docs/search/old/MainModel/data/fashion-mnist_test.csv")
     print(f"✅ Fashion-MNIST dataset loaded: {len(df)} samples")
     # Create mapping if hierarchy classes are provided
         return sorted(set(hierarchies))
     def extract_color_embeddings(self, dataloader, embedding_type='text', max_samples=10000):
+        """Extract color embeddings from dims 0-15 (16 dimensions)"""
         all_embeddings = []
         all_colors = []
         all_hierarchies = []
         sample_count = 0
         with torch.no_grad():
+            for batch in tqdm(dataloader, desc=f"Extracting {embedding_type} color embeddings (dims 0-15)"):
                 if sample_count >= max_samples:
                     break
                 else:
                     embeddings = outputs.text_embeds
+                # Extract only color embeddings (dims 0-15, i.e., first 16 dimensions)
+                # color_embeddings = embeddings[:, :self.color_emb_dim]
+                color_embeddings = embeddings
                 all_embeddings.append(color_embeddings.cpu().numpy())
                 all_colors.extend(colors)
                 all_hierarchies.extend(hierarchies)
                     embeddings = outputs.text_embeds
                 # Extract hierarchy embeddings (dims 17-79 -> indices 16:79)
+                # hierarchy_embeddings = embeddings[:, 16:79]
+                hierarchy_embeddings = embeddings
                 all_embeddings.append(hierarchy_embeddings.cpu().numpy())
                 all_colors.extend(colors)
                 all_hierarchies.extend(hierarchies)
         return np.vstack(all_embeddings), all_colors, all_hierarchies
+    def extract_full_embeddings(self, dataloader, embedding_type='text', max_samples=10000):
+        """Extract full 512-dimensional embeddings (all dimensions)"""
+        all_embeddings = []
+        all_colors = []
+        all_hierarchies = []
+        sample_count = 0
+        with torch.no_grad():
+            for batch in tqdm(dataloader, desc=f"Extracting {embedding_type} FULL embeddings (all dims)"):
+                if sample_count >= max_samples:
+                    break
+                images, texts, colors, hierarchies = batch
+                images = images.to(self.device)
+                images = images.expand(-1, 3, -1, -1)
+                text_inputs = self.processor(text=texts, padding=True, return_tensors="pt")
+                text_inputs = {k: v.to(self.device) for k, v in text_inputs.items()}
+                outputs = self.model(**text_inputs, pixel_values=images)
+                if embedding_type == 'text':
+                    embeddings = outputs.text_embeds
+                elif embedding_type == 'image':
+                    embeddings = outputs.image_embeds
+                else:
+                    embeddings = outputs.text_embeds
+                # Use all 512 dimensions
+                all_embeddings.append(embeddings.cpu().numpy())
+                all_colors.extend(colors)
+                all_hierarchies.extend(hierarchies)
+                sample_count += len(images)
+                del images, text_inputs, outputs, embeddings
+                torch.cuda.empty_cache() if torch.cuda.is_available() else None
+        return np.vstack(all_embeddings), all_colors, all_hierarchies
     def extract_baseline_embeddings_batch(self, dataloader, embedding_type='text', max_samples=10000):
         """Extract embeddings from baseline Fashion CLIP model"""
         all_embeddings = []
             predictions.append(predicted_label)
         return predictions
+    def predict_labels_ensemble(self, specialized_embeddings, full_embeddings, labels,
+                                specialized_weight=0.5):
+        """
+        Ensemble prediction combining specialized (16/64 dims) and full (512 dims) embeddings.
+        Args:
+            specialized_embeddings: Embeddings from specialized dimensions (e.g., dims 0-15 for color)
+            full_embeddings: Full 512-dimensional embeddings
+            labels: True labels for computing centroids
+            specialized_weight: Weight for specialized embeddings (0.0 = only full, 1.0 = only specialized)
+        Returns:
+            List of predicted labels using weighted ensemble
+        """
+        unique_labels = list(set(labels))
+        # Compute centroids for both specialized and full embeddings
+        specialized_centroids = {}
+        full_centroids = {}
+        for label in unique_labels:
+            label_indices = [i for i, l in enumerate(labels) if l == label]
+            specialized_centroids[label] = np.mean(specialized_embeddings[label_indices], axis=0)
+            full_centroids[label] = np.mean(full_embeddings[label_indices], axis=0)
+        predictions = []
+        for i in range(len(specialized_embeddings)):
+            best_combined_score = -np.inf
+            predicted_label = None
+            for label in unique_labels:
+                # Compute similarity scores for both specialized and full
+                spec_sim = cosine_similarity([specialized_embeddings[i]], [specialized_centroids[label]])[0][0]
+                full_sim = cosine_similarity([full_embeddings[i]], [full_centroids[label]])[0][0]
+                # Weighted combination
+                combined_score = specialized_weight * spec_sim + (1 - specialized_weight) * full_sim
+                if combined_score > best_combined_score:
+                    best_combined_score = combined_score
+                    predicted_label = label
+            predictions.append(predicted_label)
+        return predictions
     def create_confusion_matrix(self, true_labels, predicted_labels, title="Confusion Matrix", label_type="Label"):
         """Create and plot confusion matrix"""
         unique_labels = sorted(list(set(true_labels + predicted_labels)))
         plt.tight_layout()
         return plt.gcf(), accuracy, cm
+    def evaluate_classification_performance(self, embeddings, labels, embedding_type="Embeddings", label_type="Label",
+                                           full_embeddings=None, ensemble_weight=0.5):
+        """
+        Evaluate classification performance and create confusion matrix.
+        Args:
+            embeddings: Specialized embeddings (e.g., dims 0-15 for color or dims 16-79 for hierarchy)
+            labels: True labels
+            embedding_type: Type of embeddings for display
+            label_type: Type of labels (Color/Hierarchy)
+            full_embeddings: Optional full 512-dim embeddings for ensemble (if None, uses only specialized)
+            ensemble_weight: Weight for specialized embeddings in ensemble (0.0 = only full, 1.0 = only specialized)
+        """
+        if full_embeddings is not None:
+            # Use ensemble prediction
+            predictions = self.predict_labels_ensemble(embeddings, full_embeddings, labels, ensemble_weight)
+            title_suffix = f" (Ensemble: {ensemble_weight:.1f} specialized + {1-ensemble_weight:.1f} full)"
+        else:
+            # Use only specialized embeddings
+            predictions = self.predict_labels_from_embeddings(embeddings, labels)
+            title_suffix = ""
         accuracy = accuracy_score(labels, predictions)
+        fig, acc, cm = self.create_confusion_matrix(
+            labels, predictions,
+            f"{embedding_type} - {label_type} Classification{title_suffix}",
+            label_type
+        )
         unique_labels = sorted(list(set(labels)))
         report = classification_report(labels, predictions, labels=unique_labels, target_names=unique_labels, output_dict=True)
         return {
         results = {}
+        # ========== EXTRACT FULL EMBEDDINGS FOR ENSEMBLE ==========
+        print("\n📦 Extracting full 512-dimensional embeddings for ensemble...")
+        text_full_embeddings, text_colors_full, text_hierarchies_full = self.extract_full_embeddings(dataloader, 'text', max_samples)
+        image_full_embeddings, image_colors_full, image_hierarchies_full = self.extract_full_embeddings(dataloader, 'image', max_samples)
+        print(f"   Text full embeddings shape: {text_full_embeddings.shape}")
+        print(f"   Image full embeddings shape: {image_full_embeddings.shape}")
+        # ========== COLOR EVALUATION (DIMS 0-15) WITH ENSEMBLE ==========
+        print("\n🎨 COLOR EVALUATION (dims 0-15) - Using Ensemble")
         print("=" * 50)
+        # Extract specialized color embeddings (dims 0-15)
+        print("\n📝 Extracting specialized text color embeddings (dims 0-15)...")
+        text_color_embeddings_spec = text_full_embeddings[:, :self.color_emb_dim]  # First 16 dims
+        print(f"   Specialized text color embeddings shape: {text_color_embeddings_spec.shape}")
+        text_color_metrics = self.compute_similarity_metrics(text_color_embeddings_spec, text_colors_full)
+        # Use ensemble: combine specialized (16D) + full (512D)
         text_color_class = self.evaluate_classification_performance(
+            text_color_embeddings_spec, text_colors_full,
+            "Text Color Embeddings (Ensemble)", "Color",
+            full_embeddings=text_full_embeddings, ensemble_weight=0.4  # 40% specialized, 60% full
         )
         text_color_metrics.update(text_color_class)
         results['text_color'] = text_color_metrics
+        # Image color embeddings with ensemble
+        print("\n🖼️ Extracting specialized image color embeddings (dims 0-15)...")
+        image_color_embeddings_spec = image_full_embeddings[:, :self.color_emb_dim]  # First 16 dims
+        print(f"   Specialized image color embeddings shape: {image_color_embeddings_spec.shape}")
+        image_color_metrics = self.compute_similarity_metrics(image_color_embeddings_spec, image_colors_full)
         image_color_class = self.evaluate_classification_performance(
+            image_color_embeddings_spec, image_colors_full,
+            "Image Color Embeddings (Ensemble)", "Color",
+            full_embeddings=image_full_embeddings, ensemble_weight=0.4
         )
         image_color_metrics.update(image_color_class)
         results['image_color'] = image_color_metrics
+        # ========== HIERARCHY EVALUATION (DIMS 16-79) WITH ENSEMBLE ==========
+        print("\n📋 HIERARCHY EVALUATION (dims 16-79) - Using Ensemble")
         print("=" * 50)
+        # Extract specialized hierarchy embeddings (dims 16-79)
+        print("\n📝 Extracting specialized text hierarchy embeddings (dims 16-79)...")
+        text_hierarchy_embeddings_spec = text_full_embeddings[:, self.color_emb_dim:self.color_emb_dim+self.hierarchy_emb_dim]  # dims 16-79
+        print(f"   Specialized text hierarchy embeddings shape: {text_hierarchy_embeddings_spec.shape}")
+        text_hierarchy_metrics = self.compute_similarity_metrics(text_hierarchy_embeddings_spec, text_hierarchies_full)
+        # Use ensemble: combine specialized (64D) + full (512D)
         text_hierarchy_class = self.evaluate_classification_performance(
+            text_hierarchy_embeddings_spec, text_hierarchies_full,
+            "Text Hierarchy Embeddings (Ensemble)", "Hierarchy",
+            full_embeddings=text_full_embeddings, ensemble_weight=0.4
         )
         text_hierarchy_metrics.update(text_hierarchy_class)
         results['text_hierarchy'] = text_hierarchy_metrics
+        # Image hierarchy embeddings with ensemble
+        print("\n🖼️ Extracting specialized image hierarchy embeddings (dims 16-79)...")
+        image_hierarchy_embeddings_spec = image_full_embeddings[:, self.color_emb_dim:self.color_emb_dim+self.hierarchy_emb_dim]  # dims 16-79
+        print(f"   Specialized image hierarchy embeddings shape: {image_hierarchy_embeddings_spec.shape}")
+        image_hierarchy_metrics = self.compute_similarity_metrics(image_hierarchy_embeddings_spec, image_hierarchies_full)
         image_hierarchy_class = self.evaluate_classification_performance(
+            image_hierarchy_embeddings_spec, image_hierarchies_full,
+            "Image Hierarchy Embeddings (Ensemble)", "Hierarchy",
+            full_embeddings=image_full_embeddings, ensemble_weight=0.4
         )
         image_hierarchy_metrics.update(image_hierarchy_class)
         results['image_hierarchy'] = image_hierarchy_metrics
+        # Cleanup
+        del text_full_embeddings, image_full_embeddings
+        del text_color_embeddings_spec, image_color_embeddings_spec
+        del text_hierarchy_embeddings_spec, image_hierarchy_embeddings_spec
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         # ========== SAVE VISUALIZATIONS ==========
                 'trained': trained_color_img_acc,
                 'baseline': baseline_color_img_acc,
                 'diff': diff,
+                'trained_dims': '0-15 (16 dims)',
                 'baseline_dims': 'All dimensions (512 dims)'
             })
             print("\nRaisons probables:")
             print("\n1. 📐 CAPACITÉ DIMENSIONNELLE:")
             print("   • Baseline: Utilise TOUTES les 512 dimensions des embeddings")
+            print("   • Modèle entraîné: Utilise seulement 16 dims (couleur) ou 64 dims (hiérarchie)")
             print("   • Impact: La baseline a accès à plus d'information pour la classification")
             print("\n2. 🎯 SUR-SPÉCIALISATION:")
     device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
     print(f"Using device: {device}")
+    directory = 'main_model_analysis_model'
     max_samples = 10000
     evaluator = ColorHierarchyEvaluator(device=device, directory=directory)