File size: 10,810 Bytes
25bdf34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78db21d
 
25bdf34
 
78db21d
25bdf34
 
78db21d
25bdf34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78db21d
 
25bdf34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
"""
Advanced metrics calculation for outfit recommendation system.
Includes accuracy, precision, recall, F1 score, and other research-grade metrics.
"""

import numpy as np
import torch
import torch.nn.functional as F
from typing import Dict, List, Any, Tuple
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import json
from pathlib import Path


class AdvancedMetrics:
    """Calculate comprehensive metrics for outfit recommendation models."""
    
    def __init__(self):
        self.reset()
    
    def reset(self):
        """Reset all metrics."""
        self.predictions = []
        self.targets = []
        self.scores = []
        self.embeddings = []
        self.outfit_scores = []
    
    def add_batch(self, predictions: torch.Tensor, targets: torch.Tensor, 
                  scores: torch.Tensor = None, embeddings: torch.Tensor = None):
        """Add a batch of predictions and targets."""
        self.predictions.extend(predictions.detach().cpu().numpy())
        self.targets.extend(targets.detach().cpu().numpy())
        
        if scores is not None:
            self.scores.extend(scores.detach().cpu().numpy())
        
        if embeddings is not None:
            self.embeddings.extend(embeddings.detach().cpu().numpy())
    
    def add_outfit_scores(self, outfit_scores: List[float]):
        """Add outfit compatibility scores."""
        self.outfit_scores.extend(outfit_scores)
    
    def calculate_classification_metrics(self) -> Dict[str, float]:
        """Calculate classification metrics."""
        if not self.predictions or not self.targets:
            return {}
        
        preds = np.array(self.predictions)
        targets = np.array(self.targets)
        
        # Convert to binary if needed
        if preds.max() > 1:
            preds = (preds > 0.5).astype(int)
        
        if targets.max() > 1:
            targets = (targets > 0.5).astype(int)
        
        accuracy = accuracy_score(targets, preds)
        precision, recall, f1, _ = precision_recall_fscore_support(
            targets, preds, average='weighted', zero_division=0
        )
        
        # Calculate per-class metrics
        precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
            targets, preds, average='macro', zero_division=0
        )
        
        # Calculate AUC if we have scores
        auc = None
        if self.scores:
            try:
                scores_array = np.array(self.scores)
                if len(np.unique(targets)) > 1:  # Need both classes for AUC
                    auc = roc_auc_score(targets, scores_array)
            except ValueError:
                auc = None
        
        return {
            "accuracy": float(accuracy),
            "precision_weighted": float(precision),
            "recall_weighted": float(recall),
            "f1_weighted": float(f1),
            "precision_macro": float(precision_macro),
            "recall_macro": float(recall_macro),
            "f1_macro": float(f1_macro),
            "auc": float(auc) if auc is not None else None
        }
    
    def calculate_embedding_metrics(self) -> Dict[str, float]:
        """Calculate embedding quality metrics."""
        if not self.embeddings:
            return {}
        
        embeddings = np.array(self.embeddings)
        
        # Calculate embedding statistics
        mean_norm = np.mean(np.linalg.norm(embeddings, axis=1))
        std_norm = np.std(np.linalg.norm(embeddings, axis=1))
        
        # Calculate intra-class and inter-class distances
        if len(self.targets) > 1:
            targets = np.array(self.targets)
            unique_classes = np.unique(targets)
            
            intra_class_distances = []
            inter_class_distances = []
            
            for class_label in unique_classes:
                class_embeddings = embeddings[targets == class_label]
                if len(class_embeddings) > 1:
                    # Intra-class distances
                    for i in range(len(class_embeddings)):
                        for j in range(i + 1, len(class_embeddings)):
                            dist = np.linalg.norm(class_embeddings[i] - class_embeddings[j])
                            intra_class_distances.append(dist)
                
                # Inter-class distances
                other_embeddings = embeddings[targets != class_label]
                if len(other_embeddings) > 0:
                    for class_emb in class_embeddings:
                        for other_emb in other_embeddings:
                            dist = np.linalg.norm(class_emb - other_emb)
                            inter_class_distances.append(dist)
            
            avg_intra_class = np.mean(intra_class_distances) if intra_class_distances else 0
            avg_inter_class = np.mean(inter_class_distances) if inter_class_distances else 0
            
            # Separation ratio (higher is better)
            separation_ratio = avg_inter_class / (avg_intra_class + 1e-8)
        else:
            avg_intra_class = 0
            avg_inter_class = 0
            separation_ratio = 0
        
        return {
            "embedding_mean_norm": float(mean_norm),
            "embedding_std_norm": float(std_norm),
            "avg_intra_class_distance": float(avg_intra_class),
            "avg_inter_class_distance": float(avg_inter_class),
            "separation_ratio": float(separation_ratio)
        }
    
    def calculate_outfit_metrics(self) -> Dict[str, float]:
        """Calculate outfit-specific metrics."""
        if not self.outfit_scores:
            return {}
        
        scores = np.array(self.outfit_scores)
        
        return {
            "outfit_score_mean": float(np.mean(scores)),
            "outfit_score_std": float(np.std(scores)),
            "outfit_score_min": float(np.min(scores)),
            "outfit_score_max": float(np.max(scores)),
            "outfit_score_median": float(np.median(scores))
        }
    
    def calculate_all_metrics(self) -> Dict[str, Any]:
        """Calculate all available metrics."""
        metrics = {
            "classification": self.calculate_classification_metrics(),
            "embeddings": self.calculate_embedding_metrics(),
            "outfits": self.calculate_outfit_metrics()
        }
        
        # Add summary statistics
        metrics["summary"] = {
            "total_predictions": len(self.predictions),
            "total_targets": len(self.targets),
            "total_scores": len(self.scores),
            "total_embeddings": len(self.embeddings),
            "total_outfit_scores": len(self.outfit_scores)
        }
        
        return metrics
    
    def save_metrics(self, filepath: str, additional_info: Dict[str, Any] = None):
        """Save metrics to JSON file."""
        metrics = self.calculate_all_metrics()
        
        if additional_info:
            metrics["additional_info"] = additional_info
        
        # Ensure directory exists
        Path(filepath).parent.mkdir(parents=True, exist_ok=True)
        
        with open(filepath, 'w') as f:
            json.dump(metrics, f, indent=2)
        
        return metrics


def calculate_triplet_metrics(anchor_emb: torch.Tensor, positive_emb: torch.Tensor, 
                            negative_emb: torch.Tensor, margin: float = 0.2) -> Dict[str, float]:
    """Calculate triplet-specific metrics."""
    
    # Calculate distances
    pos_dist = F.pairwise_distance(anchor_emb, positive_emb, p=2)
    neg_dist = F.pairwise_distance(anchor_emb, negative_emb, p=2)
    
    # Triplet loss
    triplet_loss = F.relu(pos_dist - neg_dist + margin).mean()
    
    # Accuracy: positive distance < negative distance
    correct = (pos_dist < neg_dist).float().mean()
    
    # Margin violations
    margin_violations = (pos_dist - neg_dist + margin > 0).float().mean()
    
    # Distance statistics
    pos_dist_mean = pos_dist.mean()
    neg_dist_mean = neg_dist.mean()
    distance_ratio = neg_dist_mean / (pos_dist_mean + 1e-8)
    
    return {
        "triplet_loss": float(triplet_loss),
        "triplet_accuracy": float(correct),
        "margin_violations": float(margin_violations),
        "positive_distance_mean": float(pos_dist_mean),
        "negative_distance_mean": float(neg_dist_mean),
        "distance_ratio": float(distance_ratio)
    }


def calculate_outfit_compatibility_metrics(outfit_scores: torch.Tensor, 
                                         labels: torch.Tensor) -> Dict[str, float]:
    """Calculate outfit compatibility specific metrics."""
    
    # Convert to numpy for sklearn compatibility
    scores_np = outfit_scores.detach().cpu().numpy()
    labels_np = labels.detach().cpu().numpy()
    
    # Binary classification metrics
    pred_binary = (scores_np > 0.5).astype(int)
    
    accuracy = accuracy_score(labels_np, pred_binary)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels_np, pred_binary, average='weighted', zero_division=0
    )
    
    # AUC if we have both classes
    auc = None
    if len(np.unique(labels_np)) > 1:
        try:
            auc = roc_auc_score(labels_np, scores_np)
        except ValueError:
            auc = None
    
    # Score distribution metrics
    compatible_scores = scores_np[labels_np == 1]
    incompatible_scores = scores_np[labels_np == 0]
    
    return {
        "compatibility_accuracy": float(accuracy),
        "compatibility_precision": float(precision),
        "compatibility_recall": float(recall),
        "compatibility_f1": float(f1),
        "compatibility_auc": float(auc) if auc is not None else None,
        "compatible_score_mean": float(np.mean(compatible_scores)) if len(compatible_scores) > 0 else 0,
        "incompatible_score_mean": float(np.mean(incompatible_scores)) if len(incompatible_scores) > 0 else 0,
        "score_separation": float(np.mean(compatible_scores) - np.mean(incompatible_scores)) if len(compatible_scores) > 0 and len(incompatible_scores) > 0 else 0
    }


if __name__ == "__main__":
    # Example usage
    metrics = AdvancedMetrics()
    
    # Simulate some data
    predictions = torch.randn(100, 1)
    targets = torch.randint(0, 2, (100, 1)).float()
    scores = torch.sigmoid(predictions)
    embeddings = torch.randn(100, 512)
    
    metrics.add_batch(predictions, targets, scores, embeddings)
    metrics.add_outfit_scores(scores.flatten().tolist())
    
    # Calculate and save metrics
    all_metrics = metrics.calculate_all_metrics()
    print("Calculated metrics:")
    print(json.dumps(all_metrics, indent=2))
    
    # Save to file
    metrics.save_metrics("test_metrics.json", {"model": "test", "epoch": 1})