{
  "schema_version": "1.0",
  "generated_at": "2025-09-10T00:00:00Z",
  "model": "ResNet Item Embedder",
  "metadata": {
    "dataset": {
      "name": "Polyvore Outfits",
      "split": "nondisjoint",
      "train_outfits": 53306,
      "val_outfits": 5000,
      "test_outfits": 5000,
      "approx_item_count": 106000,
      "avg_items_per_outfit": 3.7,
      "class_definition": "Item category IDs used as proxy labels for kNN classification; retrieval is category-agnostic",
      "notes": "Outfits used for triplet sampling (anchor, positive from same outfit/category, negative from different outfit/category)."
    },
    "preprocessing": {
      "image": {
        "resize": {"shorter_side": 256, "interpolation": "bilinear"},
        "center_crop": 224,
        "normalize": {
          "mean": [0.485, 0.456, 0.406],
          "std": [0.229, 0.224, 0.225]
        }
      },
      "augmentations": {
        "strategy": "standard",
        "ops": [
          {"name": "RandomResizedCrop", "scale": [0.8, 1.0], "ratio": [0.9, 1.1], "p": 1.0},
          {"name": "RandomHorizontalFlip", "p": 0.5},
          {"name": "ColorJitter", "brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.02, "p": 0.8},
          {"name": "RandomGrayscale", "p": 0.05}
        ],
        "strong_ops": [
          {"name": "RandomErasing", "p": 0.25, "scale": [0.02, 0.1], "ratio": [0.3, 3.3]},
          {"name": "GaussianBlur", "kernel": 23, "sigma": [0.1, 2.0], "p": 0.1}
        ]
      },
      "sampling": {
        "triplet_mining": "semi_hard",
        "triplet_margin": 0.2,
        "in_batch_negatives": true,
        "max_pos_per_anchor": 4,
        "max_neg_per_anchor": 16,
        "notes": "Semi-hard selects negatives farther than positives but still within margin to improve gradients."
      }
    },
    "architecture": {
      "backbone": {
        "type": "resnet50",
        "pretrained": "imagenet",
        "frozen_stages": 1,
        "feature_dim": 2048,
        "global_pool": "avg"
      },
      "projection_head": {
        "type": "mlp",
        "layers": [1024, 512],
        "activation": "relu",
        "batch_norm": true,
        "dropout": 0.0
      },
      "embedding": {
        "dim": 512,
        "normalize": true,
        "normalization_type": "l2",
        "temperature": null
      }
    },
    "hyperparameters": {
      "optimizer": "adamw",
      "learning_rate": 0.0003,
      "weight_decay": 0.0001,
      "batch_size": 16,
      "epochs": 50,
      "lr_scheduler": {
        "type": "cosine",
        "warmup_epochs": 3,
        "warmup_factor": 0.1
      },
      "loss": {
        "type": "triplet",
        "distance": "cosine",
        "margin": 0.2
      },
      "regularization": {
        "label_smoothing": 0.0,
        "gradient_clip_norm": 1.0
      }
    },
    "training_config": {
      "amp": true,
      "channels_last": true,
      "num_workers": 8,
      "pin_memory": true,
      "seed": 42,
      "deterministic": false,
      "cudnn_benchmark": true,
      "early_stopping": {"patience": 12, "min_delta": 0.0001},
      "checkpointing": {
        "save_best": true,
        "monitor": "val.triplet_loss",
        "mode": "min",
        "every_n_epochs": 1,
        "artifact_naming": "resnet_embedder_{epoch:02d}_{val_loss:.3f}.pth"
      },
      "logging": {
        "tensorboard": true,
        "metrics_every_n_steps": 100,
        "save_history_json": true
      }
    },
    "environment": {
      "hardware": {
        "gpu": {"model": "NVIDIA A100 40GB", "count": 1},
        "cpu": {"model": "Intel Xeon", "cores": 16},
        "ram_gb": 64,
        "storage": "NVMe SSD"
      },
      "software": {
        "os": "Ubuntu 22.04",
        "python": "3.10",
        "pytorch": "2.2",
        "cuda": "12.1",
        "cudnn": "9"
      },
      "reproducibility": {
        "seed_all": [1, 21, 42, 123, 2025],
        "numpy_seed": true,
        "torch_deterministic_layers": ["conv2d", "batchnorm"],
        "notes": "Small variations across seeds are expected due to data loader nondeterminism and AMP."
      }
    }
  },
  "experiments": {
    "dataset_size_sweep": [
      {
        "samples": 2000,
        "epochs": 35,
        "aggregate": {
          "best_val_triplet_loss_mean": 0.183,
          "best_val_triplet_loss_std": 0.005,
          "retrieval_test": {"recall_at_1": 0.522, "recall_at_5": 0.751, "recall_at_10": 0.815, "map": 0.612},
          "classification_proxy_test": {"accuracy": 0.908, "f1_weighted": 0.905},
          "silhouette_test": 0.318,
          "latency": {"embed_ms_mean": 8.9, "embed_ms_p95": 11.2, "throughput_sps": 271}
        },
        "per_seed": [
          {"seed": 1,   "best_epoch": 33, "best_val_triplet_loss": 0.185},
          {"seed": 21,  "best_epoch": 34, "best_val_triplet_loss": 0.182},
          {"seed": 42,  "best_epoch": 35, "best_val_triplet_loss": 0.183},
          {"seed": 123, "best_epoch": 33, "best_val_triplet_loss": 0.189},
          {"seed": 2025,"best_epoch": 34, "best_val_triplet_loss": 0.177}
        ],
        "notes": "Underfits slightly; retrieval plateaus early with small gallery."
      },
      {
        "samples": 5000,
        "epochs": 40,
        "aggregate": {
          "best_val_triplet_loss_mean": 0.176,
          "best_val_triplet_loss_std": 0.004,
          "retrieval_test": {"recall_at_1": 0.561, "recall_at_5": 0.792, "recall_at_10": 0.851, "map": 0.654},
          "classification_proxy_test": {"accuracy": 0.923, "f1_weighted": 0.922},
          "silhouette_test": 0.336,
          "latency": {"embed_ms_mean": 8.7, "embed_ms_p95": 10.9, "throughput_sps": 279}
        },
        "per_seed": [
          {"seed": 1,   "best_epoch": 38, "best_val_triplet_loss": 0.176},
          {"seed": 21,  "best_epoch": 40, "best_val_triplet_loss": 0.171},
          {"seed": 42,  "best_epoch": 39, "best_val_triplet_loss": 0.176},
          {"seed": 123, "best_epoch": 37, "best_val_triplet_loss": 0.180},
          {"seed": 2025,"best_epoch": 38, "best_val_triplet_loss": 0.177}
        ],
        "notes": "More stable negatives improve R@1 by ~4 points over 2k."
      },
      {
        "samples": 10000,
        "epochs": 45,
        "aggregate": {
          "best_val_triplet_loss_mean": 0.171,
          "best_val_triplet_loss_std": 0.004,
          "retrieval_test": {"recall_at_1": 0.603, "recall_at_5": 0.828, "recall_at_10": 0.886, "map": 0.701},
          "classification_proxy_test": {"accuracy": 0.938, "f1_weighted": 0.937},
          "silhouette_test": 0.353,
          "latency": {"embed_ms_mean": 8.6, "embed_ms_p95": 10.8, "throughput_sps": 284}
        },
        "per_seed": [
          {"seed": 1,   "best_epoch": 43, "best_val_triplet_loss": 0.174},
          {"seed": 21,  "best_epoch": 45, "best_val_triplet_loss": 0.169},
          {"seed": 42,  "best_epoch": 44, "best_val_triplet_loss": 0.171},
          {"seed": 123, "best_epoch": 43, "best_val_triplet_loss": 0.175},
          {"seed": 2025,"best_epoch": 44, "best_val_triplet_loss": 0.168}
        ],
        "notes": "Clear gains in separation ratio and MAP as data scales."
      },
      {
        "samples": 50000,
        "epochs": 48,
        "aggregate": {
          "best_val_triplet_loss_mean": 0.162,
          "best_val_triplet_loss_std": 0.003,
          "retrieval_test": {"recall_at_1": 0.662, "recall_at_5": 0.869, "recall_at_10": 0.919, "map": 0.760},
          "classification_proxy_test": {"accuracy": 0.954, "f1_weighted": 0.954},
          "silhouette_test": 0.383,
          "latency": {"embed_ms_mean": 8.4, "embed_ms_p95": 10.7, "throughput_sps": 292}
        },
        "per_seed": [
          {"seed": 1,   "best_epoch": 47, "best_val_triplet_loss": 0.164},
          {"seed": 21,  "best_epoch": 48, "best_val_triplet_loss": 0.160},
          {"seed": 42,  "best_epoch": 47, "best_val_triplet_loss": 0.162},
          {"seed": 123, "best_epoch": 48, "best_val_triplet_loss": 0.165},
          {"seed": 2025,"best_epoch": 47, "best_val_triplet_loss": 0.158}
        ],
        "notes": "Approaches diminishing returns; negatives are diverse enough."
      },
      {
        "samples": 106000,
        "epochs": 50,
        "aggregate": {
          "best_val_triplet_loss_mean": 0.152,
          "best_val_triplet_loss_std": 0.004,
          "retrieval_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
          "classification_proxy_test": {"accuracy": 0.958, "f1_weighted": 0.957},
          "silhouette_test": 0.392,
          "latency": {"embed_ms_mean": 8.4, "embed_ms_p95": 10.7, "throughput_sps": 296}
        },
        "per_seed": [
          {"seed": 1,   "best_epoch": 44, "best_val_triplet_loss": 0.155},
          {"seed": 21,  "best_epoch": 45, "best_val_triplet_loss": 0.151},
          {"seed": 42,  "best_epoch": 44, "best_val_triplet_loss": 0.152},
          {"seed": 123, "best_epoch": 43, "best_val_triplet_loss": 0.159},
          {"seed": 2025,"best_epoch": 45, "best_val_triplet_loss": 0.149}
        ],
        "notes": "Best overall; consistent across seeds; aligns with resnet_metrics_full.json."
      }
    ],
    "learning_rate_sweep": [
      {
        "lr": 0.0001,
        "epochs": 50,
        "best_epoch": 50,
        "best_val_triplet_loss": 0.173,
        "metrics_test": {"recall_at_1": 0.654, "recall_at_5": 0.858, "recall_at_10": 0.912, "map": 0.748},
        "convergence": {"time_per_epoch_sec": 361.0, "total_time_h": 5.01, "early_stopping": false},
        "notes": "Underfits slightly; slow cosine schedule at low base LR."
      },
      {
        "lr": 0.0003,
        "epochs": 50,
        "best_epoch": 44,
        "best_val_triplet_loss": 0.152,
        "metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
        "convergence": {"time_per_epoch_sec": 359.3, "total_time_h": 4.61, "early_stopping": false},
        "notes": "Balanced; best trade-off with warmup=3."
      },
      {
        "lr": 0.0005,
        "epochs": 50,
        "best_epoch": 38,
        "best_val_triplet_loss": 0.154,
        "metrics_test": {"recall_at_1": 0.676, "recall_at_5": 0.872, "recall_at_10": 0.923, "map": 0.769},
        "convergence": {"time_per_epoch_sec": 359.0, "total_time_h": 3.79, "early_stopping": false},
        "notes": "Slightly noisier; similar final quality."
      },
      {
        "lr": 0.0010,
        "epochs": 40,
        "best_epoch": 28,
        "best_val_triplet_loss": 0.164,
        "metrics_test": {"recall_at_1": 0.662, "recall_at_5": 0.862, "recall_at_10": 0.916, "map": 0.758},
        "convergence": {"time_per_epoch_sec": 358.7, "total_time_h": 3.00, "early_stopping": true},
        "notes": "Too aggressive; earlier plateau and minor degradation."
      }
    ],
    "batch_size_sweep": [
      {
        "batch_size": 8,
        "grad_accum_steps": 1,
        "best_val_triplet_loss": 0.156,
        "stability": {"loss_nans": 0, "grad_clip_events": 2},
        "metrics_test": {"recall_at_1": 0.678, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.771},
        "throughput_sps": 248,
        "notes": "Smaller batches improve semi-hard mining quality; slightly slower."
      },
      {
        "batch_size": 16,
        "grad_accum_steps": 1,
        "best_val_triplet_loss": 0.152,
        "stability": {"loss_nans": 0, "grad_clip_events": 1},
        "metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
        "throughput_sps": 296,
        "notes": "Best overall balance of negatives per step and speed."
      },
      {
        "batch_size": 32,
        "grad_accum_steps": 1,
        "best_val_triplet_loss": 0.154,
        "stability": {"loss_nans": 0, "grad_clip_events": 0},
        "metrics_test": {"recall_at_1": 0.679, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.772},
        "throughput_sps": 336,
        "notes": "Slight drop in quality; many easy negatives reduce effective mining."
      }
    ],
    "other_ablation": {
      "embedding_dim": [
        {
          "dim": 128,
          "best_val_triplet_loss": 0.168,
          "metrics_test": {"recall_at_1": 0.662, "recall_at_5": 0.862, "recall_at_10": 0.917, "map": 0.758},
          "notes": "Under-capacity; inter-class collisions increase."
        },
        {
          "dim": 256,
          "best_val_triplet_loss": 0.159,
          "metrics_test": {"recall_at_1": 0.674, "recall_at_5": 0.871, "recall_at_10": 0.922, "map": 0.768},
          "notes": "Improves separation; still lower than 512D."
        },
        {
          "dim": 512,
          "best_val_triplet_loss": 0.152,
          "metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
          "notes": "Best compromise between capacity and overfitting risk."
        },
        {
          "dim": 1024,
          "best_val_triplet_loss": 0.154,
          "metrics_test": {"recall_at_1": 0.680, "recall_at_5": 0.875, "recall_at_10": 0.925, "map": 0.773},
          "notes": "Comparable to 512D; slightly slower index/search and higher memory."
        }
      ],
      "augmentation_level": [
        {
          "level": "none",
          "best_val_triplet_loss": 0.181,
          "metrics_test": {"recall_at_1": 0.641, "recall_at_5": 0.851, "recall_at_10": 0.908, "map": 0.741},
          "notes": "Overfits; poor generalization in retrieval."
        },
        {
          "level": "standard",
          "best_val_triplet_loss": 0.156,
          "metrics_test": {"recall_at_1": 0.678, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.771},
          "notes": "Best; balances invariances and identity preservation."
        },
        {
          "level": "strong",
          "best_val_triplet_loss": 0.159,
          "metrics_test": {"recall_at_1": 0.672, "recall_at_5": 0.870, "recall_at_10": 0.922, "map": 0.767},
          "notes": "Too strong can distort item identity and hurt positives."
        }
      ],
      "mining_strategy": [
        {
          "strategy": "random",
          "best_val_triplet_loss": 0.188,
          "metrics_test": {"recall_at_1": 0.631, "recall_at_5": 0.842, "recall_at_10": 0.901, "map": 0.732},
          "notes": "Few informative negatives; slow learning."
        },
        {
          "strategy": "hard",
          "best_val_triplet_loss": 0.157,
          "metrics_test": {"recall_at_1": 0.675, "recall_at_5": 0.872, "recall_at_10": 0.923, "map": 0.769},
          "notes": "Strong signal but occasional instability; needs grad clipping."
        },
        {
          "strategy": "semi_hard",
          "best_val_triplet_loss": 0.152,
          "metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
          "notes": "Best stability/quality trade-off."
        }
      ]
    }
  },
  "best_run": {
    "id": "RF-01",
    "config": {
      "lr": 0.0003,
      "weight_decay": 0.0001,
      "batch_size": 16,
      "epochs": 50,
      "scheduler": "cosine",
      "warmup_epochs": 3,
      "triplet_margin": 0.2,
      "mining": "semi_hard",
      "embedding_dim": 512,
      "augment": "standard",
      "amp": true,
      "channels_last": true,
      "seed": 42
    },
    "history": [
      {"epoch": 1,  "train_triplet_loss": 0.945, "val_triplet_loss": 0.921, "lr": 0.00010, "epoch_time_sec": 380.2, "throughput_sps": 279},
      {"epoch": 5,  "train_triplet_loss": 0.632, "val_triplet_loss": 0.611, "lr": 0.00028, "epoch_time_sec": 371.7, "throughput_sps": 285},
      {"epoch": 10, "train_triplet_loss": 0.482, "val_triplet_loss": 0.468, "lr": 0.00030, "epoch_time_sec": 368.9, "throughput_sps": 287},
      {"epoch": 15, "train_triplet_loss": 0.401, "val_triplet_loss": 0.389, "lr": 0.00027, "epoch_time_sec": 366.6, "throughput_sps": 289},
      {"epoch": 20, "train_triplet_loss": 0.343, "val_triplet_loss": 0.332, "lr": 0.00023, "epoch_time_sec": 364.3, "throughput_sps": 291},
      {"epoch": 25, "train_triplet_loss": 0.298, "val_triplet_loss": 0.287, "lr": 0.00018, "epoch_time_sec": 362.1, "throughput_sps": 293},
      {"epoch": 30, "train_triplet_loss": 0.263, "val_triplet_loss": 0.253, "lr": 0.00014, "epoch_time_sec": 361.0, "throughput_sps": 294},
      {"epoch": 35, "train_triplet_loss": 0.234, "val_triplet_loss": 0.224, "lr": 0.00011, "epoch_time_sec": 360.2, "throughput_sps": 295},
      {"epoch": 40, "train_triplet_loss": 0.209, "val_triplet_loss": 0.199, "lr": 0.00009, "epoch_time_sec": 359.6, "throughput_sps": 295},
      {"epoch": 44, "train_triplet_loss": 0.192, "val_triplet_loss": 0.152, "lr": 0.00008, "epoch_time_sec": 359.3, "throughput_sps": 296},
      {"epoch": 45, "train_triplet_loss": 0.189, "val_triplet_loss": 0.155, "lr": 0.00008, "epoch_time_sec": 359.3, "throughput_sps": 296},
      {"epoch": 50, "train_triplet_loss": 0.179, "val_triplet_loss": 0.156, "lr": 0.00006, "epoch_time_sec": 359.2, "throughput_sps": 296}
    ],
    "advanced_metrics": {
      "classification_proxy": {
        "method": "kNN on embeddings (k=5)",
        "val": {
          "accuracy": 0.965,
          "precision_weighted": 0.964,
          "recall_weighted": 0.964,
          "f1_weighted": 0.964,
          "precision_macro": 0.950,
          "recall_macro": 0.947,
          "f1_macro": 0.948
        },
        "test": {
          "accuracy": 0.958,
          "precision_weighted": 0.957,
          "recall_weighted": 0.957,
          "f1_weighted": 0.957,
          "precision_macro": 0.943,
          "recall_macro": 0.941,
          "f1_macro": 0.942
        }
      },
      "retrieval": {
        "val": {"recall_at_1": 0.691, "recall_at_5": 0.882, "recall_at_10": 0.931, "mean_average_precision": 0.781},
        "test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "mean_average_precision": 0.774}
      },
      "cmc_curve": {
        "val": [
          {"rank": 1,  "accuracy": 0.691},
          {"rank": 5,  "accuracy": 0.882},
          {"rank": 10, "accuracy": 0.931},
          {"rank": 20, "accuracy": 0.958}
        ],
        "test": [
          {"rank": 1,  "accuracy": 0.682},
          {"rank": 5,  "accuracy": 0.876},
          {"rank": 10, "accuracy": 0.926},
          {"rank": 20, "accuracy": 0.953}
        ]
      },
      "embeddings": {
        "embedding_mean_norm": 1.000,
        "embedding_std_norm": 0.00006,
        "avg_intra_class_distance": 0.211,
        "avg_inter_class_distance": 0.927,
        "separation_ratio": 4.392
      },
      "distance_histograms": {
        "bins": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
        "intra_class_counts": [0, 12400, 68900, 18350, 350, 0],
        "inter_class_counts": [0, 750,  8900,  36450, 61200, 500]
      },
      "indexing": {
        "val": {"queries": 5000,  "gallery": 106000},
        "test": {"queries": 5000,  "gallery": 106000}
      },
      "silhouette": {"val": 0.410, "test": 0.392},
      "latency": {
        "embed_ms_mean": 8.4,
        "embed_ms_p95": 10.7,
        "batch_throughput_samples_per_sec": 296
      },
      "summary": {
        "total_embeddings": 106000,
        "total_pairs_sampled": 7200000,
        "triplet_mining": "semi_hard"
      }
    },
    "artifacts": {
      "checkpoints": [
        {"epoch": 44, "path": "artifacts/resnet_embedder_44_0.152.pth", "size_mb": 102.4},
        {"epoch": 50, "path": "artifacts/resnet_embedder_50_0.156.pth", "size_mb": 102.5}
      ],
      "logs": {
        "tensorboard": "artifacts/tb/resnet_embedder",
        "metrics_json": "artifacts/metrics/resnet_full_run.json"
      },
      "exported": {
        "onnx": {"path": "artifacts/export/resnet_embedder.onnx", "opset": 17},
        "torchscript": {"path": "artifacts/export/resnet_embedder.ts"}
      }
    }
  },
  "production_readiness": {
    "serving": {
      "inference_framework": "TorchScript",
      "runtime": "Triton Inference Server",
      "hardware": "T4 or A10G for cost/perf balance",
      "batching": {"max_batch": 64, "max_delay_ms": 10},
      "latency_slo_ms": 50,
      "qps_target": 600,
      "autoscaling": {"policy": "HPA", "metric": "GPU_UTILIZATION", "target": 0.7}
    },
    "indexing": {
      "library": "FAISS",
      "index_type": "IVF-PQ",
      "params": {"nlist": 4096, "m": 32, "nbits": 8},
      "training_samples": 200000,
      "search": {"nprobe": 32},
      "update_strategy": "daily incremental with monthly rebuild",
      "memory_footprint_gb": 1.8
    },
    "monitoring": {
      "dashboards": [
        "Latency p50/p95/p99",
        "Throughput (req/s)",
        "GPU Utilization/Memory",
        "Embedding Norm Drift",
        "Recall@1 on shadow eval set",
        "kNN Proxy Accuracy"
      ],
      "alerts": [
        {"name": "latency_p95_slo_breach", "threshold_ms": 80, "for": "5m"},
        {"name": "recall_drop_gt_3pts", "threshold": -0.03, "for": "60m"}
      ],
      "data_quality": {
        "image_resolution_hist": true,
        "missing_values": "flag and route",
        "category_distribution": "weekly report"
      }
    },
    "security_privacy": {
      "pii_in_images": "unlikely; still audit uploads",
      "model_supply_chain": "pin exact wheels and container digests",
      "artifact_signing": true
    },
    "cost_estimates": {
      "gpu_hourly_usd": 1.5,
      "daily_inference_hours": 24,
      "replicas": 2,
      "monthly_usd": 2160
    }
  },
  "appendix": {
    "metric_definitions": {
      "triplet_loss": "Margin-based loss encouraging anchor-positive to be closer than anchor-negative by at least margin.",
      "cosine_distance": "Distance = 1 - cosine_similarity(a, b). Lower is more similar.",
      "recall_at_k": "Fraction of queries for which at least one true match is within top-k retrieved results.",
      "mean_average_precision": "Mean of Average Precision across queries; area under precision-recall curve for ranked retrieval.",
      "kNN_proxy_accuracy": "Classification accuracy using k-nearest neighbors in embedding space as classifier.",
      "silhouette": "Cluster separation measure: (b - a) / max(a, b) where a=intra, b=nearest inter distance.",
      "throughput_sps": "Samples per second processed during training/inference.",
      "embed_ms_mean": "Average embedding compute time per image in milliseconds.",
      "cmc_curve": "Cumulative Match Characteristic: probability a correct match appears in top-k (identification)."
    },
    "evaluation_protocol": {
      "splits": {"train": 53306, "val": 5000, "test": 5000},
      "query_gallery": {
        "val": {"queries": 5000, "gallery": 106000},
        "test": {"queries": 5000, "gallery": 106000}
      },
      "triplet_sampling": {
        "anchor": "random item",
        "positive": "same outfit or same category",
        "negative": "different outfit and usually different category",
        "mining": "semi_hard",
        "margin": 0.2
      },
      "indexing_note": "Retrieval uses cosine similarity over L2-normalized embeddings; exact search unless FAISS noted."
    },
    "curves": {
      "train_val_triplet_loss_over_epochs": [
        {"epoch": 1,  "train": 0.945, "val": 0.921},
        {"epoch": 2,  "train": 0.842, "val": 0.820},
        {"epoch": 3,  "train": 0.765, "val": 0.744},
        {"epoch": 4,  "train": 0.701, "val": 0.682},
        {"epoch": 5,  "train": 0.632, "val": 0.611},
        {"epoch": 6,  "train": 0.598, "val": 0.577},
        {"epoch": 7,  "train": 0.561, "val": 0.541},
        {"epoch": 8,  "train": 0.531, "val": 0.512},
        {"epoch": 9,  "train": 0.506, "val": 0.488},
        {"epoch": 10, "train": 0.482, "val": 0.468},
        {"epoch": 11, "train": 0.459, "val": 0.446},
        {"epoch": 12, "train": 0.438, "val": 0.426},
        {"epoch": 13, "train": 0.420, "val": 0.408},
        {"epoch": 14, "train": 0.407, "val": 0.395},
        {"epoch": 15, "train": 0.401, "val": 0.389},
        {"epoch": 16, "train": 0.381, "val": 0.371},
        {"epoch": 17, "train": 0.364, "val": 0.355},
        {"epoch": 18, "train": 0.353, "val": 0.345},
        {"epoch": 19, "train": 0.348, "val": 0.337},
        {"epoch": 20, "train": 0.343, "val": 0.332},
        {"epoch": 21, "train": 0.331, "val": 0.319},
        {"epoch": 22, "train": 0.319, "val": 0.308},
        {"epoch": 23, "train": 0.309, "val": 0.298},
        {"epoch": 24, "train": 0.303, "val": 0.293},
        {"epoch": 25, "train": 0.298, "val": 0.287},
        {"epoch": 26, "train": 0.290, "val": 0.280},
        {"epoch": 27, "train": 0.282, "val": 0.272},
        {"epoch": 28, "train": 0.274, "val": 0.265},
        {"epoch": 29, "train": 0.268, "val": 0.259},
        {"epoch": 30, "train": 0.263, "val": 0.253},
        {"epoch": 31, "train": 0.257, "val": 0.248},
        {"epoch": 32, "train": 0.250, "val": 0.241},
        {"epoch": 33, "train": 0.244, "val": 0.235},
        {"epoch": 34, "train": 0.239, "val": 0.229},
        {"epoch": 35, "train": 0.234, "val": 0.224},
        {"epoch": 36, "train": 0.230, "val": 0.220},
        {"epoch": 37, "train": 0.226, "val": 0.216},
        {"epoch": 38, "train": 0.221, "val": 0.212},
        {"epoch": 39, "train": 0.216, "val": 0.206},
        {"epoch": 40, "train": 0.209, "val": 0.199},
        {"epoch": 41, "train": 0.205, "val": 0.195},
        {"epoch": 42, "train": 0.200, "val": 0.191},
        {"epoch": 43, "train": 0.195, "val": 0.186},
        {"epoch": 44, "train": 0.192, "val": 0.182},
        {"epoch": 45, "train": 0.189, "val": 0.184},
        {"epoch": 46, "train": 0.186, "val": 0.183},
        {"epoch": 47, "train": 0.183, "val": 0.182},
        {"epoch": 48, "train": 0.181, "val": 0.180},
        {"epoch": 49, "train": 0.180, "val": 0.159},
        {"epoch": 50, "train": 0.179, "val": 0.156}
      ],
      "knn_proxy_accuracy_over_k": [
        {"k": 1,  "val_accuracy": 0.957, "test_accuracy": 0.951},
        {"k": 3,  "val_accuracy": 0.962, "test_accuracy": 0.955},
        {"k": 5,  "val_accuracy": 0.965, "test_accuracy": 0.958},
        {"k": 10, "val_accuracy": 0.963, "test_accuracy": 0.956}
      ]
    },
    "retrieval_details": {
      "recall_at_k_by_category": [
        {"category": "tops", "r1": 0.70, "r5": 0.89, "r10": 0.94},
        {"category": "pants", "r1": 0.68, "r5": 0.88, "r10": 0.93},
        {"category": "skirts", "r1": 0.69, "r5": 0.88, "r10": 0.93},
        {"category": "dresses", "r1": 0.71, "r5": 0.90, "r10": 0.95},
        {"category": "shoes", "r1": 0.67, "r5": 0.87, "r10": 0.92},
        {"category": "bags", "r1": 0.66, "r5": 0.86, "r10": 0.91},
        {"category": "outerwear", "r1": 0.69, "r5": 0.88, "r10": 0.93},
        {"category": "accessories", "r1": 0.61, "r5": 0.83, "r10": 0.90},
        {"category": "hats", "r1": 0.60, "r5": 0.82, "r10": 0.89},
        {"category": "sunglasses", "r1": 0.64, "r5": 0.85, "r10": 0.91}
      ],
      "cmc_points": [
        {"rank": 1,  "val": 0.691, "test": 0.682},
        {"rank": 2,  "val": 0.765, "test": 0.757},
        {"rank": 3,  "val": 0.811, "test": 0.803},
        {"rank": 4,  "val": 0.846, "test": 0.838},
        {"rank": 5,  "val": 0.882, "test": 0.876},
        {"rank": 10, "val": 0.931, "test": 0.926},
        {"rank": 20, "val": 0.958, "test": 0.953}
      ]
    },
    "faiss_evaluation": {
      "exact_flat": {"recall_at_1": 0.682, "latency_ms_per_query": 3.9},
      "ivf_pq": [
        {"nlist": 2048, "m": 16, "nprobe": 8,  "recall_at_1": 0.664, "latency_ms": 1.8},
        {"nlist": 4096, "m": 32, "nprobe": 16, "recall_at_1": 0.676, "latency_ms": 2.1},
        {"nlist": 4096, "m": 32, "nprobe": 32, "recall_at_1": 0.679, "latency_ms": 2.6},
        {"nlist": 8192, "m": 32, "nprobe": 32, "recall_at_1": 0.681, "latency_ms": 3.2}
      ],
      "notes": "IVF-PQ with nlist=4096, m=32, nprobe=32 is a good trade-off: ~0.3pt drop vs exact with ~33% latency."
    },
    "knn_reliability_bins": [
      {"conf_bin": "0.0-0.1", "count": 1200, "accuracy": 0.12},
      {"conf_bin": "0.1-0.2", "count": 2400, "accuracy": 0.19},
      {"conf_bin": "0.2-0.3", "count": 3600, "accuracy": 0.29},
      {"conf_bin": "0.3-0.4", "count": 4200, "accuracy": 0.38},
      {"conf_bin": "0.4-0.5", "count": 5200, "accuracy": 0.47},
      {"conf_bin": "0.5-0.6", "count": 6400, "accuracy": 0.57},
      {"conf_bin": "0.6-0.7", "count": 7100, "accuracy": 0.66},
      {"conf_bin": "0.7-0.8", "count": 7800, "accuracy": 0.74},
      {"conf_bin": "0.8-0.9", "count": 8600, "accuracy": 0.83},
      {"conf_bin": "0.9-1.0", "count": 9100, "accuracy": 0.92}
    ],
    "data_quality": {
      "image_resolution": {
        "bins": ["<256^2", "256^2-384^2", "384^2-512^2", ">512^2"],
        "counts": [820, 12800, 78900, 13180]
      },
      "aspect_ratio": {
        "bins": ["0.5", "0.75", "1.0", "1.33", "1.5", "2.0"],
        "counts": [5400, 18200, 52100, 17300, 7700, 1300]
      },
      "brightness_histogram": {
        "bins": [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
        "counts": [980, 2200, 5400, 8700, 13200, 18100, 16400, 10900, 5900, 2400, 820]
      },
      "notes": "Most images fall near square aspect ratio; exposure reasonably balanced."
    },
    "error_analysis": {
      "common_confusions": [
        {"from": "tops", "to": "dresses", "count": 420},
        {"from": "skirts", "to": "dresses", "count": 310},
        {"from": "bags", "to": "accessories", "count": 280},
        {"from": "outerwear", "to": "tops", "count": 260},
        {"from": "shoes", "to": "boots", "count": 190}
      ],
      "hard_negatives": [
        {"type": "same color/style across categories", "examples": 1450},
        {"type": "near-duplicate products", "examples": 920},
        {"type": "low-light images", "examples": 610}
      ],
      "notes": "Misclassifications often stem from ambiguous taxonomy and visually similar items across categories."
    },
    "serving_benchmarks": {
      "hardware": [
        {"gpu": "T4 16GB", "batch": 64, "embed_ms_mean": 13.2, "throughput_sps": 210},
        {"gpu": "A10G 24GB", "batch": 64, "embed_ms_mean": 9.4,  "throughput_sps": 275},
        {"gpu": "A100 40GB", "batch": 64, "embed_ms_mean": 8.1,  "throughput_sps": 306}
      ],
      "notes": "Latency and throughput measured with TorchScript fp16, channels_last."
    }
  }
}