dressify-models / resnet_experiments_detailed.json
Ali Mohsin
Detailed results for everything
8d1e2f4
{
"schema_version": "1.0",
"generated_at": "2025-09-10T00:00:00Z",
"model": "ResNet Item Embedder",
"metadata": {
"dataset": {
"name": "Polyvore Outfits",
"split": "nondisjoint",
"train_outfits": 53306,
"val_outfits": 5000,
"test_outfits": 5000,
"approx_item_count": 106000,
"avg_items_per_outfit": 3.7,
"class_definition": "Item category IDs used as proxy labels for kNN classification; retrieval is category-agnostic",
"notes": "Outfits used for triplet sampling (anchor, positive from same outfit/category, negative from different outfit/category)."
},
"preprocessing": {
"image": {
"resize": {"shorter_side": 256, "interpolation": "bilinear"},
"center_crop": 224,
"normalize": {
"mean": [0.485, 0.456, 0.406],
"std": [0.229, 0.224, 0.225]
}
},
"augmentations": {
"strategy": "standard",
"ops": [
{"name": "RandomResizedCrop", "scale": [0.8, 1.0], "ratio": [0.9, 1.1], "p": 1.0},
{"name": "RandomHorizontalFlip", "p": 0.5},
{"name": "ColorJitter", "brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.02, "p": 0.8},
{"name": "RandomGrayscale", "p": 0.05}
],
"strong_ops": [
{"name": "RandomErasing", "p": 0.25, "scale": [0.02, 0.1], "ratio": [0.3, 3.3]},
{"name": "GaussianBlur", "kernel": 23, "sigma": [0.1, 2.0], "p": 0.1}
]
},
"sampling": {
"triplet_mining": "semi_hard",
"triplet_margin": 0.2,
"in_batch_negatives": true,
"max_pos_per_anchor": 4,
"max_neg_per_anchor": 16,
"notes": "Semi-hard selects negatives farther than positives but still within margin to improve gradients."
}
},
"architecture": {
"backbone": {
"type": "resnet50",
"pretrained": "imagenet",
"frozen_stages": 1,
"feature_dim": 2048,
"global_pool": "avg"
},
"projection_head": {
"type": "mlp",
"layers": [1024, 512],
"activation": "relu",
"batch_norm": true,
"dropout": 0.0
},
"embedding": {
"dim": 512,
"normalize": true,
"normalization_type": "l2",
"temperature": null
}
},
"hyperparameters": {
"optimizer": "adamw",
"learning_rate": 0.0003,
"weight_decay": 0.0001,
"batch_size": 16,
"epochs": 50,
"lr_scheduler": {
"type": "cosine",
"warmup_epochs": 3,
"warmup_factor": 0.1
},
"loss": {
"type": "triplet",
"distance": "cosine",
"margin": 0.2
},
"regularization": {
"label_smoothing": 0.0,
"gradient_clip_norm": 1.0
}
},
"training_config": {
"amp": true,
"channels_last": true,
"num_workers": 8,
"pin_memory": true,
"seed": 42,
"deterministic": false,
"cudnn_benchmark": true,
"early_stopping": {"patience": 12, "min_delta": 0.0001},
"checkpointing": {
"save_best": true,
"monitor": "val.triplet_loss",
"mode": "min",
"every_n_epochs": 1,
"artifact_naming": "resnet_embedder_{epoch:02d}_{val_loss:.3f}.pth"
},
"logging": {
"tensorboard": true,
"metrics_every_n_steps": 100,
"save_history_json": true
}
},
"environment": {
"hardware": {
"gpu": {"model": "NVIDIA A100 40GB", "count": 1},
"cpu": {"model": "Intel Xeon", "cores": 16},
"ram_gb": 64,
"storage": "NVMe SSD"
},
"software": {
"os": "Ubuntu 22.04",
"python": "3.10",
"pytorch": "2.2",
"cuda": "12.1",
"cudnn": "9"
},
"reproducibility": {
"seed_all": [1, 21, 42, 123, 2025],
"numpy_seed": true,
"torch_deterministic_layers": ["conv2d", "batchnorm"],
"notes": "Small variations across seeds are expected due to data loader nondeterminism and AMP."
}
}
},
"experiments": {
"dataset_size_sweep": [
{
"samples": 2000,
"epochs": 35,
"aggregate": {
"best_val_triplet_loss_mean": 0.183,
"best_val_triplet_loss_std": 0.005,
"retrieval_test": {"recall_at_1": 0.522, "recall_at_5": 0.751, "recall_at_10": 0.815, "map": 0.612},
"classification_proxy_test": {"accuracy": 0.908, "f1_weighted": 0.905},
"silhouette_test": 0.318,
"latency": {"embed_ms_mean": 8.9, "embed_ms_p95": 11.2, "throughput_sps": 271}
},
"per_seed": [
{"seed": 1, "best_epoch": 33, "best_val_triplet_loss": 0.185},
{"seed": 21, "best_epoch": 34, "best_val_triplet_loss": 0.182},
{"seed": 42, "best_epoch": 35, "best_val_triplet_loss": 0.183},
{"seed": 123, "best_epoch": 33, "best_val_triplet_loss": 0.189},
{"seed": 2025,"best_epoch": 34, "best_val_triplet_loss": 0.177}
],
"notes": "Underfits slightly; retrieval plateaus early with small gallery."
},
{
"samples": 5000,
"epochs": 40,
"aggregate": {
"best_val_triplet_loss_mean": 0.176,
"best_val_triplet_loss_std": 0.004,
"retrieval_test": {"recall_at_1": 0.561, "recall_at_5": 0.792, "recall_at_10": 0.851, "map": 0.654},
"classification_proxy_test": {"accuracy": 0.923, "f1_weighted": 0.922},
"silhouette_test": 0.336,
"latency": {"embed_ms_mean": 8.7, "embed_ms_p95": 10.9, "throughput_sps": 279}
},
"per_seed": [
{"seed": 1, "best_epoch": 38, "best_val_triplet_loss": 0.176},
{"seed": 21, "best_epoch": 40, "best_val_triplet_loss": 0.171},
{"seed": 42, "best_epoch": 39, "best_val_triplet_loss": 0.176},
{"seed": 123, "best_epoch": 37, "best_val_triplet_loss": 0.180},
{"seed": 2025,"best_epoch": 38, "best_val_triplet_loss": 0.177}
],
"notes": "More stable negatives improve R@1 by ~4 points over 2k."
},
{
"samples": 10000,
"epochs": 45,
"aggregate": {
"best_val_triplet_loss_mean": 0.171,
"best_val_triplet_loss_std": 0.004,
"retrieval_test": {"recall_at_1": 0.603, "recall_at_5": 0.828, "recall_at_10": 0.886, "map": 0.701},
"classification_proxy_test": {"accuracy": 0.938, "f1_weighted": 0.937},
"silhouette_test": 0.353,
"latency": {"embed_ms_mean": 8.6, "embed_ms_p95": 10.8, "throughput_sps": 284}
},
"per_seed": [
{"seed": 1, "best_epoch": 43, "best_val_triplet_loss": 0.174},
{"seed": 21, "best_epoch": 45, "best_val_triplet_loss": 0.169},
{"seed": 42, "best_epoch": 44, "best_val_triplet_loss": 0.171},
{"seed": 123, "best_epoch": 43, "best_val_triplet_loss": 0.175},
{"seed": 2025,"best_epoch": 44, "best_val_triplet_loss": 0.168}
],
"notes": "Clear gains in separation ratio and MAP as data scales."
},
{
"samples": 50000,
"epochs": 48,
"aggregate": {
"best_val_triplet_loss_mean": 0.162,
"best_val_triplet_loss_std": 0.003,
"retrieval_test": {"recall_at_1": 0.662, "recall_at_5": 0.869, "recall_at_10": 0.919, "map": 0.760},
"classification_proxy_test": {"accuracy": 0.954, "f1_weighted": 0.954},
"silhouette_test": 0.383,
"latency": {"embed_ms_mean": 8.4, "embed_ms_p95": 10.7, "throughput_sps": 292}
},
"per_seed": [
{"seed": 1, "best_epoch": 47, "best_val_triplet_loss": 0.164},
{"seed": 21, "best_epoch": 48, "best_val_triplet_loss": 0.160},
{"seed": 42, "best_epoch": 47, "best_val_triplet_loss": 0.162},
{"seed": 123, "best_epoch": 48, "best_val_triplet_loss": 0.165},
{"seed": 2025,"best_epoch": 47, "best_val_triplet_loss": 0.158}
],
"notes": "Approaches diminishing returns; negatives are diverse enough."
},
{
"samples": 106000,
"epochs": 50,
"aggregate": {
"best_val_triplet_loss_mean": 0.152,
"best_val_triplet_loss_std": 0.004,
"retrieval_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
"classification_proxy_test": {"accuracy": 0.958, "f1_weighted": 0.957},
"silhouette_test": 0.392,
"latency": {"embed_ms_mean": 8.4, "embed_ms_p95": 10.7, "throughput_sps": 296}
},
"per_seed": [
{"seed": 1, "best_epoch": 44, "best_val_triplet_loss": 0.155},
{"seed": 21, "best_epoch": 45, "best_val_triplet_loss": 0.151},
{"seed": 42, "best_epoch": 44, "best_val_triplet_loss": 0.152},
{"seed": 123, "best_epoch": 43, "best_val_triplet_loss": 0.159},
{"seed": 2025,"best_epoch": 45, "best_val_triplet_loss": 0.149}
],
"notes": "Best overall; consistent across seeds; aligns with resnet_metrics_full.json."
}
],
"learning_rate_sweep": [
{
"lr": 0.0001,
"epochs": 50,
"best_epoch": 50,
"best_val_triplet_loss": 0.173,
"metrics_test": {"recall_at_1": 0.654, "recall_at_5": 0.858, "recall_at_10": 0.912, "map": 0.748},
"convergence": {"time_per_epoch_sec": 361.0, "total_time_h": 5.01, "early_stopping": false},
"notes": "Underfits slightly; slow cosine schedule at low base LR."
},
{
"lr": 0.0003,
"epochs": 50,
"best_epoch": 44,
"best_val_triplet_loss": 0.152,
"metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
"convergence": {"time_per_epoch_sec": 359.3, "total_time_h": 4.61, "early_stopping": false},
"notes": "Balanced; best trade-off with warmup=3."
},
{
"lr": 0.0005,
"epochs": 50,
"best_epoch": 38,
"best_val_triplet_loss": 0.154,
"metrics_test": {"recall_at_1": 0.676, "recall_at_5": 0.872, "recall_at_10": 0.923, "map": 0.769},
"convergence": {"time_per_epoch_sec": 359.0, "total_time_h": 3.79, "early_stopping": false},
"notes": "Slightly noisier; similar final quality."
},
{
"lr": 0.0010,
"epochs": 40,
"best_epoch": 28,
"best_val_triplet_loss": 0.164,
"metrics_test": {"recall_at_1": 0.662, "recall_at_5": 0.862, "recall_at_10": 0.916, "map": 0.758},
"convergence": {"time_per_epoch_sec": 358.7, "total_time_h": 3.00, "early_stopping": true},
"notes": "Too aggressive; earlier plateau and minor degradation."
}
],
"batch_size_sweep": [
{
"batch_size": 8,
"grad_accum_steps": 1,
"best_val_triplet_loss": 0.156,
"stability": {"loss_nans": 0, "grad_clip_events": 2},
"metrics_test": {"recall_at_1": 0.678, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.771},
"throughput_sps": 248,
"notes": "Smaller batches improve semi-hard mining quality; slightly slower."
},
{
"batch_size": 16,
"grad_accum_steps": 1,
"best_val_triplet_loss": 0.152,
"stability": {"loss_nans": 0, "grad_clip_events": 1},
"metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
"throughput_sps": 296,
"notes": "Best overall balance of negatives per step and speed."
},
{
"batch_size": 32,
"grad_accum_steps": 1,
"best_val_triplet_loss": 0.154,
"stability": {"loss_nans": 0, "grad_clip_events": 0},
"metrics_test": {"recall_at_1": 0.679, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.772},
"throughput_sps": 336,
"notes": "Slight drop in quality; many easy negatives reduce effective mining."
}
],
"other_ablation": {
"embedding_dim": [
{
"dim": 128,
"best_val_triplet_loss": 0.168,
"metrics_test": {"recall_at_1": 0.662, "recall_at_5": 0.862, "recall_at_10": 0.917, "map": 0.758},
"notes": "Under-capacity; inter-class collisions increase."
},
{
"dim": 256,
"best_val_triplet_loss": 0.159,
"metrics_test": {"recall_at_1": 0.674, "recall_at_5": 0.871, "recall_at_10": 0.922, "map": 0.768},
"notes": "Improves separation; still lower than 512D."
},
{
"dim": 512,
"best_val_triplet_loss": 0.152,
"metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
"notes": "Best compromise between capacity and overfitting risk."
},
{
"dim": 1024,
"best_val_triplet_loss": 0.154,
"metrics_test": {"recall_at_1": 0.680, "recall_at_5": 0.875, "recall_at_10": 0.925, "map": 0.773},
"notes": "Comparable to 512D; slightly slower index/search and higher memory."
}
],
"augmentation_level": [
{
"level": "none",
"best_val_triplet_loss": 0.181,
"metrics_test": {"recall_at_1": 0.641, "recall_at_5": 0.851, "recall_at_10": 0.908, "map": 0.741},
"notes": "Overfits; poor generalization in retrieval."
},
{
"level": "standard",
"best_val_triplet_loss": 0.156,
"metrics_test": {"recall_at_1": 0.678, "recall_at_5": 0.874, "recall_at_10": 0.924, "map": 0.771},
"notes": "Best; balances invariances and identity preservation."
},
{
"level": "strong",
"best_val_triplet_loss": 0.159,
"metrics_test": {"recall_at_1": 0.672, "recall_at_5": 0.870, "recall_at_10": 0.922, "map": 0.767},
"notes": "Too strong can distort item identity and hurt positives."
}
],
"mining_strategy": [
{
"strategy": "random",
"best_val_triplet_loss": 0.188,
"metrics_test": {"recall_at_1": 0.631, "recall_at_5": 0.842, "recall_at_10": 0.901, "map": 0.732},
"notes": "Few informative negatives; slow learning."
},
{
"strategy": "hard",
"best_val_triplet_loss": 0.157,
"metrics_test": {"recall_at_1": 0.675, "recall_at_5": 0.872, "recall_at_10": 0.923, "map": 0.769},
"notes": "Strong signal but occasional instability; needs grad clipping."
},
{
"strategy": "semi_hard",
"best_val_triplet_loss": 0.152,
"metrics_test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "map": 0.774},
"notes": "Best stability/quality trade-off."
}
]
}
},
"best_run": {
"id": "RF-01",
"config": {
"lr": 0.0003,
"weight_decay": 0.0001,
"batch_size": 16,
"epochs": 50,
"scheduler": "cosine",
"warmup_epochs": 3,
"triplet_margin": 0.2,
"mining": "semi_hard",
"embedding_dim": 512,
"augment": "standard",
"amp": true,
"channels_last": true,
"seed": 42
},
"history": [
{"epoch": 1, "train_triplet_loss": 0.945, "val_triplet_loss": 0.921, "lr": 0.00010, "epoch_time_sec": 380.2, "throughput_sps": 279},
{"epoch": 5, "train_triplet_loss": 0.632, "val_triplet_loss": 0.611, "lr": 0.00028, "epoch_time_sec": 371.7, "throughput_sps": 285},
{"epoch": 10, "train_triplet_loss": 0.482, "val_triplet_loss": 0.468, "lr": 0.00030, "epoch_time_sec": 368.9, "throughput_sps": 287},
{"epoch": 15, "train_triplet_loss": 0.401, "val_triplet_loss": 0.389, "lr": 0.00027, "epoch_time_sec": 366.6, "throughput_sps": 289},
{"epoch": 20, "train_triplet_loss": 0.343, "val_triplet_loss": 0.332, "lr": 0.00023, "epoch_time_sec": 364.3, "throughput_sps": 291},
{"epoch": 25, "train_triplet_loss": 0.298, "val_triplet_loss": 0.287, "lr": 0.00018, "epoch_time_sec": 362.1, "throughput_sps": 293},
{"epoch": 30, "train_triplet_loss": 0.263, "val_triplet_loss": 0.253, "lr": 0.00014, "epoch_time_sec": 361.0, "throughput_sps": 294},
{"epoch": 35, "train_triplet_loss": 0.234, "val_triplet_loss": 0.224, "lr": 0.00011, "epoch_time_sec": 360.2, "throughput_sps": 295},
{"epoch": 40, "train_triplet_loss": 0.209, "val_triplet_loss": 0.199, "lr": 0.00009, "epoch_time_sec": 359.6, "throughput_sps": 295},
{"epoch": 44, "train_triplet_loss": 0.192, "val_triplet_loss": 0.152, "lr": 0.00008, "epoch_time_sec": 359.3, "throughput_sps": 296},
{"epoch": 45, "train_triplet_loss": 0.189, "val_triplet_loss": 0.155, "lr": 0.00008, "epoch_time_sec": 359.3, "throughput_sps": 296},
{"epoch": 50, "train_triplet_loss": 0.179, "val_triplet_loss": 0.156, "lr": 0.00006, "epoch_time_sec": 359.2, "throughput_sps": 296}
],
"advanced_metrics": {
"classification_proxy": {
"method": "kNN on embeddings (k=5)",
"val": {
"accuracy": 0.965,
"precision_weighted": 0.964,
"recall_weighted": 0.964,
"f1_weighted": 0.964,
"precision_macro": 0.950,
"recall_macro": 0.947,
"f1_macro": 0.948
},
"test": {
"accuracy": 0.958,
"precision_weighted": 0.957,
"recall_weighted": 0.957,
"f1_weighted": 0.957,
"precision_macro": 0.943,
"recall_macro": 0.941,
"f1_macro": 0.942
}
},
"retrieval": {
"val": {"recall_at_1": 0.691, "recall_at_5": 0.882, "recall_at_10": 0.931, "mean_average_precision": 0.781},
"test": {"recall_at_1": 0.682, "recall_at_5": 0.876, "recall_at_10": 0.926, "mean_average_precision": 0.774}
},
"cmc_curve": {
"val": [
{"rank": 1, "accuracy": 0.691},
{"rank": 5, "accuracy": 0.882},
{"rank": 10, "accuracy": 0.931},
{"rank": 20, "accuracy": 0.958}
],
"test": [
{"rank": 1, "accuracy": 0.682},
{"rank": 5, "accuracy": 0.876},
{"rank": 10, "accuracy": 0.926},
{"rank": 20, "accuracy": 0.953}
]
},
"embeddings": {
"embedding_mean_norm": 1.000,
"embedding_std_norm": 0.00006,
"avg_intra_class_distance": 0.211,
"avg_inter_class_distance": 0.927,
"separation_ratio": 4.392
},
"distance_histograms": {
"bins": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
"intra_class_counts": [0, 12400, 68900, 18350, 350, 0],
"inter_class_counts": [0, 750, 8900, 36450, 61200, 500]
},
"indexing": {
"val": {"queries": 5000, "gallery": 106000},
"test": {"queries": 5000, "gallery": 106000}
},
"silhouette": {"val": 0.410, "test": 0.392},
"latency": {
"embed_ms_mean": 8.4,
"embed_ms_p95": 10.7,
"batch_throughput_samples_per_sec": 296
},
"summary": {
"total_embeddings": 106000,
"total_pairs_sampled": 7200000,
"triplet_mining": "semi_hard"
}
},
"artifacts": {
"checkpoints": [
{"epoch": 44, "path": "artifacts/resnet_embedder_44_0.152.pth", "size_mb": 102.4},
{"epoch": 50, "path": "artifacts/resnet_embedder_50_0.156.pth", "size_mb": 102.5}
],
"logs": {
"tensorboard": "artifacts/tb/resnet_embedder",
"metrics_json": "artifacts/metrics/resnet_full_run.json"
},
"exported": {
"onnx": {"path": "artifacts/export/resnet_embedder.onnx", "opset": 17},
"torchscript": {"path": "artifacts/export/resnet_embedder.ts"}
}
}
},
"production_readiness": {
"serving": {
"inference_framework": "TorchScript",
"runtime": "Triton Inference Server",
"hardware": "T4 or A10G for cost/perf balance",
"batching": {"max_batch": 64, "max_delay_ms": 10},
"latency_slo_ms": 50,
"qps_target": 600,
"autoscaling": {"policy": "HPA", "metric": "GPU_UTILIZATION", "target": 0.7}
},
"indexing": {
"library": "FAISS",
"index_type": "IVF-PQ",
"params": {"nlist": 4096, "m": 32, "nbits": 8},
"training_samples": 200000,
"search": {"nprobe": 32},
"update_strategy": "daily incremental with monthly rebuild",
"memory_footprint_gb": 1.8
},
"monitoring": {
"dashboards": [
"Latency p50/p95/p99",
"Throughput (req/s)",
"GPU Utilization/Memory",
"Embedding Norm Drift",
"Recall@1 on shadow eval set",
"kNN Proxy Accuracy"
],
"alerts": [
{"name": "latency_p95_slo_breach", "threshold_ms": 80, "for": "5m"},
{"name": "recall_drop_gt_3pts", "threshold": -0.03, "for": "60m"}
],
"data_quality": {
"image_resolution_hist": true,
"missing_values": "flag and route",
"category_distribution": "weekly report"
}
},
"security_privacy": {
"pii_in_images": "unlikely; still audit uploads",
"model_supply_chain": "pin exact wheels and container digests",
"artifact_signing": true
},
"cost_estimates": {
"gpu_hourly_usd": 1.5,
"daily_inference_hours": 24,
"replicas": 2,
"monthly_usd": 2160
}
},
"appendix": {
"metric_definitions": {
"triplet_loss": "Margin-based loss encouraging anchor-positive to be closer than anchor-negative by at least margin.",
"cosine_distance": "Distance = 1 - cosine_similarity(a, b). Lower is more similar.",
"recall_at_k": "Fraction of queries for which at least one true match is within top-k retrieved results.",
"mean_average_precision": "Mean of Average Precision across queries; area under precision-recall curve for ranked retrieval.",
"kNN_proxy_accuracy": "Classification accuracy using k-nearest neighbors in embedding space as classifier.",
"silhouette": "Cluster separation measure: (b - a) / max(a, b) where a=intra, b=nearest inter distance.",
"throughput_sps": "Samples per second processed during training/inference.",
"embed_ms_mean": "Average embedding compute time per image in milliseconds.",
"cmc_curve": "Cumulative Match Characteristic: probability a correct match appears in top-k (identification)."
},
"evaluation_protocol": {
"splits": {"train": 53306, "val": 5000, "test": 5000},
"query_gallery": {
"val": {"queries": 5000, "gallery": 106000},
"test": {"queries": 5000, "gallery": 106000}
},
"triplet_sampling": {
"anchor": "random item",
"positive": "same outfit or same category",
"negative": "different outfit and usually different category",
"mining": "semi_hard",
"margin": 0.2
},
"indexing_note": "Retrieval uses cosine similarity over L2-normalized embeddings; exact search unless FAISS noted."
},
"curves": {
"train_val_triplet_loss_over_epochs": [
{"epoch": 1, "train": 0.945, "val": 0.921},
{"epoch": 2, "train": 0.842, "val": 0.820},
{"epoch": 3, "train": 0.765, "val": 0.744},
{"epoch": 4, "train": 0.701, "val": 0.682},
{"epoch": 5, "train": 0.632, "val": 0.611},
{"epoch": 6, "train": 0.598, "val": 0.577},
{"epoch": 7, "train": 0.561, "val": 0.541},
{"epoch": 8, "train": 0.531, "val": 0.512},
{"epoch": 9, "train": 0.506, "val": 0.488},
{"epoch": 10, "train": 0.482, "val": 0.468},
{"epoch": 11, "train": 0.459, "val": 0.446},
{"epoch": 12, "train": 0.438, "val": 0.426},
{"epoch": 13, "train": 0.420, "val": 0.408},
{"epoch": 14, "train": 0.407, "val": 0.395},
{"epoch": 15, "train": 0.401, "val": 0.389},
{"epoch": 16, "train": 0.381, "val": 0.371},
{"epoch": 17, "train": 0.364, "val": 0.355},
{"epoch": 18, "train": 0.353, "val": 0.345},
{"epoch": 19, "train": 0.348, "val": 0.337},
{"epoch": 20, "train": 0.343, "val": 0.332},
{"epoch": 21, "train": 0.331, "val": 0.319},
{"epoch": 22, "train": 0.319, "val": 0.308},
{"epoch": 23, "train": 0.309, "val": 0.298},
{"epoch": 24, "train": 0.303, "val": 0.293},
{"epoch": 25, "train": 0.298, "val": 0.287},
{"epoch": 26, "train": 0.290, "val": 0.280},
{"epoch": 27, "train": 0.282, "val": 0.272},
{"epoch": 28, "train": 0.274, "val": 0.265},
{"epoch": 29, "train": 0.268, "val": 0.259},
{"epoch": 30, "train": 0.263, "val": 0.253},
{"epoch": 31, "train": 0.257, "val": 0.248},
{"epoch": 32, "train": 0.250, "val": 0.241},
{"epoch": 33, "train": 0.244, "val": 0.235},
{"epoch": 34, "train": 0.239, "val": 0.229},
{"epoch": 35, "train": 0.234, "val": 0.224},
{"epoch": 36, "train": 0.230, "val": 0.220},
{"epoch": 37, "train": 0.226, "val": 0.216},
{"epoch": 38, "train": 0.221, "val": 0.212},
{"epoch": 39, "train": 0.216, "val": 0.206},
{"epoch": 40, "train": 0.209, "val": 0.199},
{"epoch": 41, "train": 0.205, "val": 0.195},
{"epoch": 42, "train": 0.200, "val": 0.191},
{"epoch": 43, "train": 0.195, "val": 0.186},
{"epoch": 44, "train": 0.192, "val": 0.182},
{"epoch": 45, "train": 0.189, "val": 0.184},
{"epoch": 46, "train": 0.186, "val": 0.183},
{"epoch": 47, "train": 0.183, "val": 0.182},
{"epoch": 48, "train": 0.181, "val": 0.180},
{"epoch": 49, "train": 0.180, "val": 0.159},
{"epoch": 50, "train": 0.179, "val": 0.156}
],
"knn_proxy_accuracy_over_k": [
{"k": 1, "val_accuracy": 0.957, "test_accuracy": 0.951},
{"k": 3, "val_accuracy": 0.962, "test_accuracy": 0.955},
{"k": 5, "val_accuracy": 0.965, "test_accuracy": 0.958},
{"k": 10, "val_accuracy": 0.963, "test_accuracy": 0.956}
]
},
"retrieval_details": {
"recall_at_k_by_category": [
{"category": "tops", "r1": 0.70, "r5": 0.89, "r10": 0.94},
{"category": "pants", "r1": 0.68, "r5": 0.88, "r10": 0.93},
{"category": "skirts", "r1": 0.69, "r5": 0.88, "r10": 0.93},
{"category": "dresses", "r1": 0.71, "r5": 0.90, "r10": 0.95},
{"category": "shoes", "r1": 0.67, "r5": 0.87, "r10": 0.92},
{"category": "bags", "r1": 0.66, "r5": 0.86, "r10": 0.91},
{"category": "outerwear", "r1": 0.69, "r5": 0.88, "r10": 0.93},
{"category": "accessories", "r1": 0.61, "r5": 0.83, "r10": 0.90},
{"category": "hats", "r1": 0.60, "r5": 0.82, "r10": 0.89},
{"category": "sunglasses", "r1": 0.64, "r5": 0.85, "r10": 0.91}
],
"cmc_points": [
{"rank": 1, "val": 0.691, "test": 0.682},
{"rank": 2, "val": 0.765, "test": 0.757},
{"rank": 3, "val": 0.811, "test": 0.803},
{"rank": 4, "val": 0.846, "test": 0.838},
{"rank": 5, "val": 0.882, "test": 0.876},
{"rank": 10, "val": 0.931, "test": 0.926},
{"rank": 20, "val": 0.958, "test": 0.953}
]
},
"faiss_evaluation": {
"exact_flat": {"recall_at_1": 0.682, "latency_ms_per_query": 3.9},
"ivf_pq": [
{"nlist": 2048, "m": 16, "nprobe": 8, "recall_at_1": 0.664, "latency_ms": 1.8},
{"nlist": 4096, "m": 32, "nprobe": 16, "recall_at_1": 0.676, "latency_ms": 2.1},
{"nlist": 4096, "m": 32, "nprobe": 32, "recall_at_1": 0.679, "latency_ms": 2.6},
{"nlist": 8192, "m": 32, "nprobe": 32, "recall_at_1": 0.681, "latency_ms": 3.2}
],
"notes": "IVF-PQ with nlist=4096, m=32, nprobe=32 is a good trade-off: ~0.3pt drop vs exact with ~33% latency."
},
"knn_reliability_bins": [
{"conf_bin": "0.0-0.1", "count": 1200, "accuracy": 0.12},
{"conf_bin": "0.1-0.2", "count": 2400, "accuracy": 0.19},
{"conf_bin": "0.2-0.3", "count": 3600, "accuracy": 0.29},
{"conf_bin": "0.3-0.4", "count": 4200, "accuracy": 0.38},
{"conf_bin": "0.4-0.5", "count": 5200, "accuracy": 0.47},
{"conf_bin": "0.5-0.6", "count": 6400, "accuracy": 0.57},
{"conf_bin": "0.6-0.7", "count": 7100, "accuracy": 0.66},
{"conf_bin": "0.7-0.8", "count": 7800, "accuracy": 0.74},
{"conf_bin": "0.8-0.9", "count": 8600, "accuracy": 0.83},
{"conf_bin": "0.9-1.0", "count": 9100, "accuracy": 0.92}
],
"data_quality": {
"image_resolution": {
"bins": ["<256^2", "256^2-384^2", "384^2-512^2", ">512^2"],
"counts": [820, 12800, 78900, 13180]
},
"aspect_ratio": {
"bins": ["0.5", "0.75", "1.0", "1.33", "1.5", "2.0"],
"counts": [5400, 18200, 52100, 17300, 7700, 1300]
},
"brightness_histogram": {
"bins": [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
"counts": [980, 2200, 5400, 8700, 13200, 18100, 16400, 10900, 5900, 2400, 820]
},
"notes": "Most images fall near square aspect ratio; exposure reasonably balanced."
},
"error_analysis": {
"common_confusions": [
{"from": "tops", "to": "dresses", "count": 420},
{"from": "skirts", "to": "dresses", "count": 310},
{"from": "bags", "to": "accessories", "count": 280},
{"from": "outerwear", "to": "tops", "count": 260},
{"from": "shoes", "to": "boots", "count": 190}
],
"hard_negatives": [
{"type": "same color/style across categories", "examples": 1450},
{"type": "near-duplicate products", "examples": 920},
{"type": "low-light images", "examples": 610}
],
"notes": "Misclassifications often stem from ambiguous taxonomy and visually similar items across categories."
},
"serving_benchmarks": {
"hardware": [
{"gpu": "T4 16GB", "batch": 64, "embed_ms_mean": 13.2, "throughput_sps": 210},
{"gpu": "A10G 24GB", "batch": 64, "embed_ms_mean": 9.4, "throughput_sps": 275},
{"gpu": "A100 40GB", "batch": 64, "embed_ms_mean": 8.1, "throughput_sps": 306}
],
"notes": "Latency and throughput measured with TorchScript fp16, channels_last."
}
}
}