| { | |
| "schema_version": "1.0", | |
| "generated_at": "2025-09-10T00:00:00Z", | |
| "model": "ViT Outfit Compatibility", | |
| "metadata": { | |
| "dataset": { | |
| "name": "Polyvore Outfits", | |
| "split": "nondisjoint", | |
| "train_outfits": 53306, | |
| "val_outfits": 5000, | |
| "test_outfits": 5000, | |
| "approx_item_count": 106000, | |
| "avg_items_per_outfit": 3.7, | |
| "labeling": "Binary compatibility for scored pairs; retrieval over coherent sets", | |
| "notes": "Sequences are outfits; scoring predicts coherence/compatibility." | |
| }, | |
| "preprocessing": { | |
| "image": { | |
| "resize": {"shorter_side": 256, "interpolation": "bilinear"}, | |
| "center_crop": 224, | |
| "normalize": { | |
| "mean": [0.485, 0.456, 0.406], | |
| "std": [0.229, 0.224, 0.225] | |
| } | |
| }, | |
| "sequence": { | |
| "max_items": 8, | |
| "padding": "zeros", | |
| "masking": true, | |
| "position_encoding": "learned" | |
| }, | |
| "augmentations": { | |
| "ops": [ | |
| {"name": "RandomResizedCrop", "scale": [0.8, 1.0], "ratio": [0.9, 1.1], "p": 1.0}, | |
| {"name": "RandomHorizontalFlip", "p": 0.5}, | |
| {"name": "ColorJitter", "brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.02, "p": 0.8}, | |
| {"name": "RandomGrayscale", "p": 0.05} | |
| ], | |
| "notes": "Mild augmentations preserve item identity critical for compatibility." | |
| } | |
| }, | |
| "architecture": { | |
| "vision_backbone": { | |
| "name": "ViT-B/16", | |
| "patch_size": 16, | |
| "img_size": 224, | |
| "embed_dim": 768, | |
| "pretrained": "imagenet-21k", | |
| "freeze_patchify": false | |
| }, | |
| "sequence_encoder": { | |
| "type": "transformer_encoder", | |
| "num_layers": 8, | |
| "num_heads": 8, | |
| "ff_multiplier": 4, | |
| "dropout": 0.1, | |
| "layernorm_eps": 1e-5, | |
| "activation": "gelu" | |
| }, | |
| "pooling": {"type": "mean", "include_cls": false}, | |
| "head": { | |
| "type": "mlp", | |
| "hidden": [512], | |
| "activation": "gelu", | |
| "dropout": 0.1, | |
| "output": 1, | |
| "output_activation": "sigmoid" | |
| } | |
| }, | |
| "hyperparameters": { | |
| "optimizer": "adamw", | |
| "learning_rate": 0.00035, | |
| "weight_decay": 0.05, | |
| "batch_size": 8, | |
| "epochs": 60, | |
| "lr_scheduler": { | |
| "type": "cosine", | |
| "warmup_epochs": 5, | |
| "warmup_factor": 0.1 | |
| }, | |
| "loss": { | |
| "type": "triplet + bce", | |
| "triplet_margin": 0.3, | |
| "triplet_distance": "cosine", | |
| "bce_weight": 0.5 | |
| }, | |
| "regularization": { | |
| "dropout": 0.1, | |
| "label_smoothing": 0.0, | |
| "gradient_clip_norm": 1.0 | |
| } | |
| }, | |
| "training_config": { | |
| "amp": true, | |
| "num_workers": 8, | |
| "pin_memory": true, | |
| "seed": 42, | |
| "deterministic": false, | |
| "cudnn_benchmark": true, | |
| "early_stopping": {"patience": 12, "min_delta": 0.0001}, | |
| "checkpointing": { | |
| "save_best": true, | |
| "monitor": "val.triplet_loss", | |
| "mode": "min", | |
| "every_n_epochs": 1, | |
| "artifact_naming": "vit_outfit_{epoch:02d}_{val_loss:.3f}.pth" | |
| }, | |
| "logging": { | |
| "tensorboard": true, | |
| "metrics_every_n_steps": 50, | |
| "save_history_json": true | |
| } | |
| }, | |
| "environment": { | |
| "hardware": { | |
| "gpu": {"model": "NVIDIA A100 40GB", "count": 1}, | |
| "cpu": {"model": "Intel Xeon", "cores": 16}, | |
| "ram_gb": 64, | |
| "storage": "NVMe SSD" | |
| }, | |
| "software": { | |
| "os": "Ubuntu 22.04", | |
| "python": "3.10", | |
| "pytorch": "2.2", | |
| "cuda": "12.1", | |
| "cudnn": "9" | |
| }, | |
| "reproducibility": { | |
| "seed_all": [1, 21, 42, 123, 2025], | |
| "numpy_seed": true, | |
| "notes": "Some nondeterminism due to AMP and data loader order." | |
| } | |
| } | |
| }, | |
| "experiments": { | |
| "dataset_size_sweep": [ | |
| { | |
| "samples": 5000, | |
| "epochs": 40, | |
| "aggregate": { | |
| "best_val_triplet_loss_mean": 0.462, | |
| "best_val_triplet_loss_std": 0.009, | |
| "outfit_scoring_test": {"mean": 0.793, "median": 0.805, "std": 0.102}, | |
| "retrieval_test": {"coherent_set_hit_rate@1": 0.398, "@5": 0.671, "@10": 0.742}, | |
| "classification_test": {"accuracy": 0.861, "f1": 0.860}, | |
| "auc_test": {"roc_auc": 0.902, "pr_auc": 0.874}, | |
| "latency": {"score_ms_mean": 1.9, "score_ms_p95": 2.6, "sequences_per_sec": 620} | |
| }, | |
| "per_seed": [ | |
| {"seed": 1, "best_epoch": 38, "best_val_triplet_loss": 0.468}, | |
| {"seed": 21, "best_epoch": 39, "best_val_triplet_loss": 0.457}, | |
| {"seed": 42, "best_epoch": 40, "best_val_triplet_loss": 0.462}, | |
| {"seed": 123, "best_epoch": 39, "best_val_triplet_loss": 0.471}, | |
| {"seed": 2025,"best_epoch": 38, "best_val_triplet_loss": 0.451} | |
| ], | |
| "notes": "Underfits; limited combinations reduce semi-hard positives." | |
| }, | |
| { | |
| "samples": 20000, | |
| "epochs": 50, | |
| "aggregate": { | |
| "best_val_triplet_loss_mean": 0.418, | |
| "best_val_triplet_loss_std": 0.006, | |
| "outfit_scoring_test": {"mean": 0.821, "median": 0.834, "std": 0.089}, | |
| "retrieval_test": {"coherent_set_hit_rate@1": 0.461, "@5": 0.728, "@10": 0.801}, | |
| "classification_test": {"accuracy": 0.892, "f1": 0.891}, | |
| "auc_test": {"roc_auc": 0.931, "pr_auc": 0.912}, | |
| "latency": {"score_ms_mean": 1.8, "score_ms_p95": 2.5, "sequences_per_sec": 642} | |
| }, | |
| "per_seed": [ | |
| {"seed": 1, "best_epoch": 48, "best_val_triplet_loss": 0.421}, | |
| {"seed": 21, "best_epoch": 49, "best_val_triplet_loss": 0.414}, | |
| {"seed": 42, "best_epoch": 50, "best_val_triplet_loss": 0.418}, | |
| {"seed": 123, "best_epoch": 49, "best_val_triplet_loss": 0.423}, | |
| {"seed": 2025,"best_epoch": 48, "best_val_triplet_loss": 0.412} | |
| ], | |
| "notes": "Gains across all metrics, especially ROC/PR AUC." | |
| }, | |
| { | |
| "samples": 53306, | |
| "epochs": 60, | |
| "aggregate": { | |
| "best_val_triplet_loss_mean": 0.391, | |
| "best_val_triplet_loss_std": 0.004, | |
| "outfit_scoring_test": {"mean": 0.839, "median": 0.851, "std": 0.080}, | |
| "retrieval_test": {"coherent_set_hit_rate@1": 0.493, "@5": 0.765, "@10": 0.838}, | |
| "classification_test": {"accuracy": 0.908, "f1": 0.908}, | |
| "auc_test": {"roc_auc": 0.951, "pr_auc": 0.934}, | |
| "calibration_test": {"ece": 0.021, "mce": 0.057, "brier": 0.087}, | |
| "latency": {"score_ms_mean": 1.8, "score_ms_p95": 2.4, "sequences_per_sec": 653} | |
| }, | |
| "per_seed": [ | |
| {"seed": 1, "best_epoch": 52, "best_val_triplet_loss": 0.394}, | |
| {"seed": 21, "best_epoch": 53, "best_val_triplet_loss": 0.389}, | |
| {"seed": 42, "best_epoch": 52, "best_val_triplet_loss": 0.391}, | |
| {"seed": 123, "best_epoch": 51, "best_val_triplet_loss": 0.396}, | |
| {"seed": 2025,"best_epoch": 53, "best_val_triplet_loss": 0.388} | |
| ], | |
| "notes": "Best overall; aligns with vit_metrics_full.json." | |
| } | |
| ], | |
| "learning_rate_sweep": [ | |
| { | |
| "lr": 0.0002, | |
| "epochs": 60, | |
| "best_epoch": 55, | |
| "best_val_triplet_loss": 0.402, | |
| "metrics_test": {"accuracy": 0.902, "f1": 0.901, "roc_auc": 0.946, "pr_auc": 0.928}, | |
| "notes": "Slight underfit; stable but slower rise." | |
| }, | |
| { | |
| "lr": 0.00035, | |
| "epochs": 60, | |
| "best_epoch": 52, | |
| "best_val_triplet_loss": 0.391, | |
| "metrics_test": {"accuracy": 0.908, "f1": 0.908, "roc_auc": 0.951, "pr_auc": 0.934}, | |
| "notes": "Best balance; matches full run." | |
| }, | |
| { | |
| "lr": 0.0006, | |
| "epochs": 55, | |
| "best_epoch": 44, | |
| "best_val_triplet_loss": 0.399, | |
| "metrics_test": {"accuracy": 0.904, "f1": 0.903, "roc_auc": 0.948, "pr_auc": 0.932}, | |
| "notes": "Slightly noisier; close quality." | |
| } | |
| ], | |
| "batch_size_sweep": [ | |
| { | |
| "batch_size": 4, | |
| "grad_accum_steps": 1, | |
| "best_val_triplet_loss": 0.398, | |
| "metrics_test": {"accuracy": 0.905, "f1": 0.905, "roc_auc": 0.949, "pr_auc": 0.933}, | |
| "throughput": {"sequences_per_sec": 611}, | |
| "notes": "More gradient noise; marginally worse." | |
| }, | |
| { | |
| "batch_size": 8, | |
| "grad_accum_steps": 1, | |
| "best_val_triplet_loss": 0.391, | |
| "metrics_test": {"accuracy": 0.908, "f1": 0.908, "roc_auc": 0.951, "pr_auc": 0.934}, | |
| "throughput": {"sequences_per_sec": 653}, | |
| "notes": "Best trade-off for stability and negatives diversity." | |
| }, | |
| { | |
| "batch_size": 16, | |
| "grad_accum_steps": 1, | |
| "best_val_triplet_loss": 0.393, | |
| "metrics_test": {"accuracy": 0.907, "f1": 0.907, "roc_auc": 0.950, "pr_auc": 0.934}, | |
| "throughput": {"sequences_per_sec": 688}, | |
| "notes": "Slightly worse triplet dynamics; similar serving cost." | |
| } | |
| ], | |
| "other_ablation": { | |
| "dropout": [ | |
| {"dropout": 0.0, "best_val_triplet_loss": 0.397, "metrics_test": {"accuracy": 0.905, "f1": 0.905}}, | |
| {"dropout": 0.1, "best_val_triplet_loss": 0.391, "metrics_test": {"accuracy": 0.908, "f1": 0.908}}, | |
| {"dropout": 0.3, "best_val_triplet_loss": 0.396, "metrics_test": {"accuracy": 0.906, "f1": 0.906}} | |
| ], | |
| "embedding_dim": [ | |
| {"dim": 256, "best_val_triplet_loss": 0.400, "metrics_test": {"accuracy": 0.904, "f1": 0.904}}, | |
| {"dim": 512, "best_val_triplet_loss": 0.391, "metrics_test": {"accuracy": 0.908, "f1": 0.908}}, | |
| {"dim": 768, "best_val_triplet_loss": 0.393, "metrics_test": {"accuracy": 0.907, "f1": 0.907}} | |
| ], | |
| "transformer_depth": [ | |
| {"layers": 6, "best_val_triplet_loss": 0.402, "metrics_test": {"accuracy": 0.904, "f1": 0.904}}, | |
| {"layers": 8, "best_val_triplet_loss": 0.391, "metrics_test": {"accuracy": 0.908, "f1": 0.908}}, | |
| {"layers": 10, "best_val_triplet_loss": 0.396, "metrics_test": {"accuracy": 0.906, "f1": 0.906}} | |
| ], | |
| "attention_heads": [ | |
| {"heads": 8, "best_val_triplet_loss": 0.391, "metrics_test": {"accuracy": 0.908, "f1": 0.908}}, | |
| {"heads": 12, "best_val_triplet_loss": 0.395, "metrics_test": {"accuracy": 0.906, "f1": 0.906}} | |
| ] | |
| } | |
| }, | |
| "best_run": { | |
| "id": "VF-01", | |
| "config": { | |
| "layers": 8, | |
| "heads": 8, | |
| "ff": 4, | |
| "lr": 0.00035, | |
| "margin": 0.3, | |
| "dropout": 0.1, | |
| "batch_size": 8, | |
| "epochs": 60, | |
| "scheduler": "cosine", | |
| "warmup_epochs": 5, | |
| "amp": true, | |
| "seed": 42 | |
| }, | |
| "history": [ | |
| {"epoch": 1, "triplet_loss": 1.302, "val_triplet_loss": 1.268, "lr": 0.00007, "epoch_time_sec": 89.2, "sequences_per_sec": 610}, | |
| {"epoch": 5, "triplet_loss": 0.962, "val_triplet_loss": 0.929, "lr": 0.00023, "epoch_time_sec": 86.7, "sequences_per_sec": 628}, | |
| {"epoch": 10, "triplet_loss": 0.794, "val_triplet_loss": 0.768, "lr": 0.00033, "epoch_time_sec": 85.3, "sequences_per_sec": 639}, | |
| {"epoch": 15, "triplet_loss": 0.687, "val_triplet_loss": 0.664, "lr": 0.00035, "epoch_time_sec": 84.8, "sequences_per_sec": 643}, | |
| {"epoch": 20, "triplet_loss": 0.611, "val_triplet_loss": 0.590, "lr": 0.00032, "epoch_time_sec": 84.4, "sequences_per_sec": 646}, | |
| {"epoch": 25, "triplet_loss": 0.552, "val_triplet_loss": 0.533, "lr": 0.00027, "epoch_time_sec": 84.1, "sequences_per_sec": 648}, | |
| {"epoch": 30, "triplet_loss": 0.504, "val_triplet_loss": 0.487, "lr": 0.00022, "epoch_time_sec": 83.9, "sequences_per_sec": 650}, | |
| {"epoch": 35, "triplet_loss": 0.465, "val_triplet_loss": 0.450, "lr": 0.00018, "epoch_time_sec": 83.8, "sequences_per_sec": 651}, | |
| {"epoch": 40, "triplet_loss": 0.432, "val_triplet_loss": 0.418, "lr": 0.00015, "epoch_time_sec": 83.7, "sequences_per_sec": 652}, | |
| {"epoch": 45, "triplet_loss": 0.406, "val_triplet_loss": 0.394, "lr": 0.00012, "epoch_time_sec": 83.6, "sequences_per_sec": 653}, | |
| {"epoch": 52, "triplet_loss": 0.392, "val_triplet_loss": 0.391, "lr": 0.00010, "epoch_time_sec": 83.6, "sequences_per_sec": 653}, | |
| {"epoch": 60, "triplet_loss": 0.389, "val_triplet_loss": 0.394, "lr": 0.00008, "epoch_time_sec": 83.6, "sequences_per_sec": 653} | |
| ], | |
| "advanced_metrics": { | |
| "outfit_scoring": { | |
| "val": {"mean": 0.846, "median": 0.858, "std": 0.077}, | |
| "test": {"mean": 0.839, "median": 0.851, "std": 0.080} | |
| }, | |
| "retrieval": { | |
| "val": {"coherent_set_hit_rate@1": 0.501, "coherent_set_hit_rate@5": 0.773, "coherent_set_hit_rate@10": 0.845}, | |
| "test": {"coherent_set_hit_rate@1": 0.493, "coherent_set_hit_rate@5": 0.765, "coherent_set_hit_rate@10": 0.838} | |
| }, | |
| "classification": { | |
| "threshold_selection": {"method": "YoudenJ", "tau_val": 0.52}, | |
| "val": {"accuracy": 0.915, "precision": 0.911, "recall": 0.918, "f1": 0.914}, | |
| "test": {"accuracy": 0.908, "precision": 0.904, "recall": 0.911, "f1": 0.908} | |
| }, | |
| "calibration": { | |
| "val": {"ece": 0.018, "mce": 0.051, "brier": 0.083}, | |
| "test": {"ece": 0.021, "mce": 0.057, "brier": 0.087} | |
| }, | |
| "auc": { | |
| "val": {"roc_auc": 0.957, "pr_auc": 0.941}, | |
| "test": {"roc_auc": 0.951, "pr_auc": 0.934} | |
| }, | |
| "latency": { | |
| "score_ms_mean": 1.8, | |
| "score_ms_p95": 2.4, | |
| "sequences_per_sec": 653 | |
| }, | |
| "per_context": { | |
| "occasion": { | |
| "business": {"f1_val": 0.923, "f1_test": 0.917}, | |
| "casual": {"f1_val": 0.909, "f1_test": 0.902}, | |
| "formal": {"f1_val": 0.918, "f1_test": 0.911}, | |
| "sport": {"f1_val": 0.903, "f1_test": 0.897} | |
| }, | |
| "weather": { | |
| "hot": {"f1_val": 0.912, "f1_test": 0.906}, | |
| "cold": {"f1_val": 0.916, "f1_test": 0.909}, | |
| "mild": {"f1_val": 0.914, "f1_test": 0.907}, | |
| "rain": {"f1_val": 0.905, "f1_test": 0.898} | |
| } | |
| }, | |
| "summary": { | |
| "total_outfit_scores": 53306, | |
| "total_sequences_seen": 3180000, | |
| "avg_sequence_length": 3.7 | |
| } | |
| }, | |
| "artifacts": { | |
| "checkpoints": [ | |
| {"epoch": 52, "path": "artifacts/vit_outfit_52_0.391.pth", "size_mb": 329.1}, | |
| {"epoch": 60, "path": "artifacts/vit_outfit_60_0.394.pth", "size_mb": 329.2} | |
| ], | |
| "logs": { | |
| "tensorboard": "artifacts/tb/vit_outfit", | |
| "metrics_json": "artifacts/metrics/vit_full_run.json" | |
| }, | |
| "exported": { | |
| "onnx": {"path": "artifacts/export/vit_outfit.onnx", "opset": 17}, | |
| "torchscript": {"path": "artifacts/export/vit_outfit.ts"} | |
| } | |
| } | |
| }, | |
| "production_readiness": { | |
| "serving": { | |
| "inference_framework": "TorchScript", | |
| "runtime": "Triton Inference Server", | |
| "hardware": "A10G recommended", | |
| "batching": {"max_batch": 64, "max_delay_ms": 10}, | |
| "latency_slo_ms": 80, | |
| "qps_target": 500, | |
| "autoscaling": {"policy": "HPA", "metric": "GPU_UTILIZATION", "target": 0.7} | |
| }, | |
| "monitoring": { | |
| "dashboards": [ | |
| "Score latency p50/p95/p99", | |
| "Throughput (seq/s)", | |
| "GPU Utilization/Memory", | |
| "Calibration drift (ECE)", | |
| "ROC/PR AUC on shadow eval", | |
| "Per-context F1 (occasion/weather)" | |
| ], | |
| "alerts": [ | |
| {"name": "latency_p95_slo_breach", "threshold_ms": 120, "for": "5m"}, | |
| {"name": "auc_drop_gt_2pts", "threshold": -0.02, "for": "60m"} | |
| ] | |
| }, | |
| "security_privacy": { | |
| "data_minimization": true, | |
| "artifact_signing": true, | |
| "container_sbom": true | |
| }, | |
| "cost_estimates": { | |
| "gpu_hourly_usd": 1.8, | |
| "replicas": 2, | |
| "monthly_usd": 2592 | |
| } | |
| }, | |
| "summary_findings": { | |
| "concise_trends": [ | |
| "Data scaling from 5k to 53k outfits lifts ROC AUC by ~5 points and improves coherent-set hit@10 by ~10 points.", | |
| "Best configuration uses 8 layers, 8 heads, FF×4, dropout 0.1, lr=3.5e-4, batch=8 with cosine+5 warmup.", | |
| "Batch 8 balances semi-hard dynamics and stability; batch 16 is similar but slightly worse triplet separation.", | |
| "Dropout 0.1 regularizes without harming compatibility signals; 0.0 tends to overfit and 0.3 erodes positives.", | |
| "Embedding 512–768D performs similarly; 512D preferred for latency/memory.", | |
| "Heads=8 slightly better than 12 in this regime; depth=8 outperforms 6 and 10 by small margins." | |
| ] | |
| }, | |
| "appendix": { | |
| "metric_definitions": { | |
| "triplet_loss": "Margin-based loss for sequences via pooled item embeddings.", | |
| "outfit_score": "Scalar in [0,1] representing predicted outfit compatibility.", | |
| "coherent_set_hit_rate@k": "Probability a coherent variant of an outfit appears in top-k ranked candidates.", | |
| "roc_auc": "Area under ROC; threshold-independent binary classification measure.", | |
| "pr_auc": "Area under Precision-Recall curve; more informative for class imbalance.", | |
| "ece": "Expected Calibration Error; lower indicates better confidence calibration.", | |
| "brier": "Mean squared error between forecast probabilities and outcomes.", | |
| "sequences_per_sec": "Throughput during training/inference for sequence-level scoring." | |
| }, | |
| "evaluation_protocol": { | |
| "splits": {"train": 53306, "val": 5000, "test": 5000}, | |
| "binary_labels": "Compatible vs incompatible outfit pairs constructed via negative sampling.", | |
| "threshold_selection": {"method": "YoudenJ", "grid": [0.3,0.35,0.4,0.45,0.5,0.52,0.55,0.6]}, | |
| "latency_measurement": { | |
| "mode": "fp16", "batch": 64, "warmup": 50, "iters": 500, | |
| "note": "Measured without data loading using synthetic tensors; accounts for encoder+head only." | |
| } | |
| }, | |
| "curves": { | |
| "val_metrics_over_epochs": [ | |
| {"epoch": 1, "triplet": 1.268, "roc_auc": 0.812, "pr_auc": 0.775}, | |
| {"epoch": 5, "triplet": 0.929, "roc_auc": 0.873, "pr_auc": 0.846}, | |
| {"epoch": 10, "triplet": 0.768, "roc_auc": 0.906, "pr_auc": 0.885}, | |
| {"epoch": 15, "triplet": 0.664, "roc_auc": 0.922, "pr_auc": 0.903}, | |
| {"epoch": 20, "triplet": 0.590, "roc_auc": 0.934, "pr_auc": 0.915}, | |
| {"epoch": 25, "triplet": 0.533, "roc_auc": 0.943, "pr_auc": 0.925}, | |
| {"epoch": 30, "triplet": 0.487, "roc_auc": 0.949, "pr_auc": 0.931}, | |
| {"epoch": 35, "triplet": 0.450, "roc_auc": 0.952, "pr_auc": 0.936}, | |
| {"epoch": 40, "triplet": 0.418, "roc_auc": 0.955, "pr_auc": 0.939}, | |
| {"epoch": 45, "triplet": 0.394, "roc_auc": 0.956, "pr_auc": 0.940}, | |
| {"epoch": 52, "triplet": 0.391, "roc_auc": 0.957, "pr_auc": 0.941}, | |
| {"epoch": 60, "triplet": 0.394, "roc_auc": 0.956, "pr_auc": 0.940} | |
| ], | |
| "reliability_diagram_bins": [ | |
| {"bin": "0.0-0.1", "count": 3200, "avg_conf": 0.06, "acc": 0.07}, | |
| {"bin": "0.1-0.2", "count": 4800, "avg_conf": 0.15, "acc": 0.16}, | |
| {"bin": "0.2-0.3", "count": 6200, "avg_conf": 0.25, "acc": 0.26}, | |
| {"bin": "0.3-0.4", "count": 7300, "avg_conf": 0.35, "acc": 0.36}, | |
| {"bin": "0.4-0.5", "count": 8100, "avg_conf": 0.45, "acc": 0.46}, | |
| {"bin": "0.5-0.6", "count": 8800, "avg_conf": 0.55, "acc": 0.56}, | |
| {"bin": "0.6-0.7", "count": 9100, "avg_conf": 0.65, "acc": 0.64}, | |
| {"bin": "0.7-0.8", "count": 9600, "avg_conf": 0.75, "acc": 0.74}, | |
| {"bin": "0.8-0.9", "count": 10000, "avg_conf": 0.85, "acc": 0.84}, | |
| {"bin": "0.9-1.0", "count": 10400, "avg_conf": 0.93, "acc": 0.92} | |
| ] | |
| }, | |
| "slice_metrics": { | |
| "occasion": [ | |
| {"slice": "business", "f1_test": 0.917, "support": 4100}, | |
| {"slice": "casual", "f1_test": 0.902, "support": 5100}, | |
| {"slice": "formal", "f1_test": 0.911, "support": 2800}, | |
| {"slice": "sport", "f1_test": 0.897, "support": 3300} | |
| ], | |
| "weather": [ | |
| {"slice": "hot", "f1_test": 0.906, "support": 3600}, | |
| {"slice": "cold", "f1_test": 0.909, "support": 3700}, | |
| {"slice": "mild", "f1_test": 0.907, "support": 4200}, | |
| {"slice": "rain", "f1_test": 0.898, "support": 1800} | |
| ] | |
| }, | |
| "negative_sampling": { | |
| "methods": ["random", "in-batch", "hard via top-k distance"], | |
| "mixing": {"random": 0.5, "in_batch": 0.3, "hard": 0.2}, | |
| "notes": "Hard negatives sourced using previous epoch embeddings to avoid label leakage." | |
| }, | |
| "serving_benchmarks": { | |
| "hardware": [ | |
| {"gpu": "T4 16GB", "batch": 64, "score_ms_mean": 2.6, "seq_per_sec": 440}, | |
| {"gpu": "A10G 24GB", "batch": 64, "score_ms_mean": 2.1, "seq_per_sec": 520}, | |
| {"gpu": "A100 40GB", "batch": 64, "score_ms_mean": 1.8, "seq_per_sec": 653} | |
| ], | |
| "notes": "Measured with fp16, cudnn_benchmark on; includes encoder + head." | |
| } | |
| } | |
| } | |