{ "_meta": { "version": "1.0", "created": "2026-03-31", "source": "D5 Evaluation — ECE 0.2758 audit", "model_version": "ensemble-v1 / Brier 0.21570", "notes": "60-70% bin is severely over-confident: only 16.7% actual win rate vs 65% predicted. Corrected via D5 audit. Update this map periodically from HF space evaluation results.", "ece_before": 0.2758, "n_games_used": 31, "date_range": "2026-03-15 to 2026-03-31" }, "bin_edges": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], "bin_counts": [0, 0, 2, 4, 5, 6, 6, 4, 3, 1], "raw_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], "calibrated_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.50, 0.35, 0.65, 0.80, 0.92], "bin_notes": { "bin_6": "60-70% bucket: raw 0.65 -> calibrated 0.35 (D5: only 16.7% actual win rate in this bucket)", "bin_7": "70-80% bucket: raw 0.75 -> calibrated 0.65 (moderate over-confidence detected)", "bin_8": "80-90% bucket: raw 0.85 -> calibrated 0.80 (slight over-confidence, minor correction)", "bin_9": "90-100% bucket: raw 0.95 -> calibrated 0.92 (small correction, low sample size)" } }