| { |
| "_meta": { |
| "version": "1.0", |
| "created": "2026-03-31", |
| "source": "D5 Evaluation — ECE 0.2758 audit", |
| "model_version": "ensemble-v1 / Brier 0.21570", |
| "notes": "60-70% bin is severely over-confident: only 16.7% actual win rate vs 65% predicted. Corrected via D5 audit. Update this map periodically from HF space evaluation results.", |
| "ece_before": 0.2758, |
| "n_games_used": 31, |
| "date_range": "2026-03-15 to 2026-03-31" |
| }, |
| "bin_edges": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], |
| "bin_counts": [0, 0, 2, 4, 5, 6, 6, 4, 3, 1], |
| "raw_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95], |
| "calibrated_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.50, 0.35, 0.65, 0.80, 0.92], |
| "bin_notes": { |
| "bin_6": "60-70% bucket: raw 0.65 -> calibrated 0.35 (D5: only 16.7% actual win rate in this bucket)", |
| "bin_7": "70-80% bucket: raw 0.75 -> calibrated 0.65 (moderate over-confidence detected)", |
| "bin_8": "80-90% bucket: raw 0.85 -> calibrated 0.80 (slight over-confidence, minor correction)", |
| "bin_9": "90-100% bucket: raw 0.95 -> calibrated 0.92 (small correction, low sample size)" |
| } |
| } |
|
|