{ "per_head": { "complexity": { "accuracy": 0.6782, "macro_f1": 0.6806, "per_label": { "easy": { "precision": 0.5758, "recall": 0.8636, "f1": 0.6909, "support": 44 }, "medium": { "precision": 0.7941, "recall": 0.5745, "f1": 0.6667, "support": 94 }, "hard": { "precision": 0.65, "recall": 0.7222, "f1": 0.6842, "support": 36 } }, "confusion_matrix": [ [ 38, 6, 0 ], [ 26, 54, 14 ], [ 2, 8, 26 ] ] }, "task_type": { "accuracy": 0.8678, "macro_f1": 0.8718, "per_label": { "bugfix": { "precision": 0.875, "recall": 0.84, "f1": 0.8571, "support": 25 }, "feature": { "precision": 0.875, "recall": 0.8333, "f1": 0.8537, "support": 42 }, "refactor": { "precision": 0.84, "recall": 1.0, "f1": 0.913, "support": 21 }, "test": { "precision": 1.0, "recall": 0.8571, "f1": 0.9231, "support": 14 }, "design": { "precision": 0.8667, "recall": 0.8125, "f1": 0.8387, "support": 16 }, "docs": { "precision": 0.9286, "recall": 0.8667, "f1": 0.8966, "support": 15 }, "migration": { "precision": 0.9524, "recall": 0.9091, "f1": 0.9302, "support": 22 }, "exploration": { "precision": 0.6957, "recall": 0.8421, "f1": 0.7619, "support": 19 } }, "confusion_matrix": [ [ 21, 0, 2, 0, 0, 0, 1, 1 ], [ 0, 35, 1, 0, 1, 0, 0, 5 ], [ 0, 0, 21, 0, 0, 0, 0, 0 ], [ 1, 1, 0, 12, 0, 0, 0, 0 ], [ 0, 1, 1, 0, 13, 1, 0, 0 ], [ 0, 1, 0, 0, 0, 13, 0, 1 ], [ 1, 1, 0, 0, 0, 0, 20, 0 ], [ 1, 1, 0, 0, 1, 0, 0, 16 ] ] }, "risk": { "accuracy": 0.6667, "macro_f1": 0.6217, "per_label": { "low": { "precision": 0.8068, "recall": 0.8353, "f1": 0.8208, "support": 85 }, "medium": { "precision": 0.5102, "recall": 0.4545, "f1": 0.4808, "support": 55 }, "high": { "precision": 0.5405, "recall": 0.5882, "f1": 0.5634, "support": 34 } }, "confusion_matrix": [ [ 71, 11, 3 ], [ 16, 25, 14 ], [ 1, 13, 20 ] ] } }, "overall": { "exact_match": 0.3908, "macro_average_f1": 0.7247, "automation_safe_accuracy": 0.6167, "automation_safe_coverage": 0.3448, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.369827, "bins": [ { "range": [ 0.5, 0.6 ], "count": 2, "avg_confidence": 0.5761, "accuracy": 0.0 }, { "range": [ 0.6, 0.7 ], "count": 52, "avg_confidence": 0.6519, "accuracy": 0.3077 }, { "range": [ 0.7, 0.8 ], "count": 60, "avg_confidence": 0.7476, "accuracy": 0.25 }, { "range": [ 0.8, 0.9 ], "count": 41, "avg_confidence": 0.8439, "accuracy": 0.5366 }, { "range": [ 0.9, 1.0 ], "count": 19, "avg_confidence": 0.9391, "accuracy": 0.7895 } ] } }, "temperature_scaling": { "method": "per_head_temperature_scaling", "per_head": { "complexity": 0.891251, "task_type": 0.707946, "risk": 1.059254 } }, "complexity_subdims": { "reasoning_depth": { "mae": 0.1117, "r2": 0.513 }, "spec_completeness": { "mae": 0.1165, "r2": 0.3356 }, "scope_breadth": { "mae": 0.1082, "r2": 0.4687 }, "domain_knowledge": { "mae": 0.1199, "r2": 0.3003 } }, "risk_subdims": { "security_surface": { "mae": 0.1627, "r2": 0.1921 }, "data_sensitivity": { "mae": 0.1182, "r2": 0.2477 }, "production_exposure": { "mae": 0.1387, "r2": 0.5113 }, "reversal_cost": { "mae": 0.1103, "r2": 0.5463 } } }