{ "per_head": { "complexity": { "accuracy": 0.6954, "macro_f1": 0.7011, "per_label": { "easy": { "precision": 0.7258, "recall": 0.75, "f1": 0.7377, "support": 60 }, "medium": { "precision": 0.6232, "recall": 0.6418, "f1": 0.6324, "support": 67 }, "hard": { "precision": 0.7674, "recall": 0.7021, "f1": 0.7333, "support": 47 } }, "confusion_matrix": [ [ 45, 15, 0 ], [ 14, 43, 10 ], [ 3, 11, 33 ] ] }, "task_type": { "accuracy": 0.9138, "macro_f1": 0.9093, "per_label": { "bugfix": { "precision": 0.9565, "recall": 0.88, "f1": 0.9167, "support": 25 }, "feature": { "precision": 0.9302, "recall": 0.9524, "f1": 0.9412, "support": 42 }, "refactor": { "precision": 0.8696, "recall": 0.9524, "f1": 0.9091, "support": 21 }, "test": { "precision": 1.0, "recall": 0.8571, "f1": 0.9231, "support": 14 }, "design": { "precision": 0.9375, "recall": 0.9375, "f1": 0.9375, "support": 16 }, "docs": { "precision": 0.9333, "recall": 0.9333, "f1": 0.9333, "support": 15 }, "migration": { "precision": 0.9167, "recall": 1.0, "f1": 0.9565, "support": 22 }, "exploration": { "precision": 0.7778, "recall": 0.7368, "f1": 0.7568, "support": 19 } }, "confusion_matrix": [ [ 22, 1, 0, 0, 0, 0, 0, 2 ], [ 0, 40, 1, 0, 0, 0, 0, 1 ], [ 0, 0, 20, 0, 0, 0, 1, 0 ], [ 1, 1, 0, 12, 0, 0, 0, 0 ], [ 0, 0, 1, 0, 15, 0, 0, 0 ], [ 0, 0, 0, 0, 0, 14, 0, 1 ], [ 0, 0, 0, 0, 0, 0, 22, 0 ], [ 0, 1, 1, 0, 1, 1, 1, 14 ] ] }, "risk": { "accuracy": 0.6954, "macro_f1": 0.6554, "per_label": { "low": { "precision": 0.8, "recall": 0.8276, "f1": 0.8136, "support": 87 }, "medium": { "precision": 0.5333, "recall": 0.6038, "f1": 0.5664, "support": 53 }, "high": { "precision": 0.7083, "recall": 0.5, "f1": 0.5862, "support": 34 } }, "confusion_matrix": [ [ 72, 13, 2 ], [ 16, 32, 5 ], [ 2, 15, 17 ] ] } }, "overall": { "exact_match": 0.4655, "macro_average_f1": 0.7553, "automation_safe_accuracy": 0.6935, "automation_safe_coverage": 0.3563, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.303072, "bins": [ { "range": [ 0.5, 0.6 ], "count": 5, "avg_confidence": 0.586, "accuracy": 0.0 }, { "range": [ 0.6, 0.7 ], "count": 48, "avg_confidence": 0.6575, "accuracy": 0.2917 }, { "range": [ 0.7, 0.8 ], "count": 59, "avg_confidence": 0.7473, "accuracy": 0.4068 }, { "range": [ 0.8, 0.9 ], "count": 33, "avg_confidence": 0.8512, "accuracy": 0.5758 }, { "range": [ 0.9, 1.0 ], "count": 29, "avg_confidence": 0.9333, "accuracy": 0.8276 } ] } }, "temperature_scaling": { "method": "per_head_temperature_scaling", "per_head": { "complexity": 0.891251, "task_type": 0.707946, "risk": 1.059254 } }, "complexity_subdims": { "reasoning_depth": { "mae": 0.1069, "r2": 0.5888 }, "spec_completeness": { "mae": 0.1033, "r2": 0.3667 }, "scope_breadth": { "mae": 0.1076, "r2": 0.5044 }, "domain_knowledge": { "mae": 0.1036, "r2": 0.4405 } }, "risk_subdims": { "security_surface": { "mae": 0.1517, "r2": 0.1937 }, "data_sensitivity": { "mae": 0.1184, "r2": 0.3094 }, "production_exposure": { "mae": 0.1182, "r2": 0.6323 }, "reversal_cost": { "mae": 0.1045, "r2": 0.6316 } } }