Spaces:
Running
Running
| from models import IncidentAction | |
| _SEV_ORDER = {"SEV1": 0, "SEV2": 1, "SEV3": 2} | |
| _TASK2_RELATED_GROUPS = [ | |
| {"DATABASE", "APPLICATION"}, | |
| {"NETWORK", "INFRASTRUCTURE"}, | |
| {"NETWORK", "THIRD_PARTY"}, | |
| {"INFRASTRUCTURE", "THIRD_PARTY"}, | |
| ] | |
| _TASK3_PARTIAL = { | |
| ("RESTART_SERVICE", "FAILOVER"): 0.25, | |
| ("FAILOVER", "RESTART_SERVICE"): 0.25, | |
| ("NOTIFY_VENDOR", "INVESTIGATE"): 0.25, | |
| ("SCALE_UP", "INVESTIGATE"): 0.25, | |
| ("RESTART_SERVICE", "INVESTIGATE"): 0.25, | |
| } | |
| # Scores must be strictly within (0, 1) — 0.0 and 1.0 are rejected by the validator. | |
| _EXACT = 0.99 | |
| _ZERO = 0.01 | |
| def grade_task1(action: IncidentAction, ground_truth: dict) -> tuple[float, str]: | |
| if action.severity is None: | |
| return _ZERO, "Missing severity classification." | |
| predicted = _SEV_ORDER.get(action.severity.value, -1) | |
| expected = _SEV_ORDER.get(ground_truth["severity"], -1) | |
| distance = abs(predicted - expected) | |
| score = {0: _EXACT, 1: 0.5, 2: _ZERO}.get(distance, _ZERO) | |
| if score == _EXACT: | |
| return score, "Exact severity match." | |
| if score == 0.5: | |
| return score, "Adjacent severity band: partial credit for a close escalation call." | |
| return score, "Severity choice is too far from the ground truth." | |
| def grade_task2(action: IncidentAction, ground_truth: dict) -> tuple[float, str]: | |
| if action.root_cause is None: | |
| return _ZERO, "Missing root-cause classification." | |
| predicted = action.root_cause.value | |
| expected = ground_truth["root_cause"] | |
| if predicted == expected: | |
| return _EXACT, "Exact root-cause match." | |
| if predicted == "UNKNOWN": | |
| return 0.25, "Conservative fallback: uncertainty recognized, but the failure domain was not isolated." | |
| if any({predicted, expected} == group for group in _TASK2_RELATED_GROUPS): | |
| return 0.5, "Related failure domain selected: partial credit for a near-miss diagnosis." | |
| return _ZERO, "Root-cause classification does not match the expected failure domain." | |
| def grade_task3(action: IncidentAction, ground_truth: dict) -> tuple[float, str]: | |
| if action.action is None: | |
| return _ZERO, "Missing remediation recommendation." | |
| predicted = action.action.value | |
| expected = ground_truth["action"] | |
| if predicted == expected: | |
| return _EXACT, "Exact remediation match." | |
| if predicted == "INVESTIGATE" and expected != "NO_ACTION": | |
| return 0.4, "Safe investigative fallback: the incident was recognized, but the optimal action was not taken." | |
| if predicted == "NO_ACTION" and expected == "INVESTIGATE": | |
| return 0.25, "Conservative response, but deeper investigation was expected." | |
| if (predicted, expected) in _TASK3_PARTIAL: | |
| return _TASK3_PARTIAL[(predicted, expected)], "Related remediation selected: partial credit for a close operational response." | |
| return _ZERO, "Recommended action does not match the expected operator response." | |
| GRADERS = { | |
| "task1": grade_task1, | |
| "task2": grade_task2, | |
| "task3": grade_task3, | |
| } |