from __future__ import annotations import unittest import openenv_test_stubs # noqa: F401 from models import HelpdeskTicketAction, HelpdeskTicketRecord from server.grader import ( ASSIGNMENT_GROUP_SIMILARITY, ISSUE_TYPE_SIMILARITY, PRIORITY_SCORES, RESOLUTION_ACTION_SIMILARITY, TASK_WEIGHTS, grade_action, ) from vocabulary import ASSIGNMENT_GROUPS, ISSUE_TYPES, PRIORITIES, RESOLUTION_ACTIONS def _expected_breakdown(task_id: int, **field_scores: float) -> dict[str, float]: return {field: field_scores[field] for field in TASK_WEIGHTS[task_id]} def _expected_task_score(task_id: int, **field_scores: float) -> float: raw_score = sum( field_scores[field] * TASK_WEIGHTS[task_id][field] for field in TASK_WEIGHTS[task_id] ) return max(0.0, min(1.0, raw_score)) def _ticket( *, issue_type: str = "billing_license", priority: str = "high", assignment_group: str = "license_ops", resolution_action: str = "fulfill", ) -> HelpdeskTicketRecord: return HelpdeskTicketRecord( ticket_id="ticket-test", title="Test ticket", requester="user@example.com", description="Synthetic ticket used for deterministic grader tests.", issue_type=issue_type, priority=priority, assignment_group=assignment_group, resolution_action=resolution_action, ) class GraderUnitTests(unittest.TestCase): def test_task_3_exact_match_scores_one(self) -> None: ticket = _ticket() action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="license_ops", resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertAlmostEqual(score, 1.0) self.assertEqual( breakdown, { "issue_type": 1.0, "priority": 1.0, "assignment_group": 1.0, "resolution_action": 1.0, }, ) def test_unknown_task_id_raises(self) -> None: ticket = _ticket() action = HelpdeskTicketAction(issue_type="billing_license") with self.assertRaisesRegex(ValueError, "Unsupported task_id"): grade_action(action, ticket, task_id=99) def test_issue_type_partial_credit_only_for_known_similarity_pair(self) -> None: ticket = _ticket(issue_type="billing_license") action = HelpdeskTicketAction(issue_type="service_request") score, breakdown = grade_action(action, ticket, task_id=1) expected_breakdown = _expected_breakdown( 1, issue_type=0.4, priority=0.0, assignment_group=0.0, resolution_action=0.0, ) self.assertEqual(breakdown, expected_breakdown) self.assertAlmostEqual( score, _expected_task_score( 1, issue_type=0.4, priority=0.0, assignment_group=0.0, resolution_action=0.0, ), ) def test_issue_type_scoring_matches_declared_similarity_table_exhaustively(self) -> None: for expected in ISSUE_TYPES: for predicted in ISSUE_TYPES: with self.subTest(expected=expected, predicted=predicted): ticket = _ticket(issue_type=expected) action = HelpdeskTicketAction(issue_type=predicted) score, breakdown = grade_action(action, ticket, task_id=1) raw_expected_score = ( 1.0 if predicted == expected else ISSUE_TYPE_SIMILARITY.get((predicted, expected), 0.0) ) expected_breakdown = _expected_breakdown( 1, issue_type=raw_expected_score, priority=0.0, assignment_group=0.0, resolution_action=0.0, ) self.assertAlmostEqual( score, _expected_task_score( 1, issue_type=raw_expected_score, priority=0.0, assignment_group=0.0, resolution_action=0.0, ), ) self.assertEqual(breakdown, expected_breakdown) def test_unrelated_issue_type_gets_zero_not_fuzzy_credit(self) -> None: ticket = _ticket(issue_type="onboarding") action = HelpdeskTicketAction(issue_type="spam_phishing") score, breakdown = grade_action(action, ticket, task_id=1) self.assertAlmostEqual(score, 0.0) self.assertEqual( breakdown, _expected_breakdown( 1, issue_type=0.0, priority=0.0, assignment_group=0.0, resolution_action=0.0, ), ) def test_priority_scoring_uses_defined_proximity_table(self) -> None: ticket = _ticket(priority="critical") action = HelpdeskTicketAction(issue_type="billing_license", priority="high") score, breakdown = grade_action(action, ticket, task_id=2) self.assertAlmostEqual(breakdown["issue_type"], 1.0) self.assertAlmostEqual(breakdown["priority"], 0.6) self.assertAlmostEqual( score, _expected_task_score( 2, issue_type=1.0, priority=0.6, assignment_group=0.0, resolution_action=0.0, ), ) def test_priority_scoring_matches_declared_table_exhaustively(self) -> None: for expected in PRIORITIES: for predicted in PRIORITIES: with self.subTest(expected=expected, predicted=predicted): ticket = _ticket(priority=expected) action = HelpdeskTicketAction( issue_type="billing_license", priority=predicted, ) score, breakdown = grade_action(action, ticket, task_id=2) priority_score = ( 1.0 if predicted == expected else PRIORITY_SCORES.get((predicted, expected), 0.0) ) self.assertEqual( breakdown, _expected_breakdown( 2, issue_type=1.0, priority=priority_score, assignment_group=0.0, resolution_action=0.0, ), ) self.assertAlmostEqual( score, _expected_task_score( 2, issue_type=1.0, priority=priority_score, assignment_group=0.0, resolution_action=0.0, ), ) def test_task_2_weights_apply_as_documented(self) -> None: ticket = _ticket(priority="high") action = HelpdeskTicketAction(issue_type="billing_license", priority="medium") score, breakdown = grade_action(action, ticket, task_id=2) self.assertEqual( breakdown, _expected_breakdown( 2, issue_type=1.0, priority=0.5, assignment_group=0.0, resolution_action=0.0, ), ) self.assertAlmostEqual( score, _expected_task_score( 2, issue_type=1.0, priority=0.5, assignment_group=0.0, resolution_action=0.0, ), ) def test_assignment_group_partial_credit_uses_declared_similarity_table(self) -> None: ticket = _ticket() action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="procurement", resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertEqual(breakdown["assignment_group"], 0.55) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=0.55, resolution_action=1.0, ), ) def test_assignment_group_unrelated_miss_stays_zero(self) -> None: ticket = _ticket() action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="security_team", resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertEqual(breakdown["assignment_group"], 0.0) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=0.0, resolution_action=1.0, ), ) def test_task_3_weights_apply_as_documented(self) -> None: ticket = _ticket(priority="high") action = HelpdeskTicketAction( issue_type="billing_license", priority="medium", assignment_group="security_team", resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertEqual( breakdown, _expected_breakdown( 3, issue_type=1.0, priority=0.5, assignment_group=0.0, resolution_action=1.0, ), ) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=0.5, assignment_group=0.0, resolution_action=1.0, ), ) def test_alternate_route_can_win_when_primary_route_is_worse(self) -> None: ticket = HelpdeskTicketRecord( ticket_id="ticket-alt", title="Planning ticket", requester="planner@example.com", description="Capacity-sensitive routing decision.", issue_type="service_request", priority="medium", assignment_group="procurement", resolution_action="assign", alternate_issue_type="billing_license", alternate_priority="high", alternate_assignment_group="license_ops", alternate_resolution_action="fulfill", alternate_route_score_multiplier=0.85, ) action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="license_ops", resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertAlmostEqual(score, 0.85) self.assertEqual( breakdown, { "issue_type": 0.85, "priority": 0.85, "assignment_group": 0.85, "resolution_action": 0.85, }, ) def test_resolution_action_partial_credit_uses_declared_similarity_table(self) -> None: ticket = _ticket() action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="license_ops", resolution_action="acknowledge", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertEqual(breakdown["resolution_action"], 0.35) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=1.0, resolution_action=0.35, ), ) def test_resolution_action_unrelated_miss_stays_zero(self) -> None: ticket = _ticket() action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="license_ops", resolution_action="ignore", ) score, breakdown = grade_action(action, ticket, task_id=3) self.assertEqual(breakdown["resolution_action"], 0.0) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=1.0, resolution_action=0.0, ), ) def test_assignment_group_scoring_matches_declared_similarity_table_exhaustively(self) -> None: for expected in ASSIGNMENT_GROUPS: for predicted in ASSIGNMENT_GROUPS: with self.subTest(expected=expected, predicted=predicted): ticket = _ticket(assignment_group=expected) action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group=predicted, resolution_action="fulfill", ) score, breakdown = grade_action(action, ticket, task_id=3) assignment_group_score = ( 1.0 if predicted == expected else ASSIGNMENT_GROUP_SIMILARITY.get((predicted, expected), 0.0) ) self.assertEqual( breakdown, _expected_breakdown( 3, issue_type=1.0, priority=1.0, assignment_group=assignment_group_score, resolution_action=1.0, ), ) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=assignment_group_score, resolution_action=1.0, ), ) def test_resolution_action_scoring_matches_declared_similarity_table_exhaustively(self) -> None: for expected in RESOLUTION_ACTIONS: for predicted in RESOLUTION_ACTIONS: with self.subTest(expected=expected, predicted=predicted): ticket = _ticket(resolution_action=expected) action = HelpdeskTicketAction( issue_type="billing_license", priority="high", assignment_group="license_ops", resolution_action=predicted, ) score, breakdown = grade_action(action, ticket, task_id=3) resolution_action_score = ( 1.0 if predicted == expected else RESOLUTION_ACTION_SIMILARITY.get((predicted, expected), 0.0) ) self.assertEqual( breakdown, _expected_breakdown( 3, issue_type=1.0, priority=1.0, assignment_group=1.0, resolution_action=resolution_action_score, ), ) self.assertAlmostEqual( score, _expected_task_score( 3, issue_type=1.0, priority=1.0, assignment_group=1.0, resolution_action=resolution_action_score, ), ) def test_partial_credit_tables_never_override_exact_match(self) -> None: for pair, value in ISSUE_TYPE_SIMILARITY.items(): with self.subTest(table="issue_type", pair=pair): self.assertGreater(value, 0.0) self.assertLess(value, 1.0) for pair, value in PRIORITY_SCORES.items(): with self.subTest(table="priority", pair=pair): self.assertGreater(value, 0.0) self.assertLess(value, 1.0) for pair, value in ASSIGNMENT_GROUP_SIMILARITY.items(): with self.subTest(table="assignment_group", pair=pair): self.assertGreater(value, 0.0) self.assertLess(value, 1.0) for pair, value in RESOLUTION_ACTION_SIMILARITY.items(): with self.subTest(table="resolution_action", pair=pair): self.assertGreater(value, 0.0) self.assertLess(value, 1.0) def test_task_weights_sum_to_one_for_each_task(self) -> None: for task_id, weights in TASK_WEIGHTS.items(): with self.subTest(task_id=task_id): self.assertAlmostEqual(sum(weights.values()), 1.0) def test_grade_action_is_deterministic_for_same_inputs(self) -> None: ticket = _ticket(issue_type="service_request", priority="medium") action = HelpdeskTicketAction( issue_type="general_inquiry", priority="low", assignment_group="license_ops", resolution_action="assign", ) first_score, first_breakdown = grade_action(action, ticket, task_id=3) second_score, second_breakdown = grade_action(action, ticket, task_id=3) self.assertEqual(first_score, second_score) self.assertEqual(first_breakdown, second_breakdown) if __name__ == "__main__": unittest.main()