AIHack-ITHelpDesk / tests /test_grader_unit.py
Roopalgn's picture
Upgrade helpdesk env with queue dynamics and operational actions
043d9e1
from __future__ import annotations
import unittest
import openenv_test_stubs # noqa: F401
from models import HelpdeskTicketAction, HelpdeskTicketRecord
from server.grader import (
ASSIGNMENT_GROUP_SIMILARITY,
ISSUE_TYPE_SIMILARITY,
PRIORITY_SCORES,
RESOLUTION_ACTION_SIMILARITY,
TASK_WEIGHTS,
grade_action,
)
from vocabulary import ASSIGNMENT_GROUPS, ISSUE_TYPES, PRIORITIES, RESOLUTION_ACTIONS
def _expected_breakdown(task_id: int, **field_scores: float) -> dict[str, float]:
return {field: field_scores[field] for field in TASK_WEIGHTS[task_id]}
def _expected_task_score(task_id: int, **field_scores: float) -> float:
raw_score = sum(
field_scores[field] * TASK_WEIGHTS[task_id][field]
for field in TASK_WEIGHTS[task_id]
)
return max(0.0, min(1.0, raw_score))
def _ticket(
*,
issue_type: str = "billing_license",
priority: str = "high",
assignment_group: str = "license_ops",
resolution_action: str = "fulfill",
) -> HelpdeskTicketRecord:
return HelpdeskTicketRecord(
ticket_id="ticket-test",
title="Test ticket",
requester="user@example.com",
description="Synthetic ticket used for deterministic grader tests.",
issue_type=issue_type,
priority=priority,
assignment_group=assignment_group,
resolution_action=resolution_action,
)
class GraderUnitTests(unittest.TestCase):
def test_task_3_exact_match_scores_one(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="license_ops",
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertAlmostEqual(score, 1.0)
self.assertEqual(
breakdown,
{
"issue_type": 1.0,
"priority": 1.0,
"assignment_group": 1.0,
"resolution_action": 1.0,
},
)
def test_unknown_task_id_raises(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(issue_type="billing_license")
with self.assertRaisesRegex(ValueError, "Unsupported task_id"):
grade_action(action, ticket, task_id=99)
def test_issue_type_partial_credit_only_for_known_similarity_pair(self) -> None:
ticket = _ticket(issue_type="billing_license")
action = HelpdeskTicketAction(issue_type="service_request")
score, breakdown = grade_action(action, ticket, task_id=1)
expected_breakdown = _expected_breakdown(
1,
issue_type=0.4,
priority=0.0,
assignment_group=0.0,
resolution_action=0.0,
)
self.assertEqual(breakdown, expected_breakdown)
self.assertAlmostEqual(
score,
_expected_task_score(
1,
issue_type=0.4,
priority=0.0,
assignment_group=0.0,
resolution_action=0.0,
),
)
def test_issue_type_scoring_matches_declared_similarity_table_exhaustively(self) -> None:
for expected in ISSUE_TYPES:
for predicted in ISSUE_TYPES:
with self.subTest(expected=expected, predicted=predicted):
ticket = _ticket(issue_type=expected)
action = HelpdeskTicketAction(issue_type=predicted)
score, breakdown = grade_action(action, ticket, task_id=1)
raw_expected_score = (
1.0
if predicted == expected
else ISSUE_TYPE_SIMILARITY.get((predicted, expected), 0.0)
)
expected_breakdown = _expected_breakdown(
1,
issue_type=raw_expected_score,
priority=0.0,
assignment_group=0.0,
resolution_action=0.0,
)
self.assertAlmostEqual(
score,
_expected_task_score(
1,
issue_type=raw_expected_score,
priority=0.0,
assignment_group=0.0,
resolution_action=0.0,
),
)
self.assertEqual(breakdown, expected_breakdown)
def test_unrelated_issue_type_gets_zero_not_fuzzy_credit(self) -> None:
ticket = _ticket(issue_type="onboarding")
action = HelpdeskTicketAction(issue_type="spam_phishing")
score, breakdown = grade_action(action, ticket, task_id=1)
self.assertAlmostEqual(score, 0.0)
self.assertEqual(
breakdown,
_expected_breakdown(
1,
issue_type=0.0,
priority=0.0,
assignment_group=0.0,
resolution_action=0.0,
),
)
def test_priority_scoring_uses_defined_proximity_table(self) -> None:
ticket = _ticket(priority="critical")
action = HelpdeskTicketAction(issue_type="billing_license", priority="high")
score, breakdown = grade_action(action, ticket, task_id=2)
self.assertAlmostEqual(breakdown["issue_type"], 1.0)
self.assertAlmostEqual(breakdown["priority"], 0.6)
self.assertAlmostEqual(
score,
_expected_task_score(
2,
issue_type=1.0,
priority=0.6,
assignment_group=0.0,
resolution_action=0.0,
),
)
def test_priority_scoring_matches_declared_table_exhaustively(self) -> None:
for expected in PRIORITIES:
for predicted in PRIORITIES:
with self.subTest(expected=expected, predicted=predicted):
ticket = _ticket(priority=expected)
action = HelpdeskTicketAction(
issue_type="billing_license",
priority=predicted,
)
score, breakdown = grade_action(action, ticket, task_id=2)
priority_score = (
1.0
if predicted == expected
else PRIORITY_SCORES.get((predicted, expected), 0.0)
)
self.assertEqual(
breakdown,
_expected_breakdown(
2,
issue_type=1.0,
priority=priority_score,
assignment_group=0.0,
resolution_action=0.0,
),
)
self.assertAlmostEqual(
score,
_expected_task_score(
2,
issue_type=1.0,
priority=priority_score,
assignment_group=0.0,
resolution_action=0.0,
),
)
def test_task_2_weights_apply_as_documented(self) -> None:
ticket = _ticket(priority="high")
action = HelpdeskTicketAction(issue_type="billing_license", priority="medium")
score, breakdown = grade_action(action, ticket, task_id=2)
self.assertEqual(
breakdown,
_expected_breakdown(
2,
issue_type=1.0,
priority=0.5,
assignment_group=0.0,
resolution_action=0.0,
),
)
self.assertAlmostEqual(
score,
_expected_task_score(
2,
issue_type=1.0,
priority=0.5,
assignment_group=0.0,
resolution_action=0.0,
),
)
def test_assignment_group_partial_credit_uses_declared_similarity_table(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="procurement",
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(breakdown["assignment_group"], 0.55)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=0.55,
resolution_action=1.0,
),
)
def test_assignment_group_unrelated_miss_stays_zero(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="security_team",
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(breakdown["assignment_group"], 0.0)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=0.0,
resolution_action=1.0,
),
)
def test_task_3_weights_apply_as_documented(self) -> None:
ticket = _ticket(priority="high")
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="medium",
assignment_group="security_team",
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(
breakdown,
_expected_breakdown(
3,
issue_type=1.0,
priority=0.5,
assignment_group=0.0,
resolution_action=1.0,
),
)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=0.5,
assignment_group=0.0,
resolution_action=1.0,
),
)
def test_alternate_route_can_win_when_primary_route_is_worse(self) -> None:
ticket = HelpdeskTicketRecord(
ticket_id="ticket-alt",
title="Planning ticket",
requester="planner@example.com",
description="Capacity-sensitive routing decision.",
issue_type="service_request",
priority="medium",
assignment_group="procurement",
resolution_action="assign",
alternate_issue_type="billing_license",
alternate_priority="high",
alternate_assignment_group="license_ops",
alternate_resolution_action="fulfill",
alternate_route_score_multiplier=0.85,
)
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="license_ops",
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertAlmostEqual(score, 0.85)
self.assertEqual(
breakdown,
{
"issue_type": 0.85,
"priority": 0.85,
"assignment_group": 0.85,
"resolution_action": 0.85,
},
)
def test_resolution_action_partial_credit_uses_declared_similarity_table(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="license_ops",
resolution_action="acknowledge",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(breakdown["resolution_action"], 0.35)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=1.0,
resolution_action=0.35,
),
)
def test_resolution_action_unrelated_miss_stays_zero(self) -> None:
ticket = _ticket()
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="license_ops",
resolution_action="ignore",
)
score, breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(breakdown["resolution_action"], 0.0)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=1.0,
resolution_action=0.0,
),
)
def test_assignment_group_scoring_matches_declared_similarity_table_exhaustively(self) -> None:
for expected in ASSIGNMENT_GROUPS:
for predicted in ASSIGNMENT_GROUPS:
with self.subTest(expected=expected, predicted=predicted):
ticket = _ticket(assignment_group=expected)
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group=predicted,
resolution_action="fulfill",
)
score, breakdown = grade_action(action, ticket, task_id=3)
assignment_group_score = (
1.0
if predicted == expected
else ASSIGNMENT_GROUP_SIMILARITY.get((predicted, expected), 0.0)
)
self.assertEqual(
breakdown,
_expected_breakdown(
3,
issue_type=1.0,
priority=1.0,
assignment_group=assignment_group_score,
resolution_action=1.0,
),
)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=assignment_group_score,
resolution_action=1.0,
),
)
def test_resolution_action_scoring_matches_declared_similarity_table_exhaustively(self) -> None:
for expected in RESOLUTION_ACTIONS:
for predicted in RESOLUTION_ACTIONS:
with self.subTest(expected=expected, predicted=predicted):
ticket = _ticket(resolution_action=expected)
action = HelpdeskTicketAction(
issue_type="billing_license",
priority="high",
assignment_group="license_ops",
resolution_action=predicted,
)
score, breakdown = grade_action(action, ticket, task_id=3)
resolution_action_score = (
1.0
if predicted == expected
else RESOLUTION_ACTION_SIMILARITY.get((predicted, expected), 0.0)
)
self.assertEqual(
breakdown,
_expected_breakdown(
3,
issue_type=1.0,
priority=1.0,
assignment_group=1.0,
resolution_action=resolution_action_score,
),
)
self.assertAlmostEqual(
score,
_expected_task_score(
3,
issue_type=1.0,
priority=1.0,
assignment_group=1.0,
resolution_action=resolution_action_score,
),
)
def test_partial_credit_tables_never_override_exact_match(self) -> None:
for pair, value in ISSUE_TYPE_SIMILARITY.items():
with self.subTest(table="issue_type", pair=pair):
self.assertGreater(value, 0.0)
self.assertLess(value, 1.0)
for pair, value in PRIORITY_SCORES.items():
with self.subTest(table="priority", pair=pair):
self.assertGreater(value, 0.0)
self.assertLess(value, 1.0)
for pair, value in ASSIGNMENT_GROUP_SIMILARITY.items():
with self.subTest(table="assignment_group", pair=pair):
self.assertGreater(value, 0.0)
self.assertLess(value, 1.0)
for pair, value in RESOLUTION_ACTION_SIMILARITY.items():
with self.subTest(table="resolution_action", pair=pair):
self.assertGreater(value, 0.0)
self.assertLess(value, 1.0)
def test_task_weights_sum_to_one_for_each_task(self) -> None:
for task_id, weights in TASK_WEIGHTS.items():
with self.subTest(task_id=task_id):
self.assertAlmostEqual(sum(weights.values()), 1.0)
def test_grade_action_is_deterministic_for_same_inputs(self) -> None:
ticket = _ticket(issue_type="service_request", priority="medium")
action = HelpdeskTicketAction(
issue_type="general_inquiry",
priority="low",
assignment_group="license_ops",
resolution_action="assign",
)
first_score, first_breakdown = grade_action(action, ticket, task_id=3)
second_score, second_breakdown = grade_action(action, ticket, task_id=3)
self.assertEqual(first_score, second_score)
self.assertEqual(first_breakdown, second_breakdown)
if __name__ == "__main__":
unittest.main()