File size: 4,106 Bytes
149595c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from typing import Dict, Any, Tuple
from graders.base import BaseGrader
from environment.base import BaseWorkflowEnvironment, Observation


class SupportTicketGrader(BaseGrader):
    PRIORITIES = ["critical", "high", "medium", "low"]
    
    def grade(self, state: Dict[str, Any], action: Dict[str, Any], step_count: int) -> Tuple[float, Dict[str, Any]]:
        ticket = state.get("current_ticket", {})
        correct_priority = ticket.get("correct_priority")
        
        selected_priority = action.get("priority")
        estimated_time = action.get("estimated_time", 0)
        assignee = action.get("assignee")
        
        score = 0.0
        
        # Priority score (60%)
        if selected_priority == correct_priority:
            score += 0.6
        else:
            priority_distance = abs(self.PRIORITIES.index(selected_priority) - self.PRIORITIES.index(correct_priority))
            score += max(0.0, 0.6 - (priority_distance * 0.2))
        
        # Time estimation score (30%)
        correct_time = ticket.get("correct_time", 0)
        time_error = abs(estimated_time - correct_time) / max(correct_time, 1)
        score += max(0.0, 0.3 - (time_error * 0.3))
        
        # Assignee score (10%)
        if assignee == ticket.get("correct_assignee"):
            score += 0.1
        
        final_score = self.apply_efficiency_penalty(score, step_count)
        
        return final_score, {
            "priority_score": score >= 0.6,
            "time_score": score >= 0.3,
            "assignee_score": score >= 0.1,
            "total": final_score
        }


class SupportTicketTask(BaseWorkflowEnvironment):
    TEST_TICKETS = [
        {
            "id": 101,
            "title": "Database connection failure",
            "description": "Cannot connect to primary database. All transactions failing.",
            "correct_priority": "critical",
            "correct_time": 30,
            "correct_assignee": "database-team"
        },
        {
            "id": 102,
            "title": "User password reset request",
            "description": "User cannot log in, needs password reset.",
            "correct_priority": "medium",
            "correct_time": 10,
            "correct_assignee": "support"
        },
        {
            "id": 103,
            "title": "Feature request: Dark mode",
            "description": "Would like dark mode option for dashboard.",
            "correct_priority": "low",
            "correct_time": 480,
            "correct_assignee": "frontend"
        }
    ]
    
    def reset(self) -> Observation:
        super().reset()
        self.task_state["ticket_index"] = 0
        self.task_state["current_ticket"] = self.TEST_TICKETS[0]
        self.task_state["score"] = 0.0
        
        return Observation(
            done=False,
            reward=0.0,
            observation={
                "task": "support_ticket",
                "ticket": self.task_state["current_ticket"],
                "priorities": SupportTicketGrader.PRIORITIES
            }
        )
    
    def _execute_action(self, action: Dict[str, Any]) -> Observation:
        grader = SupportTicketGrader()
        score, meta = grader.grade(self.task_state, action, self._state.step_count)
        
        self.task_state["score"] += score
        self.task_state["ticket_index"] += 1
        
        if self.task_state["ticket_index"] >= len(self.TEST_TICKETS):
            final_score = self.task_state["score"] / len(self.TEST_TICKETS)
            return Observation(
                done=True,
                reward=final_score,
                observation={"final_score": final_score},
                metadata=meta
            )
        
        self.task_state["current_ticket"] = self.TEST_TICKETS[self.task_state["ticket_index"]]
        
        return Observation(
            done=False,
            reward=score,
            observation={
                "ticket": self.task_state["current_ticket"],
                "current_score": self.task_state["score"]
            },
            metadata=meta
        )