Spaces:
Runtime error
Runtime error
| from typing import Dict, Any, Tuple | |
| from graders.base import BaseGrader | |
| from environment.base import BaseWorkflowEnvironment, Observation | |
| class SupportTicketGrader(BaseGrader): | |
| PRIORITIES = ["critical", "high", "medium", "low"] | |
| def grade(self, state: Dict[str, Any], action: Dict[str, Any], step_count: int) -> Tuple[float, Dict[str, Any]]: | |
| ticket = state.get("current_ticket", {}) | |
| correct_priority = ticket.get("correct_priority") | |
| selected_priority = action.get("priority") | |
| estimated_time = action.get("estimated_time", 0) | |
| assignee = action.get("assignee") | |
| score = 0.0 | |
| # Priority score (60%) | |
| if selected_priority == correct_priority: | |
| score += 0.6 | |
| else: | |
| priority_distance = abs(self.PRIORITIES.index(selected_priority) - self.PRIORITIES.index(correct_priority)) | |
| score += max(0.0, 0.6 - (priority_distance * 0.2)) | |
| # Time estimation score (30%) | |
| correct_time = ticket.get("correct_time", 0) | |
| time_error = abs(estimated_time - correct_time) / max(correct_time, 1) | |
| score += max(0.0, 0.3 - (time_error * 0.3)) | |
| # Assignee score (10%) | |
| if assignee == ticket.get("correct_assignee"): | |
| score += 0.1 | |
| final_score = self.apply_efficiency_penalty(score, step_count) | |
| return final_score, { | |
| "priority_score": score >= 0.6, | |
| "time_score": score >= 0.3, | |
| "assignee_score": score >= 0.1, | |
| "total": final_score | |
| } | |
| class SupportTicketTask(BaseWorkflowEnvironment): | |
| TEST_TICKETS = [ | |
| { | |
| "id": 101, | |
| "title": "Database connection failure", | |
| "description": "Cannot connect to primary database. All transactions failing.", | |
| "correct_priority": "critical", | |
| "correct_time": 30, | |
| "correct_assignee": "database-team" | |
| }, | |
| { | |
| "id": 102, | |
| "title": "User password reset request", | |
| "description": "User cannot log in, needs password reset.", | |
| "correct_priority": "medium", | |
| "correct_time": 10, | |
| "correct_assignee": "support" | |
| }, | |
| { | |
| "id": 103, | |
| "title": "Feature request: Dark mode", | |
| "description": "Would like dark mode option for dashboard.", | |
| "correct_priority": "low", | |
| "correct_time": 480, | |
| "correct_assignee": "frontend" | |
| } | |
| ] | |
| def reset(self) -> Observation: | |
| super().reset() | |
| self.task_state["ticket_index"] = 0 | |
| self.task_state["current_ticket"] = self.TEST_TICKETS[0] | |
| self.task_state["score"] = 0.0 | |
| return Observation( | |
| done=False, | |
| reward=0.0, | |
| observation={ | |
| "task": "support_ticket", | |
| "ticket": self.task_state["current_ticket"], | |
| "priorities": SupportTicketGrader.PRIORITIES | |
| } | |
| ) | |
| def _execute_action(self, action: Dict[str, Any]) -> Observation: | |
| grader = SupportTicketGrader() | |
| score, meta = grader.grade(self.task_state, action, self._state.step_count) | |
| self.task_state["score"] += score | |
| self.task_state["ticket_index"] += 1 | |
| if self.task_state["ticket_index"] >= len(self.TEST_TICKETS): | |
| final_score = self.task_state["score"] / len(self.TEST_TICKETS) | |
| return Observation( | |
| done=True, | |
| reward=final_score, | |
| observation={"final_score": final_score}, | |
| metadata=meta | |
| ) | |
| self.task_state["current_ticket"] = self.TEST_TICKETS[self.task_state["ticket_index"]] | |
| return Observation( | |
| done=False, | |
| reward=score, | |
| observation={ | |
| "ticket": self.task_state["current_ticket"], | |
| "current_score": self.task_state["score"] | |
| }, | |
| metadata=meta | |
| ) | |