File size: 2,920 Bytes
bd67155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from __future__ import annotations

from typing import Any, Dict, List, Literal, Optional

from pydantic import BaseModel, Field


ActionType = Literal[
    "inspect_ticket",
    "request_context",
    "set_priority",
    "set_route",
    "set_resolution",
    "escalate",
    "rank_queue",
    "finalize",
]


class RewardModel(BaseModel):
    value: float
    components: Dict[str, float] = Field(default_factory=dict)
    rationale: str = ""


class Action(BaseModel):
    action_type: ActionType
    target: str = "T1"
    value: Optional[str] = None


class TicketObservation(BaseModel):
    ticket_id: str
    summary: str
    visible_context: Dict[str, str]
    discovered_context: Dict[str, str] = Field(default_factory=dict)
    selected_priority: Optional[str] = None
    selected_route: Optional[str] = None
    selected_resolution: Optional[str] = None
    escalation_team: Optional[str] = None


class Observation(BaseModel):
    task_id: str
    difficulty: Literal["easy", "medium", "hard"]
    title: str
    instruction: str
    queue_mode: bool
    tickets: List[TicketObservation]
    remaining_steps: int
    available_actions: List[str]
    current_queue_order: List[str] = Field(default_factory=list)
    score_hint: Dict[str, float] = Field(default_factory=dict)


class StateModel(BaseModel):
    task_id: str
    step_count: int
    done: bool
    discovered_keys: Dict[str, List[str]]
    priorities: Dict[str, Optional[str]]
    routes: Dict[str, Optional[str]]
    resolutions: Dict[str, Optional[str]]
    escalations: Dict[str, Optional[str]]
    queue_order: List[str]
    cumulative_reward: float
    latest_score: Dict[str, float] = Field(default_factory=dict)


class TicketSpec(BaseModel):
    ticket_id: str
    summary: str
    visible_context: Dict[str, str]
    hidden_context: Dict[str, str]
    required_context: List[str]
    gold_priority: str
    gold_route: str
    gold_resolution: str
    gold_escalation_team: Optional[str] = None


class TaskSpec(BaseModel):
    task_id: str
    difficulty: Literal["easy", "medium", "hard"]
    title: str
    description: str
    instruction: str
    max_steps: int
    queue_mode: bool = False
    tickets: List[TicketSpec]
    gold_queue_order: List[str] = Field(default_factory=list)
    grader_name: str
    reward_weights: Dict[str, float] = Field(default_factory=dict)


class TaskGrade(BaseModel):
    task_id: str
    score: float
    passed: bool
    component_scores: Dict[str, float]
    notes: List[str] = Field(default_factory=list)


class StepInfo(BaseModel):
    task_id: str
    step_count: int
    task_score: float
    done_reason: Optional[str] = None
    grade: Optional[TaskGrade] = None
    event: str = ""
    event_score: Dict[str, float] = Field(default_factory=dict)


class BaselineResult(BaseModel):
    task_id: str
    difficulty: str
    score: float
    steps: int
    transcript: List[Dict[str, Any]]