File size: 3,616 Bytes
e4accbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Typed models for the SaaS support triage benchmark."""

from __future__ import annotations

from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field

try:
    from openenv.core.env_server.types import Action as OpenEnvAction
    from openenv.core.env_server.types import Observation as OpenEnvObservation
except Exception:  # pragma: no cover - compatibility fallback
    OpenEnvAction = BaseModel
    OpenEnvObservation = BaseModel


Priority = Literal["P1", "P2", "P3", "P4"]
QueueName = Literal["billing", "security", "technical", "success", "trust_safety"]
Disposition = Literal["respond", "request_info", "escalate", "close"]
Difficulty = Literal["easy", "medium", "hard"]
CustomerTier = Literal["starter", "growth", "enterprise"]


class TaskCard(BaseModel):
    model_config = ConfigDict(extra="forbid")

    task_id: str
    title: str
    difficulty: Difficulty
    description: str
    ticket_count: int


class TicketSnapshot(BaseModel):
    model_config = ConfigDict(extra="forbid")

    ticket_id: str
    subject: str
    body: str
    customer_tier: CustomerTier
    product_area: str
    sla_hours: int
    recent_events: list[str] = Field(default_factory=list)


class SupportQueueAction(OpenEnvAction):
    model_config = ConfigDict(extra="forbid")

    priority: Priority
    queue: QueueName
    disposition: Disposition
    summary: str = Field(..., min_length=8, max_length=280)
    response: str = Field(..., min_length=16, max_length=1200)
    confidence: float = Field(default=0.5, ge=0.0, le=1.0)


class GradingBreakdown(BaseModel):
    model_config = ConfigDict(extra="forbid")

    priority_score: float = 0.0
    queue_score: float = 0.0
    disposition_score: float = 0.0
    summary_score: float = 0.0
    response_score: float = 0.0
    penalty: float = 0.0
    total: float = 0.0


class TicketFeedback(BaseModel):
    model_config = ConfigDict(extra="forbid")

    ticket_id: str
    expected_priority: Priority
    expected_queue: QueueName
    expected_disposition: Disposition
    breakdown: GradingBreakdown
    feedback: str


class SupportQueueObservation(OpenEnvObservation):
    model_config = ConfigDict(extra="forbid")

    task_id: str
    task_title: str
    difficulty: Difficulty
    instructions: str
    current_index: int
    total_tickets: int
    ticket: TicketSnapshot
    allowed_priorities: list[Priority] = Field(default_factory=lambda: ["P1", "P2", "P3", "P4"])
    allowed_queues: list[QueueName] = Field(
        default_factory=lambda: ["billing", "security", "technical", "success", "trust_safety"]
    )
    allowed_dispositions: list[Disposition] = Field(
        default_factory=lambda: ["respond", "request_info", "escalate", "close"]
    )
    scoring_weights: dict[str, float] = Field(
        default_factory=lambda: {
            "priority": 0.30,
            "queue": 0.25,
            "disposition": 0.20,
            "summary": 0.15,
            "response": 0.10,
        }
    )
    last_feedback: TicketFeedback | None = None
    cumulative_reward: float = 0.0
    reward: float = 0.0
    done: bool = False
    info: dict[str, Any] = Field(default_factory=dict)


class SupportQueueState(BaseModel):
    model_config = ConfigDict(extra="forbid")

    episode_id: str
    task: TaskCard
    current_index: int
    total_tickets: int
    done: bool
    cumulative_reward: float
    average_reward: float
    ticket_scores: list[TicketFeedback] = Field(default_factory=list)
    action_history: list[SupportQueueAction] = Field(default_factory=list)
    processed_tickets: list[str] = Field(default_factory=list)