File size: 7,066 Bytes
ea03c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7aa1f0
b105545
 
 
2dedffd
 
ea03c8c
 
 
 
 
 
 
9e6686d
 
 
 
 
 
a1089c9
 
 
ea03c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e6686d
 
 
ea03c8c
 
9e6686d
 
 
a1089c9
 
 
 
ea03c8c
 
 
 
 
 
 
 
 
 
 
 
e32a33b
 
 
 
 
 
 
 
ea03c8c
 
 
 
 
 
 
9e6686d
ea03c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83eb290
b7aa1f0
ea03c8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37bfd28
 
 
ea03c8c
 
 
 
 
 
83eb290
ea03c8c
 
b105545
 
 
 
 
ea03c8c
 
 
 
 
 
83eb290
ea03c8c
 
 
 
 
 
 
 
 
9e6686d
ea03c8c
 
 
 
 
 
 
 
9e6686d
ea03c8c
 
 
 
 
 
 
 
9e6686d
ea03c8c
 
 
 
9e6686d
ea03c8c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
"""Typed models for the ChargebackOps OpenEnv environment."""

from __future__ import annotations

from typing import Any, Literal

from openenv.core.env_server.types import Action, Observation, State
from pydantic import BaseModel, Field

SystemName = Literal["orders", "payment", "shipping", "support", "refunds", "risk"]
StrategyName = Literal["contest", "accept_chargeback", "issue_refund"]
ActionType = Literal[
    "select_case",
    "inspect_case",
    "query_system",
    "retrieve_policy",
    "add_evidence",
    "remove_evidence",
    "set_strategy",
    "submit_representment",
    "resolve_case",
    # multi-round dispute actions 
    "respond_to_pre_arb",
    "escalate_to_arbitration",
    "accept_arbitration_loss",
    # long-horizon backlog action
    "wait_for_updates",
]


class CaseQueueItem(BaseModel):
    """Queue-level summary of a chargeback case."""

    case_id: str
    transaction_id: str
    transaction_timestamp: str
    dispute_opened_at: str
    merchant_name: str
    merchant_mcc: str
    masked_card: str
    card_network: str
    network_reason_code: str
    response_window_days: int
    amount: float
    currency: str
    reason_code: str
    status: str
    summary: str
    deadline_step: int
    steps_until_deadline: int


class EvidenceCard(BaseModel):
    """Evidence snippet visible to the agent."""

    evidence_id: str
    source_system: SystemName
    title: str
    summary: str
    attached: bool = False


class PolicyView(BaseModel):
    """Visible reason-code policy guidance."""

    reason_code: str
    guidance: str
    required_evidence: list[str] = Field(default_factory=list)


class VisibleCase(BaseModel):
    """Current workspace for the selected case."""

    case_id: str
    transaction_id: str
    transaction_timestamp: str
    dispute_opened_at: str
    order_id: str
    customer_id: str
    merchant_name: str
    merchant_mcc: str
    masked_card: str
    card_network: str
    network_reason_code: str
    response_window_days: int
    compelling_evidence_category: str
    amount: float
    currency: str
    reason_code: str
    status: str
    current_strategy: StrategyName | None = None
    summary: str
    inspection_notes: str | None = None
    systems_revealed: list[SystemName] = Field(default_factory=list)
    retrieved_evidence: list[EvidenceCard] = Field(default_factory=list)
    attached_evidence: list[EvidenceCard] = Field(default_factory=list)
    policy: PolicyView | None = None
    submission_status: str | None = None
    # Multi-round dispute lifecycle visibility
    round_number: int = 1
    last_issuer_decision: str | None = None
    last_issuer_rationale: str | None = None
    pre_arb_evidence_added: list[str] = Field(default_factory=list)
    arbitration_outcome: str | None = None
    arb_fees_paid: float = 0.0
    final_economic_outcome: float | None = None


class TaskSummary(BaseModel):
    """Metadata for a built-in task."""

    task_id: str
    title: str
    difficulty: Literal["easy", "medium", "hard", "nightmare"]
    objective: str
    description: str
    max_steps: int
    case_count: int


class ActionTraceItem(BaseModel):
    """Compact action history row."""

    step_index: int
    action_type: str
    case_id: str | None = None
    outcome: str
    reward: float


class CaseResolutionState(BaseModel):
    """Public case state in the current episode."""

    case_id: str
    status: str
    current_strategy: StrategyName | None = None
    resolved: bool = False
    steps_until_deadline: int


class CaseScoreBreakdown(BaseModel):
    """Per-case grading breakdown."""

    case_id: str
    strategy_correctness: float
    evidence_quality: float
    packet_validity: float
    deadline_compliance: float
    efficiency: float
    outcome_quality: float
    note_quality: float = 0.0
    escalation_roi: float = 1.0
    weighted_score: float
    final_resolution: str
    notes: str


class GraderReport(BaseModel):
    """Episode-level deterministic grade report."""

    episode_id: str
    task_id: str
    total_score: float
    normalized_score: float
    completed: bool
    case_reports: list[CaseScoreBreakdown] = Field(default_factory=list)
    summary: str


class BaselineTaskResult(BaseModel):
    """Baseline score for one task."""

    task_id: str
    title: str
    score: float
    steps_used: int
    final_status: str


class BaselineRunResult(BaseModel):
    """Aggregate baseline result payload."""

    provider: str
    model_name: str
    mode: str
    provider_calls_attempted: int = 0
    provider_calls_succeeded: int = 0
    provider_errors: dict[str, int] = Field(default_factory=dict)
    task_results: list[BaselineTaskResult]
    average_score: float


class TasksResponse(BaseModel):
    """Payload returned by /tasks."""

    tasks: list[TaskSummary]
    action_schema: dict[str, Any]


class ChargebackOpsAction(Action):
    """Action schema for ChargebackOps."""

    action_type: ActionType
    case_id: str | None = Field(
        default=None, max_length=64, description="Target case id when applicable"
    )
    system_name: SystemName | None = Field(
        default=None,
        description="System to query when action_type is query_system",
    )
    evidence_ids: list[str] = Field(
        default_factory=list,
        max_length=20,
        description="Evidence ids to attach or remove",
    )
    compelling_evidence_ids: list[str] = Field(
        default_factory=list,
        max_length=20,
        description="Evidence ids to attach as compelling evidence in pre-arbitration (round 2)",
    )
    strategy: StrategyName | None = Field(
        default=None,
        description="Strategy to set or use when resolving a case",
    )
    note: str | None = Field(
        default=None,
        max_length=500,
        description="Optional short rationale for the action",
    )


class ChargebackOpsObservation(Observation):
    """Observation returned by reset() and step()."""

    task_id: str
    task_title: str
    difficulty: Literal["easy", "medium", "hard", "nightmare"]
    objective: str
    selected_case_id: str | None = None
    queue: list[CaseQueueItem] = Field(default_factory=list)
    visible_case: VisibleCase | None = None
    last_action_result: str = ""
    available_actions: list[str] = Field(default_factory=list)
    steps_remaining: int
    progress_score: float = 0.0
    info: dict[str, Any] = Field(default_factory=dict)
    grader_report: GraderReport | None = None


class ChargebackOpsState(State):
    """Extended environment state returned by state()."""

    task_id: str
    task_title: str
    difficulty: Literal["easy", "medium", "hard", "nightmare"]
    objective: str
    selected_case_id: str | None = None
    queue_state: list[CaseResolutionState] = Field(default_factory=list)
    action_history: list[ActionTraceItem] = Field(default_factory=list)
    metrics: dict[str, float] = Field(default_factory=dict)
    latest_grade: float | None = None
    grader_report: GraderReport | None = None
    completed: bool = False