Spaces:
Running
Running
Strengthen queue benchmark and refresh landing page
Browse files- README.md +9 -4
- inference.py +15 -2
- models.py +4 -0
- policy_learning.py +69 -6
- server/app.py +751 -12
- server/environment.py +336 -27
- server/tasks.py +5 -3
- tests/test_competitive_upgrade.py +53 -1
- tests/test_environment_smoke.py +18 -0
- tests/test_policy_learning.py +2 -0
README.md
CHANGED
|
@@ -65,7 +65,7 @@ The environment classes and vocabulary are intentionally frozen to keep collabor
|
|
| 65 |
|
| 66 |
## Lightweight Policy Improvement Loop
|
| 67 |
|
| 68 |
-
The repo includes a local policy runner in `policy_learning.py`. It still does not update model weights, but it now does more than cosmetic search: it evaluates repeated seeded rollouts, learns cue-conditioned tool preferences for investigation, uses the same planning-aware deterministic submit logic as `inference.py`, and ranks policies by terminal rubric reward first, with lower planning penalty as the tie-breaker.
|
| 69 |
|
| 70 |
That gives the project a meaningful improvement loop for judge demos:
|
| 71 |
|
|
@@ -99,7 +99,7 @@ The default submit policy inside this runner stays deterministic and local. It r
|
|
| 99 |
| ID | Name | Difficulty | Required Fields | What The Agent Must Do |
|
| 100 |
|----|------|------------|-----------------|-------------------------|
|
| 101 |
| 1 | Guided Full Routing | Easy | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route a mostly visible ticket correctly |
|
| 102 |
-
| 2 | Contextual Full Routing | Medium | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route under partial observability with investigation and
|
| 103 |
| 3 | Adaptive Queue Routing | Hard | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route while managing queue pressure, incidents, clustered follow-ons, deferrals, and downstream follow-ups |
|
| 104 |
|
| 105 |
## Locked Vocabulary
|
|
@@ -158,6 +158,7 @@ Visible ticket fields:
|
|
| 158 |
- optional `routing_options`
|
| 159 |
- optional `capacity_state`
|
| 160 |
- optional `operational_context`
|
|
|
|
| 161 |
- optional `generated_from_ticket_id`
|
| 162 |
|
| 163 |
Each observation also includes:
|
|
@@ -181,7 +182,7 @@ Each observation also includes:
|
|
| 181 |
- `last_reward_components`
|
| 182 |
- `rubric_reward` on terminal observations
|
| 183 |
- `metadata.last_feedback_summary` for compact reward / penalty feedback
|
| 184 |
-
- `metadata.capacity_state`
|
| 185 |
- `metadata.planning_penalty_total` and `metadata.planning_penalty_applied`
|
| 186 |
- standard OpenEnv fields such as `done` and `reward`
|
| 187 |
|
|
@@ -204,6 +205,8 @@ The internal `HelpdeskTicketState` tracks:
|
|
| 204 |
- `planning_penalty_total`
|
| 205 |
- `incident_gap_total`
|
| 206 |
- `sla_breach_count`
|
|
|
|
|
|
|
| 207 |
- `dynamic_queue_events`
|
| 208 |
|
| 209 |
## Grading And Reward
|
|
@@ -232,6 +235,7 @@ Hard-task investigation behavior:
|
|
| 232 |
- linked-ticket previews and internal routing notes stay hidden until the matching tool is used
|
| 233 |
- capacity-sensitive tickets can expose queue pressure, future demand, and alternate routing options through `lookup_queue_capacity_forecast`
|
| 234 |
- cluster-sensitive tickets can expose future related tickets, shared-requester load, and active incident coverage through `lookup_queue_cluster_summary`
|
|
|
|
| 235 |
- only useful investigation steps return a small positive shaping reward
|
| 236 |
- blind or repeated probing does not pay by default
|
| 237 |
- premature hard-task submission can incur a shaping penalty even when the visible text looks plausible
|
|
@@ -260,7 +264,7 @@ Task weights:
|
|
| 260 |
Final episode rubric reward is queue-based:
|
| 261 |
|
| 262 |
```text
|
| 263 |
-
clamp(
|
| 264 |
```
|
| 265 |
|
| 266 |
Both `reward` and `rubric_reward` now use the closed interval `[0.0, 1.0]`.
|
|
@@ -273,6 +277,7 @@ To make the environment more RL-friendly, each observation now also surfaces str
|
|
| 273 |
|
| 274 |
- `last_reward_components` exposes ticket score, shaped step reward, milestone adjustment, trajectory reward when applicable, and any investigation penalty applied
|
| 275 |
- `average_score_so_far` and `progress_fraction` expose trajectory progress without leaking future labels
|
|
|
|
| 276 |
- hard-task telemetry includes planning penalties, capacity usage, and the post-action capacity snapshot
|
| 277 |
- `history` retains the same reward components plus a compact `feedback_summary` string for downstream agents
|
| 278 |
|
|
|
|
| 65 |
|
| 66 |
## Lightweight Policy Improvement Loop
|
| 67 |
|
| 68 |
+
The repo includes a local policy runner in `policy_learning.py`. It still does not update model weights, but it now does more than cosmetic search: it evaluates repeated seeded rollouts, learns cue-conditioned tool preferences for investigation, uses the same planning-aware deterministic submit logic as `inference.py`, and ranks policies by terminal rubric reward first, then queue-management quality, with lower planning penalty as the next tie-breaker.
|
| 69 |
|
| 70 |
That gives the project a meaningful improvement loop for judge demos:
|
| 71 |
|
|
|
|
| 99 |
| ID | Name | Difficulty | Required Fields | What The Agent Must Do |
|
| 100 |
|----|------|------------|-----------------|-------------------------|
|
| 101 |
| 1 | Guided Full Routing | Easy | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route a mostly visible ticket correctly |
|
| 102 |
+
| 2 | Contextual Full Routing | Medium | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route under partial observability with investigation, clarification, and moderate queue carry-over |
|
| 103 |
| 3 | Adaptive Queue Routing | Hard | `issue_type`, `priority`, `assignment_group`, `resolution_action` | route while managing queue pressure, incidents, clustered follow-ons, deferrals, and downstream follow-ups |
|
| 104 |
|
| 105 |
## Locked Vocabulary
|
|
|
|
| 158 |
- optional `routing_options`
|
| 159 |
- optional `capacity_state`
|
| 160 |
- optional `operational_context`
|
| 161 |
+
- optional `cluster_summary`
|
| 162 |
- optional `generated_from_ticket_id`
|
| 163 |
|
| 164 |
Each observation also includes:
|
|
|
|
| 182 |
- `last_reward_components`
|
| 183 |
- `rubric_reward` on terminal observations
|
| 184 |
- `metadata.last_feedback_summary` for compact reward / penalty feedback
|
| 185 |
+
- `metadata.capacity_state` on hard-task episodes
|
| 186 |
- `metadata.planning_penalty_total` and `metadata.planning_penalty_applied`
|
| 187 |
- standard OpenEnv fields such as `done` and `reward`
|
| 188 |
|
|
|
|
| 205 |
- `planning_penalty_total`
|
| 206 |
- `incident_gap_total`
|
| 207 |
- `sla_breach_count`
|
| 208 |
+
- `queue_management_score`
|
| 209 |
+
- `queue_management_breakdown`
|
| 210 |
- `dynamic_queue_events`
|
| 211 |
|
| 212 |
## Grading And Reward
|
|
|
|
| 235 |
- linked-ticket previews and internal routing notes stay hidden until the matching tool is used
|
| 236 |
- capacity-sensitive tickets can expose queue pressure, future demand, and alternate routing options through `lookup_queue_capacity_forecast`
|
| 237 |
- cluster-sensitive tickets can expose future related tickets, shared-requester load, and active incident coverage through `lookup_queue_cluster_summary`
|
| 238 |
+
- detailed cluster counts and future queue-demand breakdowns stay hidden until the matching queue tool is used
|
| 239 |
- only useful investigation steps return a small positive shaping reward
|
| 240 |
- blind or repeated probing does not pay by default
|
| 241 |
- premature hard-task submission can incur a shaping penalty even when the visible text looks plausible
|
|
|
|
| 264 |
Final episode rubric reward is queue-based:
|
| 265 |
|
| 266 |
```text
|
| 267 |
+
clamp(route_trajectory_reward * route_weight + queue_management_score * queue_weight - extra investigation penalties)
|
| 268 |
```
|
| 269 |
|
| 270 |
Both `reward` and `rubric_reward` now use the closed interval `[0.0, 1.0]`.
|
|
|
|
| 277 |
|
| 278 |
- `last_reward_components` exposes ticket score, shaped step reward, milestone adjustment, trajectory reward when applicable, and any investigation penalty applied
|
| 279 |
- `average_score_so_far` and `progress_fraction` expose trajectory progress without leaking future labels
|
| 280 |
+
- medium and hard telemetry now also exposes terminal `queue_management_score` plus a queue-management breakdown
|
| 281 |
- hard-task telemetry includes planning penalties, capacity usage, and the post-action capacity snapshot
|
| 282 |
- `history` retains the same reward components plus a compact `feedback_summary` string for downstream agents
|
| 283 |
|
inference.py
CHANGED
|
@@ -581,6 +581,7 @@ def build_routing_text(ticket: dict) -> str:
|
|
| 581 |
last_tool_result = ticket.get("last_tool_result") or {}
|
| 582 |
routing_options = ticket.get("routing_options") or []
|
| 583 |
operational_context = ticket.get("operational_context") or {}
|
|
|
|
| 584 |
return " ".join(
|
| 585 |
[
|
| 586 |
ticket.get("title", ""),
|
|
@@ -593,6 +594,7 @@ def build_routing_text(ticket: dict) -> str:
|
|
| 593 |
json.dumps(last_tool_result, sort_keys=True),
|
| 594 |
json.dumps(routing_options, sort_keys=True),
|
| 595 |
json.dumps(operational_context, sort_keys=True),
|
|
|
|
| 596 |
json.dumps(ticket.get("capacity_state") or {}, sort_keys=True),
|
| 597 |
json.dumps(ticket.get("future_queue_demand") or {}, sort_keys=True),
|
| 598 |
]
|
|
@@ -1013,9 +1015,11 @@ def should_investigate(
|
|
| 1013 |
)
|
| 1014 |
)
|
| 1015 |
operational_context = ticket.get("operational_context") or {}
|
|
|
|
| 1016 |
cluster_signal = (
|
| 1017 |
-
|
| 1018 |
-
or int(
|
|
|
|
| 1019 |
or any(
|
| 1020 |
phrase in routing_text
|
| 1021 |
for phrase in (
|
|
@@ -1103,6 +1107,15 @@ def merge_ticket_context(ticket: dict, observation: Any) -> dict:
|
|
| 1103 |
merged_ticket = dict(ticket)
|
| 1104 |
if getattr(observation, "last_tool_result", None) is not None:
|
| 1105 |
merged_ticket["last_tool_result"] = observation.last_tool_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1106 |
merged_ticket["recent_history"] = list(getattr(observation, "history", []))
|
| 1107 |
merged_ticket["queue_position"] = getattr(observation, "queue_position", None)
|
| 1108 |
merged_ticket["tickets_remaining"] = getattr(observation, "tickets_remaining", None)
|
|
|
|
| 581 |
last_tool_result = ticket.get("last_tool_result") or {}
|
| 582 |
routing_options = ticket.get("routing_options") or []
|
| 583 |
operational_context = ticket.get("operational_context") or {}
|
| 584 |
+
cluster_summary = ticket.get("cluster_summary") or {}
|
| 585 |
return " ".join(
|
| 586 |
[
|
| 587 |
ticket.get("title", ""),
|
|
|
|
| 594 |
json.dumps(last_tool_result, sort_keys=True),
|
| 595 |
json.dumps(routing_options, sort_keys=True),
|
| 596 |
json.dumps(operational_context, sort_keys=True),
|
| 597 |
+
json.dumps(cluster_summary, sort_keys=True),
|
| 598 |
json.dumps(ticket.get("capacity_state") or {}, sort_keys=True),
|
| 599 |
json.dumps(ticket.get("future_queue_demand") or {}, sort_keys=True),
|
| 600 |
]
|
|
|
|
| 1015 |
)
|
| 1016 |
)
|
| 1017 |
operational_context = ticket.get("operational_context") or {}
|
| 1018 |
+
cluster_summary = ticket.get("cluster_summary") or {}
|
| 1019 |
cluster_signal = (
|
| 1020 |
+
bool(operational_context.get("cluster_coordination_hint"))
|
| 1021 |
+
or int(cluster_summary.get("future_cluster_ticket_count", 0) or 0) > 0
|
| 1022 |
+
or int(cluster_summary.get("shared_requester_count", 0) or 0) > 1
|
| 1023 |
or any(
|
| 1024 |
phrase in routing_text
|
| 1025 |
for phrase in (
|
|
|
|
| 1107 |
merged_ticket = dict(ticket)
|
| 1108 |
if getattr(observation, "last_tool_result", None) is not None:
|
| 1109 |
merged_ticket["last_tool_result"] = observation.last_tool_result
|
| 1110 |
+
if observation.last_tool_result.get("tool_name") == "lookup_queue_capacity_forecast":
|
| 1111 |
+
if observation.last_tool_result.get("future_queue_demand") is not None:
|
| 1112 |
+
merged_ticket["future_queue_demand"] = observation.last_tool_result[
|
| 1113 |
+
"future_queue_demand"
|
| 1114 |
+
]
|
| 1115 |
+
if observation.last_tool_result.get("capacity_state") is not None:
|
| 1116 |
+
merged_ticket["capacity_state"] = observation.last_tool_result[
|
| 1117 |
+
"capacity_state"
|
| 1118 |
+
]
|
| 1119 |
merged_ticket["recent_history"] = list(getattr(observation, "history", []))
|
| 1120 |
merged_ticket["queue_position"] = getattr(observation, "queue_position", None)
|
| 1121 |
merged_ticket["tickets_remaining"] = getattr(observation, "tickets_remaining", None)
|
models.py
CHANGED
|
@@ -214,6 +214,8 @@ class HelpdeskTicketState(State):
|
|
| 214 |
escalation_slots_remaining: int = 0
|
| 215 |
planning_penalty_total: float = 0.0
|
| 216 |
capacity_pressure_tickets_resolved: int = 0
|
|
|
|
|
|
|
| 217 |
ticket_request_info_usage: dict[str, int] = Field(default_factory=dict)
|
| 218 |
ticket_defer_counts: dict[str, int] = Field(default_factory=dict)
|
| 219 |
open_incident_ticket_ids: list[str] = Field(default_factory=list)
|
|
@@ -226,4 +228,6 @@ class HelpdeskTicketState(State):
|
|
| 226 |
spawned_follow_up_ticket_ids: list[str] = Field(default_factory=list)
|
| 227 |
spawned_follow_up_source_ids: list[str] = Field(default_factory=list)
|
| 228 |
dynamic_queue_events: list[dict[str, Any]] = Field(default_factory=list)
|
|
|
|
|
|
|
| 229 |
history_entries: list[dict] = Field(default_factory=list)
|
|
|
|
| 214 |
escalation_slots_remaining: int = 0
|
| 215 |
planning_penalty_total: float = 0.0
|
| 216 |
capacity_pressure_tickets_resolved: int = 0
|
| 217 |
+
cluster_stabilizations_total: int = 0
|
| 218 |
+
cluster_destabilizations_total: int = 0
|
| 219 |
ticket_request_info_usage: dict[str, int] = Field(default_factory=dict)
|
| 220 |
ticket_defer_counts: dict[str, int] = Field(default_factory=dict)
|
| 221 |
open_incident_ticket_ids: list[str] = Field(default_factory=list)
|
|
|
|
| 228 |
spawned_follow_up_ticket_ids: list[str] = Field(default_factory=list)
|
| 229 |
spawned_follow_up_source_ids: list[str] = Field(default_factory=list)
|
| 230 |
dynamic_queue_events: list[dict[str, Any]] = Field(default_factory=list)
|
| 231 |
+
queue_management_score: float = 0.0
|
| 232 |
+
queue_management_breakdown: dict[str, Any] = Field(default_factory=dict)
|
| 233 |
history_entries: list[dict] = Field(default_factory=list)
|
policy_learning.py
CHANGED
|
@@ -249,6 +249,7 @@ def _routing_text(ticket: dict[str, Any]) -> str:
|
|
| 249 |
json.dumps(ticket.get("last_tool_result") or {}, sort_keys=True),
|
| 250 |
json.dumps(ticket.get("routing_options") or [], sort_keys=True),
|
| 251 |
json.dumps(ticket.get("operational_context") or {}, sort_keys=True),
|
|
|
|
| 252 |
json.dumps(ticket.get("capacity_state") or {}, sort_keys=True),
|
| 253 |
json.dumps(ticket.get("future_queue_demand") or {}, sort_keys=True),
|
| 254 |
]
|
|
@@ -284,8 +285,11 @@ def infer_ticket_cue(ticket: dict[str, Any]) -> str:
|
|
| 284 |
):
|
| 285 |
return "capacity_planning"
|
| 286 |
if (
|
| 287 |
-
|
|
|
|
| 288 |
> 0
|
|
|
|
|
|
|
| 289 |
or any(
|
| 290 |
phrase in text
|
| 291 |
for phrase in (
|
|
@@ -471,6 +475,43 @@ def choose_operational_action(
|
|
| 471 |
return None, None
|
| 472 |
|
| 473 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
def choose_policy_action(
|
| 475 |
policy: PolicyConfig,
|
| 476 |
observation: HelpdeskTicketObservation,
|
|
@@ -480,7 +521,7 @@ def choose_policy_action(
|
|
| 480 |
used_tools_by_ticket: dict[str, set[str]] | None = None,
|
| 481 |
adaptive_bandit: AdaptiveToolBandit | None = None,
|
| 482 |
) -> tuple[HelpdeskTicketAction, str, str | None]:
|
| 483 |
-
ticket = observation.current_ticket or {}
|
| 484 |
ticket_id = str(ticket.get("ticket_id", ""))
|
| 485 |
ticket_investigations = investigations_by_ticket.get(ticket_id, 0)
|
| 486 |
used_tools = set()
|
|
@@ -588,7 +629,7 @@ def rollout_episode(
|
|
| 588 |
trajectories: list[dict[str, Any]] = []
|
| 589 |
|
| 590 |
while not observation.done:
|
| 591 |
-
ticket = observation.current_ticket or {}
|
| 592 |
ticket_id = str(ticket.get("ticket_id", ""))
|
| 593 |
action, action_source, action_cue = choose_policy_action(
|
| 594 |
policy,
|
|
@@ -660,6 +701,7 @@ def rollout_episode(
|
|
| 660 |
"terminal_reward": terminal_reward,
|
| 661 |
"terminal_rubric_reward": terminal_rubric_reward,
|
| 662 |
"average_ticket_score": env.state.average_score_so_far,
|
|
|
|
| 663 |
"planning_penalty_total": env.state.planning_penalty_total,
|
| 664 |
"capacity_pressure_tickets_resolved": env.state.capacity_pressure_tickets_resolved,
|
| 665 |
"per_ticket_scores": list(env.state.per_ticket_scores),
|
|
@@ -700,6 +742,9 @@ def summarize_policy_episodes(
|
|
| 700 |
"avg_terminal_rubric_reward": _safe_mean(
|
| 701 |
[float(episode["terminal_rubric_reward"]) for episode in task_episodes]
|
| 702 |
),
|
|
|
|
|
|
|
|
|
|
| 703 |
"avg_planning_penalty_total": _safe_mean(
|
| 704 |
[float(episode["planning_penalty_total"]) for episode in task_episodes]
|
| 705 |
),
|
|
@@ -730,6 +775,9 @@ def summarize_policy_episodes(
|
|
| 730 |
"avg_terminal_rubric_reward": _safe_mean(
|
| 731 |
[float(episode["terminal_rubric_reward"]) for episode in episode_summaries]
|
| 732 |
),
|
|
|
|
|
|
|
|
|
|
| 733 |
"avg_planning_penalty_total": _safe_mean(
|
| 734 |
[float(episode["planning_penalty_total"]) for episode in episode_summaries]
|
| 735 |
),
|
|
@@ -788,9 +836,10 @@ def evaluate_policy(
|
|
| 788 |
return result
|
| 789 |
|
| 790 |
|
| 791 |
-
def _selection_tuple(summary: dict[str, Any]) -> tuple[float, float, float, float, float]:
|
| 792 |
return (
|
| 793 |
float(summary["avg_terminal_rubric_reward"]),
|
|
|
|
| 794 |
-float(summary["avg_planning_penalty_total"]),
|
| 795 |
float(summary["avg_episode_return"]),
|
| 796 |
float(summary["avg_normalized_return"]),
|
|
@@ -849,7 +898,9 @@ def compare_policies(
|
|
| 849 |
"mode": "compare",
|
| 850 |
"task_ids": task_ids,
|
| 851 |
"seeds": seeds,
|
| 852 |
-
"selection_metric":
|
|
|
|
|
|
|
| 853 |
"baseline_policy": baseline_run["policy"],
|
| 854 |
"best_policy": best_run["policy"],
|
| 855 |
"improvement_vs_baseline": {
|
|
@@ -867,6 +918,11 @@ def compare_policies(
|
|
| 867 |
baseline_run["summary"],
|
| 868 |
"avg_terminal_rubric_reward",
|
| 869 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 870 |
"avg_planning_penalty_total": _delta(
|
| 871 |
best_run["summary"],
|
| 872 |
baseline_run["summary"],
|
|
@@ -966,7 +1022,9 @@ def search_policies(
|
|
| 966 |
"task_ids": task_ids,
|
| 967 |
"train_seeds": train_seeds,
|
| 968 |
"eval_seeds": eval_seeds,
|
| 969 |
-
"selection_metric":
|
|
|
|
|
|
|
| 970 |
"candidate_policies": [policy.name for policy in candidate_policies],
|
| 971 |
"selected_policy": selected_policy.name,
|
| 972 |
"baseline_policy": baseline_policy.name,
|
|
@@ -997,6 +1055,11 @@ def search_policies(
|
|
| 997 |
eval_baseline["summary"],
|
| 998 |
"avg_terminal_rubric_reward",
|
| 999 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
"avg_planning_penalty_total": _delta(
|
| 1001 |
eval_selected["summary"],
|
| 1002 |
eval_baseline["summary"],
|
|
|
|
| 249 |
json.dumps(ticket.get("last_tool_result") or {}, sort_keys=True),
|
| 250 |
json.dumps(ticket.get("routing_options") or [], sort_keys=True),
|
| 251 |
json.dumps(ticket.get("operational_context") or {}, sort_keys=True),
|
| 252 |
+
json.dumps(ticket.get("cluster_summary") or {}, sort_keys=True),
|
| 253 |
json.dumps(ticket.get("capacity_state") or {}, sort_keys=True),
|
| 254 |
json.dumps(ticket.get("future_queue_demand") or {}, sort_keys=True),
|
| 255 |
]
|
|
|
|
| 285 |
):
|
| 286 |
return "capacity_planning"
|
| 287 |
if (
|
| 288 |
+
bool((ticket.get("operational_context") or {}).get("cluster_coordination_hint"))
|
| 289 |
+
or int((ticket.get("cluster_summary") or {}).get("future_cluster_ticket_count", 0) or 0)
|
| 290 |
> 0
|
| 291 |
+
or int((ticket.get("cluster_summary") or {}).get("shared_requester_count", 0) or 0)
|
| 292 |
+
> 1
|
| 293 |
or any(
|
| 294 |
phrase in text
|
| 295 |
for phrase in (
|
|
|
|
| 475 |
return None, None
|
| 476 |
|
| 477 |
|
| 478 |
+
def merge_ticket_context(
|
| 479 |
+
ticket: dict[str, Any],
|
| 480 |
+
observation: HelpdeskTicketObservation,
|
| 481 |
+
) -> dict[str, Any]:
|
| 482 |
+
merged_ticket = dict(ticket)
|
| 483 |
+
if getattr(observation, "last_tool_result", None) is not None:
|
| 484 |
+
merged_ticket["last_tool_result"] = observation.last_tool_result
|
| 485 |
+
if observation.last_tool_result.get("tool_name") == "lookup_queue_capacity_forecast":
|
| 486 |
+
if observation.last_tool_result.get("future_queue_demand") is not None:
|
| 487 |
+
merged_ticket["future_queue_demand"] = observation.last_tool_result[
|
| 488 |
+
"future_queue_demand"
|
| 489 |
+
]
|
| 490 |
+
if observation.last_tool_result.get("capacity_state") is not None:
|
| 491 |
+
merged_ticket["capacity_state"] = observation.last_tool_result[
|
| 492 |
+
"capacity_state"
|
| 493 |
+
]
|
| 494 |
+
merged_ticket["recent_history"] = list(getattr(observation, "history", []) or [])
|
| 495 |
+
merged_ticket["queue_position"] = getattr(observation, "queue_position", None)
|
| 496 |
+
merged_ticket["tickets_remaining"] = getattr(observation, "tickets_remaining", None)
|
| 497 |
+
merged_ticket["tickets_after_current"] = getattr(observation, "tickets_after_current", None)
|
| 498 |
+
merged_ticket["available_tools"] = list(getattr(observation, "available_tools", []) or [])
|
| 499 |
+
merged_ticket["available_action_types"] = list(
|
| 500 |
+
getattr(observation, "available_action_types", []) or []
|
| 501 |
+
)
|
| 502 |
+
merged_ticket["last_reward_components"] = dict(
|
| 503 |
+
getattr(observation, "last_reward_components", {}) or {}
|
| 504 |
+
)
|
| 505 |
+
observation_metadata = getattr(observation, "metadata", {}) or {}
|
| 506 |
+
if observation_metadata.get("last_feedback_summary"):
|
| 507 |
+
merged_ticket["feedback_summary"] = observation_metadata["last_feedback_summary"]
|
| 508 |
+
if observation_metadata.get("capacity_state") is not None:
|
| 509 |
+
merged_ticket["capacity_state"] = observation_metadata["capacity_state"]
|
| 510 |
+
if observation_metadata.get("future_queue_demand") is not None:
|
| 511 |
+
merged_ticket["future_queue_demand"] = observation_metadata["future_queue_demand"]
|
| 512 |
+
return merged_ticket
|
| 513 |
+
|
| 514 |
+
|
| 515 |
def choose_policy_action(
|
| 516 |
policy: PolicyConfig,
|
| 517 |
observation: HelpdeskTicketObservation,
|
|
|
|
| 521 |
used_tools_by_ticket: dict[str, set[str]] | None = None,
|
| 522 |
adaptive_bandit: AdaptiveToolBandit | None = None,
|
| 523 |
) -> tuple[HelpdeskTicketAction, str, str | None]:
|
| 524 |
+
ticket = merge_ticket_context(observation.current_ticket or {}, observation)
|
| 525 |
ticket_id = str(ticket.get("ticket_id", ""))
|
| 526 |
ticket_investigations = investigations_by_ticket.get(ticket_id, 0)
|
| 527 |
used_tools = set()
|
|
|
|
| 629 |
trajectories: list[dict[str, Any]] = []
|
| 630 |
|
| 631 |
while not observation.done:
|
| 632 |
+
ticket = merge_ticket_context(observation.current_ticket or {}, observation)
|
| 633 |
ticket_id = str(ticket.get("ticket_id", ""))
|
| 634 |
action, action_source, action_cue = choose_policy_action(
|
| 635 |
policy,
|
|
|
|
| 701 |
"terminal_reward": terminal_reward,
|
| 702 |
"terminal_rubric_reward": terminal_rubric_reward,
|
| 703 |
"average_ticket_score": env.state.average_score_so_far,
|
| 704 |
+
"queue_management_score": env.state.queue_management_score,
|
| 705 |
"planning_penalty_total": env.state.planning_penalty_total,
|
| 706 |
"capacity_pressure_tickets_resolved": env.state.capacity_pressure_tickets_resolved,
|
| 707 |
"per_ticket_scores": list(env.state.per_ticket_scores),
|
|
|
|
| 742 |
"avg_terminal_rubric_reward": _safe_mean(
|
| 743 |
[float(episode["terminal_rubric_reward"]) for episode in task_episodes]
|
| 744 |
),
|
| 745 |
+
"avg_queue_management_score": _safe_mean(
|
| 746 |
+
[float(episode["queue_management_score"]) for episode in task_episodes]
|
| 747 |
+
),
|
| 748 |
"avg_planning_penalty_total": _safe_mean(
|
| 749 |
[float(episode["planning_penalty_total"]) for episode in task_episodes]
|
| 750 |
),
|
|
|
|
| 775 |
"avg_terminal_rubric_reward": _safe_mean(
|
| 776 |
[float(episode["terminal_rubric_reward"]) for episode in episode_summaries]
|
| 777 |
),
|
| 778 |
+
"avg_queue_management_score": _safe_mean(
|
| 779 |
+
[float(episode["queue_management_score"]) for episode in episode_summaries]
|
| 780 |
+
),
|
| 781 |
"avg_planning_penalty_total": _safe_mean(
|
| 782 |
[float(episode["planning_penalty_total"]) for episode in episode_summaries]
|
| 783 |
),
|
|
|
|
| 836 |
return result
|
| 837 |
|
| 838 |
|
| 839 |
+
def _selection_tuple(summary: dict[str, Any]) -> tuple[float, float, float, float, float, float]:
|
| 840 |
return (
|
| 841 |
float(summary["avg_terminal_rubric_reward"]),
|
| 842 |
+
float(summary["avg_queue_management_score"]),
|
| 843 |
-float(summary["avg_planning_penalty_total"]),
|
| 844 |
float(summary["avg_episode_return"]),
|
| 845 |
float(summary["avg_normalized_return"]),
|
|
|
|
| 898 |
"mode": "compare",
|
| 899 |
"task_ids": task_ids,
|
| 900 |
"seeds": seeds,
|
| 901 |
+
"selection_metric": (
|
| 902 |
+
"avg_terminal_rubric_reward_then_queue_management_then_lower_planning_penalty"
|
| 903 |
+
),
|
| 904 |
"baseline_policy": baseline_run["policy"],
|
| 905 |
"best_policy": best_run["policy"],
|
| 906 |
"improvement_vs_baseline": {
|
|
|
|
| 918 |
baseline_run["summary"],
|
| 919 |
"avg_terminal_rubric_reward",
|
| 920 |
),
|
| 921 |
+
"avg_queue_management_score": _delta(
|
| 922 |
+
best_run["summary"],
|
| 923 |
+
baseline_run["summary"],
|
| 924 |
+
"avg_queue_management_score",
|
| 925 |
+
),
|
| 926 |
"avg_planning_penalty_total": _delta(
|
| 927 |
best_run["summary"],
|
| 928 |
baseline_run["summary"],
|
|
|
|
| 1022 |
"task_ids": task_ids,
|
| 1023 |
"train_seeds": train_seeds,
|
| 1024 |
"eval_seeds": eval_seeds,
|
| 1025 |
+
"selection_metric": (
|
| 1026 |
+
"avg_terminal_rubric_reward_then_queue_management_then_lower_planning_penalty"
|
| 1027 |
+
),
|
| 1028 |
"candidate_policies": [policy.name for policy in candidate_policies],
|
| 1029 |
"selected_policy": selected_policy.name,
|
| 1030 |
"baseline_policy": baseline_policy.name,
|
|
|
|
| 1055 |
eval_baseline["summary"],
|
| 1056 |
"avg_terminal_rubric_reward",
|
| 1057 |
),
|
| 1058 |
+
"avg_queue_management_score": _delta(
|
| 1059 |
+
eval_selected["summary"],
|
| 1060 |
+
eval_baseline["summary"],
|
| 1061 |
+
"avg_queue_management_score",
|
| 1062 |
+
),
|
| 1063 |
"avg_planning_penalty_total": _delta(
|
| 1064 |
eval_selected["summary"],
|
| 1065 |
eval_baseline["summary"],
|
server/app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import sys
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
from typing import Any
|
| 4 |
|
|
@@ -16,7 +17,7 @@ from models import HelpdeskTicketAction, HelpdeskTicketObservation
|
|
| 16 |
from server.environment import HelpdeskTicketRoutingEnvironment
|
| 17 |
from server.grader import grade_action
|
| 18 |
from server.tasks import TASKS, load_dataset
|
| 19 |
-
from vocabulary import APP_ENV_NAME
|
| 20 |
|
| 21 |
app = create_app(
|
| 22 |
HelpdeskTicketRoutingEnvironment,
|
|
@@ -55,20 +56,758 @@ def list_tasks():
|
|
| 55 |
|
| 56 |
@app.get("/web", response_class=HTMLResponse)
|
| 57 |
def web_ui():
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
for t in TASKS.values()
|
| 61 |
)
|
|
|
|
| 62 |
html = f"""<!DOCTYPE html>
|
| 63 |
-
<html>
|
| 64 |
-
<
|
| 65 |
-
<
|
| 66 |
-
<
|
| 67 |
-
<
|
| 68 |
-
<
|
| 69 |
-
{
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
return HTMLResponse(content=html)
|
| 73 |
|
| 74 |
|
|
|
|
| 1 |
import sys
|
| 2 |
+
from html import escape
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Any
|
| 5 |
|
|
|
|
| 17 |
from server.environment import HelpdeskTicketRoutingEnvironment
|
| 18 |
from server.grader import grade_action
|
| 19 |
from server.tasks import TASKS, load_dataset
|
| 20 |
+
from vocabulary import APP_ENV_NAME, PROJECT_TITLE, TEAM_NAME
|
| 21 |
|
| 22 |
app = create_app(
|
| 23 |
HelpdeskTicketRoutingEnvironment,
|
|
|
|
| 56 |
|
| 57 |
@app.get("/web", response_class=HTMLResponse)
|
| 58 |
def web_ui():
|
| 59 |
+
dataset = load_dataset()
|
| 60 |
+
dataset_size = len(dataset)
|
| 61 |
+
alternate_route_count = sum(
|
| 62 |
+
1 for ticket in dataset if ticket.alternate_route_score_multiplier > 0.0
|
| 63 |
+
)
|
| 64 |
+
clustered_case_count = sum(1 for ticket in dataset if ticket.service_cluster_id)
|
| 65 |
+
hidden_context_case_count = sum(
|
| 66 |
+
1
|
| 67 |
+
for ticket in dataset
|
| 68 |
+
if ticket.ambiguity_note
|
| 69 |
+
or ticket.related_ticket_id
|
| 70 |
+
or ticket.planning_note
|
| 71 |
+
or ticket.customer_update_note
|
| 72 |
+
)
|
| 73 |
+
incident_sensitive_count = sum(1 for ticket in dataset if ticket.incident_recommended)
|
| 74 |
+
|
| 75 |
+
difficulty_labels = {
|
| 76 |
+
"easy": "Guided",
|
| 77 |
+
"medium": "Contextual",
|
| 78 |
+
"hard": "Adaptive",
|
| 79 |
+
}
|
| 80 |
+
task_cards = "".join(
|
| 81 |
+
f"""
|
| 82 |
+
<article class="task-card difficulty-{escape(t['difficulty'])}">
|
| 83 |
+
<div class="task-head">
|
| 84 |
+
<span class="task-id">Task {t['id']}</span>
|
| 85 |
+
<span class="difficulty-pill">{escape(difficulty_labels.get(t['difficulty'], t['difficulty']).upper())}</span>
|
| 86 |
+
</div>
|
| 87 |
+
<h3>{escape(t['name'])}</h3>
|
| 88 |
+
<p>{escape(t['instructions'])}</p>
|
| 89 |
+
<div class="field-row">
|
| 90 |
+
{''.join(f'<span class="field-chip">{escape(field)}</span>' for field in t['allowed_fields'])}
|
| 91 |
+
</div>
|
| 92 |
+
</article>
|
| 93 |
+
"""
|
| 94 |
for t in TASKS.values()
|
| 95 |
)
|
| 96 |
+
|
| 97 |
html = f"""<!DOCTYPE html>
|
| 98 |
+
<html lang="en">
|
| 99 |
+
<head>
|
| 100 |
+
<meta charset="utf-8" />
|
| 101 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 102 |
+
<title>{escape(APP_ENV_NAME)}</title>
|
| 103 |
+
<style>
|
| 104 |
+
:root {{
|
| 105 |
+
--bg: #07131b;
|
| 106 |
+
--bg-soft: #0b1c27;
|
| 107 |
+
--panel: rgba(15, 32, 44, 0.84);
|
| 108 |
+
--panel-strong: rgba(12, 26, 37, 0.94);
|
| 109 |
+
--line: rgba(173, 215, 230, 0.16);
|
| 110 |
+
--line-strong: rgba(173, 215, 230, 0.28);
|
| 111 |
+
--text: #ecf5f7;
|
| 112 |
+
--muted: #97aeb7;
|
| 113 |
+
--accent: #4fd1c5;
|
| 114 |
+
--accent-strong: #1cb0a4;
|
| 115 |
+
--accent-warm: #ffb454;
|
| 116 |
+
--success: #7fdf9f;
|
| 117 |
+
--shadow: 0 28px 80px rgba(0, 0, 0, 0.32);
|
| 118 |
+
--radius-xl: 28px;
|
| 119 |
+
--radius-lg: 20px;
|
| 120 |
+
--radius-md: 14px;
|
| 121 |
+
}}
|
| 122 |
+
|
| 123 |
+
* {{
|
| 124 |
+
box-sizing: border-box;
|
| 125 |
+
}}
|
| 126 |
+
|
| 127 |
+
html {{
|
| 128 |
+
scroll-behavior: smooth;
|
| 129 |
+
}}
|
| 130 |
+
|
| 131 |
+
body {{
|
| 132 |
+
margin: 0;
|
| 133 |
+
min-height: 100vh;
|
| 134 |
+
color: var(--text);
|
| 135 |
+
background:
|
| 136 |
+
radial-gradient(circle at 12% 18%, rgba(79, 209, 197, 0.18), transparent 26%),
|
| 137 |
+
radial-gradient(circle at 82% 20%, rgba(255, 180, 84, 0.16), transparent 22%),
|
| 138 |
+
radial-gradient(circle at 50% 100%, rgba(79, 209, 197, 0.12), transparent 35%),
|
| 139 |
+
linear-gradient(180deg, #07131b 0%, #0b1821 52%, #07131b 100%);
|
| 140 |
+
font-family: "Aptos", "Segoe UI Variable Text", "Trebuchet MS", sans-serif;
|
| 141 |
+
}}
|
| 142 |
+
|
| 143 |
+
body::before {{
|
| 144 |
+
content: "";
|
| 145 |
+
position: fixed;
|
| 146 |
+
inset: 0;
|
| 147 |
+
pointer-events: none;
|
| 148 |
+
background-image:
|
| 149 |
+
linear-gradient(rgba(255, 255, 255, 0.03) 1px, transparent 1px),
|
| 150 |
+
linear-gradient(90deg, rgba(255, 255, 255, 0.03) 1px, transparent 1px);
|
| 151 |
+
background-size: 36px 36px;
|
| 152 |
+
mask-image: linear-gradient(180deg, rgba(0, 0, 0, 0.9), transparent 92%);
|
| 153 |
+
}}
|
| 154 |
+
|
| 155 |
+
.shell {{
|
| 156 |
+
width: min(1180px, calc(100vw - 32px));
|
| 157 |
+
margin: 0 auto;
|
| 158 |
+
padding: 28px 0 56px;
|
| 159 |
+
}}
|
| 160 |
+
|
| 161 |
+
.topbar {{
|
| 162 |
+
display: flex;
|
| 163 |
+
align-items: center;
|
| 164 |
+
justify-content: space-between;
|
| 165 |
+
gap: 18px;
|
| 166 |
+
margin-bottom: 22px;
|
| 167 |
+
padding: 16px 20px;
|
| 168 |
+
border: 1px solid var(--line);
|
| 169 |
+
border-radius: 999px;
|
| 170 |
+
background: rgba(10, 23, 32, 0.68);
|
| 171 |
+
backdrop-filter: blur(14px);
|
| 172 |
+
}}
|
| 173 |
+
|
| 174 |
+
.brand {{
|
| 175 |
+
display: flex;
|
| 176 |
+
align-items: center;
|
| 177 |
+
gap: 14px;
|
| 178 |
+
}}
|
| 179 |
+
|
| 180 |
+
.brand-mark {{
|
| 181 |
+
width: 42px;
|
| 182 |
+
height: 42px;
|
| 183 |
+
border-radius: 14px;
|
| 184 |
+
background:
|
| 185 |
+
linear-gradient(145deg, rgba(79, 209, 197, 0.96), rgba(28, 176, 164, 0.75));
|
| 186 |
+
box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.32);
|
| 187 |
+
position: relative;
|
| 188 |
+
}}
|
| 189 |
+
|
| 190 |
+
.brand-mark::after {{
|
| 191 |
+
content: "";
|
| 192 |
+
position: absolute;
|
| 193 |
+
inset: 10px;
|
| 194 |
+
border-radius: 10px;
|
| 195 |
+
border: 2px solid rgba(7, 19, 27, 0.75);
|
| 196 |
+
}}
|
| 197 |
+
|
| 198 |
+
.eyebrow {{
|
| 199 |
+
margin: 0 0 4px;
|
| 200 |
+
color: var(--accent);
|
| 201 |
+
font-size: 0.78rem;
|
| 202 |
+
letter-spacing: 0.18em;
|
| 203 |
+
text-transform: uppercase;
|
| 204 |
+
}}
|
| 205 |
+
|
| 206 |
+
.brand h1 {{
|
| 207 |
+
margin: 0;
|
| 208 |
+
font-family: "Bahnschrift", "Aptos Display", "Trebuchet MS", sans-serif;
|
| 209 |
+
font-size: 1.05rem;
|
| 210 |
+
letter-spacing: 0.03em;
|
| 211 |
+
}}
|
| 212 |
+
|
| 213 |
+
.nav-links {{
|
| 214 |
+
display: flex;
|
| 215 |
+
flex-wrap: wrap;
|
| 216 |
+
gap: 10px;
|
| 217 |
+
}}
|
| 218 |
+
|
| 219 |
+
.nav-links a,
|
| 220 |
+
.button {{
|
| 221 |
+
display: inline-flex;
|
| 222 |
+
align-items: center;
|
| 223 |
+
justify-content: center;
|
| 224 |
+
gap: 8px;
|
| 225 |
+
text-decoration: none;
|
| 226 |
+
color: var(--text);
|
| 227 |
+
border-radius: 999px;
|
| 228 |
+
border: 1px solid var(--line);
|
| 229 |
+
padding: 11px 16px;
|
| 230 |
+
font-size: 0.94rem;
|
| 231 |
+
transition: transform 160ms ease, border-color 160ms ease, background 160ms ease;
|
| 232 |
+
}}
|
| 233 |
+
|
| 234 |
+
.nav-links a:hover,
|
| 235 |
+
.button:hover {{
|
| 236 |
+
transform: translateY(-1px);
|
| 237 |
+
border-color: var(--line-strong);
|
| 238 |
+
}}
|
| 239 |
+
|
| 240 |
+
.button.primary {{
|
| 241 |
+
background: linear-gradient(135deg, rgba(79, 209, 197, 0.22), rgba(28, 176, 164, 0.18));
|
| 242 |
+
border-color: rgba(79, 209, 197, 0.35);
|
| 243 |
+
}}
|
| 244 |
+
|
| 245 |
+
.button.secondary {{
|
| 246 |
+
background: linear-gradient(135deg, rgba(255, 180, 84, 0.14), rgba(255, 180, 84, 0.08));
|
| 247 |
+
border-color: rgba(255, 180, 84, 0.25);
|
| 248 |
+
}}
|
| 249 |
+
|
| 250 |
+
.hero {{
|
| 251 |
+
position: relative;
|
| 252 |
+
overflow: hidden;
|
| 253 |
+
display: grid;
|
| 254 |
+
grid-template-columns: minmax(0, 1.3fr) minmax(300px, 0.9fr);
|
| 255 |
+
gap: 24px;
|
| 256 |
+
padding: 36px;
|
| 257 |
+
border: 1px solid var(--line);
|
| 258 |
+
border-radius: var(--radius-xl);
|
| 259 |
+
background:
|
| 260 |
+
linear-gradient(160deg, rgba(15, 33, 44, 0.92), rgba(8, 21, 29, 0.9)),
|
| 261 |
+
radial-gradient(circle at top right, rgba(255, 180, 84, 0.16), transparent 28%);
|
| 262 |
+
box-shadow: var(--shadow);
|
| 263 |
+
}}
|
| 264 |
+
|
| 265 |
+
.hero::after {{
|
| 266 |
+
content: "";
|
| 267 |
+
position: absolute;
|
| 268 |
+
inset: auto -8% -32% 44%;
|
| 269 |
+
height: 340px;
|
| 270 |
+
background: radial-gradient(circle, rgba(79, 209, 197, 0.2), transparent 62%);
|
| 271 |
+
pointer-events: none;
|
| 272 |
+
}}
|
| 273 |
+
|
| 274 |
+
.hero-copy,
|
| 275 |
+
.hero-panel {{
|
| 276 |
+
position: relative;
|
| 277 |
+
z-index: 1;
|
| 278 |
+
}}
|
| 279 |
+
|
| 280 |
+
.hero-copy h2 {{
|
| 281 |
+
margin: 0 0 14px;
|
| 282 |
+
max-width: 10.5ch;
|
| 283 |
+
font-family: "Bahnschrift", "Aptos Display", "Trebuchet MS", sans-serif;
|
| 284 |
+
font-size: clamp(2.7rem, 6vw, 4.8rem);
|
| 285 |
+
line-height: 0.95;
|
| 286 |
+
letter-spacing: -0.05em;
|
| 287 |
+
}}
|
| 288 |
+
|
| 289 |
+
.hero-copy p {{
|
| 290 |
+
margin: 0;
|
| 291 |
+
max-width: 62ch;
|
| 292 |
+
color: var(--muted);
|
| 293 |
+
font-size: 1.02rem;
|
| 294 |
+
line-height: 1.7;
|
| 295 |
+
}}
|
| 296 |
+
|
| 297 |
+
.hero-kickers {{
|
| 298 |
+
display: flex;
|
| 299 |
+
flex-wrap: wrap;
|
| 300 |
+
gap: 10px;
|
| 301 |
+
margin: 18px 0 22px;
|
| 302 |
+
}}
|
| 303 |
+
|
| 304 |
+
.kicker {{
|
| 305 |
+
padding: 9px 14px;
|
| 306 |
+
border-radius: 999px;
|
| 307 |
+
border: 1px solid var(--line);
|
| 308 |
+
background: rgba(255, 255, 255, 0.04);
|
| 309 |
+
color: #d5e4e9;
|
| 310 |
+
font-size: 0.9rem;
|
| 311 |
+
}}
|
| 312 |
+
|
| 313 |
+
.hero-actions {{
|
| 314 |
+
display: flex;
|
| 315 |
+
flex-wrap: wrap;
|
| 316 |
+
gap: 12px;
|
| 317 |
+
margin-top: 26px;
|
| 318 |
+
}}
|
| 319 |
+
|
| 320 |
+
.hero-panel {{
|
| 321 |
+
align-self: stretch;
|
| 322 |
+
display: grid;
|
| 323 |
+
gap: 14px;
|
| 324 |
+
padding: 18px;
|
| 325 |
+
border-radius: 22px;
|
| 326 |
+
border: 1px solid rgba(79, 209, 197, 0.16);
|
| 327 |
+
background: rgba(7, 19, 27, 0.46);
|
| 328 |
+
backdrop-filter: blur(14px);
|
| 329 |
+
}}
|
| 330 |
+
|
| 331 |
+
.panel-title {{
|
| 332 |
+
margin: 0;
|
| 333 |
+
font-size: 0.88rem;
|
| 334 |
+
color: var(--muted);
|
| 335 |
+
letter-spacing: 0.14em;
|
| 336 |
+
text-transform: uppercase;
|
| 337 |
+
}}
|
| 338 |
+
|
| 339 |
+
.signal-card {{
|
| 340 |
+
padding: 16px;
|
| 341 |
+
border-radius: 18px;
|
| 342 |
+
border: 1px solid var(--line);
|
| 343 |
+
background: rgba(255, 255, 255, 0.035);
|
| 344 |
+
}}
|
| 345 |
+
|
| 346 |
+
.signal-card strong {{
|
| 347 |
+
display: block;
|
| 348 |
+
margin-bottom: 6px;
|
| 349 |
+
font-size: 1rem;
|
| 350 |
+
}}
|
| 351 |
+
|
| 352 |
+
.signal-card span {{
|
| 353 |
+
color: var(--muted);
|
| 354 |
+
font-size: 0.92rem;
|
| 355 |
+
line-height: 1.55;
|
| 356 |
+
}}
|
| 357 |
+
|
| 358 |
+
.stats-grid,
|
| 359 |
+
.feature-grid,
|
| 360 |
+
.task-grid,
|
| 361 |
+
.shortcut-grid {{
|
| 362 |
+
display: grid;
|
| 363 |
+
gap: 16px;
|
| 364 |
+
margin-top: 20px;
|
| 365 |
+
}}
|
| 366 |
+
|
| 367 |
+
.stats-grid {{
|
| 368 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 369 |
+
}}
|
| 370 |
+
|
| 371 |
+
.feature-grid {{
|
| 372 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 373 |
+
}}
|
| 374 |
+
|
| 375 |
+
.task-grid {{
|
| 376 |
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
| 377 |
+
}}
|
| 378 |
+
|
| 379 |
+
.shortcut-grid {{
|
| 380 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 381 |
+
}}
|
| 382 |
+
|
| 383 |
+
.stat-card,
|
| 384 |
+
.feature-card,
|
| 385 |
+
.shortcut-card,
|
| 386 |
+
.task-card {{
|
| 387 |
+
border: 1px solid var(--line);
|
| 388 |
+
border-radius: var(--radius-lg);
|
| 389 |
+
background: var(--panel);
|
| 390 |
+
backdrop-filter: blur(16px);
|
| 391 |
+
box-shadow: var(--shadow);
|
| 392 |
+
}}
|
| 393 |
+
|
| 394 |
+
.stat-card {{
|
| 395 |
+
padding: 20px;
|
| 396 |
+
}}
|
| 397 |
+
|
| 398 |
+
.stat-card .value {{
|
| 399 |
+
display: block;
|
| 400 |
+
margin-bottom: 8px;
|
| 401 |
+
font-family: "Bahnschrift", "Aptos Display", "Trebuchet MS", sans-serif;
|
| 402 |
+
font-size: 2rem;
|
| 403 |
+
letter-spacing: -0.04em;
|
| 404 |
+
}}
|
| 405 |
+
|
| 406 |
+
.stat-card .label,
|
| 407 |
+
.stat-card .hint,
|
| 408 |
+
.feature-card p,
|
| 409 |
+
.shortcut-card p,
|
| 410 |
+
.task-card p {{
|
| 411 |
+
color: var(--muted);
|
| 412 |
+
}}
|
| 413 |
+
|
| 414 |
+
.stat-card .label {{
|
| 415 |
+
display: block;
|
| 416 |
+
margin-bottom: 6px;
|
| 417 |
+
font-size: 0.92rem;
|
| 418 |
+
}}
|
| 419 |
+
|
| 420 |
+
.stat-card .hint {{
|
| 421 |
+
font-size: 0.86rem;
|
| 422 |
+
line-height: 1.5;
|
| 423 |
+
}}
|
| 424 |
+
|
| 425 |
+
.section {{
|
| 426 |
+
margin-top: 24px;
|
| 427 |
+
padding: 28px;
|
| 428 |
+
border: 1px solid var(--line);
|
| 429 |
+
border-radius: var(--radius-xl);
|
| 430 |
+
background: linear-gradient(180deg, rgba(11, 26, 37, 0.84), rgba(9, 21, 30, 0.88));
|
| 431 |
+
}}
|
| 432 |
+
|
| 433 |
+
.section-head {{
|
| 434 |
+
display: flex;
|
| 435 |
+
align-items: end;
|
| 436 |
+
justify-content: space-between;
|
| 437 |
+
gap: 18px;
|
| 438 |
+
margin-bottom: 18px;
|
| 439 |
+
}}
|
| 440 |
+
|
| 441 |
+
.section-head h3 {{
|
| 442 |
+
margin: 0 0 8px;
|
| 443 |
+
font-family: "Bahnschrift", "Aptos Display", "Trebuchet MS", sans-serif;
|
| 444 |
+
font-size: 1.75rem;
|
| 445 |
+
letter-spacing: -0.03em;
|
| 446 |
+
}}
|
| 447 |
+
|
| 448 |
+
.section-head p {{
|
| 449 |
+
margin: 0;
|
| 450 |
+
max-width: 64ch;
|
| 451 |
+
color: var(--muted);
|
| 452 |
+
line-height: 1.65;
|
| 453 |
+
}}
|
| 454 |
+
|
| 455 |
+
.feature-card,
|
| 456 |
+
.shortcut-card {{
|
| 457 |
+
padding: 20px;
|
| 458 |
+
}}
|
| 459 |
+
|
| 460 |
+
.feature-card h4,
|
| 461 |
+
.shortcut-card h4,
|
| 462 |
+
.task-card h3 {{
|
| 463 |
+
margin: 0 0 10px;
|
| 464 |
+
font-size: 1.04rem;
|
| 465 |
+
}}
|
| 466 |
+
|
| 467 |
+
.task-card {{
|
| 468 |
+
padding: 20px;
|
| 469 |
+
position: relative;
|
| 470 |
+
overflow: hidden;
|
| 471 |
+
}}
|
| 472 |
+
|
| 473 |
+
.task-card::before {{
|
| 474 |
+
content: "";
|
| 475 |
+
position: absolute;
|
| 476 |
+
inset: 0 auto auto 0;
|
| 477 |
+
width: 100%;
|
| 478 |
+
height: 4px;
|
| 479 |
+
background: linear-gradient(90deg, rgba(79, 209, 197, 0.95), rgba(255, 180, 84, 0.72));
|
| 480 |
+
}}
|
| 481 |
+
|
| 482 |
+
.task-card.difficulty-easy::before {{
|
| 483 |
+
background: linear-gradient(90deg, rgba(127, 223, 159, 0.95), rgba(79, 209, 197, 0.7));
|
| 484 |
+
}}
|
| 485 |
+
|
| 486 |
+
.task-card.difficulty-medium::before {{
|
| 487 |
+
background: linear-gradient(90deg, rgba(79, 209, 197, 0.95), rgba(120, 196, 230, 0.72));
|
| 488 |
+
}}
|
| 489 |
+
|
| 490 |
+
.task-card.difficulty-hard::before {{
|
| 491 |
+
background: linear-gradient(90deg, rgba(255, 180, 84, 0.95), rgba(255, 122, 72, 0.78));
|
| 492 |
+
}}
|
| 493 |
+
|
| 494 |
+
.task-head {{
|
| 495 |
+
display: flex;
|
| 496 |
+
align-items: center;
|
| 497 |
+
justify-content: space-between;
|
| 498 |
+
gap: 12px;
|
| 499 |
+
margin-bottom: 16px;
|
| 500 |
+
}}
|
| 501 |
+
|
| 502 |
+
.task-id {{
|
| 503 |
+
color: var(--muted);
|
| 504 |
+
font-size: 0.84rem;
|
| 505 |
+
letter-spacing: 0.12em;
|
| 506 |
+
text-transform: uppercase;
|
| 507 |
+
}}
|
| 508 |
+
|
| 509 |
+
.difficulty-pill {{
|
| 510 |
+
padding: 7px 10px;
|
| 511 |
+
border-radius: 999px;
|
| 512 |
+
border: 1px solid var(--line);
|
| 513 |
+
font-size: 0.74rem;
|
| 514 |
+
letter-spacing: 0.14em;
|
| 515 |
+
text-transform: uppercase;
|
| 516 |
+
color: #f6fafb;
|
| 517 |
+
background: rgba(255, 255, 255, 0.05);
|
| 518 |
+
}}
|
| 519 |
+
|
| 520 |
+
.field-row,
|
| 521 |
+
.chip-row {{
|
| 522 |
+
display: flex;
|
| 523 |
+
flex-wrap: wrap;
|
| 524 |
+
gap: 8px;
|
| 525 |
+
margin-top: 16px;
|
| 526 |
+
}}
|
| 527 |
+
|
| 528 |
+
.field-chip,
|
| 529 |
+
.mini-chip {{
|
| 530 |
+
padding: 8px 11px;
|
| 531 |
+
border-radius: 999px;
|
| 532 |
+
border: 1px solid var(--line);
|
| 533 |
+
background: rgba(255, 255, 255, 0.04);
|
| 534 |
+
color: #d9e7eb;
|
| 535 |
+
font-size: 0.82rem;
|
| 536 |
+
}}
|
| 537 |
+
|
| 538 |
+
.feature-card ul {{
|
| 539 |
+
margin: 12px 0 0;
|
| 540 |
+
padding-left: 18px;
|
| 541 |
+
color: var(--muted);
|
| 542 |
+
line-height: 1.65;
|
| 543 |
+
}}
|
| 544 |
+
|
| 545 |
+
.shortcut-card code {{
|
| 546 |
+
display: block;
|
| 547 |
+
margin: 12px 0 14px;
|
| 548 |
+
padding: 12px 14px;
|
| 549 |
+
border-radius: 14px;
|
| 550 |
+
background: rgba(0, 0, 0, 0.2);
|
| 551 |
+
border: 1px solid rgba(255, 255, 255, 0.05);
|
| 552 |
+
color: #d9fcf7;
|
| 553 |
+
font-family: "Cascadia Code", "Consolas", monospace;
|
| 554 |
+
font-size: 0.88rem;
|
| 555 |
+
white-space: nowrap;
|
| 556 |
+
overflow-x: auto;
|
| 557 |
+
}}
|
| 558 |
+
|
| 559 |
+
.footer {{
|
| 560 |
+
margin-top: 20px;
|
| 561 |
+
padding: 18px 6px 8px;
|
| 562 |
+
color: var(--muted);
|
| 563 |
+
font-size: 0.92rem;
|
| 564 |
+
}}
|
| 565 |
+
|
| 566 |
+
@keyframes rise {{
|
| 567 |
+
from {{
|
| 568 |
+
opacity: 0;
|
| 569 |
+
transform: translateY(12px);
|
| 570 |
+
}}
|
| 571 |
+
to {{
|
| 572 |
+
opacity: 1;
|
| 573 |
+
transform: translateY(0);
|
| 574 |
+
}}
|
| 575 |
+
}}
|
| 576 |
+
|
| 577 |
+
.hero,
|
| 578 |
+
.section,
|
| 579 |
+
.stat-card,
|
| 580 |
+
.task-card,
|
| 581 |
+
.feature-card,
|
| 582 |
+
.shortcut-card {{
|
| 583 |
+
animation: rise 420ms ease both;
|
| 584 |
+
}}
|
| 585 |
+
|
| 586 |
+
@media (max-width: 980px) {{
|
| 587 |
+
.hero,
|
| 588 |
+
.stats-grid,
|
| 589 |
+
.feature-grid,
|
| 590 |
+
.task-grid,
|
| 591 |
+
.shortcut-grid {{
|
| 592 |
+
grid-template-columns: 1fr;
|
| 593 |
+
}}
|
| 594 |
+
|
| 595 |
+
.topbar,
|
| 596 |
+
.section-head {{
|
| 597 |
+
border-radius: 24px;
|
| 598 |
+
flex-direction: column;
|
| 599 |
+
align-items: flex-start;
|
| 600 |
+
}}
|
| 601 |
+
}}
|
| 602 |
+
|
| 603 |
+
@media (max-width: 640px) {{
|
| 604 |
+
.shell {{
|
| 605 |
+
width: min(100vw - 18px, 1180px);
|
| 606 |
+
padding-top: 14px;
|
| 607 |
+
}}
|
| 608 |
+
|
| 609 |
+
.hero,
|
| 610 |
+
.section {{
|
| 611 |
+
padding: 22px;
|
| 612 |
+
}}
|
| 613 |
+
|
| 614 |
+
.hero-copy h2 {{
|
| 615 |
+
max-width: none;
|
| 616 |
+
font-size: clamp(2.4rem, 14vw, 3.5rem);
|
| 617 |
+
}}
|
| 618 |
+
|
| 619 |
+
.nav-links,
|
| 620 |
+
.hero-actions {{
|
| 621 |
+
width: 100%;
|
| 622 |
+
}}
|
| 623 |
+
|
| 624 |
+
.nav-links a,
|
| 625 |
+
.button {{
|
| 626 |
+
flex: 1 1 180px;
|
| 627 |
+
}}
|
| 628 |
+
}}
|
| 629 |
+
</style>
|
| 630 |
+
</head>
|
| 631 |
+
<body>
|
| 632 |
+
<main class="shell">
|
| 633 |
+
<header class="topbar">
|
| 634 |
+
<div class="brand">
|
| 635 |
+
<div class="brand-mark" aria-hidden="true"></div>
|
| 636 |
+
<div>
|
| 637 |
+
<p class="eyebrow">OpenEnv Environment</p>
|
| 638 |
+
<h1>{escape(PROJECT_TITLE)}</h1>
|
| 639 |
+
</div>
|
| 640 |
+
</div>
|
| 641 |
+
<nav class="nav-links">
|
| 642 |
+
<a href="/health">Health</a>
|
| 643 |
+
<a href="/tasks">Tasks JSON</a>
|
| 644 |
+
<a href="/docs">API Docs</a>
|
| 645 |
+
</nav>
|
| 646 |
+
</header>
|
| 647 |
+
|
| 648 |
+
<section class="hero">
|
| 649 |
+
<div class="hero-copy">
|
| 650 |
+
<p class="eyebrow">{escape(APP_ENV_NAME)}</p>
|
| 651 |
+
<h2>Queue decisions that actually carry forward.</h2>
|
| 652 |
+
<p>
|
| 653 |
+
A sleek benchmark surface for sequential helpdesk routing: hidden context,
|
| 654 |
+
cluster-aware follow-ons, incident handling, deferrals, and a terminal rubric
|
| 655 |
+
that rewards queue strategy instead of isolated classification alone.
|
| 656 |
+
</p>
|
| 657 |
+
<div class="hero-kickers">
|
| 658 |
+
<span class="kicker">Task family: easy to hard</span>
|
| 659 |
+
<span class="kicker">Closed-form grader</span>
|
| 660 |
+
<span class="kicker">Queue-level terminal objective</span>
|
| 661 |
+
</div>
|
| 662 |
+
<div class="hero-actions">
|
| 663 |
+
<a class="button primary" href="/docs">Explore the API</a>
|
| 664 |
+
<a class="button secondary" href="/baseline?task_id=3&seed=42">Run Hard Baseline</a>
|
| 665 |
+
<a class="button" href="/tasks">Inspect Task Definitions</a>
|
| 666 |
+
</div>
|
| 667 |
+
</div>
|
| 668 |
+
|
| 669 |
+
<aside class="hero-panel">
|
| 670 |
+
<p class="panel-title">Why This Stands Out</p>
|
| 671 |
+
<div class="signal-card">
|
| 672 |
+
<strong>Not just ticket labels</strong>
|
| 673 |
+
<span>Medium and hard episodes now carry cluster state, follow-up debt, queue pressure, and operational actions across the whole episode.</span>
|
| 674 |
+
</div>
|
| 675 |
+
<div class="signal-card">
|
| 676 |
+
<strong>Judge-friendly surface</strong>
|
| 677 |
+
<span>Clear API entry points, deterministic grading, and a landing page that explains the benchmark without making anyone read code first.</span>
|
| 678 |
+
</div>
|
| 679 |
+
<div class="signal-card">
|
| 680 |
+
<strong>Built by {escape(TEAM_NAME)}</strong>
|
| 681 |
+
<span>Designed for OpenEnv evaluation, local policy comparison, and fast demoability during judging.</span>
|
| 682 |
+
</div>
|
| 683 |
+
</aside>
|
| 684 |
+
</section>
|
| 685 |
+
|
| 686 |
+
<section class="stats-grid" aria-label="Benchmark stats">
|
| 687 |
+
<article class="stat-card">
|
| 688 |
+
<span class="value">{dataset_size}</span>
|
| 689 |
+
<span class="label">Tickets in the grounded dataset</span>
|
| 690 |
+
<span class="hint">Curated records plus queue mutation mechanics create repeatable but non-trivial episodes.</span>
|
| 691 |
+
</article>
|
| 692 |
+
<article class="stat-card">
|
| 693 |
+
<span class="value">{alternate_route_count}</span>
|
| 694 |
+
<span class="label">Capacity-aware alternate routes</span>
|
| 695 |
+
<span class="hint">The grader can reward declared fallback routes instead of collapsing to all-or-nothing exact match.</span>
|
| 696 |
+
</article>
|
| 697 |
+
<article class="stat-card">
|
| 698 |
+
<span class="value">{clustered_case_count}</span>
|
| 699 |
+
<span class="label">Cluster-linked or coordinated cases</span>
|
| 700 |
+
<span class="hint">Handling one ticket can stabilize or destabilize the downstream tickets in the same workstream.</span>
|
| 701 |
+
</article>
|
| 702 |
+
<article class="stat-card">
|
| 703 |
+
<span class="value">{hidden_context_case_count}</span>
|
| 704 |
+
<span class="label">Hidden-context routing cases</span>
|
| 705 |
+
<span class="hint">Investigation tools matter because key evidence does not appear in the initial observation by default.</span>
|
| 706 |
+
</article>
|
| 707 |
+
</section>
|
| 708 |
+
|
| 709 |
+
<section class="section">
|
| 710 |
+
<div class="section-head">
|
| 711 |
+
<div>
|
| 712 |
+
<p class="eyebrow">Task Ladder</p>
|
| 713 |
+
<h3>One benchmark family, not three disconnected demos</h3>
|
| 714 |
+
</div>
|
| 715 |
+
<p>
|
| 716 |
+
The difficulty ladder keeps the same full-routing output while progressively changing
|
| 717 |
+
observability, queue dependencies, and operational pressure.
|
| 718 |
+
</p>
|
| 719 |
+
</div>
|
| 720 |
+
<div class="task-grid">
|
| 721 |
+
{task_cards}
|
| 722 |
+
</div>
|
| 723 |
+
</section>
|
| 724 |
+
|
| 725 |
+
<section class="section">
|
| 726 |
+
<div class="section-head">
|
| 727 |
+
<div>
|
| 728 |
+
<p class="eyebrow">Environment Signals</p>
|
| 729 |
+
<h3>What the agent is balancing</h3>
|
| 730 |
+
</div>
|
| 731 |
+
<p>
|
| 732 |
+
The benchmark is designed so strong policy choices change later tickets, incident
|
| 733 |
+
coverage, and terminal queue quality instead of just nudging shaped reward.
|
| 734 |
+
</p>
|
| 735 |
+
</div>
|
| 736 |
+
<div class="feature-grid">
|
| 737 |
+
<article class="feature-card">
|
| 738 |
+
<h4>Hidden context retrieval</h4>
|
| 739 |
+
<p>Related-ticket previews, requester history, internal routing notes, queue cluster summaries, and capacity forecasts are revealed through explicit tool use.</p>
|
| 740 |
+
<div class="chip-row">
|
| 741 |
+
<span class="mini-chip">investigate</span>
|
| 742 |
+
<span class="mini-chip">request_info</span>
|
| 743 |
+
<span class="mini-chip">cluster summary</span>
|
| 744 |
+
</div>
|
| 745 |
+
</article>
|
| 746 |
+
<article class="feature-card">
|
| 747 |
+
<h4>Operational actions with consequences</h4>
|
| 748 |
+
<p>Deferrals can raise later urgency, incident handling can reduce downstream debt, and weak handling can spawn or worsen follow-up work.</p>
|
| 749 |
+
<div class="chip-row">
|
| 750 |
+
<span class="mini-chip">defer</span>
|
| 751 |
+
<span class="mini-chip">open_incident</span>
|
| 752 |
+
<span class="mini-chip">follow-up spawning</span>
|
| 753 |
+
</div>
|
| 754 |
+
</article>
|
| 755 |
+
<article class="feature-card">
|
| 756 |
+
<h4>Queue-level terminal rubric</h4>
|
| 757 |
+
<p>Final scoring blends routing trajectory quality with queue management quality so agents are rewarded for coherent episode strategy, not just isolated ticket matches.</p>
|
| 758 |
+
<div class="chip-row">
|
| 759 |
+
<span class="mini-chip">terminal rubric</span>
|
| 760 |
+
<span class="mini-chip">queue quality</span>
|
| 761 |
+
<span class="mini-chip">planning-aware</span>
|
| 762 |
+
</div>
|
| 763 |
+
</article>
|
| 764 |
+
</div>
|
| 765 |
+
</section>
|
| 766 |
+
|
| 767 |
+
<section class="section">
|
| 768 |
+
<div class="section-head">
|
| 769 |
+
<div>
|
| 770 |
+
<p class="eyebrow">Quick Routes</p>
|
| 771 |
+
<h3>Fast ways to demo the environment</h3>
|
| 772 |
+
</div>
|
| 773 |
+
<p>
|
| 774 |
+
Useful entry points for judges, reviewers, or anyone trying to get signal from the project quickly.
|
| 775 |
+
</p>
|
| 776 |
+
</div>
|
| 777 |
+
<div class="shortcut-grid">
|
| 778 |
+
<article class="shortcut-card">
|
| 779 |
+
<h4>Interactive API docs</h4>
|
| 780 |
+
<p>Browse the full OpenEnv-compatible surface, request models, and built-in helper endpoints.</p>
|
| 781 |
+
<code>GET /docs</code>
|
| 782 |
+
<a class="button primary" href="/docs">Open Docs</a>
|
| 783 |
+
</article>
|
| 784 |
+
<article class="shortcut-card">
|
| 785 |
+
<h4>Task manifest</h4>
|
| 786 |
+
<p>Inspect the easy, medium, and hard task definitions exactly as exposed by the server.</p>
|
| 787 |
+
<code>GET /tasks</code>
|
| 788 |
+
<a class="button" href="/tasks">View Tasks</a>
|
| 789 |
+
</article>
|
| 790 |
+
<article class="shortcut-card">
|
| 791 |
+
<h4>Hard-task baseline rollout</h4>
|
| 792 |
+
<p>See a deterministic baseline episode over the hardest queue with the current environment logic.</p>
|
| 793 |
+
<code>GET /baseline?task_id=3&seed=42</code>
|
| 794 |
+
<a class="button secondary" href="/baseline?task_id=3&seed=42">Run Baseline</a>
|
| 795 |
+
</article>
|
| 796 |
+
<article class="shortcut-card">
|
| 797 |
+
<h4>Health and deployment status</h4>
|
| 798 |
+
<p>Quick check that the service is alive and ready for OpenEnv-style evaluation requests.</p>
|
| 799 |
+
<code>GET /health</code>
|
| 800 |
+
<a class="button" href="/health">Check Health</a>
|
| 801 |
+
</article>
|
| 802 |
+
</div>
|
| 803 |
+
</section>
|
| 804 |
+
|
| 805 |
+
<footer class="footer">
|
| 806 |
+
<span>{escape(PROJECT_TITLE)} • {escape(APP_ENV_NAME)} • {incident_sensitive_count} incident-sensitive records surfaced in the current dataset snapshot.</span>
|
| 807 |
+
</footer>
|
| 808 |
+
</main>
|
| 809 |
+
</body>
|
| 810 |
+
</html>"""
|
| 811 |
return HTMLResponse(content=html)
|
| 812 |
|
| 813 |
|
server/environment.py
CHANGED
|
@@ -35,7 +35,7 @@ BASE_AVAILABLE_TOOLS = (
|
|
| 35 |
)
|
| 36 |
TASK_AVAILABLE_ACTION_TYPES: dict[int, tuple[str, ...]] = {
|
| 37 |
1: ("submit", "investigate"),
|
| 38 |
-
2: ("submit", "investigate", "request_info"),
|
| 39 |
3: ("submit", "investigate", "request_info", "defer", "open_incident"),
|
| 40 |
}
|
| 41 |
TASK_AVAILABLE_TOOLS: dict[int, tuple[str, ...]] = {
|
|
@@ -48,6 +48,7 @@ TASK_AVAILABLE_TOOLS: dict[int, tuple[str, ...]] = {
|
|
| 48 |
"lookup_related_ticket",
|
| 49 |
"lookup_requester_history",
|
| 50 |
"lookup_internal_routing_note",
|
|
|
|
| 51 |
),
|
| 52 |
3: BASE_AVAILABLE_TOOLS,
|
| 53 |
}
|
|
@@ -79,6 +80,11 @@ CLUSTER_STABILIZE_SCORE_THRESHOLD = 0.84
|
|
| 79 |
CLUSTER_DESTABILIZE_SCORE_THRESHOLD = 0.72
|
| 80 |
CLUSTER_INCIDENT_RELIEF_MULTIPLIER = 0.94
|
| 81 |
CLUSTER_OWNER_RELIEF_MULTIPLIER = 0.86
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
TASK3_INVESTIGATION_TOOL_PLAN: dict[str, tuple[str, ...]] = {
|
| 84 |
"ticket-021": ("lookup_related_ticket", "lookup_requester_history"),
|
|
@@ -228,6 +234,8 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 228 |
incident_slots_remaining=incident_slots_initial,
|
| 229 |
planning_penalty_total=0.0,
|
| 230 |
capacity_pressure_tickets_resolved=0,
|
|
|
|
|
|
|
| 231 |
ticket_request_info_usage={},
|
| 232 |
ticket_defer_counts={},
|
| 233 |
open_incident_ticket_ids=[],
|
|
@@ -238,6 +246,8 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 238 |
spawned_follow_up_ticket_ids=[],
|
| 239 |
spawned_follow_up_source_ids=[],
|
| 240 |
dynamic_queue_events=[],
|
|
|
|
|
|
|
| 241 |
)
|
| 242 |
|
| 243 |
return self._build_observation(task)
|
|
@@ -292,6 +302,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 292 |
trajectory_reward = None
|
| 293 |
trajectory_components = None
|
| 294 |
investigation_penalty = self._compute_episode_penalty() if is_done else 0.0
|
|
|
|
| 295 |
if is_done:
|
| 296 |
trajectory_components = compute_trajectory_adjustments(
|
| 297 |
self._state.per_ticket_scores,
|
|
@@ -300,7 +311,9 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 300 |
completion_bonus=self._trajectory_consistency_bonus(),
|
| 301 |
)
|
| 302 |
trajectory_reward = trajectory_components["final_reward"]
|
| 303 |
-
final_reward = self.
|
|
|
|
|
|
|
| 304 |
self._state.total_reward = final_reward
|
| 305 |
else:
|
| 306 |
final_reward = clamp_open_unit_interval(0.0)
|
|
@@ -329,6 +342,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 329 |
if trajectory_components is not None
|
| 330 |
else None
|
| 331 |
),
|
|
|
|
| 332 |
},
|
| 333 |
)
|
| 334 |
self._state.history_entries.append(
|
|
@@ -388,6 +402,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 388 |
trajectory_components = None
|
| 389 |
investigation_penalty = 0.0
|
| 390 |
rubric_reward = None
|
|
|
|
| 391 |
|
| 392 |
if is_done:
|
| 393 |
self._state.per_ticket_scores.append(score)
|
|
@@ -403,8 +418,8 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 403 |
),
|
| 404 |
)
|
| 405 |
trajectory_reward = trajectory_components["final_reward"]
|
| 406 |
-
rubric_reward = self.
|
| 407 |
-
trajectory_reward
|
| 408 |
)
|
| 409 |
final_reward = clamp_open_unit_interval(
|
| 410 |
rubric_reward - context_penalty - capacity_penalty - incident_gap_penalty
|
|
@@ -434,10 +449,13 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 434 |
trajectory_reward = None
|
| 435 |
trajectory_components = None
|
| 436 |
rubric_reward = None
|
|
|
|
| 437 |
final_reward = clamp_open_unit_interval(
|
| 438 |
step_reward - context_penalty - capacity_penalty - incident_gap_penalty
|
| 439 |
)
|
| 440 |
self._state.total_reward = 0.0
|
|
|
|
|
|
|
| 441 |
if incident_gap_penalty > 0.0:
|
| 442 |
self._state.incident_gap_total = round(
|
| 443 |
self._state.incident_gap_total + incident_gap_penalty,
|
|
@@ -503,6 +521,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 503 |
if trajectory_components is not None
|
| 504 |
else None
|
| 505 |
),
|
|
|
|
| 506 |
},
|
| 507 |
)
|
| 508 |
reward_components.update(capacity_details)
|
|
@@ -553,10 +572,6 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 553 |
|
| 554 |
def _apply_episode_economics(self, base_reward: float) -> float:
|
| 555 |
penalty = self._compute_episode_penalty()
|
| 556 |
-
penalty += min(
|
| 557 |
-
0.25,
|
| 558 |
-
self._state.sla_breach_count * SLA_BREACH_PENALTY + self._state.incident_gap_total,
|
| 559 |
-
)
|
| 560 |
return clamp_open_unit_interval(base_reward - penalty)
|
| 561 |
|
| 562 |
def _current_average_score(self) -> float:
|
|
@@ -564,6 +579,230 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 564 |
return 0.0
|
| 565 |
return sum(self._state.per_ticket_scores) / len(self._state.per_ticket_scores)
|
| 566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
def _available_action_types_for_task(self, task_id: int | None = None) -> list[str]:
|
| 568 |
resolved_task_id = self._state.current_task_id if task_id is None else task_id
|
| 569 |
return list(TASK_AVAILABLE_ACTION_TYPES.get(int(resolved_task_id or 1), ("submit",)))
|
|
@@ -595,7 +834,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 595 |
def _sample_queue(self, task_id: int, queue_size: int) -> list[HelpdeskTicketRecord]:
|
| 596 |
if queue_size <= 0:
|
| 597 |
return []
|
| 598 |
-
if task_id
|
| 599 |
return self._rng.sample(self._dataset, queue_size)
|
| 600 |
|
| 601 |
cluster_groups = self._cluster_sample_groups()
|
|
@@ -671,15 +910,27 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 671 |
indexes.append(index)
|
| 672 |
return indexes
|
| 673 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 674 |
def _cluster_summary(
|
| 675 |
self,
|
| 676 |
ticket: HelpdeskTicketRecord,
|
| 677 |
*,
|
| 678 |
start_index: int | None = None,
|
| 679 |
) -> dict[str, Any]:
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
future_indexes = self._future_cluster_ticket_indexes(
|
| 684 |
ticket,
|
| 685 |
start_index=effective_start,
|
|
@@ -722,7 +973,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 722 |
context_penalty: float,
|
| 723 |
incident_gap_penalty: float,
|
| 724 |
) -> list[str]:
|
| 725 |
-
if self._state.current_task_id
|
| 726 |
return []
|
| 727 |
if score < CLUSTER_STABILIZE_SCORE_THRESHOLD:
|
| 728 |
return []
|
|
@@ -791,6 +1042,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 791 |
updated_ticket_ids.append(updated_ticket.ticket_id)
|
| 792 |
|
| 793 |
if updated_ticket_ids:
|
|
|
|
| 794 |
self._record_dynamic_queue_event(
|
| 795 |
"stabilize_cluster",
|
| 796 |
source_ticket_id=current_ticket.ticket_id,
|
|
@@ -807,7 +1059,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 807 |
context_penalty: float,
|
| 808 |
incident_gap_penalty: float,
|
| 809 |
) -> list[str]:
|
| 810 |
-
if self._state.current_task_id
|
| 811 |
return []
|
| 812 |
if score >= CLUSTER_DESTABILIZE_SCORE_THRESHOLD:
|
| 813 |
if context_penalty <= 0.0 and incident_gap_penalty <= 0.0:
|
|
@@ -850,6 +1102,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 850 |
updated_ticket_ids.append(updated_ticket.ticket_id)
|
| 851 |
|
| 852 |
if updated_ticket_ids:
|
|
|
|
| 853 |
self._record_dynamic_queue_event(
|
| 854 |
"destabilize_cluster",
|
| 855 |
source_ticket_id=current_ticket.ticket_id,
|
|
@@ -1431,17 +1684,27 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 1431 |
context_penalty: float,
|
| 1432 |
incident_gap_penalty: float,
|
| 1433 |
) -> bool:
|
| 1434 |
-
|
|
|
|
| 1435 |
return False
|
| 1436 |
if ticket.generated_from_ticket_id is not None:
|
| 1437 |
return False
|
| 1438 |
if ticket.ticket_id in self._state.spawned_follow_up_source_ids:
|
| 1439 |
return False
|
| 1440 |
-
|
| 1441 |
self._requires_incident(ticket)
|
| 1442 |
or self._ticket_mentions_follow_up(ticket)
|
| 1443 |
or ticket.related_ticket_id is not None
|
| 1444 |
or ticket.priority in {"high", "critical"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1445 |
):
|
| 1446 |
return False
|
| 1447 |
return (
|
|
@@ -1527,7 +1790,7 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 1527 |
or self._future_queue_demand()["remaining_ticket_count"] > 0
|
| 1528 |
)
|
| 1529 |
if tool_name == "lookup_queue_cluster_summary":
|
| 1530 |
-
if self._state.current_task_id
|
| 1531 |
return False
|
| 1532 |
cluster_summary = self._cluster_summary(ticket)
|
| 1533 |
return (
|
|
@@ -1569,10 +1832,16 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 1569 |
and "lookup_queue_capacity_forecast" not in required_tools
|
| 1570 |
):
|
| 1571 |
required_tools.append("lookup_queue_capacity_forecast")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1572 |
if resolved_task_id == 3:
|
| 1573 |
cluster_summary = self._cluster_summary(
|
| 1574 |
ticket,
|
| 1575 |
-
start_index=
|
| 1576 |
)
|
| 1577 |
if (
|
| 1578 |
cluster_summary["future_cluster_ticket_count"] > 0
|
|
@@ -1584,6 +1853,21 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 1584 |
)
|
| 1585 |
):
|
| 1586 |
required_tools.append("lookup_queue_cluster_summary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1587 |
filtered_required_tools: list[str] = []
|
| 1588 |
allowed_tool_set = set(self._available_tools_for_task(resolved_task_id))
|
| 1589 |
for tool_name in required_tools:
|
|
@@ -2316,6 +2600,10 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 2316 |
used_tools = set(self._used_tools_for_ticket(ticket.ticket_id))
|
| 2317 |
operational_actions = progress["recommended_operational_actions"]
|
| 2318 |
cluster_summary = self._cluster_summary(ticket)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2319 |
ticket_view: dict[str, Any] = {
|
| 2320 |
"ticket_id": ticket.ticket_id,
|
| 2321 |
"title": self._visible_title(ticket),
|
|
@@ -2341,12 +2629,19 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 2341 |
"incident_recommended": self._requires_incident(ticket),
|
| 2342 |
"incident_open": self._incident_open_for_ticket(ticket),
|
| 2343 |
"recommended_actions": operational_actions,
|
| 2344 |
-
"
|
| 2345 |
-
"
|
| 2346 |
-
"future_cluster_ticket_ids": cluster_summary["future_cluster_ticket_ids"],
|
| 2347 |
-
"shared_requester_count": cluster_summary["shared_requester_count"],
|
| 2348 |
-
"active_incident_cover": cluster_summary["active_incident_cover"],
|
| 2349 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2350 |
if ticket.ambiguity_note is not None and "lookup_internal_routing_note" not in remaining_tools:
|
| 2351 |
ticket_view["ambiguity_note"] = ticket.ambiguity_note
|
| 2352 |
if (
|
|
@@ -2444,6 +2739,9 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 2444 |
incident_gap_penalty = reward_components.get("incident_gap_penalty")
|
| 2445 |
if incident_gap_penalty:
|
| 2446 |
parts.append(f"incident_gap_penalty={incident_gap_penalty:.2f}")
|
|
|
|
|
|
|
|
|
|
| 2447 |
spawned_follow_up_ticket_id = reward_components.get("spawned_follow_up_ticket_id")
|
| 2448 |
if spawned_follow_up_ticket_id:
|
| 2449 |
parts.append(f"spawned_follow_up={spawned_follow_up_ticket_id}")
|
|
@@ -2493,11 +2791,21 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 2493 |
"defer_count": self._defer_count(ticket.ticket_id),
|
| 2494 |
"incident_open": self._incident_open_for_ticket(ticket),
|
| 2495 |
"recommended_actions": progress["recommended_operational_actions"],
|
| 2496 |
-
"
|
| 2497 |
-
|
| 2498 |
-
|
|
|
|
| 2499 |
},
|
| 2500 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2501 |
if self._state.current_task_id == 3:
|
| 2502 |
history_entry["capacity_state"] = self._capacity_state_snapshot()
|
| 2503 |
if reward is not None:
|
|
@@ -2610,12 +2918,13 @@ class HelpdeskTicketRoutingEnvironment(
|
|
| 2610 |
"planning_penalty_applied": self._state.planning_penalty_applied,
|
| 2611 |
"sla_breach_count": self._state.sla_breach_count,
|
| 2612 |
"incident_gap_total": self._state.incident_gap_total,
|
|
|
|
|
|
|
| 2613 |
"dynamic_queue_events": list(self._state.dynamic_queue_events[-5:]),
|
| 2614 |
"clustered_follow_ons": self._future_queue_demand().get("clustered_follow_ons", 0),
|
| 2615 |
}
|
| 2616 |
if self._state.current_task_id == 3:
|
| 2617 |
metadata["capacity_state"] = self._capacity_state_snapshot()
|
| 2618 |
-
metadata["future_queue_demand"] = self._future_queue_demand()
|
| 2619 |
if last_history_entry is not None:
|
| 2620 |
metadata["last_score"] = last_history_entry.get("score")
|
| 2621 |
metadata["last_reward"] = last_history_entry.get("reward")
|
|
|
|
| 35 |
)
|
| 36 |
TASK_AVAILABLE_ACTION_TYPES: dict[int, tuple[str, ...]] = {
|
| 37 |
1: ("submit", "investigate"),
|
| 38 |
+
2: ("submit", "investigate", "request_info", "defer"),
|
| 39 |
3: ("submit", "investigate", "request_info", "defer", "open_incident"),
|
| 40 |
}
|
| 41 |
TASK_AVAILABLE_TOOLS: dict[int, tuple[str, ...]] = {
|
|
|
|
| 48 |
"lookup_related_ticket",
|
| 49 |
"lookup_requester_history",
|
| 50 |
"lookup_internal_routing_note",
|
| 51 |
+
"lookup_queue_cluster_summary",
|
| 52 |
),
|
| 53 |
3: BASE_AVAILABLE_TOOLS,
|
| 54 |
}
|
|
|
|
| 80 |
CLUSTER_DESTABILIZE_SCORE_THRESHOLD = 0.72
|
| 81 |
CLUSTER_INCIDENT_RELIEF_MULTIPLIER = 0.94
|
| 82 |
CLUSTER_OWNER_RELIEF_MULTIPLIER = 0.86
|
| 83 |
+
TASK_QUEUE_MANAGEMENT_WEIGHT: dict[int, float] = {
|
| 84 |
+
1: 0.0,
|
| 85 |
+
2: 0.2,
|
| 86 |
+
3: 0.32,
|
| 87 |
+
}
|
| 88 |
|
| 89 |
TASK3_INVESTIGATION_TOOL_PLAN: dict[str, tuple[str, ...]] = {
|
| 90 |
"ticket-021": ("lookup_related_ticket", "lookup_requester_history"),
|
|
|
|
| 234 |
incident_slots_remaining=incident_slots_initial,
|
| 235 |
planning_penalty_total=0.0,
|
| 236 |
capacity_pressure_tickets_resolved=0,
|
| 237 |
+
cluster_stabilizations_total=0,
|
| 238 |
+
cluster_destabilizations_total=0,
|
| 239 |
ticket_request_info_usage={},
|
| 240 |
ticket_defer_counts={},
|
| 241 |
open_incident_ticket_ids=[],
|
|
|
|
| 246 |
spawned_follow_up_ticket_ids=[],
|
| 247 |
spawned_follow_up_source_ids=[],
|
| 248 |
dynamic_queue_events=[],
|
| 249 |
+
queue_management_score=0.0,
|
| 250 |
+
queue_management_breakdown={},
|
| 251 |
)
|
| 252 |
|
| 253 |
return self._build_observation(task)
|
|
|
|
| 302 |
trajectory_reward = None
|
| 303 |
trajectory_components = None
|
| 304 |
investigation_penalty = self._compute_episode_penalty() if is_done else 0.0
|
| 305 |
+
rubric_details: dict[str, Any] = {}
|
| 306 |
if is_done:
|
| 307 |
trajectory_components = compute_trajectory_adjustments(
|
| 308 |
self._state.per_ticket_scores,
|
|
|
|
| 311 |
completion_bonus=self._trajectory_consistency_bonus(),
|
| 312 |
)
|
| 313 |
trajectory_reward = trajectory_components["final_reward"]
|
| 314 |
+
final_reward, rubric_details = self._finalize_terminal_rubric(
|
| 315 |
+
trajectory_reward
|
| 316 |
+
)
|
| 317 |
self._state.total_reward = final_reward
|
| 318 |
else:
|
| 319 |
final_reward = clamp_open_unit_interval(0.0)
|
|
|
|
| 342 |
if trajectory_components is not None
|
| 343 |
else None
|
| 344 |
),
|
| 345 |
+
**rubric_details,
|
| 346 |
},
|
| 347 |
)
|
| 348 |
self._state.history_entries.append(
|
|
|
|
| 402 |
trajectory_components = None
|
| 403 |
investigation_penalty = 0.0
|
| 404 |
rubric_reward = None
|
| 405 |
+
rubric_details: dict[str, Any] = {}
|
| 406 |
|
| 407 |
if is_done:
|
| 408 |
self._state.per_ticket_scores.append(score)
|
|
|
|
| 418 |
),
|
| 419 |
)
|
| 420 |
trajectory_reward = trajectory_components["final_reward"]
|
| 421 |
+
rubric_reward, rubric_details = self._finalize_terminal_rubric(
|
| 422 |
+
trajectory_reward
|
| 423 |
)
|
| 424 |
final_reward = clamp_open_unit_interval(
|
| 425 |
rubric_reward - context_penalty - capacity_penalty - incident_gap_penalty
|
|
|
|
| 449 |
trajectory_reward = None
|
| 450 |
trajectory_components = None
|
| 451 |
rubric_reward = None
|
| 452 |
+
rubric_details = {}
|
| 453 |
final_reward = clamp_open_unit_interval(
|
| 454 |
step_reward - context_penalty - capacity_penalty - incident_gap_penalty
|
| 455 |
)
|
| 456 |
self._state.total_reward = 0.0
|
| 457 |
+
self._state.queue_management_score = 0.0
|
| 458 |
+
self._state.queue_management_breakdown = {}
|
| 459 |
if incident_gap_penalty > 0.0:
|
| 460 |
self._state.incident_gap_total = round(
|
| 461 |
self._state.incident_gap_total + incident_gap_penalty,
|
|
|
|
| 521 |
if trajectory_components is not None
|
| 522 |
else None
|
| 523 |
),
|
| 524 |
+
**rubric_details,
|
| 525 |
},
|
| 526 |
)
|
| 527 |
reward_components.update(capacity_details)
|
|
|
|
| 572 |
|
| 573 |
def _apply_episode_economics(self, base_reward: float) -> float:
|
| 574 |
penalty = self._compute_episode_penalty()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
return clamp_open_unit_interval(base_reward - penalty)
|
| 576 |
|
| 577 |
def _current_average_score(self) -> float:
|
|
|
|
| 579 |
return 0.0
|
| 580 |
return sum(self._state.per_ticket_scores) / len(self._state.per_ticket_scores)
|
| 581 |
|
| 582 |
+
def _queue_management_blend_weight(self, task_id: int | None = None) -> float:
|
| 583 |
+
resolved_task_id = self._state.current_task_id if task_id is None else task_id
|
| 584 |
+
return TASK_QUEUE_MANAGEMENT_WEIGHT.get(int(resolved_task_id or 1), 0.0)
|
| 585 |
+
|
| 586 |
+
def _context_resolution_score(self) -> float:
|
| 587 |
+
hidden_context_tickets = [
|
| 588 |
+
ticket
|
| 589 |
+
for ticket in self._queue
|
| 590 |
+
if self._required_tools_for_ticket(ticket, self._state.current_task_id)
|
| 591 |
+
]
|
| 592 |
+
if not hidden_context_tickets:
|
| 593 |
+
return 1.0
|
| 594 |
+
total_required = 0
|
| 595 |
+
total_resolved = 0
|
| 596 |
+
for ticket in hidden_context_tickets:
|
| 597 |
+
progress = self._tool_progress_for_ticket(ticket)
|
| 598 |
+
total_required += max(1, len(progress["required_tools"]))
|
| 599 |
+
total_resolved += max(
|
| 600 |
+
0,
|
| 601 |
+
len(progress["required_tools"]) - len(progress["remaining_tools"]),
|
| 602 |
+
)
|
| 603 |
+
return round(
|
| 604 |
+
max(0.0, min(1.0, total_resolved / max(1, total_required))),
|
| 605 |
+
4,
|
| 606 |
+
)
|
| 607 |
+
|
| 608 |
+
def _follow_up_containment_score(self) -> float:
|
| 609 |
+
follow_up_risk_tickets = [
|
| 610 |
+
ticket
|
| 611 |
+
for ticket in self._queue
|
| 612 |
+
if ticket.generated_from_ticket_id is None
|
| 613 |
+
and (
|
| 614 |
+
self._requires_incident(ticket)
|
| 615 |
+
or self._ticket_mentions_follow_up(ticket)
|
| 616 |
+
or ticket.related_ticket_id is not None
|
| 617 |
+
or ticket.priority in {"high", "critical"}
|
| 618 |
+
)
|
| 619 |
+
]
|
| 620 |
+
if not follow_up_risk_tickets:
|
| 621 |
+
return 1.0
|
| 622 |
+
spawn_rate = len(self._state.spawned_follow_up_ticket_ids) / max(
|
| 623 |
+
1,
|
| 624 |
+
len(follow_up_risk_tickets),
|
| 625 |
+
)
|
| 626 |
+
generated_follow_up_scores = [
|
| 627 |
+
float(entry.get("score", 0.0))
|
| 628 |
+
for entry in self._state.history_entries
|
| 629 |
+
if entry.get("generated_from_ticket_id") is not None
|
| 630 |
+
]
|
| 631 |
+
recovery_credit = (
|
| 632 |
+
sum(generated_follow_up_scores) / len(generated_follow_up_scores)
|
| 633 |
+
if generated_follow_up_scores
|
| 634 |
+
else 0.0
|
| 635 |
+
)
|
| 636 |
+
score = (1.0 - min(1.0, 0.7 * spawn_rate)) + (
|
| 637 |
+
min(1.0, spawn_rate) * 0.3 * recovery_credit
|
| 638 |
+
)
|
| 639 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 640 |
+
|
| 641 |
+
def _incident_management_score(self) -> float:
|
| 642 |
+
if (self._state.current_task_id or 1) < 3:
|
| 643 |
+
return 1.0
|
| 644 |
+
incident_sensitive_tickets = [
|
| 645 |
+
ticket
|
| 646 |
+
for ticket in self._queue
|
| 647 |
+
if ticket.generated_from_ticket_id is None and self._requires_incident(ticket)
|
| 648 |
+
]
|
| 649 |
+
if not incident_sensitive_tickets:
|
| 650 |
+
return 1.0
|
| 651 |
+
coverage_ratio = sum(
|
| 652 |
+
1 for ticket in incident_sensitive_tickets if self._incident_open_for_ticket(ticket)
|
| 653 |
+
) / max(1, len(incident_sensitive_tickets))
|
| 654 |
+
gap_ratio = min(
|
| 655 |
+
1.0,
|
| 656 |
+
self._state.incident_gap_total
|
| 657 |
+
/ max(
|
| 658 |
+
INCIDENT_GAP_PENALTY,
|
| 659 |
+
len(incident_sensitive_tickets) * INCIDENT_GAP_PENALTY,
|
| 660 |
+
),
|
| 661 |
+
)
|
| 662 |
+
score = (0.65 * (1.0 - gap_ratio)) + (0.35 * coverage_ratio)
|
| 663 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 664 |
+
|
| 665 |
+
def _sla_quality_score(self) -> float:
|
| 666 |
+
breach_denominator = max(1, self._state.deferred_ticket_count or len(self._queue))
|
| 667 |
+
breach_ratio = min(1.0, self._state.sla_breach_count / breach_denominator)
|
| 668 |
+
score = 1.0 - breach_ratio
|
| 669 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 670 |
+
|
| 671 |
+
def _planning_quality_score(self) -> float:
|
| 672 |
+
if (self._state.current_task_id or 1) < 3:
|
| 673 |
+
return 1.0
|
| 674 |
+
capacity_sensitive_count = sum(
|
| 675 |
+
1 for ticket in self._queue if self._ticket_has_alternate_route(ticket)
|
| 676 |
+
)
|
| 677 |
+
route_coverage = (
|
| 678 |
+
min(
|
| 679 |
+
1.0,
|
| 680 |
+
self._state.capacity_pressure_tickets_resolved / capacity_sensitive_count,
|
| 681 |
+
)
|
| 682 |
+
if capacity_sensitive_count
|
| 683 |
+
else 1.0
|
| 684 |
+
)
|
| 685 |
+
max_expected_penalty = max(
|
| 686 |
+
0.12,
|
| 687 |
+
len(self._queue)
|
| 688 |
+
* (
|
| 689 |
+
TEAM_CAPACITY_OVERFLOW_PENALTY
|
| 690 |
+
+ HIGH_PRIORITY_SLOT_OVERFLOW_PENALTY
|
| 691 |
+
+ ESCALATION_SLOT_OVERFLOW_PENALTY
|
| 692 |
+
),
|
| 693 |
+
)
|
| 694 |
+
penalty_score = 1.0 - min(
|
| 695 |
+
1.0,
|
| 696 |
+
self._state.planning_penalty_total / max_expected_penalty,
|
| 697 |
+
)
|
| 698 |
+
score = (0.6 * penalty_score) + (0.4 * route_coverage)
|
| 699 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 700 |
+
|
| 701 |
+
def _cluster_coordination_score(self) -> float:
|
| 702 |
+
if (self._state.current_task_id or 1) < 2:
|
| 703 |
+
return 1.0
|
| 704 |
+
clustered_tickets = [
|
| 705 |
+
ticket
|
| 706 |
+
for ticket in self._queue
|
| 707 |
+
if ticket.service_cluster_id
|
| 708 |
+
or ticket.related_ticket_id is not None
|
| 709 |
+
or ticket.generated_from_ticket_id is not None
|
| 710 |
+
or self._ticket_repeated_requester_count(ticket) >= 2
|
| 711 |
+
]
|
| 712 |
+
if not clustered_tickets:
|
| 713 |
+
return 1.0
|
| 714 |
+
cluster_count = max(1, len(clustered_tickets))
|
| 715 |
+
destabilization_ratio = min(
|
| 716 |
+
1.0,
|
| 717 |
+
self._state.cluster_destabilizations_total / cluster_count,
|
| 718 |
+
)
|
| 719 |
+
stabilization_ratio = min(
|
| 720 |
+
1.0,
|
| 721 |
+
self._state.cluster_stabilizations_total / cluster_count,
|
| 722 |
+
)
|
| 723 |
+
score = 1.0 - (0.75 * destabilization_ratio) + (0.25 * stabilization_ratio)
|
| 724 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 725 |
+
|
| 726 |
+
def _queue_management_breakdown(self, trajectory_reward: float) -> tuple[float, dict[str, Any]]:
|
| 727 |
+
task_id = int(self._state.current_task_id or 1)
|
| 728 |
+
if task_id < 2:
|
| 729 |
+
proxy_score = round(clamp_open_unit_interval(trajectory_reward), 4)
|
| 730 |
+
return proxy_score, {"routing_trajectory_proxy": proxy_score}
|
| 731 |
+
|
| 732 |
+
component_scores: dict[str, float] = {
|
| 733 |
+
"context_resolution": self._context_resolution_score(),
|
| 734 |
+
"cluster_coordination": self._cluster_coordination_score(),
|
| 735 |
+
"follow_up_containment": self._follow_up_containment_score(),
|
| 736 |
+
"sla_management": self._sla_quality_score(),
|
| 737 |
+
}
|
| 738 |
+
if task_id >= 3:
|
| 739 |
+
component_scores["planning_quality"] = self._planning_quality_score()
|
| 740 |
+
component_scores["incident_management"] = self._incident_management_score()
|
| 741 |
+
component_weights = {
|
| 742 |
+
"context_resolution": 0.2,
|
| 743 |
+
"planning_quality": 0.24,
|
| 744 |
+
"incident_management": 0.2,
|
| 745 |
+
"cluster_coordination": 0.16,
|
| 746 |
+
"follow_up_containment": 0.12,
|
| 747 |
+
"sla_management": 0.08,
|
| 748 |
+
}
|
| 749 |
+
else:
|
| 750 |
+
component_weights = {
|
| 751 |
+
"context_resolution": 0.38,
|
| 752 |
+
"cluster_coordination": 0.26,
|
| 753 |
+
"follow_up_containment": 0.2,
|
| 754 |
+
"sla_management": 0.16,
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
aggregate_score = round(
|
| 758 |
+
sum(
|
| 759 |
+
component_scores[name] * weight
|
| 760 |
+
for name, weight in component_weights.items()
|
| 761 |
+
),
|
| 762 |
+
4,
|
| 763 |
+
)
|
| 764 |
+
breakdown: dict[str, Any] = {
|
| 765 |
+
name: round(score, 4) for name, score in component_scores.items()
|
| 766 |
+
}
|
| 767 |
+
breakdown["weights"] = {
|
| 768 |
+
name: round(weight, 4) for name, weight in component_weights.items()
|
| 769 |
+
}
|
| 770 |
+
breakdown["cluster_stabilizations_total"] = self._state.cluster_stabilizations_total
|
| 771 |
+
breakdown["cluster_destabilizations_total"] = self._state.cluster_destabilizations_total
|
| 772 |
+
breakdown["spawned_follow_up_count"] = len(self._state.spawned_follow_up_ticket_ids)
|
| 773 |
+
breakdown["sla_breach_count"] = self._state.sla_breach_count
|
| 774 |
+
breakdown["planning_penalty_total"] = round(self._state.planning_penalty_total, 4)
|
| 775 |
+
breakdown["incident_gap_total"] = round(self._state.incident_gap_total, 4)
|
| 776 |
+
breakdown["aggregate"] = aggregate_score
|
| 777 |
+
return aggregate_score, breakdown
|
| 778 |
+
|
| 779 |
+
def _finalize_terminal_rubric(
|
| 780 |
+
self,
|
| 781 |
+
trajectory_reward: float,
|
| 782 |
+
) -> tuple[float, dict[str, Any]]:
|
| 783 |
+
task_id = int(self._state.current_task_id or 1)
|
| 784 |
+
queue_management_score, queue_management_breakdown = self._queue_management_breakdown(
|
| 785 |
+
trajectory_reward
|
| 786 |
+
)
|
| 787 |
+
route_weight = round(1.0 - self._queue_management_blend_weight(task_id), 4)
|
| 788 |
+
queue_weight = round(self._queue_management_blend_weight(task_id), 4)
|
| 789 |
+
blended_reward = clamp_open_unit_interval(
|
| 790 |
+
(route_weight * trajectory_reward) + (queue_weight * queue_management_score)
|
| 791 |
+
)
|
| 792 |
+
episode_economics_penalty = round(self._compute_episode_penalty(), 4)
|
| 793 |
+
rubric_reward = self._apply_episode_economics(blended_reward)
|
| 794 |
+
self._state.queue_management_score = queue_management_score
|
| 795 |
+
self._state.queue_management_breakdown = dict(queue_management_breakdown)
|
| 796 |
+
return rubric_reward, {
|
| 797 |
+
"trajectory_routing_reward": trajectory_reward,
|
| 798 |
+
"queue_management_score": queue_management_score,
|
| 799 |
+
"queue_management_breakdown": dict(queue_management_breakdown),
|
| 800 |
+
"route_objective_weight": route_weight,
|
| 801 |
+
"queue_management_weight": queue_weight,
|
| 802 |
+
"blended_objective_before_economics": blended_reward,
|
| 803 |
+
"episode_economics_penalty": episode_economics_penalty,
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
def _available_action_types_for_task(self, task_id: int | None = None) -> list[str]:
|
| 807 |
resolved_task_id = self._state.current_task_id if task_id is None else task_id
|
| 808 |
return list(TASK_AVAILABLE_ACTION_TYPES.get(int(resolved_task_id or 1), ("submit",)))
|
|
|
|
| 834 |
def _sample_queue(self, task_id: int, queue_size: int) -> list[HelpdeskTicketRecord]:
|
| 835 |
if queue_size <= 0:
|
| 836 |
return []
|
| 837 |
+
if task_id not in {2, 3} or queue_size < 3:
|
| 838 |
return self._rng.sample(self._dataset, queue_size)
|
| 839 |
|
| 840 |
cluster_groups = self._cluster_sample_groups()
|
|
|
|
| 910 |
indexes.append(index)
|
| 911 |
return indexes
|
| 912 |
|
| 913 |
+
def _ticket_queue_index(self, ticket: HelpdeskTicketRecord) -> int | None:
|
| 914 |
+
for index, candidate in enumerate(self._queue):
|
| 915 |
+
if candidate.ticket_id == ticket.ticket_id:
|
| 916 |
+
return index
|
| 917 |
+
return None
|
| 918 |
+
|
| 919 |
def _cluster_summary(
|
| 920 |
self,
|
| 921 |
ticket: HelpdeskTicketRecord,
|
| 922 |
*,
|
| 923 |
start_index: int | None = None,
|
| 924 |
) -> dict[str, Any]:
|
| 925 |
+
if start_index is None:
|
| 926 |
+
ticket_index = self._ticket_queue_index(ticket)
|
| 927 |
+
effective_start = (
|
| 928 |
+
ticket_index + 1
|
| 929 |
+
if ticket_index is not None
|
| 930 |
+
else self._state.current_ticket_index + 1
|
| 931 |
+
)
|
| 932 |
+
else:
|
| 933 |
+
effective_start = start_index
|
| 934 |
future_indexes = self._future_cluster_ticket_indexes(
|
| 935 |
ticket,
|
| 936 |
start_index=effective_start,
|
|
|
|
| 973 |
context_penalty: float,
|
| 974 |
incident_gap_penalty: float,
|
| 975 |
) -> list[str]:
|
| 976 |
+
if (self._state.current_task_id or 1) < 2:
|
| 977 |
return []
|
| 978 |
if score < CLUSTER_STABILIZE_SCORE_THRESHOLD:
|
| 979 |
return []
|
|
|
|
| 1042 |
updated_ticket_ids.append(updated_ticket.ticket_id)
|
| 1043 |
|
| 1044 |
if updated_ticket_ids:
|
| 1045 |
+
self._state.cluster_stabilizations_total += len(updated_ticket_ids)
|
| 1046 |
self._record_dynamic_queue_event(
|
| 1047 |
"stabilize_cluster",
|
| 1048 |
source_ticket_id=current_ticket.ticket_id,
|
|
|
|
| 1059 |
context_penalty: float,
|
| 1060 |
incident_gap_penalty: float,
|
| 1061 |
) -> list[str]:
|
| 1062 |
+
if (self._state.current_task_id or 1) < 2:
|
| 1063 |
return []
|
| 1064 |
if score >= CLUSTER_DESTABILIZE_SCORE_THRESHOLD:
|
| 1065 |
if context_penalty <= 0.0 and incident_gap_penalty <= 0.0:
|
|
|
|
| 1102 |
updated_ticket_ids.append(updated_ticket.ticket_id)
|
| 1103 |
|
| 1104 |
if updated_ticket_ids:
|
| 1105 |
+
self._state.cluster_destabilizations_total += len(updated_ticket_ids)
|
| 1106 |
self._record_dynamic_queue_event(
|
| 1107 |
"destabilize_cluster",
|
| 1108 |
source_ticket_id=current_ticket.ticket_id,
|
|
|
|
| 1684 |
context_penalty: float,
|
| 1685 |
incident_gap_penalty: float,
|
| 1686 |
) -> bool:
|
| 1687 |
+
task_id = int(self._state.current_task_id or 1)
|
| 1688 |
+
if task_id < 2:
|
| 1689 |
return False
|
| 1690 |
if ticket.generated_from_ticket_id is not None:
|
| 1691 |
return False
|
| 1692 |
if ticket.ticket_id in self._state.spawned_follow_up_source_ids:
|
| 1693 |
return False
|
| 1694 |
+
follow_up_risk = (
|
| 1695 |
self._requires_incident(ticket)
|
| 1696 |
or self._ticket_mentions_follow_up(ticket)
|
| 1697 |
or ticket.related_ticket_id is not None
|
| 1698 |
or ticket.priority in {"high", "critical"}
|
| 1699 |
+
or self._cluster_summary(ticket)["future_cluster_ticket_count"] > 0
|
| 1700 |
+
)
|
| 1701 |
+
if not follow_up_risk:
|
| 1702 |
+
return False
|
| 1703 |
+
if task_id == 2 and not (
|
| 1704 |
+
ticket.related_ticket_id is not None
|
| 1705 |
+
or self._ticket_mentions_follow_up(ticket)
|
| 1706 |
+
or self._cluster_summary(ticket)["future_cluster_ticket_count"] > 0
|
| 1707 |
+
or self._ticket_repeated_requester_count(ticket) >= 2
|
| 1708 |
):
|
| 1709 |
return False
|
| 1710 |
return (
|
|
|
|
| 1790 |
or self._future_queue_demand()["remaining_ticket_count"] > 0
|
| 1791 |
)
|
| 1792 |
if tool_name == "lookup_queue_cluster_summary":
|
| 1793 |
+
if (self._state.current_task_id or 1) < 2:
|
| 1794 |
return False
|
| 1795 |
cluster_summary = self._cluster_summary(ticket)
|
| 1796 |
return (
|
|
|
|
| 1832 |
and "lookup_queue_capacity_forecast" not in required_tools
|
| 1833 |
):
|
| 1834 |
required_tools.append("lookup_queue_capacity_forecast")
|
| 1835 |
+
ticket_index = self._ticket_queue_index(ticket)
|
| 1836 |
+
cluster_start_index = (
|
| 1837 |
+
ticket_index + 1
|
| 1838 |
+
if ticket_index is not None
|
| 1839 |
+
else self._state.current_ticket_index + 1
|
| 1840 |
+
)
|
| 1841 |
if resolved_task_id == 3:
|
| 1842 |
cluster_summary = self._cluster_summary(
|
| 1843 |
ticket,
|
| 1844 |
+
start_index=cluster_start_index,
|
| 1845 |
)
|
| 1846 |
if (
|
| 1847 |
cluster_summary["future_cluster_ticket_count"] > 0
|
|
|
|
| 1853 |
)
|
| 1854 |
):
|
| 1855 |
required_tools.append("lookup_queue_cluster_summary")
|
| 1856 |
+
if resolved_task_id == 2:
|
| 1857 |
+
cluster_summary = self._cluster_summary(
|
| 1858 |
+
ticket,
|
| 1859 |
+
start_index=cluster_start_index,
|
| 1860 |
+
)
|
| 1861 |
+
if (
|
| 1862 |
+
cluster_summary["future_cluster_ticket_count"] > 0
|
| 1863 |
+
and "lookup_queue_cluster_summary" not in required_tools
|
| 1864 |
+
and (
|
| 1865 |
+
ticket.related_ticket_id is not None
|
| 1866 |
+
or cluster_summary["shared_requester_count"] > 1
|
| 1867 |
+
or self._ticket_mentions_follow_up(ticket)
|
| 1868 |
+
)
|
| 1869 |
+
):
|
| 1870 |
+
required_tools.append("lookup_queue_cluster_summary")
|
| 1871 |
filtered_required_tools: list[str] = []
|
| 1872 |
allowed_tool_set = set(self._available_tools_for_task(resolved_task_id))
|
| 1873 |
for tool_name in required_tools:
|
|
|
|
| 2600 |
used_tools = set(self._used_tools_for_ticket(ticket.ticket_id))
|
| 2601 |
operational_actions = progress["recommended_operational_actions"]
|
| 2602 |
cluster_summary = self._cluster_summary(ticket)
|
| 2603 |
+
cluster_hint = (
|
| 2604 |
+
cluster_summary["future_cluster_ticket_count"] > 0
|
| 2605 |
+
or cluster_summary["shared_requester_count"] > 1
|
| 2606 |
+
)
|
| 2607 |
ticket_view: dict[str, Any] = {
|
| 2608 |
"ticket_id": ticket.ticket_id,
|
| 2609 |
"title": self._visible_title(ticket),
|
|
|
|
| 2629 |
"incident_recommended": self._requires_incident(ticket),
|
| 2630 |
"incident_open": self._incident_open_for_ticket(ticket),
|
| 2631 |
"recommended_actions": operational_actions,
|
| 2632 |
+
"cluster_coordination_hint": cluster_hint,
|
| 2633 |
+
"shared_requester_pressure": cluster_summary["shared_requester_count"] > 1,
|
|
|
|
|
|
|
|
|
|
| 2634 |
}
|
| 2635 |
+
if "lookup_queue_cluster_summary" in used_tools:
|
| 2636 |
+
ticket_view["operational_context"].update(
|
| 2637 |
+
{
|
| 2638 |
+
"service_cluster_id": ticket.service_cluster_id,
|
| 2639 |
+
"future_cluster_ticket_count": cluster_summary["future_cluster_ticket_count"],
|
| 2640 |
+
"future_cluster_ticket_ids": cluster_summary["future_cluster_ticket_ids"],
|
| 2641 |
+
"shared_requester_count": cluster_summary["shared_requester_count"],
|
| 2642 |
+
"active_incident_cover": cluster_summary["active_incident_cover"],
|
| 2643 |
+
}
|
| 2644 |
+
)
|
| 2645 |
if ticket.ambiguity_note is not None and "lookup_internal_routing_note" not in remaining_tools:
|
| 2646 |
ticket_view["ambiguity_note"] = ticket.ambiguity_note
|
| 2647 |
if (
|
|
|
|
| 2739 |
incident_gap_penalty = reward_components.get("incident_gap_penalty")
|
| 2740 |
if incident_gap_penalty:
|
| 2741 |
parts.append(f"incident_gap_penalty={incident_gap_penalty:.2f}")
|
| 2742 |
+
queue_management_score = reward_components.get("queue_management_score")
|
| 2743 |
+
if queue_management_score is not None:
|
| 2744 |
+
parts.append(f"queue_management_score={queue_management_score:.2f}")
|
| 2745 |
spawned_follow_up_ticket_id = reward_components.get("spawned_follow_up_ticket_id")
|
| 2746 |
if spawned_follow_up_ticket_id:
|
| 2747 |
parts.append(f"spawned_follow_up={spawned_follow_up_ticket_id}")
|
|
|
|
| 2791 |
"defer_count": self._defer_count(ticket.ticket_id),
|
| 2792 |
"incident_open": self._incident_open_for_ticket(ticket),
|
| 2793 |
"recommended_actions": progress["recommended_operational_actions"],
|
| 2794 |
+
"cluster_coordination_hint": (
|
| 2795 |
+
cluster_summary["future_cluster_ticket_count"] > 0
|
| 2796 |
+
or cluster_summary["shared_requester_count"] > 1
|
| 2797 |
+
),
|
| 2798 |
},
|
| 2799 |
}
|
| 2800 |
+
if "lookup_queue_cluster_summary" in self._used_tools_for_ticket(ticket.ticket_id):
|
| 2801 |
+
history_entry["operational_context"].update(
|
| 2802 |
+
{
|
| 2803 |
+
"service_cluster_id": ticket.service_cluster_id,
|
| 2804 |
+
"future_cluster_ticket_count": cluster_summary["future_cluster_ticket_count"],
|
| 2805 |
+
"active_incident_cover": cluster_summary["active_incident_cover"],
|
| 2806 |
+
"shared_requester_count": cluster_summary["shared_requester_count"],
|
| 2807 |
+
}
|
| 2808 |
+
)
|
| 2809 |
if self._state.current_task_id == 3:
|
| 2810 |
history_entry["capacity_state"] = self._capacity_state_snapshot()
|
| 2811 |
if reward is not None:
|
|
|
|
| 2918 |
"planning_penalty_applied": self._state.planning_penalty_applied,
|
| 2919 |
"sla_breach_count": self._state.sla_breach_count,
|
| 2920 |
"incident_gap_total": self._state.incident_gap_total,
|
| 2921 |
+
"queue_management_score": self._state.queue_management_score,
|
| 2922 |
+
"queue_management_breakdown": dict(self._state.queue_management_breakdown),
|
| 2923 |
"dynamic_queue_events": list(self._state.dynamic_queue_events[-5:]),
|
| 2924 |
"clustered_follow_ons": self._future_queue_demand().get("clustered_follow_ons", 0),
|
| 2925 |
}
|
| 2926 |
if self._state.current_task_id == 3:
|
| 2927 |
metadata["capacity_state"] = self._capacity_state_snapshot()
|
|
|
|
| 2928 |
if last_history_entry is not None:
|
| 2929 |
metadata["last_score"] = last_history_entry.get("score")
|
| 2930 |
metadata["last_reward"] = last_history_entry.get("reward")
|
server/tasks.py
CHANGED
|
@@ -29,9 +29,11 @@ TASKS = {
|
|
| 29 |
"name": "Contextual Full Routing",
|
| 30 |
"difficulty": "medium",
|
| 31 |
"instructions": (
|
| 32 |
-
"Perform full helpdesk routing with partial observability
|
| 33 |
-
"tickets hide related-case, requester-history,
|
| 34 |
-
"details until you investigate or request more
|
|
|
|
|
|
|
| 35 |
),
|
| 36 |
"allowed_fields": [
|
| 37 |
"issue_type",
|
|
|
|
| 29 |
"name": "Contextual Full Routing",
|
| 30 |
"difficulty": "medium",
|
| 31 |
"instructions": (
|
| 32 |
+
"Perform full helpdesk routing with partial observability and moderate "
|
| 33 |
+
"queue carry-over. Some tickets hide related-case, requester-history, "
|
| 34 |
+
"or cluster-coordination details until you investigate or request more "
|
| 35 |
+
"information, and medium episodes can also require deferral or coherent "
|
| 36 |
+
"handling across linked tickets in the same queue."
|
| 37 |
),
|
| 38 |
"allowed_fields": [
|
| 39 |
"issue_type",
|
tests/test_competitive_upgrade.py
CHANGED
|
@@ -729,6 +729,8 @@ class TestInvestigationActions(unittest.TestCase):
|
|
| 729 |
|
| 730 |
def test_queue_cluster_summary_reveals_future_cluster_load(self) -> None:
|
| 731 |
env, obs, root, follow_up = self._make_cluster_env()
|
|
|
|
|
|
|
| 732 |
|
| 733 |
obs = env.step(
|
| 734 |
HelpdeskTicketAction(
|
|
@@ -776,7 +778,7 @@ class TestInvestigationActions(unittest.TestCase):
|
|
| 776 |
|
| 777 |
self.assertFalse(obs.done)
|
| 778 |
self.assertEqual(obs.current_ticket["ticket_id"], follow_up.ticket_id)
|
| 779 |
-
self.assertTrue(obs.current_ticket["operational_context"]["
|
| 780 |
self.assertIn(
|
| 781 |
follow_up.ticket_id,
|
| 782 |
obs.history[-1]["reward_components"]["cluster_stabilized_ticket_ids"],
|
|
@@ -839,6 +841,56 @@ class TestInvestigationActions(unittest.TestCase):
|
|
| 839 |
0.0,
|
| 840 |
)
|
| 841 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
|
| 843 |
class TestQueueEconomics(unittest.TestCase):
|
| 844 |
"""Free investigations are allowed, but excessive investigation gets a queue-level penalty."""
|
|
|
|
| 729 |
|
| 730 |
def test_queue_cluster_summary_reveals_future_cluster_load(self) -> None:
|
| 731 |
env, obs, root, follow_up = self._make_cluster_env()
|
| 732 |
+
self.assertNotIn("future_cluster_ticket_count", obs.current_ticket["operational_context"])
|
| 733 |
+
self.assertTrue(obs.current_ticket["operational_context"]["cluster_coordination_hint"])
|
| 734 |
|
| 735 |
obs = env.step(
|
| 736 |
HelpdeskTicketAction(
|
|
|
|
| 778 |
|
| 779 |
self.assertFalse(obs.done)
|
| 780 |
self.assertEqual(obs.current_ticket["ticket_id"], follow_up.ticket_id)
|
| 781 |
+
self.assertTrue(obs.current_ticket["operational_context"]["incident_open"])
|
| 782 |
self.assertIn(
|
| 783 |
follow_up.ticket_id,
|
| 784 |
obs.history[-1]["reward_components"]["cluster_stabilized_ticket_ids"],
|
|
|
|
| 841 |
0.0,
|
| 842 |
)
|
| 843 |
|
| 844 |
+
def test_terminal_rubric_reports_queue_management_score(self) -> None:
|
| 845 |
+
from unittest.mock import patch
|
| 846 |
+
|
| 847 |
+
dataset = load_dataset()
|
| 848 |
+
ticket = next((t for t in dataset if t.ticket_id == "TKT-NONDEFAULT-003"), None)
|
| 849 |
+
self.assertIsNotNone(ticket)
|
| 850 |
+
|
| 851 |
+
env = _make_env()
|
| 852 |
+
with patch.object(env, "_dataset", [ticket]):
|
| 853 |
+
with patch.object(env, "_tickets_by_id", {ticket.ticket_id: ticket}):
|
| 854 |
+
obs = env.reset(seed=0, task_id=3, queue_size=1)
|
| 855 |
+
|
| 856 |
+
final_obs = env.step(
|
| 857 |
+
HelpdeskTicketAction(
|
| 858 |
+
issue_type=ticket.issue_type,
|
| 859 |
+
priority=ticket.priority,
|
| 860 |
+
assignment_group=ticket.assignment_group,
|
| 861 |
+
resolution_action=ticket.resolution_action,
|
| 862 |
+
)
|
| 863 |
+
)
|
| 864 |
+
|
| 865 |
+
self.assertTrue(final_obs.done)
|
| 866 |
+
self.assertIn("queue_management_score", final_obs.last_reward_components)
|
| 867 |
+
self.assertIn("queue_management_breakdown", final_obs.last_reward_components)
|
| 868 |
+
self.assertIn("context_resolution", final_obs.last_reward_components["queue_management_breakdown"])
|
| 869 |
+
|
| 870 |
+
def test_capacity_forecast_hides_future_demand_until_tool_use(self) -> None:
|
| 871 |
+
from unittest.mock import patch
|
| 872 |
+
|
| 873 |
+
dataset = load_dataset()
|
| 874 |
+
ticket = next(
|
| 875 |
+
(t for t in dataset if t.alternate_route_score_multiplier > 0.0),
|
| 876 |
+
None,
|
| 877 |
+
)
|
| 878 |
+
self.assertIsNotNone(ticket)
|
| 879 |
+
|
| 880 |
+
env = _make_env()
|
| 881 |
+
with patch.object(env, "_dataset", [ticket]):
|
| 882 |
+
with patch.object(env, "_tickets_by_id", {ticket.ticket_id: ticket}):
|
| 883 |
+
obs = env.reset(seed=0, task_id=3, queue_size=1)
|
| 884 |
+
|
| 885 |
+
self.assertNotIn("future_queue_demand", obs.metadata)
|
| 886 |
+
obs = env.step(
|
| 887 |
+
HelpdeskTicketAction(
|
| 888 |
+
action_type="investigate",
|
| 889 |
+
tool_name="lookup_queue_capacity_forecast",
|
| 890 |
+
)
|
| 891 |
+
)
|
| 892 |
+
self.assertIn("future_queue_demand", obs.last_tool_result)
|
| 893 |
+
|
| 894 |
|
| 895 |
class TestQueueEconomics(unittest.TestCase):
|
| 896 |
"""Free investigations are allowed, but excessive investigation gets a queue-level penalty."""
|
tests/test_environment_smoke.py
CHANGED
|
@@ -124,6 +124,8 @@ class TestResetAllTaskIds(unittest.TestCase):
|
|
| 124 |
env = _make_env()
|
| 125 |
obs = env.reset(seed=42, task_id=2)
|
| 126 |
self._assert_valid_reset_obs(obs, 2)
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def test_reset_task3(self) -> None:
|
| 129 |
env = _make_env()
|
|
@@ -258,6 +260,22 @@ class TestSeededDeterminism(unittest.TestCase):
|
|
| 258 |
f"Expected at least one repeated service_cluster_id in task 3 queue, got {cluster_ids}",
|
| 259 |
)
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
class TestPerTicketScoreBounds(unittest.TestCase):
|
| 263 |
"""1.1.6 — all per-ticket scores stay in [0.0, 1.0] across a full episode."""
|
|
|
|
| 124 |
env = _make_env()
|
| 125 |
obs = env.reset(seed=42, task_id=2)
|
| 126 |
self._assert_valid_reset_obs(obs, 2)
|
| 127 |
+
self.assertIn("defer", obs.available_action_types)
|
| 128 |
+
self.assertIn("lookup_queue_cluster_summary", obs.available_tools)
|
| 129 |
|
| 130 |
def test_reset_task3(self) -> None:
|
| 131 |
env = _make_env()
|
|
|
|
| 260 |
f"Expected at least one repeated service_cluster_id in task 3 queue, got {cluster_ids}",
|
| 261 |
)
|
| 262 |
|
| 263 |
+
def test_task2_queue_sampling_includes_clustered_follow_on(self) -> None:
|
| 264 |
+
env = _make_env()
|
| 265 |
+
env.reset(seed=42, task_id=2, queue_size=5)
|
| 266 |
+
|
| 267 |
+
cluster_ids = [
|
| 268 |
+
ticket.service_cluster_id for ticket in env._queue if ticket.service_cluster_id
|
| 269 |
+
]
|
| 270 |
+
repeated_cluster_ids = {
|
| 271 |
+
cluster_id for cluster_id in cluster_ids if cluster_ids.count(cluster_id) >= 2
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
self.assertTrue(
|
| 275 |
+
repeated_cluster_ids,
|
| 276 |
+
f"Expected at least one repeated service_cluster_id in task 2 queue, got {cluster_ids}",
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
|
| 280 |
class TestPerTicketScoreBounds(unittest.TestCase):
|
| 281 |
"""1.1.6 — all per-ticket scores stay in [0.0, 1.0] across a full episode."""
|
tests/test_policy_learning.py
CHANGED
|
@@ -209,6 +209,8 @@ class PolicyLearningTests(unittest.TestCase):
|
|
| 209 |
|
| 210 |
self.assertEqual(report["best_policy"], "adaptive_cue_bandit")
|
| 211 |
self.assertGreater(report["improvement_vs_baseline"]["avg_terminal_reward"], 0.0)
|
|
|
|
|
|
|
| 212 |
|
| 213 |
def test_infer_ticket_cue_distinguishes_workflow_blocker(self) -> None:
|
| 214 |
cue = infer_ticket_cue(
|
|
|
|
| 209 |
|
| 210 |
self.assertEqual(report["best_policy"], "adaptive_cue_bandit")
|
| 211 |
self.assertGreater(report["improvement_vs_baseline"]["avg_terminal_reward"], 0.0)
|
| 212 |
+
self.assertIn("avg_queue_management_score", report["improvement_vs_baseline"])
|
| 213 |
+
self.assertIn("avg_queue_management_score", report["policy_summaries"][0])
|
| 214 |
|
| 215 |
def test_infer_ticket_cue_distinguishes_workflow_blocker(self) -> None:
|
| 216 |
cue = infer_ticket_cue(
|