File size: 7,107 Bytes
ddbc1ba | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | from dataclasses import dataclass, field
from typing import Any, List, Dict
@dataclass
class HiddenStateField:
key: str # e.g. "boss_mood"
initial_value: Any # e.g. "neutral"
inspect_target: str # e.g. "call_boss" — which inspect action type reveals this
description: str # shown to agent after reveal
@dataclass
class ExoEvent:
step: int # inject at this step (inclusive); -1 = probabilistic
probability: float # 1.0 = deterministic; <1.0 = random at each step
id: str # e.g. "ticket_price_spike"
description: str # what agent sees in next observation
world_mutation: dict # e.g. {"ticket_price": 450, "seats_remaining": 1}
hidden_state_mutation: dict # e.g. {"boss_mood": "angry"}
closes_routes: list[str] = field(default_factory=list) # route IDs this event blocks
@dataclass
class Milestone:
id: str # e.g. "flight_rebooked"
description: str
condition_key: str # world/hidden key to check, e.g. "flight_rebooked"
condition_value: Any # e.g. True
reward: float # milestone reward added to episode total
@dataclass
class Route:
id: str # e.g. "rebook_premium"
name: str
description: str
required_action_types: list[str] # must use these tool actions to complete
preconditions: dict # world/hidden state checks, e.g. {"card_available": True}
consequences: dict # world mutations on route completion, e.g. {"flight_rebooked": True}
closes_routes: list[str] # route IDs this blocks
milestones_unlocked: list[str] # milestone IDs this route can hit
final_reward: float # bonus on route completion
@dataclass
class Task:
id: str
domain: str # "flight_crisis" | "code_merge_crisis"
goal: str
constraints: dict # e.g. {"budget_max": 400, "deadline_step": 18}
hidden_state: dict # full truth, agent never sees directly
mutable_world: dict # partial truth, some fields revealed by inspect
visible_world: dict # agent sees this at each step (subset of mutable_world)
success_conditions: list[dict] # e.g. [{"key": "flight_rebooked", "value": True}]
failure_conditions: list[dict] # e.g. [{"key": "missed_deadline", "value": True}]
event_schedule: list[ExoEvent]
viable_routes: list[Route]
milestones: list[Milestone]
horizon: int # max steps (20–50)
difficulty: int # 1–5
domain_metadata: dict # domain-specific extra data (story text, etc.)
def FlightCrisisTask() -> Task:
routes = [
Route(
id="rebook_premium",
name="Rebook Premium Option",
description="Call agent and rebook on premium ticket",
required_action_types=["communicate", "execute"],
preconditions={"card_available": True},
consequences={"flight_rebooked": True},
closes_routes=["wait_lounge"],
milestones_unlocked=["m1"],
final_reward=2.5
),
Route(
id="wait_lounge",
name="Accept Delay & Work",
description="Stay at airport lounge and work on laptop",
required_action_types=["wait", "plan"],
preconditions={"lounge_access": True},
consequences={"caught_up": True},
closes_routes=["rebook_premium"],
milestones_unlocked=["m2"],
final_reward=1.8
)
]
milestones = [
Milestone(id="m1", description="Successfully rebooked flight before deadline", condition_key="flight_rebooked", condition_value=True, reward=1.0),
Milestone(id="m2", description="Caught up with all emergency slack messages", condition_key="caught_up", condition_value=True, reward=0.8),
]
events = [
ExoEvent(step=5, probability=1.0, id="price_surge", description="Ticket prices sharply increased by $300.", world_mutation={}, hidden_state_mutation={"card_available": False}, closes_routes=[]),
ExoEvent(step=8, probability=1.0, id="lounge_full", description="The airport lounge is now at maximum capacity.", world_mutation={"lounge_access": False}, hidden_state_mutation={}, closes_routes=["wait_lounge"]),
]
return Task(
id="flight_crisis_task_main",
domain="flight_crisis",
goal="Survive Airport Cancellation",
constraints={"budget_max": 800, "deadline_step": 20},
hidden_state={
"card_available": True
},
mutable_world={
"lounge_access": True,
"flight_rebooked": False,
"caught_up": False
},
visible_world={
"lounge_access": True
},
success_conditions=[{"key": "flight_rebooked", "value": True}],
failure_conditions=[{"key": "missed_deadline", "value": True}],
event_schedule=events,
viable_routes=routes,
milestones=milestones,
horizon=30,
difficulty=4,
domain_metadata={"story": "A major storm grounded commercial flights."}
)
def CodeMergeCrisisTask() -> Task:
"""A high-difficulty technical crisis requiring rollback or hotfix."""
routes = [
Route(id="revert_commit", name="Revert Commit", description="Quickly revert the broken merge to unblock the team.", required_action_types=["delegate", "communicate"], preconditions={}, consequences={"pipeline_unblocked": True}, closes_routes=["hotfix"], milestones_unlocked=["m1"], final_reward=1.5),
Route(id="hotfix", name="Patch Forward", description="Find the logic error and push a hotfix.", required_action_types=["communicate", "spend"], preconditions={}, consequences={"bug_resolved": True}, closes_routes=["revert_commit"], milestones_unlocked=["m2"], final_reward=3.0),
]
milestones = [
Milestone(id="m1", description="CI pipeline is green again", condition_key="pipeline_unblocked", condition_value=True, reward=1.0),
Milestone(id="m2", description="Bug resolved without losing features", condition_key="bug_resolved", condition_value=True, reward=2.0),
]
return Task(
id="code_merge_task_fallback",
domain="code_merge_crisis",
goal="Resolve Production Outage",
constraints={"budget_max": 1000, "deadline_step": 8},
hidden_state={"on_call_status": "alert"},
mutable_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0},
visible_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0},
success_conditions=[{"key": "pipeline_unblocked", "value": True}, {"key": "bug_resolved", "value": True}],
failure_conditions=[],
event_schedule=[],
viable_routes=routes,
milestones=milestones,
horizon=10,
difficulty=4,
domain_metadata={}
)
class TaskGenerator:
def __init__(self):
self.tasks = [FlightCrisisTask, CodeMergeCrisisTask]
def get_random_task(self) -> Task:
import random
return random.choice(self.tasks)()
|