from dataclasses import dataclass, field from typing import Any, List, Dict @dataclass class HiddenStateField: key: str # e.g. "boss_mood" initial_value: Any # e.g. "neutral" inspect_target: str # e.g. "call_boss" — which inspect action type reveals this description: str # shown to agent after reveal @dataclass class ExoEvent: step: int # inject at this step (inclusive); -1 = probabilistic probability: float # 1.0 = deterministic; <1.0 = random at each step id: str # e.g. "ticket_price_spike" description: str # what agent sees in next observation world_mutation: dict # e.g. {"ticket_price": 450, "seats_remaining": 1} hidden_state_mutation: dict # e.g. {"boss_mood": "angry"} closes_routes: list[str] = field(default_factory=list) # route IDs this event blocks @dataclass class Milestone: id: str # e.g. "flight_rebooked" description: str condition_key: str # world/hidden key to check, e.g. "flight_rebooked" condition_value: Any # e.g. True reward: float # milestone reward added to episode total @dataclass class Route: id: str # e.g. "rebook_premium" name: str description: str required_action_types: list[str] # must use these tool actions to complete preconditions: dict # world/hidden state checks, e.g. {"card_available": True} consequences: dict # world mutations on route completion, e.g. {"flight_rebooked": True} closes_routes: list[str] # route IDs this blocks milestones_unlocked: list[str] # milestone IDs this route can hit final_reward: float # bonus on route completion @dataclass class Task: id: str domain: str # "flight_crisis" | "code_merge_crisis" goal: str constraints: dict # e.g. {"budget_max": 400, "deadline_step": 18} hidden_state: dict # full truth, agent never sees directly mutable_world: dict # partial truth, some fields revealed by inspect visible_world: dict # agent sees this at each step (subset of mutable_world) success_conditions: list[dict] # e.g. [{"key": "flight_rebooked", "value": True}] failure_conditions: list[dict] # e.g. [{"key": "missed_deadline", "value": True}] event_schedule: list[ExoEvent] viable_routes: list[Route] milestones: list[Milestone] horizon: int # max steps (20–50) difficulty: int # 1–5 domain_metadata: dict # domain-specific extra data (story text, etc.) def FlightCrisisTask() -> Task: routes = [ Route( id="rebook_premium", name="Rebook Premium Option", description="Call agent and rebook on premium ticket", required_action_types=["communicate", "execute"], preconditions={"card_available": True}, consequences={"flight_rebooked": True}, closes_routes=["wait_lounge"], milestones_unlocked=["m1"], final_reward=2.5 ), Route( id="wait_lounge", name="Accept Delay & Work", description="Stay at airport lounge and work on laptop", required_action_types=["wait", "plan"], preconditions={"lounge_access": True}, consequences={"caught_up": True}, closes_routes=["rebook_premium"], milestones_unlocked=["m2"], final_reward=1.8 ) ] milestones = [ Milestone(id="m1", description="Successfully rebooked flight before deadline", condition_key="flight_rebooked", condition_value=True, reward=1.0), Milestone(id="m2", description="Caught up with all emergency slack messages", condition_key="caught_up", condition_value=True, reward=0.8), ] events = [ ExoEvent(step=5, probability=1.0, id="price_surge", description="Ticket prices sharply increased by $300.", world_mutation={}, hidden_state_mutation={"card_available": False}, closes_routes=[]), ExoEvent(step=8, probability=1.0, id="lounge_full", description="The airport lounge is now at maximum capacity.", world_mutation={"lounge_access": False}, hidden_state_mutation={}, closes_routes=["wait_lounge"]), ] return Task( id="flight_crisis_task_main", domain="flight_crisis", goal="Survive Airport Cancellation", constraints={"budget_max": 800, "deadline_step": 20}, hidden_state={ "card_available": True }, mutable_world={ "lounge_access": True, "flight_rebooked": False, "caught_up": False }, visible_world={ "lounge_access": True }, success_conditions=[{"key": "flight_rebooked", "value": True}], failure_conditions=[{"key": "missed_deadline", "value": True}], event_schedule=events, viable_routes=routes, milestones=milestones, horizon=30, difficulty=4, domain_metadata={"story": "A major storm grounded commercial flights."} ) def CodeMergeCrisisTask() -> Task: """A high-difficulty technical crisis requiring rollback or hotfix.""" routes = [ Route(id="revert_commit", name="Revert Commit", description="Quickly revert the broken merge to unblock the team.", required_action_types=["delegate", "communicate"], preconditions={}, consequences={"pipeline_unblocked": True}, closes_routes=["hotfix"], milestones_unlocked=["m1"], final_reward=1.5), Route(id="hotfix", name="Patch Forward", description="Find the logic error and push a hotfix.", required_action_types=["communicate", "spend"], preconditions={}, consequences={"bug_resolved": True}, closes_routes=["revert_commit"], milestones_unlocked=["m2"], final_reward=3.0), ] milestones = [ Milestone(id="m1", description="CI pipeline is green again", condition_key="pipeline_unblocked", condition_value=True, reward=1.0), Milestone(id="m2", description="Bug resolved without losing features", condition_key="bug_resolved", condition_value=True, reward=2.0), ] return Task( id="code_merge_task_fallback", domain="code_merge_crisis", goal="Resolve Production Outage", constraints={"budget_max": 1000, "deadline_step": 8}, hidden_state={"on_call_status": "alert"}, mutable_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0}, visible_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0}, success_conditions=[{"key": "pipeline_unblocked", "value": True}, {"key": "bug_resolved", "value": True}], failure_conditions=[], event_schedule=[], viable_routes=routes, milestones=milestones, horizon=10, difficulty=4, domain_metadata={} ) class TaskGenerator: def __init__(self): self.tasks = [FlightCrisisTask, CodeMergeCrisisTask] def get_random_task(self) -> Task: import random return random.choice(self.tasks)()