File size: 7,107 Bytes
ddbc1ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from dataclasses import dataclass, field
from typing import Any, List, Dict

@dataclass
class HiddenStateField:
    key: str               # e.g. "boss_mood"
    initial_value: Any     # e.g. "neutral"
    inspect_target: str    # e.g. "call_boss" — which inspect action type reveals this
    description: str       # shown to agent after reveal

@dataclass
class ExoEvent:
    step: int              # inject at this step (inclusive); -1 = probabilistic
    probability: float     # 1.0 = deterministic; <1.0 = random at each step
    id: str                # e.g. "ticket_price_spike"
    description: str       # what agent sees in next observation
    world_mutation: dict   # e.g. {"ticket_price": 450, "seats_remaining": 1}
    hidden_state_mutation: dict  # e.g. {"boss_mood": "angry"}
    closes_routes: list[str] = field(default_factory=list)  # route IDs this event blocks

@dataclass
class Milestone:
    id: str                # e.g. "flight_rebooked"
    description: str
    condition_key: str     # world/hidden key to check, e.g. "flight_rebooked"
    condition_value: Any   # e.g. True
    reward: float          # milestone reward added to episode total

@dataclass
class Route:
    id: str                # e.g. "rebook_premium"
    name: str
    description: str
    required_action_types: list[str]  # must use these tool actions to complete
    preconditions: dict    # world/hidden state checks, e.g. {"card_available": True}
    consequences: dict     # world mutations on route completion, e.g. {"flight_rebooked": True}
    closes_routes: list[str]  # route IDs this blocks
    milestones_unlocked: list[str]  # milestone IDs this route can hit
    final_reward: float    # bonus on route completion

@dataclass
class Task:
    id: str
    domain: str            # "flight_crisis" | "code_merge_crisis"
    goal: str
    constraints: dict      # e.g. {"budget_max": 400, "deadline_step": 18}
    hidden_state: dict     # full truth, agent never sees directly
    mutable_world: dict    # partial truth, some fields revealed by inspect
    visible_world: dict    # agent sees this at each step (subset of mutable_world)
    success_conditions: list[dict]  # e.g. [{"key": "flight_rebooked", "value": True}]
    failure_conditions: list[dict]  # e.g. [{"key": "missed_deadline", "value": True}]
    event_schedule: list[ExoEvent]
    viable_routes: list[Route]
    milestones: list[Milestone]
    horizon: int           # max steps (20–50)
    difficulty: int        # 1–5
    domain_metadata: dict  # domain-specific extra data (story text, etc.)


def FlightCrisisTask() -> Task:
    routes = [
        Route(
            id="rebook_premium",
            name="Rebook Premium Option",
            description="Call agent and rebook on premium ticket",
            required_action_types=["communicate", "execute"],
            preconditions={"card_available": True},
            consequences={"flight_rebooked": True},
            closes_routes=["wait_lounge"],
            milestones_unlocked=["m1"],
            final_reward=2.5
        ),
        Route(
            id="wait_lounge",
            name="Accept Delay & Work",
            description="Stay at airport lounge and work on laptop",
            required_action_types=["wait", "plan"],
            preconditions={"lounge_access": True},
            consequences={"caught_up": True},
            closes_routes=["rebook_premium"],
            milestones_unlocked=["m2"],
            final_reward=1.8
        )
    ]
    milestones = [
        Milestone(id="m1", description="Successfully rebooked flight before deadline", condition_key="flight_rebooked", condition_value=True, reward=1.0),
        Milestone(id="m2", description="Caught up with all emergency slack messages", condition_key="caught_up", condition_value=True, reward=0.8),
    ]
    events = [
        ExoEvent(step=5, probability=1.0, id="price_surge", description="Ticket prices sharply increased by $300.", world_mutation={}, hidden_state_mutation={"card_available": False}, closes_routes=[]),
        ExoEvent(step=8, probability=1.0, id="lounge_full", description="The airport lounge is now at maximum capacity.", world_mutation={"lounge_access": False}, hidden_state_mutation={}, closes_routes=["wait_lounge"]),
    ]
    return Task(
        id="flight_crisis_task_main",
        domain="flight_crisis",
        goal="Survive Airport Cancellation",
        constraints={"budget_max": 800, "deadline_step": 20},
        hidden_state={
            "card_available": True
        },
        mutable_world={
            "lounge_access": True,
            "flight_rebooked": False,
            "caught_up": False
        },
        visible_world={
            "lounge_access": True
        },
        success_conditions=[{"key": "flight_rebooked", "value": True}],
        failure_conditions=[{"key": "missed_deadline", "value": True}],
        event_schedule=events,
        viable_routes=routes,
        milestones=milestones,
        horizon=30,
        difficulty=4,
        domain_metadata={"story": "A major storm grounded commercial flights."}
    )

def CodeMergeCrisisTask() -> Task:
    """A high-difficulty technical crisis requiring rollback or hotfix."""
    routes = [
        Route(id="revert_commit", name="Revert Commit", description="Quickly revert the broken merge to unblock the team.", required_action_types=["delegate", "communicate"], preconditions={}, consequences={"pipeline_unblocked": True}, closes_routes=["hotfix"], milestones_unlocked=["m1"], final_reward=1.5),
        Route(id="hotfix", name="Patch Forward", description="Find the logic error and push a hotfix.", required_action_types=["communicate", "spend"], preconditions={}, consequences={"bug_resolved": True}, closes_routes=["revert_commit"], milestones_unlocked=["m2"], final_reward=3.0),
    ]
    milestones = [
        Milestone(id="m1", description="CI pipeline is green again", condition_key="pipeline_unblocked", condition_value=True, reward=1.0),
        Milestone(id="m2", description="Bug resolved without losing features", condition_key="bug_resolved", condition_value=True, reward=2.0),
    ]
    return Task(
        id="code_merge_task_fallback",
        domain="code_merge_crisis",
        goal="Resolve Production Outage",
        constraints={"budget_max": 1000, "deadline_step": 8},
        hidden_state={"on_call_status": "alert"},
        mutable_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0},
        visible_world={"career.stability": -20.0, "mental_wellbeing.stress_level": 30.0},
        success_conditions=[{"key": "pipeline_unblocked", "value": True}, {"key": "bug_resolved", "value": True}],
        failure_conditions=[],
        event_schedule=[],
        viable_routes=routes,
        milestones=milestones,
        horizon=10,
        difficulty=4,
        domain_metadata={}
    )

class TaskGenerator:
    def __init__(self):
        self.tasks = [FlightCrisisTask, CodeMergeCrisisTask]
        
    def get_random_task(self) -> Task:
        import random
        return random.choice(self.tasks)()