File size: 3,872 Bytes
9ba4f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01e1628
 
 
 
 
 
 
9ba4f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb5a820
01e1628
 
9ba4f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb5a820
01e1628
9ba4f8a
 
 
 
 
 
 
fb5a820
 
9ba4f8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from __future__ import annotations

from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field


Point = tuple[int, int]
AgentStatus = Literal["idle", "busy"]
OrderStatus = Literal["unassigned", "assigned", "completed", "expired", "rejected"]


class GridConfig(BaseModel):
    model_config = ConfigDict(frozen=True)

    width: int
    height: int
    congested_zones: tuple[Point, ...] = ()
    hotspots: tuple[Point, ...] = ()


class ZonePhase(BaseModel):
    model_config = ConfigDict(frozen=True)

    start_time: int
    points: tuple[Point, ...]


class AgentState(BaseModel):
    model_config = ConfigDict(validate_assignment=True)

    agent_id: str
    location: Point
    status: AgentStatus = "idle"
    busy_until: int = 0
    assigned_order_id: str | None = None
    availability_in: int = 0
    idle_now: bool = True


class OrderState(BaseModel):
    model_config = ConfigDict(validate_assignment=True)

    order_id: str
    created_at: int
    pickup_location: Point
    drop_location: Point
    reward_value: float
    deadline: int
    status: OrderStatus = "unassigned"
    assigned_agent_id: str | None = None
    scheduled_completion_time: int | None = None
    completed_at: int | None = None
    rejected_at: int | None = None
    service_cutoff_time: int | None = None
    nearest_agent_id: str | None = None
    estimated_service_time: int | None = None
    estimated_finish_time: int | None = None
    slack_time: int | None = None
    feasible_now: bool | None = None


class Scenario(BaseModel):
    model_config = ConfigDict(frozen=True)

    name: str
    grid: GridConfig
    agents: tuple[AgentState, ...]
    orders: tuple[OrderState, ...]
    episode_horizon: int
    default_max_decision_steps: int = 100
    hotspot_phases: tuple[ZonePhase, ...] = ()
    congestion_phases: tuple[ZonePhase, ...] = ()
    briefing: str = ""
    dispatch_objective: str = ""
    known_future_signal: str = ""


class Assignment(BaseModel):
    model_config = ConfigDict(frozen=True)

    agent_id: str
    order_id: str


class Action(BaseModel):
    assignments: list[Assignment] = Field(default_factory=list)
    rejections: list[str] = Field(default_factory=list)


class Reward(BaseModel):
    step_reward: float
    cumulative_reward: float


class Feedback(BaseModel):
    last_step_reward: float = 0.0
    cumulative_reward: float = 0.0
    recent_events: list[str] = Field(default_factory=list)
    reward_breakdown: dict[str, float] = Field(default_factory=dict)
    error_summary: dict[str, int] = Field(default_factory=dict)
    current_pressure: str = ""


class Metrics(BaseModel):
    completed_orders: int = 0
    on_time_orders: int = 0
    late_orders: int = 0
    expired_orders: int = 0
    rejected_orders: int = 0
    invalid_actions: int = 0
    active_orders: int = 0
    pending_orders: int = 0
    idle_agents: int = 0
    busy_agents: int = 0


class ScenarioInfo(BaseModel):
    name: str
    episode_horizon: int
    default_max_decision_steps: int = 100
    used_seed: int | None = None
    briefing: str = ""
    dispatch_objective: str = ""
    known_future_signal: str = ""


class Observation(BaseModel):
    time: int
    decision_step: int
    max_decision_steps: int
    task_id: str
    episode_horizon: int
    grid: GridConfig
    agents: list[AgentState]
    orders: list[OrderState]
    feedback: Feedback
    metrics: Metrics
    scenario_info: ScenarioInfo


class StepResult(BaseModel):
    observation: Observation
    reward: Reward
    done: bool
    info: dict[str, Any]


class TaskResult(BaseModel):
    task_id: str
    raw_reward: float
    baseline_reward: float
    target_reward: float
    score: float
    completed_orders: int = 0
    on_time_orders: int = 0
    late_orders: int = 0
    expired_orders: int = 0
    rejected_orders: int = 0
    invalid_actions: int = 0