File size: 3,872 Bytes
9ba4f8a 01e1628 9ba4f8a fb5a820 01e1628 9ba4f8a fb5a820 01e1628 9ba4f8a fb5a820 9ba4f8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
Point = tuple[int, int]
AgentStatus = Literal["idle", "busy"]
OrderStatus = Literal["unassigned", "assigned", "completed", "expired", "rejected"]
class GridConfig(BaseModel):
model_config = ConfigDict(frozen=True)
width: int
height: int
congested_zones: tuple[Point, ...] = ()
hotspots: tuple[Point, ...] = ()
class ZonePhase(BaseModel):
model_config = ConfigDict(frozen=True)
start_time: int
points: tuple[Point, ...]
class AgentState(BaseModel):
model_config = ConfigDict(validate_assignment=True)
agent_id: str
location: Point
status: AgentStatus = "idle"
busy_until: int = 0
assigned_order_id: str | None = None
availability_in: int = 0
idle_now: bool = True
class OrderState(BaseModel):
model_config = ConfigDict(validate_assignment=True)
order_id: str
created_at: int
pickup_location: Point
drop_location: Point
reward_value: float
deadline: int
status: OrderStatus = "unassigned"
assigned_agent_id: str | None = None
scheduled_completion_time: int | None = None
completed_at: int | None = None
rejected_at: int | None = None
service_cutoff_time: int | None = None
nearest_agent_id: str | None = None
estimated_service_time: int | None = None
estimated_finish_time: int | None = None
slack_time: int | None = None
feasible_now: bool | None = None
class Scenario(BaseModel):
model_config = ConfigDict(frozen=True)
name: str
grid: GridConfig
agents: tuple[AgentState, ...]
orders: tuple[OrderState, ...]
episode_horizon: int
default_max_decision_steps: int = 100
hotspot_phases: tuple[ZonePhase, ...] = ()
congestion_phases: tuple[ZonePhase, ...] = ()
briefing: str = ""
dispatch_objective: str = ""
known_future_signal: str = ""
class Assignment(BaseModel):
model_config = ConfigDict(frozen=True)
agent_id: str
order_id: str
class Action(BaseModel):
assignments: list[Assignment] = Field(default_factory=list)
rejections: list[str] = Field(default_factory=list)
class Reward(BaseModel):
step_reward: float
cumulative_reward: float
class Feedback(BaseModel):
last_step_reward: float = 0.0
cumulative_reward: float = 0.0
recent_events: list[str] = Field(default_factory=list)
reward_breakdown: dict[str, float] = Field(default_factory=dict)
error_summary: dict[str, int] = Field(default_factory=dict)
current_pressure: str = ""
class Metrics(BaseModel):
completed_orders: int = 0
on_time_orders: int = 0
late_orders: int = 0
expired_orders: int = 0
rejected_orders: int = 0
invalid_actions: int = 0
active_orders: int = 0
pending_orders: int = 0
idle_agents: int = 0
busy_agents: int = 0
class ScenarioInfo(BaseModel):
name: str
episode_horizon: int
default_max_decision_steps: int = 100
used_seed: int | None = None
briefing: str = ""
dispatch_objective: str = ""
known_future_signal: str = ""
class Observation(BaseModel):
time: int
decision_step: int
max_decision_steps: int
task_id: str
episode_horizon: int
grid: GridConfig
agents: list[AgentState]
orders: list[OrderState]
feedback: Feedback
metrics: Metrics
scenario_info: ScenarioInfo
class StepResult(BaseModel):
observation: Observation
reward: Reward
done: bool
info: dict[str, Any]
class TaskResult(BaseModel):
task_id: str
raw_reward: float
baseline_reward: float
target_reward: float
score: float
completed_orders: int = 0
on_time_orders: int = 0
late_orders: int = 0
expired_orders: int = 0
rejected_orders: int = 0
invalid_actions: int = 0
|