Rishav
Add seeded dynamic task variants
01e1628
Raw
History Blame Contribute Delete
3.87 kB
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
Point = tuple[int, int]
AgentStatus = Literal["idle", "busy"]
OrderStatus = Literal["unassigned", "assigned", "completed", "expired", "rejected"]
class GridConfig(BaseModel):
model_config = ConfigDict(frozen=True)
width: int
height: int
congested_zones: tuple[Point, ...] = ()
hotspots: tuple[Point, ...] = ()
class ZonePhase(BaseModel):
model_config = ConfigDict(frozen=True)
start_time: int
points: tuple[Point, ...]
class AgentState(BaseModel):
model_config = ConfigDict(validate_assignment=True)
agent_id: str
location: Point
status: AgentStatus = "idle"
busy_until: int = 0
assigned_order_id: str | None = None
availability_in: int = 0
idle_now: bool = True
class OrderState(BaseModel):
model_config = ConfigDict(validate_assignment=True)
order_id: str
created_at: int
pickup_location: Point
drop_location: Point
reward_value: float
deadline: int
status: OrderStatus = "unassigned"
assigned_agent_id: str | None = None
scheduled_completion_time: int | None = None
completed_at: int | None = None
rejected_at: int | None = None
service_cutoff_time: int | None = None
nearest_agent_id: str | None = None
estimated_service_time: int | None = None
estimated_finish_time: int | None = None
slack_time: int | None = None
feasible_now: bool | None = None
class Scenario(BaseModel):
model_config = ConfigDict(frozen=True)
name: str
grid: GridConfig
agents: tuple[AgentState, ...]
orders: tuple[OrderState, ...]
episode_horizon: int
default_max_decision_steps: int = 100
hotspot_phases: tuple[ZonePhase, ...] = ()
congestion_phases: tuple[ZonePhase, ...] = ()
briefing: str = ""
dispatch_objective: str = ""
known_future_signal: str = ""
class Assignment(BaseModel):
model_config = ConfigDict(frozen=True)
agent_id: str
order_id: str
class Action(BaseModel):
assignments: list[Assignment] = Field(default_factory=list)
rejections: list[str] = Field(default_factory=list)
class Reward(BaseModel):
step_reward: float
cumulative_reward: float
class Feedback(BaseModel):
last_step_reward: float = 0.0
cumulative_reward: float = 0.0
recent_events: list[str] = Field(default_factory=list)
reward_breakdown: dict[str, float] = Field(default_factory=dict)
error_summary: dict[str, int] = Field(default_factory=dict)
current_pressure: str = ""
class Metrics(BaseModel):
completed_orders: int = 0
on_time_orders: int = 0
late_orders: int = 0
expired_orders: int = 0
rejected_orders: int = 0
invalid_actions: int = 0
active_orders: int = 0
pending_orders: int = 0
idle_agents: int = 0
busy_agents: int = 0
class ScenarioInfo(BaseModel):
name: str
episode_horizon: int
default_max_decision_steps: int = 100
used_seed: int | None = None
briefing: str = ""
dispatch_objective: str = ""
known_future_signal: str = ""
class Observation(BaseModel):
time: int
decision_step: int
max_decision_steps: int
task_id: str
episode_horizon: int
grid: GridConfig
agents: list[AgentState]
orders: list[OrderState]
feedback: Feedback
metrics: Metrics
scenario_info: ScenarioInfo
class StepResult(BaseModel):
observation: Observation
reward: Reward
done: bool
info: dict[str, Any]
class TaskResult(BaseModel):
task_id: str
raw_reward: float
baseline_reward: float
target_reward: float
score: float
completed_orders: int = 0
on_time_orders: int = 0
late_orders: int = 0
expired_orders: int = 0
rejected_orders: int = 0
invalid_actions: int = 0