from __future__ import annotations from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field Point = tuple[int, int] AgentStatus = Literal["idle", "busy"] OrderStatus = Literal["unassigned", "assigned", "completed", "expired", "rejected"] class GridConfig(BaseModel): model_config = ConfigDict(frozen=True) width: int height: int congested_zones: tuple[Point, ...] = () hotspots: tuple[Point, ...] = () class ZonePhase(BaseModel): model_config = ConfigDict(frozen=True) start_time: int points: tuple[Point, ...] class AgentState(BaseModel): model_config = ConfigDict(validate_assignment=True) agent_id: str location: Point status: AgentStatus = "idle" busy_until: int = 0 assigned_order_id: str | None = None availability_in: int = 0 idle_now: bool = True class OrderState(BaseModel): model_config = ConfigDict(validate_assignment=True) order_id: str created_at: int pickup_location: Point drop_location: Point reward_value: float deadline: int status: OrderStatus = "unassigned" assigned_agent_id: str | None = None scheduled_completion_time: int | None = None completed_at: int | None = None rejected_at: int | None = None service_cutoff_time: int | None = None nearest_agent_id: str | None = None estimated_service_time: int | None = None estimated_finish_time: int | None = None slack_time: int | None = None feasible_now: bool | None = None class Scenario(BaseModel): model_config = ConfigDict(frozen=True) name: str grid: GridConfig agents: tuple[AgentState, ...] orders: tuple[OrderState, ...] episode_horizon: int default_max_decision_steps: int = 100 hotspot_phases: tuple[ZonePhase, ...] = () congestion_phases: tuple[ZonePhase, ...] = () briefing: str = "" dispatch_objective: str = "" known_future_signal: str = "" class Assignment(BaseModel): model_config = ConfigDict(frozen=True) agent_id: str order_id: str class Action(BaseModel): assignments: list[Assignment] = Field(default_factory=list) rejections: list[str] = Field(default_factory=list) class Reward(BaseModel): step_reward: float cumulative_reward: float class Feedback(BaseModel): last_step_reward: float = 0.0 cumulative_reward: float = 0.0 recent_events: list[str] = Field(default_factory=list) reward_breakdown: dict[str, float] = Field(default_factory=dict) error_summary: dict[str, int] = Field(default_factory=dict) current_pressure: str = "" class Metrics(BaseModel): completed_orders: int = 0 on_time_orders: int = 0 late_orders: int = 0 expired_orders: int = 0 rejected_orders: int = 0 invalid_actions: int = 0 active_orders: int = 0 pending_orders: int = 0 idle_agents: int = 0 busy_agents: int = 0 class ScenarioInfo(BaseModel): name: str episode_horizon: int default_max_decision_steps: int = 100 used_seed: int | None = None briefing: str = "" dispatch_objective: str = "" known_future_signal: str = "" class Observation(BaseModel): time: int decision_step: int max_decision_steps: int task_id: str episode_horizon: int grid: GridConfig agents: list[AgentState] orders: list[OrderState] feedback: Feedback metrics: Metrics scenario_info: ScenarioInfo class StepResult(BaseModel): observation: Observation reward: Reward done: bool info: dict[str, Any] class TaskResult(BaseModel): task_id: str raw_reward: float baseline_reward: float target_reward: float score: float completed_orders: int = 0 on_time_orders: int = 0 late_orders: int = 0 expired_orders: int = 0 rejected_orders: int = 0 invalid_actions: int = 0