from __future__ import annotations from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field WorldRegime = Literal[ "visible_ramp", "decoy_then_shift", "premium_late_surge", "congested_pivot", ] class DifficultyProfile(BaseModel): model_config = ConfigDict(frozen=True) task_id: str zone_count: int courier_count: int total_rounds: int max_repositions_per_round: int missed_order_penalty: float move_cost_weight: float runtime_budget_ms: float class ZoneSpec(BaseModel): model_config = ConfigDict(frozen=True) zone_id: str label: str position: tuple[int, int] class RoundTemplate(BaseModel): model_config = ConfigDict(frozen=True) round_index: int visible_orders_by_zone: tuple[int, ...] reward_per_order_by_zone: tuple[float, ...] congestion_multiplier_by_zone: tuple[float, ...] class HiddenRecipe(BaseModel): model_config = ConfigDict(frozen=True) task_id: str seed: int profile: DifficultyProfile world_regime: WorldRegime hot_zone_index: int decoy_zone_index: int support_zone_index: int premium_zone_index: int zone_specs: tuple[ZoneSpec, ...] initial_courier_counts: tuple[int, ...] rounds: tuple[RoundTemplate, ...] class ZoneSnapshot(BaseModel): model_config = ConfigDict(frozen=True) zone_id: str label: str courier_count: int visible_orders: int reward_per_order: float congestion_multiplier: float class ZoneAllocation(BaseModel): model_config = ConfigDict(frozen=True) zone_id: str courier_count: int class V3Action(BaseModel): target_allocations: list[ZoneAllocation] = Field(default_factory=list) class V3Reward(BaseModel): step_reward: float cumulative_reward: float class V3Feedback(BaseModel): last_step_reward: float = 0.0 cumulative_reward: float = 0.0 recent_events: list[str] = Field(default_factory=list) current_pressure: str = "" class V3ScenarioInfo(BaseModel): task_id: str used_seed: int total_rounds: int total_couriers: int max_repositions_per_round: int objective_brief: str = "" action_brief: str = "" episode_brief: str = "" class V3Observation(BaseModel): round_index: int remaining_rounds: int task_id: str zones: list[ZoneSnapshot] feedback: V3Feedback scenario_info: V3ScenarioInfo class V3StepResult(BaseModel): observation: V3Observation reward: V3Reward done: bool info: dict[str, Any] class V3TaskResult(BaseModel): task_id: str raw_reward: float baseline_reward: float target_reward: float score: float heuristic_reward: float | None = None class SeedMetadata(BaseModel): task_id: str seed: int world_regime: str hot_zone: str decoy_zone: str premium_zone: str baseline_reward: float heuristic_reward: float target_reward: float score_gap: float heuristic_gap: float solver_runtime_ms: float runtime_budget_ms: float admissible: bool