from __future__ import annotations

from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field


WorldRegime = Literal[
    "visible_ramp",
    "decoy_then_shift",
    "premium_late_surge",
    "congested_pivot",
]


class DifficultyProfile(BaseModel):
    model_config = ConfigDict(frozen=True)

    task_id: str
    zone_count: int
    courier_count: int
    total_rounds: int
    max_repositions_per_round: int
    missed_order_penalty: float
    move_cost_weight: float
    runtime_budget_ms: float


class ZoneSpec(BaseModel):
    model_config = ConfigDict(frozen=True)

    zone_id: str
    label: str
    position: tuple[int, int]


class RoundTemplate(BaseModel):
    model_config = ConfigDict(frozen=True)

    round_index: int
    visible_orders_by_zone: tuple[int, ...]
    reward_per_order_by_zone: tuple[float, ...]
    congestion_multiplier_by_zone: tuple[float, ...]


class HiddenRecipe(BaseModel):
    model_config = ConfigDict(frozen=True)

    task_id: str
    seed: int
    profile: DifficultyProfile
    world_regime: WorldRegime
    hot_zone_index: int
    decoy_zone_index: int
    support_zone_index: int
    premium_zone_index: int
    zone_specs: tuple[ZoneSpec, ...]
    initial_courier_counts: tuple[int, ...]
    rounds: tuple[RoundTemplate, ...]


class ZoneSnapshot(BaseModel):
    model_config = ConfigDict(frozen=True)

    zone_id: str
    label: str
    courier_count: int
    visible_orders: int
    reward_per_order: float
    congestion_multiplier: float


class ZoneAllocation(BaseModel):
    model_config = ConfigDict(frozen=True)

    zone_id: str
    courier_count: int


class V3Action(BaseModel):
    target_allocations: list[ZoneAllocation] = Field(default_factory=list)


class V3Reward(BaseModel):
    step_reward: float
    cumulative_reward: float


class V3Feedback(BaseModel):
    last_step_reward: float = 0.0
    cumulative_reward: float = 0.0
    recent_events: list[str] = Field(default_factory=list)
    current_pressure: str = ""


class V3ScenarioInfo(BaseModel):
    task_id: str
    used_seed: int
    total_rounds: int
    total_couriers: int
    max_repositions_per_round: int
    objective_brief: str = ""
    action_brief: str = ""
    episode_brief: str = ""


class V3Observation(BaseModel):
    round_index: int
    remaining_rounds: int
    task_id: str
    zones: list[ZoneSnapshot]
    feedback: V3Feedback
    scenario_info: V3ScenarioInfo


class V3StepResult(BaseModel):
    observation: V3Observation
    reward: V3Reward
    done: bool
    info: dict[str, Any]


class V3TaskResult(BaseModel):
    task_id: str
    raw_reward: float
    baseline_reward: float
    target_reward: float
    score: float
    heuristic_reward: float | None = None


class SeedMetadata(BaseModel):
    task_id: str
    seed: int
    world_regime: str
    hot_zone: str
    decoy_zone: str
    premium_zone: str
    baseline_reward: float
    heuristic_reward: float
    target_reward: float
    score_gap: float
    heuristic_gap: float
    solver_runtime_ms: float
    runtime_budget_ms: float
    admissible: bool