"""Typed models for the Multi-Agent Strategy OpenEnv environment."""

from __future__ import annotations

from typing import List, Literal, Optional

from openenv.core.env_server import Action, Observation, State
from pydantic import Field

ActionType = Literal["harvest", "attack", "fortify", "scout", "adapt", "bluff", "noop"]
Difficulty = Literal["easy", "medium", "hard"]

ACTION_CHOICES: List[ActionType] = [
    "harvest",
    "attack",
    "fortify",
    "scout",
    "adapt",
    "bluff",
    "noop",
]


class StrategyAction(Action):
    """Action chosen by the learning agent."""

    action_type: ActionType
    value: Optional[str] = None


class StrategyObservation(Observation):
    """Partially observable game snapshot shown to the agent."""

    task_id: str
    difficulty: Difficulty
    objective: str
    turn: int
    max_turns: int
    active_rule: str
    rule_hint: str
    own_resources: int
    own_defense: int
    own_intel: int
    visible_opponent_resources: int
    visible_opponent_defense: int
    last_public_event: str
    last_agent_action: str
    last_opponent_action: str
    recent_rule_history: List[str]
    allowed_actions: List[str] = Field(default_factory=lambda: ACTION_CHOICES.copy())


class StrategyState(State):
    """Serializable public state."""

    task_id: str = ""
    difficulty: Difficulty = "easy"
    objective: str = ""
    turn: int = 0
    max_turns: int = 12
    active_rule: str = "expansion"
    own_resources: int = 0
    own_defense: int = 0
    own_intel: int = 0
    visible_opponent_resources: int = 0
    visible_opponent_defense: int = 0
    cumulative_reward: float = 0.0
    done: bool = False
    history: List[str] = Field(default_factory=list)