"""Typed models for the Multi-Agent Strategy OpenEnv environment.""" from __future__ import annotations from typing import List, Literal, Optional from openenv.core.env_server import Action, Observation, State from pydantic import Field ActionType = Literal["harvest", "attack", "fortify", "scout", "adapt", "bluff", "noop"] Difficulty = Literal["easy", "medium", "hard"] ACTION_CHOICES: List[ActionType] = [ "harvest", "attack", "fortify", "scout", "adapt", "bluff", "noop", ] class StrategyAction(Action): """Action chosen by the learning agent.""" action_type: ActionType value: Optional[str] = None class StrategyObservation(Observation): """Partially observable game snapshot shown to the agent.""" task_id: str difficulty: Difficulty objective: str turn: int max_turns: int active_rule: str rule_hint: str own_resources: int own_defense: int own_intel: int visible_opponent_resources: int visible_opponent_defense: int last_public_event: str last_agent_action: str last_opponent_action: str recent_rule_history: List[str] allowed_actions: List[str] = Field(default_factory=lambda: ACTION_CHOICES.copy()) class StrategyState(State): """Serializable public state.""" task_id: str = "" difficulty: Difficulty = "easy" objective: str = "" turn: int = 0 max_turns: int = 12 active_rule: str = "expansion" own_resources: int = 0 own_defense: int = 0 own_intel: int = 0 visible_opponent_resources: int = 0 visible_opponent_defense: int = 0 cumulative_reward: float = 0.0 done: bool = False history: List[str] = Field(default_factory=list)