"""
openenv/models.py
─────────────────
Pydantic schemas that exactly match the OpenEnv 0.1 HTTP spec.

  POST /reset  → ResetResponse
  POST /step   → StepResponse
  GET  /state  → StateResponse

All three wrap a shared Observation object that carries chess-specific
fields inside the `info` dict so the core contract stays generic.
"""

from __future__ import annotations
from typing import Any, Optional
from pydantic import BaseModel, Field


# ── Request bodies ─────────────────────────────────────────────────────────────

class StepRequest(BaseModel):
    """Action sent by the RL trainer to advance the environment by one move."""
    action: str = Field(
        ...,
        description="Chess move in UCI notation (e.g. 'e2e4') or SAN (e.g. 'e4')",
        examples=["e2e4", "Nf3", "O-O"],
    )


class ResetRequest(BaseModel):
    """Optional seed / config passed on reset. All fields optional."""
    seed: Optional[int] = Field(None, description="RNG seed for reproducibility")
    config: Optional[dict[str, Any]] = Field(
        None, description="Override environment config for this episode"
    )


# ── Core observation ───────────────────────────────────────────────────────────

class ChessObservation(BaseModel):
    """
    Chess-specific observation.  Returned inside every response as `observation`.
    The `info` dict carries auxiliary data (legal moves, last move, etc.) so that
    the outer schema stays OpenEnv-generic.
    """
    fen: str = Field(..., description="Current board position in FEN notation")
    turn: str = Field(..., description="'white' or 'black'")
    move_number: int = Field(..., description="Full-move number (1-indexed)")
    last_move_uci: Optional[str] = Field(None, description="Last move in UCI notation")
    last_move_san: Optional[str] = Field(None, description="Last move in SAN notation")
    legal_moves_uci: list[str] = Field(..., description="All legal moves in UCI notation")
    is_check: bool = Field(False, description="Whether the current side is in check")
    # Economy
    wallet_white: float = Field(..., description="White agent wallet balance (units)")
    wallet_black: float = Field(..., description="Black agent wallet balance (units)")
    # Agent identities
    white_model: str = Field(..., description="Model ID playing White")
    black_model: str = Field(..., description="Model ID playing Black")
    # Info dict for auxiliary / extensible data
    info: dict[str, Any] = Field(default_factory=dict)


# ── OpenEnv response bodies ────────────────────────────────────────────────────

class ResetResponse(BaseModel):
    """
    Returned by POST /reset.
    OpenEnv spec: { observation, info }
    """
    observation: ChessObservation
    info: dict[str, Any] = Field(default_factory=dict)


class StepResponse(BaseModel):
    """
    Returned by POST /step.
    OpenEnv spec: { observation, reward, terminated, truncated, info }
    """
    observation: ChessObservation
    reward: float = Field(..., description="Per-step reward signal")
    terminated: bool = Field(..., description="True if the episode ended naturally (checkmate/stalemate/draw)")
    truncated: bool = Field(..., description="True if the episode was cut short (move limit)")
    info: dict[str, Any] = Field(default_factory=dict)


class StateResponse(BaseModel):
    """
    Returned by GET /state.
    OpenEnv spec: { observation, info, episode_id, step_count, status }
    """
    observation: ChessObservation
    info: dict[str, Any] = Field(default_factory=dict)
    episode_id: str = Field(..., description="Unique identifier for the current episode")
    step_count: int = Field(..., description="Number of moves played so far")
    status: str = Field(..., description="'active' | 'terminated' | 'truncated' | 'idle'")


# ── Environment info ──────────────────────────────────────────────────────────

class EnvInfo(BaseModel):
    """Returned by GET /env_info — describes environment capabilities."""
    name: str = "chessecon"
    version: str = "1.0.0"
    description: str = (
        "Two-agent chess economy environment. White plays Qwen2.5-0.5B-Instruct, "
        "Black plays Llama-3.2-1B-Instruct. Agents earn/lose economic units based "
        "on game outcomes. Compatible with OpenEnv 0.1 spec."
    )
    openenv_version: str = "0.1"
    action_space: dict = Field(
        default_factory=lambda: {
            "type": "text",
            "description": "Chess move in UCI (e2e4) or SAN (e4) notation",
        }
    )
    observation_space: dict = Field(
        default_factory=lambda: {
            "type": "structured",
            "fields": ["fen", "turn", "move_number", "legal_moves_uci",
                       "wallet_white", "wallet_black", "is_check"],
        }
    )
    reward_range: list[float] = Field(default_factory=lambda: [-1.0, 1.0])
    max_episode_steps: int = 300
    agents: list[dict] = Field(
        default_factory=lambda: [
            {"id": "white", "model": "Qwen/Qwen2.5-0.5B-Instruct", "role": "White player"},
            {"id": "black", "model": "meta-llama/Llama-3.2-1B-Instruct", "role": "Black player"},
        ]
    )
    tags: list[str] = Field(
        default_factory=lambda: [
            "chess", "multi-agent", "rl", "grpo", "economy",
            "openenv", "two-player", "game",
        ]
    )