"""Unified policy interface for the OpenRA-Bench eval stack.

Every actor that can drive a side of a scenario — an LLM agent, a human
labeler, a scripted reference policy — implements the same contract:

    controller.act(observation, Command) -> list[Command]

This is the keystone of the human-labeling machine and the 1v1
adversarial harness: one harness, interchangeable policy backends.
`run_level` / `run_episode` drive a single Controller; a 1v1 match
drives two, one per side, each fed its own side-specific observation.

Back-compat is non-negotiable: the historical policy shape was a bare
callable ``agent_fn(render_state, Command) -> [Command]`` and ~190 test
files still pass one. `as_controller()` adapts any such callable (or a
`ModelAgent` bound method) into a Controller, so every existing scripted
policy and test keeps working unchanged — the eval loop simply coerces
its policy argument through `as_controller()` before stepping.

Design notes
------------
* `act` keeps `Command` as an explicit parameter rather than binding it
  at construction. `Command` is the pyo3 `openra_train.Command` factory
  handle, only available once an env exists; threading it per-call keeps
  Controllers constructible without an engine (cheap to unit-test) and
  is byte-identical to the legacy `agent_fn` signature.
* `reset(ctx)` is the per-episode lifecycle hook. Scripted policies
  ignore it; the model agent re-arms history; a human controller would
  reset its click queue. The 1v1 harness calls it once per side with a
  `side`-stamped `EpisodeContext`.
* `history` / `stats` are the optional introspection surface the
  playback writer reads. `BaseController` provides empty defaults so a
  caller can read them unconditionally.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Callable, Protocol, runtime_checkable

# A bare legacy policy: (render_state, Command) -> [Command].
PolicyFn = Callable[[dict, Any], list]


@dataclass
class EpisodeContext:
    """What a Controller is told once, at episode start (`reset`).

    A scenario eval populates `pack_id` / `level` / `seed` / `objective`;
    a 1v1 match additionally stamps `side` so the two Controllers know
    which colour they are driving."""

    pack_id: str = ""
    level: str = ""
    seed: int = 0
    side: str = "agent"  # "agent" | "enemy" — which side this drives
    objective: str = ""
    max_turns: int = 0
    extra: dict = field(default_factory=dict)


@runtime_checkable
class Controller(Protocol):
    """A policy that observes the world and emits engine Commands.

    Structural — anything exposing `name`, `reset`, and `act` satisfies
    it; `ModelAgent` does so without importing this module."""

    name: str

    def reset(self, ctx: "EpisodeContext") -> None: ...

    def act(self, observation: dict, Command: Any) -> list: ...


def is_controller(obj: Any) -> bool:
    """True if `obj` already satisfies the Controller contract.

    Deliberately structural and stricter than `isinstance(obj,
    Controller)`: a bare function is callable but is NOT a Controller,
    so it must carry callable `act` AND `reset` attributes — which a
    plain function never does."""
    return callable(getattr(obj, "act", None)) and callable(
        getattr(obj, "reset", None)
    )


class BaseController:
    """Convenience base: a no-op `reset`, a `name`, empty introspection.

    Subclass and implement `act`. Concrete eval policies (the human
    bridge, scripted reference wrappers) derive from this so they share
    one introspection surface (`history`, `stats`)."""

    name: str = "controller"

    def __init__(self, name: str | None = None) -> None:
        if name:
            self.name = name
        self.history: list[dict] = []
        self.stats: dict[str, Any] = {}

    def reset(self, ctx: EpisodeContext) -> None:  # noqa: D401
        """Per-episode lifecycle hook. Default: no-op."""

    def act(self, observation: dict, Command: Any) -> list:
        raise NotImplementedError(
            f"{type(self).__name__} must implement act()"
        )


class FunctionController(BaseController):
    """Adapt a bare ``agent_fn(render_state, Command) -> [Command]``
    callable into a Controller — the back-compat bridge for every
    scripted reference policy and the legacy `scripted_explore_agent`.

    When the callable is a bound method (e.g. ``ModelAgent.agent_fn``),
    its ``__self__`` is captured as `source` so the eval loop can still
    reach the underlying object's `history` / `stats` for playback."""

    def __init__(
        self, fn: PolicyFn, name: str | None = None
    ) -> None:
        super().__init__(
            name or getattr(fn, "__name__", None) or "fn"
        )
        self._fn = fn
        self.source: Any = getattr(fn, "__self__", None)

    def act(self, observation: dict, Command: Any) -> list:
        return self._fn(observation, Command)


def as_controller(policy: Any, name: str | None = None) -> Controller:
    """Coerce anything policy-shaped into a Controller.

    Accepts, in priority order:
      * an object already satisfying the Controller contract — returned
        as-is (idempotent);
      * any callable — a bare `agent_fn` or a bound method — wrapped in
        a `FunctionController` (a bound method's `__self__` is kept
        reachable via `.source`).

    Raises `TypeError` for anything else."""
    if is_controller(policy):
        return policy
    if callable(policy):
        return FunctionController(policy, name)
    raise TypeError(
        f"cannot coerce {type(policy).__name__} into a Controller: "
        "expected a Controller, a ModelAgent, or an "
        "agent_fn(render_state, Command) -> [Command] callable"
    )


def introspection_source(controller: Controller) -> Any:
    """The object carrying `history` / `stats` for playback.

    For a `FunctionController` wrapping a bound method this is the bound
    instance (`.source`); otherwise it is the Controller itself."""
    src = getattr(controller, "source", None)
    return src if src is not None else controller