"""InteractiveSession — a threadless, stepwise driver for human web play.

Holds the live scenario+game and advances exactly one turn per HTTP request.
Built on the same ``_session_core`` helpers as SessionRunner, so the trace it
emits at the end is identical to a SessionRunner(HumanAgent) trace for the same
actions (pinned by tests/runtime/test_interactive_equivalence.py).

Fairness: ``state()`` exposes only the grid + available actions while playing;
the per-turn answer keys (optimal/habit) and rewards are disclosed in ``review``
only once the game is over.
"""
from __future__ import annotations

from proteus.game.engine.difficulty import Difficulty
from proteus.game.runtime import _session_core as core
from proteus.game.runtime.trace import SessionTrace, TurnTrace


class InteractiveSession:
    def __init__(
        self,
        scenario_name: str,
        *,
        difficulty: Difficulty = Difficulty.EASY,
        seed: int | None = None,
        play_turns: int = 15,
        use_probe: bool = False,
        motive_category: str = "survival",
        memory: "MemoryCheckpoint | None" = None,
        use_default_memory: bool = True,
    ) -> None:
        self._scenario_name = scenario_name
        self._difficulty = difficulty
        self._seed = seed
        self._play_turns = play_turns
        self._use_probe = use_probe
        self._motive_category = motive_category

        built = core.build_session(scenario_name, seed, difficulty, play_turns)
        self._scenario = built.scenario
        self._game = built.game
        self._cut_frames = built.cut_frames
        self._cut_grids = built.cut_grids
        # An explicit memory wins; else fall back to the scenario's default
        # ONLY when use_default_memory (so a caller can force "no memory").
        self._memory = (
            memory if memory is not None
            else (built.default_memory if use_default_memory else None)
        )
        self._turns: list[TurnTrace] = []
        self._trace: SessionTrace | None = None

    # ------------------------------------------------------------------ #
    def _is_done(self) -> bool:
        return (
            self._game.eliminated
            or self._game.survived
            or (self._scenario.turn_limited and len(self._turns) >= self._play_turns)
        )

    def state(self) -> dict:
        """The JSON-ready view (live = fair, no answer keys; review only when done).

        When the game is over this finalizes and memoizes the trace (idempotent)
        so it can populate ``review``; while playing it exposes only grid +
        actions, never reward/optimal/habit.
        """
        done = self._is_done()
        played = len(self._turns)
        phase = "done" if done else ("cut_intro" if played == 0 else "play")
        limited = self._scenario.turn_limited
        st: dict = {
            "phase": phase,
            "turn_idx": played,
            "play_turns": self._play_turns if limited else None,
            "health": self._game.health,
            "turns_left": max(0, self._play_turns - len(self._turns)) if limited else None,
            "grid": core.grid_to_list(self._game.current_grid()),
            "legend": {str(k): v for k, v in self._scenario.legend().items()},
            "actions": list(self._scenario.action_set),
            "outcome": None,
            "cut_frames": self._cut_grids if played == 0 else None,
            "review": None,
        }
        if done:
            trace = self.finish()
            st["outcome"] = trace.outcome
            st["review"] = {
                "outcome": trace.outcome,
                "metrics": trace.metrics,
                "turns": [
                    {
                        "turn_idx": t.turn_idx,
                        "action": t.action,
                        "motive_action": t.motive_action,
                        "habit_action": t.habit_action,
                        "reward": t.reward,
                        "is_diagnostic": t.is_diagnostic,
                        "was_congruent": t.was_congruent,
                    }
                    for t in trace.turns
                ],
            }
            if self._scenario_name == "errand_runner":
                st["review"]["errand"] = self._errand_summary(trace)
        return st

    def _errand_summary(self, trace) -> dict:
        """Build the errand results-screen summary.

        Shape: {events, approximate, grass_steps, touched_pedestrian,
        closest_persona, closest_label, headline}.
        """
        from proteus.game.scenarios import errand_world as w
        m = trace.metrics
        scen = self._scenario
        observed = {
            # coarse display: "cross" iff ANY non-stay action over the whole game;
            # not real per-turn crosswalk/light detection (a heuristic, not measured).
            "crosswalk": "cross" if any(t.action != "stay" for t in trace.turns) else "wait",
            "construction": "pass",          # coarse display; refined record not tracked per-turn
            "wallet": "grab" if not scen._wallet_present else "ignore",
            "pedestrian": "help" if scen._ped_rescued else "ignore",
            # PRECISE observation: the scenario counts grass dwell exactly.
            "grass": "cut" if scen._grass_steps > 0 else "avoid",
        }
        def score(pid):
            pol = w.PERSONAS[pid]
            return sum(1 for k in ("wallet", "pedestrian", "grass") if pol[k] == observed[k])
        # civic (avoid grass + help + ignore-wallet) and warm_outlaw (cut grass +
        # help + ignore-wallet) are now distinguished by the precisely-tracked grass
        # signal, breaking the prior wallet/pedestrian tie. opportunist (cut grass +
        # ignore-ped + grab) is distinct on wallet/pedestrian.
        closest = max(w.PERSONAS, key=score)
        def _metric(key):
            v = m.get(key)
            return float(v) if v is not None else None
        headline = {
            # discovery_turn is emitted only when SELF reports exist; the others are
            # batch-eval metrics, not produced in live interactive play. None = "not
            # measured in this mode" (the results screen renders None as "—").
            "discovery": _metric("discovery_turn"),
            "generalizability": _metric("generalizability"),
            "coherence": _metric("coherence"),
        }
        return {
            "events": observed,
            # crosswalk/construction are coarse heuristics, not measured; flagged so
            # the results renderer can mark them as best-effort, not real data.
            "approximate": ["crosswalk", "construction"],
            "grass_steps": int(scen._grass_steps),
            "touched_pedestrian": bool(scen._ped_touched),
            "closest_persona": closest,
            "closest_label": w.PERSONA_LABELS[closest],
            "headline": headline,
        }

    def step(self, action: str, probe_answer: str = "") -> dict:
        if self._is_done():
            raise core.SessionFinishedError("session already finished")
        if action not in self._scenario.action_set:
            raise ValueError(
                f"invalid action {action!r}; choose one of {self._scenario.action_set}"
            )
        turn_idx = len(self._turns) + 1
        observation = core.build_observation(
            self._scenario, self._game, self._cut_frames, turn_idx,
            memory=self._memory,
            prior_actions=[t.action for t in self._turns],
        )
        probe_fields: dict[str, object] = {}
        if self._use_probe:
            probe_fields = dict(
                probe_q=core._PROBE_QUESTION,
                probe_a=probe_answer,
                probe_reasoning="",
                probe_raw_text=probe_answer,
            )
        self._turns.append(core.make_turn_trace(
            self._scenario, self._game,
            turn_idx=turn_idx, observation=observation,
            action=action, raw_text=action, **probe_fields,
        ))
        return self.state()

    def finish(self) -> SessionTrace:
        if self._trace is not None:
            return self._trace
        self._trace = core.finalize(
            self._scenario_name, self._scenario, self._game,
            seed=self._seed, difficulty=self._difficulty,
            play_turns=self._play_turns, turns=self._turns,
            cut_frames=self._cut_frames, motive_category=self._motive_category,
            model="human",
        )
        return self._trace