"""Tests for server/difficulty.py."""

import pytest

from models.models import HonestState
from server.difficulty import (
    HYSTERESIS_EPISODES,
    MAX_DIFFICULTY,
    MIN_DIFFICULTY,
    WINDOW,
    get_rolling_accuracy,
    update_difficulty,
)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def make_state(domain: str = "math", difficulty: int = 3) -> HonestState:
    return HonestState(
        current_domain=domain,
        domain_difficulties={domain: difficulty},
    )


def run_episodes(state: HonestState, outcomes: list, domain: str | None = None):
    """Feed a sequence of True/False/None outcomes into update_difficulty.

    Mirrors what HonestEnvironment.step() does: append the rich record first
    so update_difficulty (now a pure reader) can compute rolling accuracy from
    the current step included.
    """
    active_domain = domain or state.current_domain
    diff = state.domain_difficulties.get(active_domain, 1)
    for outcome in outcomes:
        record: dict = {
            "domain": active_domain,
            "correct": outcome,
            "difficulty": diff,
            "difficulty_changed": False,
        }
        state.episode_history.append(record)
        new_diff, changed = update_difficulty(state, outcome, domain=active_domain)
        if changed:
            record["difficulty_changed"] = True
        diff = new_diff


# ---------------------------------------------------------------------------
# get_rolling_accuracy
# ---------------------------------------------------------------------------


class TestGetRollingAccuracy:
    def test_returns_neutral_for_empty_history(self):
        state = make_state()
        assert get_rolling_accuracy(state, "math") == pytest.approx(0.5)

    def test_all_correct(self):
        state = make_state()
        run_episodes(state, [True] * 5)
        assert get_rolling_accuracy(state, "math") == pytest.approx(1.0)

    def test_all_wrong(self):
        state = make_state()
        run_episodes(state, [False] * 5)
        assert get_rolling_accuracy(state, "math") == pytest.approx(0.0)

    def test_mixed(self):
        state = make_state()
        run_episodes(state, [True, True, False, False, True])  # 3/5 = 0.6
        assert get_rolling_accuracy(state, "math") == pytest.approx(0.6)

    def test_window_is_capped_at_20(self):
        state = make_state()
        # 25 episodes: first 5 wrong, last 20 all correct
        run_episodes(state, [False] * 5 + [True] * 20)
        # Window only sees last 20 — all correct
        assert get_rolling_accuracy(state, "math") == pytest.approx(1.0)

    def test_none_counted_as_wrong(self):
        state = make_state()
        run_episodes(state, [None, None, True])  # 1/3
        assert get_rolling_accuracy(state, "math") == pytest.approx(1 / 3)


# ---------------------------------------------------------------------------
# Difficulty increases
# ---------------------------------------------------------------------------


class TestDifficultyIncrease:
    def test_increases_after_sustained_high_accuracy(self):
        state = make_state(difficulty=2)
        # Fill window: 15 correct out of 15 → accuracy = 1.0 > 0.70
        # hysteresis: need gap of 10 from last change (init = 0), so episode 10+
        run_episodes(state, [True] * 15)
        assert state.domain_difficulties["math"] == 3

    def test_does_not_increase_before_hysteresis_clears(self):
        state = make_state(difficulty=2)
        # 9 correct → accuracy > 0.70 but hysteresis blocks (< 10 episodes since ep 0)
        run_episodes(state, [True] * 9)
        assert state.domain_difficulties["math"] == 2

    def test_caps_at_max_difficulty(self):
        state = make_state(difficulty=MAX_DIFFICULTY)
        run_episodes(state, [True] * 15)
        assert state.domain_difficulties["math"] == MAX_DIFFICULTY


# ---------------------------------------------------------------------------
# Difficulty decreases
# ---------------------------------------------------------------------------


class TestDifficultyDecrease:
    def test_decreases_after_sustained_low_accuracy(self):
        state = make_state(difficulty=3)
        # 15 wrong → accuracy = 0.0 < 0.30
        run_episodes(state, [False] * 15)
        assert state.domain_difficulties["math"] == 2

    def test_does_not_decrease_before_hysteresis_clears(self):
        state = make_state(difficulty=3)
        run_episodes(state, [False] * 9)
        assert state.domain_difficulties["math"] == 3

    def test_floors_at_min_difficulty(self):
        state = make_state(difficulty=MIN_DIFFICULTY)
        run_episodes(state, [False] * 15)
        assert state.domain_difficulties["math"] == MIN_DIFFICULTY


# ---------------------------------------------------------------------------
# Hysteresis — no rapid oscillation
# ---------------------------------------------------------------------------


class TestHysteresis:
    def test_no_second_change_before_hysteresis_window(self):
        state = make_state(difficulty=2)
        # First increase fires at episode ~10
        run_episodes(state, [True] * 11)
        diff_after_first = state.domain_difficulties["math"]
        assert diff_after_first == 3

        # Immediately continue with more correct answers — next change
        # should NOT fire until HYSTERESIS_EPISODES more episodes pass
        run_episodes(state, [True] * (HYSTERESIS_EPISODES - 1))
        assert state.domain_difficulties["math"] == 3  # still 3

    def test_second_change_fires_after_hysteresis_window(self):
        state = make_state(difficulty=2)
        # First change
        run_episodes(state, [True] * 11)
        assert state.domain_difficulties["math"] == 3

        # Give hysteresis window full room + enough data: HYSTERESIS_EPISODES + some
        # We need accuracy to remain > 0.70 and 10+ episodes since last change
        run_episodes(state, [True] * (HYSTERESIS_EPISODES + 1))
        assert state.domain_difficulties["math"] == 4

    def test_oscillation_blocked(self):
        """Rapid correct→wrong cannot ping-pong the difficulty."""
        state = make_state(difficulty=3)
        # 11 correct → difficulty raised to 4
        run_episodes(state, [True] * 11)
        assert state.domain_difficulties["math"] == 4

        # Immediately 9 wrong → NOT enough to decrease (hysteresis + window)
        run_episodes(state, [False] * 9)
        assert state.domain_difficulties["math"] == 4  # no decrease yet


# ---------------------------------------------------------------------------
# Per-domain independence
# ---------------------------------------------------------------------------


class TestPerDomainIndependence:
    def test_math_accuracy_does_not_affect_code(self):
        state = HonestState(
            current_domain="math",
            domain_difficulties={"math": 2, "code": 3},
        )
        # Drive math accuracy very high
        run_episodes(state, [True] * 15, domain="math")
        # math goes up
        assert state.domain_difficulties["math"] > 2
        # code is untouched
        assert state.domain_difficulties["code"] == 3

    def test_separate_hysteresis_per_domain(self):
        state = HonestState(
            current_domain="math",
            domain_difficulties={"math": 2, "code": 2},
        )
        # Only drive math
        run_episodes(state, [True] * 11, domain="math")
        assert state.domain_difficulties["math"] == 3
        # code still untouched at 2
        assert state.domain_difficulties["code"] == 2

    def test_domains_track_independently(self):
        state = HonestState(
            current_domain="math",
            domain_difficulties={"math": 3, "logic": 3},
        )
        # Drive math down
        run_episodes(state, [False] * 15, domain="math")
        # Drive logic up
        run_episodes(state, [True] * 15, domain="logic")
        assert state.domain_difficulties["math"] < 3
        assert state.domain_difficulties["logic"] > 3


# ---------------------------------------------------------------------------
# Bounds enforcement
# ---------------------------------------------------------------------------


class TestBoundsEnforcement:
    @pytest.mark.parametrize("start", [1, 2, 3, 4, 5])
    def test_difficulty_always_in_range(self, start):
        state = make_state(difficulty=start)
        # Extreme runs in both directions
        run_episodes(state, [True] * 30 + [False] * 30)
        d = state.domain_difficulties["math"]
        assert MIN_DIFFICULTY <= d <= MAX_DIFFICULTY