from freeciv_env.grpo import build_turn_prompt, oracle_action_index, parse_action_choice, reward_from_oracle
from freeciv_env.server.freeciv_environment import FreecivEnvironment
from tests.fakes import FakeFreecivSession


def test_build_turn_prompt_lists_indices() -> None:
    env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
    observation = env.reset()
    prompt = build_turn_prompt(observation)
    assert "1: End the current turn" in prompt
    assert "State:" in prompt
    env.close()


def test_parse_action_choice_returns_expected_action() -> None:
    env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
    observation = env.reset()
    chosen = parse_action_choice("2", observation.legal_actions)
    assert chosen is not None
    assert chosen.action_type == "move_unit"
    assert chosen.unit_id == 201
    assert chosen.direction == 0
    env.close()


def test_parse_action_choice_rejects_invalid_index() -> None:
    env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
    observation = env.reset()
    assert parse_action_choice("99", observation.legal_actions) is None
    assert parse_action_choice("nope", observation.legal_actions) is None
    env.close()


def test_oracle_prefers_build_city() -> None:
    env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
    observation = env.reset()
    assert oracle_action_index(observation.legal_actions) == 0
    env.close()


def test_reward_from_oracle_scores_exact_match() -> None:
    rewards = reward_from_oracle(["0", "2", "bad"], best_index=[0, 1, 2])
    assert rewards == [1.0, 0.0, -0.25]