from freeciv_env.grpo import build_turn_prompt, oracle_action_index, parse_action_choice, reward_from_oracle from freeciv_env.server.freeciv_environment import FreecivEnvironment from tests.fakes import FakeFreecivSession def test_build_turn_prompt_lists_indices() -> None: env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5) observation = env.reset() prompt = build_turn_prompt(observation) assert "1: End the current turn" in prompt assert "State:" in prompt env.close() def test_parse_action_choice_returns_expected_action() -> None: env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5) observation = env.reset() chosen = parse_action_choice("2", observation.legal_actions) assert chosen is not None assert chosen.action_type == "move_unit" assert chosen.unit_id == 201 assert chosen.direction == 0 env.close() def test_parse_action_choice_rejects_invalid_index() -> None: env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5) observation = env.reset() assert parse_action_choice("99", observation.legal_actions) is None assert parse_action_choice("nope", observation.legal_actions) is None env.close() def test_oracle_prefers_build_city() -> None: env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5) observation = env.reset() assert oracle_action_index(observation.legal_actions) == 0 env.close() def test_reward_from_oracle_scores_exact_match() -> None: rewards = reward_from_oracle(["0", "2", "bad"], best_index=[0, 1, 2]) assert rewards == [1.0, 0.0, -0.25]