openenv_hack / tests /test_grpo_utils.py
thomasm6m6's picture
Initial Freeciv OpenEnv Space
8dc7642 verified
from freeciv_env.grpo import build_turn_prompt, oracle_action_index, parse_action_choice, reward_from_oracle
from freeciv_env.server.freeciv_environment import FreecivEnvironment
from tests.fakes import FakeFreecivSession
def test_build_turn_prompt_lists_indices() -> None:
env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
observation = env.reset()
prompt = build_turn_prompt(observation)
assert "1: End the current turn" in prompt
assert "State:" in prompt
env.close()
def test_parse_action_choice_returns_expected_action() -> None:
env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
observation = env.reset()
chosen = parse_action_choice("2", observation.legal_actions)
assert chosen is not None
assert chosen.action_type == "move_unit"
assert chosen.unit_id == 201
assert chosen.direction == 0
env.close()
def test_parse_action_choice_rejects_invalid_index() -> None:
env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
observation = env.reset()
assert parse_action_choice("99", observation.legal_actions) is None
assert parse_action_choice("nope", observation.legal_actions) is None
env.close()
def test_oracle_prefers_build_city() -> None:
env = FreecivEnvironment(session_factory=FakeFreecivSession, max_turns=5)
observation = env.reset()
assert oracle_action_index(observation.legal_actions) == 0
env.close()
def test_reward_from_oracle_scores_exact_match() -> None:
rewards = reward_from_oracle(["0", "2", "bad"], best_index=[0, 1, 2])
assert rewards == [1.0, 0.0, -0.25]