# stratego/benchmarking/run_game.py import textarena as ta from stratego.env.stratego_env import StrategoEnv def get_last_board_observation(state, player_id): for obs in reversed(state.observations[player_id]): if ta.ObservationType.GAME_BOARD in obs: for elem in obs: if isinstance(elem, str): return elem return "" def run_game(agent0, agent1, size=6, seed=None): env = StrategoEnv(env_id = "Stratego-custom",size=size) env.reset(num_players=2, seed=seed) invalid_moves = {0: 0, 1: 0} repetitions = 0 turns = 0 done = False winner = None reason_verbose = "Unknown termination reason" flag_captured = False while not done: state = env.get_state() rep = env.repetition_count() pid = state.current_player_id agent = agent0 if pid == 0 else agent1 obs = get_last_board_observation(state, pid) action = agent(obs) if callable(agent) else agent.act(obs) done, _ = env.step(action) turns += 1 if state.game_info.get(pid, {}).get("invalid_move"): invalid_moves[pid] += 1 repetitions += rep.get(pid, 0) if done: gs = state.game_state gi = state.game_info if gs.get("termination") == "invalid": reason_verbose = f"Invalid move: {gs.get('invalid_reason', 'Invalid move')}" else: raw = gi.get("reason", "") # Normalize reason to string for downstream metrics/logs if isinstance(raw, (list, tuple)): raw_reason = "; ".join(map(str, raw)) else: raw_reason = str(raw) raw_lower = raw_reason.lower() if "flag" in raw_lower: flag_captured = True reason_verbose = raw_reason elif "no legal moves" in raw_lower or "no more movable pieces" in raw_lower or "no moves" in raw_lower: reason_verbose = "Opponent had no legal moves" elif "stalemate" in raw_lower: reason_verbose = "Stalemate" elif "turn limit" in raw_lower: reason_verbose = "Turn limit reached" elif "repetition" in raw_lower: reason_verbose = "Two-squares repetition rule violation" else: reason_verbose = raw_reason or "Game ended without explicit winner" # TextArena does not store a winner in game_info; derive from rewards rewards = getattr(state, "rewards", None) if rewards: max_reward = max(rewards.values()) winners = [player for player, reward in rewards.items() if reward == max_reward] if len(winners) == 1: winner = winners[0] else: winner = -1 return { "winner": winner if winner is not None else -1, "turns": turns, "invalid_moves_p0": invalid_moves[0], "invalid_moves_p1": invalid_moves[1], "repetitions": repetitions, "flag_captured": flag_captured, "game_end_reason": reason_verbose }