"""KantBench environment adapter for the HF Space. Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games, 17 strategies, meta-games, composable variants) and NPlayerEnvironment (3 N-player games) instead of a standalone reimplementation. """ from __future__ import annotations from typing import Any, Optional from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State from models import KantBenchAction, KantBenchObservation from env.environment import KantEnvironment from env.models import GameAction from env.nplayer.environment import NPlayerEnvironment from env.nplayer.models import NPlayerAction, NPlayerObservation # Register built-in N-player games into the registry import common.games_meta.nplayer_games # noqa: F401 from common.games_meta.nplayer_config import NPLAYER_GAMES from common.games import GAMES from common.variants import compose_game class KantbenchEnvironment(Environment): """Game theory environment exposing 90+ two-player and N-player games. Wraps the real KantEnvironment and NPlayerEnvironment, routing automatically based on the requested game name. Supports a ``variant`` reset parameter for dynamic game composition (e.g. ``variant="constitutional"`` or ``variant="cheap_talk"``). """ SUPPORTS_CONCURRENT_SESSIONS: bool = True def __init__(self) -> None: self._env_2p = KantEnvironment() self._env_np = NPlayerEnvironment() self._is_nplayer: bool = False def reset(self, **kwargs: Any) -> KantBenchObservation: game_name: str = kwargs.get("game", "prisoners_dilemma") variant: Optional[str] = kwargs.pop("variant", None) # Dynamic variant composition — compose game on-the-fly and # register it so KantEnvironment can look it up via get_game(). # Constitutional variant creates fresh mutable closure per call. if variant and game_name in GAMES: composed = compose_game(game_name, variant) composed_key = f"_composed_{variant}_{game_name}" GAMES[composed_key] = composed kwargs["game"] = composed_key if game_name in NPLAYER_GAMES: self._is_nplayer = True # Map Space kwargs to NPlayerEnvironment.reset signature opponent_strategies: Optional[list[str]] = None strategy = kwargs.get("strategy") if strategy: opponent_strategies = [strategy] obs = self._env_np.reset( game_name, num_rounds=kwargs.get("num_rounds"), opponent_strategies=opponent_strategies, ) return _nplayer_to_space_obs(obs) else: self._is_nplayer = False obs = self._env_2p.reset(**kwargs) return _to_space_obs(obs) def step(self, action: KantBenchAction, **kwargs: Any) -> KantBenchObservation: if self._is_nplayer: internal_action = NPlayerAction(action=action.move) obs = self._env_np.step(internal_action) return _nplayer_to_space_obs(obs) else: internal_action = GameAction(action=action.move) obs = self._env_2p.step(internal_action, **kwargs) return _to_space_obs(obs) @property def state(self) -> State: if self._is_nplayer: s = self._env_np.state else: s = self._env_2p.state return State( episode_id=s.episode_id or "", step_count=s.step_count, ) def _to_space_obs(obs) -> KantBenchObservation: """Convert internal GameObservation to Space-facing KantBenchObservation.""" last = obs.last_round history = [ { "round": r.round_number, "your_move": r.player_action, "opponent_move": r.opponent_action, "your_payoff": r.player_payoff, "opponent_payoff": r.opponent_payoff, } for r in obs.history ] return KantBenchObservation( game_name=obs.game_name, game_description=obs.game_description, available_moves=list(obs.available_actions), your_move=last.player_action if last else "", opponent_move=last.opponent_action if last else "", your_payoff=last.player_payoff if last else 0.0, opponent_payoff=last.opponent_payoff if last else 0.0, cumulative_score=obs.player_score, round_number=obs.current_round, max_rounds=obs.total_rounds, opponent_strategy=obs.opponent_strategy, history=history, done=obs.done, reward=obs.reward, message="Game over — call reset() to start a new episode." if obs.done else "", ) def _nplayer_to_space_obs(obs: NPlayerObservation) -> KantBenchObservation: """Convert NPlayerObservation to Space-facing KantBenchObservation.""" last = obs.last_round history = [ { "round": r.round_number, "actions": r.actions, "payoffs": r.payoffs, } for r in obs.history ] return KantBenchObservation( game_name=obs.game_name, game_description=obs.game_description, available_moves=list(obs.available_actions), your_move=last.actions[0] if last else "", opponent_move="", # N-player: see history for all actions your_payoff=last.payoffs[0] if last else 0.0, opponent_payoff=0.0, # N-player: see history for all payoffs cumulative_score=obs.scores[0] if obs.scores else 0.0, round_number=obs.current_round, max_rounds=obs.total_rounds, opponent_strategy="", history=history, done=obs.done, reward=obs.reward, message="Game over — call reset() to start a new episode." if obs.done else "", num_players=obs.num_players, player_index=obs.player_index, all_scores=list(obs.scores), )