jtowarek commited on
Commit
047aab1
·
verified ·
1 Parent(s): 3949e24

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. models.py +6 -2
  2. server/KantBench_environment.py +80 -13
models.py CHANGED
@@ -1,12 +1,12 @@
1
  """Data models for the KantBench game theory environment."""
2
 
3
- from typing import Any
4
  from pydantic import Field
5
  from openenv.core.env_server.types import Action, Observation
6
 
7
 
8
  class KantBenchAction(Action):
9
- """Action for the KantBench environment — a move in a 2-player game."""
10
 
11
  move: str = Field(..., description="Your move (e.g. 'cooperate', 'defect', 'hawk', 'dove')")
12
 
@@ -27,3 +27,7 @@ class KantBenchObservation(Observation):
27
  opponent_strategy: str = Field(default="", description="Opponent's strategy name")
28
  history: list[dict[str, Any]] = Field(default_factory=list, description="Round history")
29
  message: str = Field(default="", description="Status message")
 
 
 
 
 
1
  """Data models for the KantBench game theory environment."""
2
 
3
+ from typing import Any, Optional
4
  from pydantic import Field
5
  from openenv.core.env_server.types import Action, Observation
6
 
7
 
8
  class KantBenchAction(Action):
9
+ """Action for the KantBench environment — a move in a 2-player or N-player game."""
10
 
11
  move: str = Field(..., description="Your move (e.g. 'cooperate', 'defect', 'hawk', 'dove')")
12
 
 
27
  opponent_strategy: str = Field(default="", description="Opponent's strategy name")
28
  history: list[dict[str, Any]] = Field(default_factory=list, description="Round history")
29
  message: str = Field(default="", description="Status message")
30
+ # N-player fields (only populated for multiplayer games)
31
+ num_players: Optional[int] = Field(default=None, description="Number of players (set for N-player games)")
32
+ player_index: Optional[int] = Field(default=None, description="Your player index (set for N-player games)")
33
+ all_scores: Optional[list[float]] = Field(default=None, description="Scores for all players (set for N-player games)")
server/KantBench_environment.py CHANGED
@@ -1,12 +1,13 @@
1
  """KantBench environment adapter for the HF Space.
2
 
3
- Thin wrapper that delegates to the real KantEnvironment (90+ games,
4
- 17 strategies) instead of a standalone reimplementation.
 
5
  """
6
 
7
  from __future__ import annotations
8
 
9
- from typing import Any
10
 
11
  from openenv.core.env_server.interfaces import Environment
12
  from openenv.core.env_server.types import State
@@ -14,32 +15,65 @@ from openenv.core.env_server.types import State
14
  from models import KantBenchAction, KantBenchObservation
15
  from env.environment import KantEnvironment
16
  from env.models import GameAction
 
 
 
 
 
 
17
 
18
 
19
  class KantbenchEnvironment(Environment):
20
- """Game theory environment exposing 90+ games via the OpenEnv interface.
21
 
22
- Wraps the real KantEnvironment and translates between the Space's
23
- model types (KantBenchAction/Observation) and the internal types.
24
  """
25
 
26
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
27
 
28
  def __init__(self) -> None:
29
- self._env = KantEnvironment()
 
 
30
 
31
  def reset(self, **kwargs: Any) -> KantBenchObservation:
32
- obs = self._env.reset(**kwargs)
33
- return _to_space_obs(obs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def step(self, action: KantBenchAction, **kwargs: Any) -> KantBenchObservation:
36
- internal_action = GameAction(action=action.move)
37
- obs = self._env.step(internal_action, **kwargs)
38
- return _to_space_obs(obs)
 
 
 
 
 
39
 
40
  @property
41
  def state(self) -> State:
42
- s = self._env.state
 
 
 
43
  return State(
44
  episode_id=s.episode_id or "",
45
  step_count=s.step_count,
@@ -76,3 +110,36 @@ def _to_space_obs(obs) -> KantBenchObservation:
76
  reward=obs.reward,
77
  message="Game over — call reset() to start a new episode." if obs.done else "",
78
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """KantBench environment adapter for the HF Space.
2
 
3
+ Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games,
4
+ 17 strategies) and NPlayerEnvironment (3 N-player games) instead of a
5
+ standalone reimplementation.
6
  """
7
 
8
  from __future__ import annotations
9
 
10
+ from typing import Any, Optional
11
 
12
  from openenv.core.env_server.interfaces import Environment
13
  from openenv.core.env_server.types import State
 
15
  from models import KantBenchAction, KantBenchObservation
16
  from env.environment import KantEnvironment
17
  from env.models import GameAction
18
+ from env.nplayer.environment import NPlayerEnvironment
19
+ from env.nplayer.models import NPlayerAction, NPlayerObservation
20
+
21
+ # Register built-in N-player games into the registry
22
+ import common.games_meta.nplayer_games # noqa: F401
23
+ from common.games_meta.nplayer_config import NPLAYER_GAMES
24
 
25
 
26
  class KantbenchEnvironment(Environment):
27
+ """Game theory environment exposing 90+ two-player and N-player games.
28
 
29
+ Wraps the real KantEnvironment and NPlayerEnvironment, routing
30
+ automatically based on the requested game name.
31
  """
32
 
33
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
34
 
35
  def __init__(self) -> None:
36
+ self._env_2p = KantEnvironment()
37
+ self._env_np = NPlayerEnvironment()
38
+ self._is_nplayer: bool = False
39
 
40
  def reset(self, **kwargs: Any) -> KantBenchObservation:
41
+ game_name: str = kwargs.get("game", "prisoners_dilemma")
42
+
43
+ if game_name in NPLAYER_GAMES:
44
+ self._is_nplayer = True
45
+ # Map Space kwargs to NPlayerEnvironment.reset signature
46
+ opponent_strategies: Optional[list[str]] = None
47
+ strategy = kwargs.get("strategy")
48
+ if strategy:
49
+ opponent_strategies = [strategy]
50
+ obs = self._env_np.reset(
51
+ game_name,
52
+ num_rounds=kwargs.get("num_rounds"),
53
+ opponent_strategies=opponent_strategies,
54
+ )
55
+ return _nplayer_to_space_obs(obs)
56
+ else:
57
+ self._is_nplayer = False
58
+ obs = self._env_2p.reset(**kwargs)
59
+ return _to_space_obs(obs)
60
 
61
  def step(self, action: KantBenchAction, **kwargs: Any) -> KantBenchObservation:
62
+ if self._is_nplayer:
63
+ internal_action = NPlayerAction(action=action.move)
64
+ obs = self._env_np.step(internal_action)
65
+ return _nplayer_to_space_obs(obs)
66
+ else:
67
+ internal_action = GameAction(action=action.move)
68
+ obs = self._env_2p.step(internal_action, **kwargs)
69
+ return _to_space_obs(obs)
70
 
71
  @property
72
  def state(self) -> State:
73
+ if self._is_nplayer:
74
+ s = self._env_np.state
75
+ else:
76
+ s = self._env_2p.state
77
  return State(
78
  episode_id=s.episode_id or "",
79
  step_count=s.step_count,
 
110
  reward=obs.reward,
111
  message="Game over — call reset() to start a new episode." if obs.done else "",
112
  )
113
+
114
+
115
+ def _nplayer_to_space_obs(obs: NPlayerObservation) -> KantBenchObservation:
116
+ """Convert NPlayerObservation to Space-facing KantBenchObservation."""
117
+ last = obs.last_round
118
+ history = [
119
+ {
120
+ "round": r.round_number,
121
+ "actions": r.actions,
122
+ "payoffs": r.payoffs,
123
+ }
124
+ for r in obs.history
125
+ ]
126
+ return KantBenchObservation(
127
+ game_name=obs.game_name,
128
+ game_description=obs.game_description,
129
+ available_moves=list(obs.available_actions),
130
+ your_move=last.actions[0] if last else "",
131
+ opponent_move="", # N-player: see history for all actions
132
+ your_payoff=last.payoffs[0] if last else 0.0,
133
+ opponent_payoff=0.0, # N-player: see history for all payoffs
134
+ cumulative_score=obs.scores[0] if obs.scores else 0.0,
135
+ round_number=obs.current_round,
136
+ max_rounds=obs.total_rounds,
137
+ opponent_strategy="",
138
+ history=history,
139
+ done=obs.done,
140
+ reward=obs.reward,
141
+ message="Game over — call reset() to start a new episode." if obs.done else "",
142
+ num_players=obs.num_players,
143
+ player_index=obs.player_index,
144
+ all_scores=list(obs.scores),
145
+ )