jtowarek commited on
Commit
dd8e198
ยท
verified ยท
1 Parent(s): 047aab1

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -10,11 +10,13 @@ tags:
10
  - openenv
11
  ---
12
 
13
- # KantBench: 90+ Game Theory Environments for LLM Training
14
 
15
  A comprehensive game theory environment for training and evaluating LLM strategic reasoning via OpenEnv. Supports GRPO/DPO training with the environment as a reward oracle.
16
 
17
- ## Games (90+)
 
 
18
 
19
  | Category | Examples | Count |
20
  |---|---|---|
@@ -25,22 +27,30 @@ A comprehensive game theory environment for training and evaluating LLM strategi
25
  | **Auctions & Contests** | First-Price, Vickrey, All-Pay, Colonel Blotto, Tullock Contest | 10+ |
26
  | **Sequential** | Ultimatum, Trust, Centipede, Stackelberg, Dictator | 6 |
27
 
 
 
 
 
 
 
 
 
28
  ## Opponent Strategies (17)
29
 
30
  `random`, `always_cooperate`, `always_defect`, `tit_for_tat`, `tit_for_two_tats`, `grudger`, `pavlov`, `suspicious_tit_for_tat`, `generous_tit_for_tat`, `adaptive`, `mixed`, `ultimatum_fair`, `ultimatum_low`, `trust_fair`, `trust_generous`, `public_goods_fair`, `public_goods_free_rider`
31
 
32
  ## Quick Start
33
 
 
 
34
  ```python
35
  from KantBench import KantBenchAction, KantBenchEnv
36
 
37
  with KantBenchEnv(base_url="https://openenv-community-kantbench.hf.space") as env:
38
- # Reset with a specific game and opponent strategy
39
  result = env.reset(game="prisoners_dilemma", strategy="tit_for_tat")
40
  print(f"Game: {result.observation.game_name}")
41
  print(f"Moves: {result.observation.available_moves}")
42
 
43
- # Play rounds until done
44
  while not result.done:
45
  result = env.step(KantBenchAction(move="cooperate"))
46
  print(f"Round {result.observation.round_number}: "
@@ -51,12 +61,30 @@ with KantBenchEnv(base_url="https://openenv-community-kantbench.hf.space") as en
51
  print(f"Final score: {result.observation.cumulative_score}")
52
  ```
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ## Reset Parameters
55
 
56
  ```python
57
  # Specific game and strategy
58
  result = env.reset(game="stag_hunt", strategy="grudger")
59
 
 
 
 
60
  # Random game and strategy (default)
61
  result = env.reset()
62
  ```
@@ -66,26 +94,29 @@ result = env.reset()
66
  - **Web Interface** at `/web` โ€” Interactive UI for exploring the environment
67
  - **API Docs** at `/docs` โ€” Full OpenAPI/Swagger interface
68
  - **Health Check** at `/health` โ€” Container health monitoring
69
- - **WebSocket** at `/ws` โ€” Persistent session endpoint
70
 
71
  ## Environment Details
72
 
73
  ### Action
74
 
75
  **KantBenchAction**: Single field
76
- - `move` (str) โ€” Your move (e.g. `"cooperate"`, `"defect"`, `"hawk"`, `"produce_5"`)
77
 
78
  ### Observation
79
 
80
  **KantBenchObservation**: Full round result and episode state
81
  - `game_name`, `game_description` โ€” Current game info
82
  - `available_moves` โ€” Valid moves for this game
83
- - `your_move`, `opponent_move` โ€” Moves played this round
84
- - `your_payoff`, `opponent_payoff` โ€” Payoffs this round
85
  - `cumulative_score` โ€” Your total score
86
  - `round_number`, `max_rounds` โ€” Episode progress
87
  - `opponent_strategy` โ€” Opponent strategy name
88
  - `history` โ€” Full round-by-round history
 
 
 
89
 
90
  ## Deployment
91
 
 
10
  - openenv
11
  ---
12
 
13
+ # KantBench: 93 Game Theory Environments for LLM Training
14
 
15
  A comprehensive game theory environment for training and evaluating LLM strategic reasoning via OpenEnv. Supports GRPO/DPO training with the environment as a reward oracle.
16
 
17
+ ## Games (93)
18
+
19
+ ### 2-Player Games (90)
20
 
21
  | Category | Examples | Count |
22
  |---|---|---|
 
27
  | **Auctions & Contests** | First-Price, Vickrey, All-Pay, Colonel Blotto, Tullock Contest | 10+ |
28
  | **Sequential** | Ultimatum, Trust, Centipede, Stackelberg, Dictator | 6 |
29
 
30
+ ### N-Player Games (3)
31
+
32
+ | Game | Players | Description |
33
+ |---|---|---|
34
+ | `nplayer_public_goods` | 5 | Each player contributes from an endowment; pot is multiplied and split equally |
35
+ | `nplayer_volunteer_dilemma` | 5 | At least one must volunteer for everyone to benefit; volunteers pay a cost |
36
+ | `nplayer_el_farol` | 5 | Attend a bar that's fun when uncrowded but unpleasant when full |
37
+
38
  ## Opponent Strategies (17)
39
 
40
  `random`, `always_cooperate`, `always_defect`, `tit_for_tat`, `tit_for_two_tats`, `grudger`, `pavlov`, `suspicious_tit_for_tat`, `generous_tit_for_tat`, `adaptive`, `mixed`, `ultimatum_fair`, `ultimatum_low`, `trust_fair`, `trust_generous`, `public_goods_fair`, `public_goods_free_rider`
41
 
42
  ## Quick Start
43
 
44
+ ### 2-Player Game
45
+
46
  ```python
47
  from KantBench import KantBenchAction, KantBenchEnv
48
 
49
  with KantBenchEnv(base_url="https://openenv-community-kantbench.hf.space") as env:
 
50
  result = env.reset(game="prisoners_dilemma", strategy="tit_for_tat")
51
  print(f"Game: {result.observation.game_name}")
52
  print(f"Moves: {result.observation.available_moves}")
53
 
 
54
  while not result.done:
55
  result = env.step(KantBenchAction(move="cooperate"))
56
  print(f"Round {result.observation.round_number}: "
 
61
  print(f"Final score: {result.observation.cumulative_score}")
62
  ```
63
 
64
+ ### N-Player Game
65
+
66
+ ```python
67
+ with KantBenchEnv(base_url="https://openenv-community-kantbench.hf.space") as env:
68
+ result = env.reset(game="nplayer_public_goods", strategy="random")
69
+ print(f"Players: {result.observation.num_players}")
70
+
71
+ while not result.done:
72
+ result = env.step(KantBenchAction(move="contribute_10"))
73
+ print(f"Round {result.observation.round_number}: "
74
+ f"all scores={result.observation.all_scores}")
75
+
76
+ print(f"Final scores: {result.observation.all_scores}")
77
+ ```
78
+
79
  ## Reset Parameters
80
 
81
  ```python
82
  # Specific game and strategy
83
  result = env.reset(game="stag_hunt", strategy="grudger")
84
 
85
+ # N-player game (strategy applies to all opponents)
86
+ result = env.reset(game="nplayer_volunteer_dilemma", strategy="random")
87
+
88
  # Random game and strategy (default)
89
  result = env.reset()
90
  ```
 
94
  - **Web Interface** at `/web` โ€” Interactive UI for exploring the environment
95
  - **API Docs** at `/docs` โ€” Full OpenAPI/Swagger interface
96
  - **Health Check** at `/health` โ€” Container health monitoring
97
+ - **WebSocket** at `/ws` โ€” Persistent session endpoint (reset/step with state)
98
 
99
  ## Environment Details
100
 
101
  ### Action
102
 
103
  **KantBenchAction**: Single field
104
+ - `move` (str) โ€” Your move (e.g. `"cooperate"`, `"defect"`, `"hawk"`, `"contribute_10"`)
105
 
106
  ### Observation
107
 
108
  **KantBenchObservation**: Full round result and episode state
109
  - `game_name`, `game_description` โ€” Current game info
110
  - `available_moves` โ€” Valid moves for this game
111
+ - `your_move`, `opponent_move` โ€” Moves played this round (2-player)
112
+ - `your_payoff`, `opponent_payoff` โ€” Payoffs this round (2-player)
113
  - `cumulative_score` โ€” Your total score
114
  - `round_number`, `max_rounds` โ€” Episode progress
115
  - `opponent_strategy` โ€” Opponent strategy name
116
  - `history` โ€” Full round-by-round history
117
+ - `num_players` โ€” Number of players (N-player games only, `null` for 2-player)
118
+ - `player_index` โ€” Your player index (N-player games only)
119
+ - `all_scores` โ€” Scores for all players (N-player games only)
120
 
121
  ## Deployment
122
 
client.py CHANGED
@@ -10,7 +10,7 @@ from .models import KantBenchAction, KantBenchObservation
10
 
11
 
12
  class KantBenchEnv(
13
- EnvClient[KantBenchAction, KantBenchObservation]
14
  ):
15
  """
16
  Client for the KantBench game theory environment.
 
10
 
11
 
12
  class KantBenchEnv(
13
+ EnvClient[KantBenchAction, KantBenchObservation, State]
14
  ):
15
  """
16
  Client for the KantBench game theory environment.
common/games_meta/game_tags.py CHANGED
@@ -18,6 +18,9 @@ from constant_definitions.batch4.tag_constants import (
18
  MARKET_COMPETITION, EVOLUTIONARY, SECURITY, NETWORK,
19
  # Action space
20
  BINARY_CHOICE, SMALL_CHOICE, LARGE_CHOICE,
 
 
 
21
  # Category grouping
22
  CATEGORIES,
23
  )
@@ -158,6 +161,20 @@ GAME_TAGS: dict[str, frozenset[str]] = {
158
  "risk_dominance": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, EVOLUTIONARY, BINARY_CHOICE}),
159
  "threshold_public_goods": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
160
  "evolutionary_pd": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, EVOLUTIONARY, BINARY_CHOICE}),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
 
163
 
 
18
  MARKET_COMPETITION, EVOLUTIONARY, SECURITY, NETWORK,
19
  # Action space
20
  BINARY_CHOICE, SMALL_CHOICE, LARGE_CHOICE,
21
+ # Multiplayer dimensions
22
+ MULTIPLAYER, COALITION_FORMATION,
23
+ PENALTY_ENFORCEMENT, BINDING_ENFORCEMENT, META_GOVERNANCE,
24
  # Category grouping
25
  CATEGORIES,
26
  )
 
161
  "risk_dominance": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, EVOLUTIONARY, BINARY_CHOICE}),
162
  "threshold_public_goods": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
163
  "evolutionary_pd": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, EVOLUTIONARY, BINARY_CHOICE}),
164
+
165
+ # โ”€โ”€ N-player games (common/games_meta/nplayer_games.py) โ”€โ”€
166
+ "nplayer_public_goods": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, MULTIPLAYER}),
167
+ "nplayer_volunteer_dilemma": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER}),
168
+ "nplayer_el_farol": frozenset({NO_COMMUNICATION, INCOMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER}),
169
+
170
+ # โ”€โ”€ Coalition games (common/games_meta/coalition_config.py) โ”€โ”€
171
+ "coalition_cartel": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
172
+ "coalition_alliance": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
173
+ "coalition_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
174
+ "coalition_ostracism": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ASYMMETRIC_PAYOFF, SOCIAL_DILEMMA, SMALL_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
175
+ "coalition_resource_trading": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
176
+ "coalition_rule_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
177
+ "coalition_commons": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
178
  }
179
 
180
 
constant_definitions/batch4/tag_constants.py CHANGED
@@ -40,6 +40,19 @@ BINARY_CHOICE = "binary_choice"
40
  SMALL_CHOICE = "small_choice"
41
  LARGE_CHOICE = "large_choice"
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # โ”€โ”€ Grouped by dimension (for programmatic enumeration) โ”€โ”€
44
  CATEGORIES: dict[str, list[str]] = {
45
  "communication": [
@@ -63,4 +76,8 @@ CATEGORIES: dict[str, list[str]] = {
63
  "action_space": [
64
  BINARY_CHOICE, SMALL_CHOICE, LARGE_CHOICE,
65
  ],
 
 
 
 
66
  }
 
40
  SMALL_CHOICE = "small_choice"
41
  LARGE_CHOICE = "large_choice"
42
 
43
+ # โ”€โ”€ Player count โ”€โ”€
44
+ MULTIPLAYER = "multiplayer"
45
+
46
+ # โ”€โ”€ Coalition โ”€โ”€
47
+ COALITION_FORMATION = "coalition_formation"
48
+
49
+ # โ”€โ”€ Enforcement โ”€โ”€
50
+ PENALTY_ENFORCEMENT = "penalty_enforcement"
51
+ BINDING_ENFORCEMENT = "binding_enforcement"
52
+
53
+ # โ”€โ”€ Governance โ”€โ”€
54
+ META_GOVERNANCE = "meta_governance"
55
+
56
  # โ”€โ”€ Grouped by dimension (for programmatic enumeration) โ”€โ”€
57
  CATEGORIES: dict[str, list[str]] = {
58
  "communication": [
 
76
  "action_space": [
77
  BINARY_CHOICE, SMALL_CHOICE, LARGE_CHOICE,
78
  ],
79
+ "player_count": [MULTIPLAYER],
80
+ "coalition": [COALITION_FORMATION],
81
+ "enforcement": [PENALTY_ENFORCEMENT, BINDING_ENFORCEMENT],
82
+ "governance": [META_GOVERNANCE],
83
  }