jtowarek commited on
Commit
688c130
·
verified ·
1 Parent(s): dd8e198

Upload folder using huggingface_hub

Browse files
common/games.py CHANGED
@@ -8,34 +8,15 @@ from typing import Callable
8
  from constant_definitions.game_constants import (
9
  DEFAULT_ZERO_FLOAT,
10
  DEFAULT_ZERO_INT,
11
- # Prisoner's Dilemma
12
- PD_CC_PAYOFF,
13
- PD_CD_PAYOFF,
14
- PD_DC_PAYOFF,
15
- PD_DD_PAYOFF,
16
- # Stag Hunt
17
- SH_SS_PAYOFF,
18
- SH_SH_PAYOFF,
19
- SH_HS_PAYOFF,
20
- SH_HH_PAYOFF,
21
- # Hawk-Dove
22
- HD_HH_PAYOFF,
23
- HD_HD_PAYOFF,
24
- HD_DH_PAYOFF,
25
- HD_DD_PAYOFF,
26
- # Ultimatum
27
  ULTIMATUM_POT,
28
- # Trust
29
- TRUST_MULTIPLIER,
30
- TRUST_ENDOWMENT,
31
- # Public Goods
32
- PG_MULTIPLIER_NUMERATOR,
33
- PG_MULTIPLIER_DENOMINATOR,
34
- PG_ENDOWMENT,
35
- PG_DEFAULT_NUM_PLAYERS,
36
- # Round counts
37
- DEFAULT_NUM_ROUNDS,
38
- SINGLE_SHOT_ROUNDS,
39
  )
40
 
41
  # ---------------------------------------------------------------------------
@@ -50,30 +31,39 @@ class GameConfig:
50
  name: str
51
  description: str
52
  actions: list[str]
53
- game_type: str # "matrix" | "ultimatum" | "trust" | "public_goods"
54
  default_rounds: int
55
- payoff_fn: Callable[[str, str], tuple[float, float]]
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  # ---------------------------------------------------------------------------
59
  # Matrix-game payoff helpers
60
  # ---------------------------------------------------------------------------
61
 
62
- _PD_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
63
  ("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
64
  ("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
65
  ("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
66
  ("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
67
  }
68
 
69
- _SH_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
70
  ("stag", "stag"): (float(SH_SS_PAYOFF), float(SH_SS_PAYOFF)),
71
  ("stag", "hare"): (float(SH_SH_PAYOFF), float(SH_HS_PAYOFF)),
72
  ("hare", "stag"): (float(SH_HS_PAYOFF), float(SH_SH_PAYOFF)),
73
  ("hare", "hare"): (float(SH_HH_PAYOFF), float(SH_HH_PAYOFF)),
74
  }
75
 
76
- _HD_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
77
  ("hawk", "hawk"): (float(HD_HH_PAYOFF), float(HD_HH_PAYOFF)),
78
  ("hawk", "dove"): (float(HD_HD_PAYOFF), float(HD_DH_PAYOFF)),
79
  ("dove", "hawk"): (float(HD_DH_PAYOFF), float(HD_HD_PAYOFF)),
@@ -81,9 +71,7 @@ _HD_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
81
  }
82
 
83
 
84
- def _matrix_payoff_fn(
85
- matrix: dict[tuple[str, str], tuple[float, float]],
86
- ) -> Callable[[str, str], tuple[float, float]]:
87
  """Return a payoff function backed by a pre-built matrix dict."""
88
 
89
  def _payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
@@ -284,6 +272,7 @@ def _load_extensions() -> None:
284
  "common.games_market.advanced", "common.games_coop.cooperative",
285
  "common.games_coop.dynamic", "common.games_coop.pd_variants",
286
  "common.games_coop.infinite", "common.games_coop.stochastic",
 
287
  ]:
288
  try:
289
  importlib.import_module(mod)
 
8
  from constant_definitions.game_constants import (
9
  DEFAULT_ZERO_FLOAT,
10
  DEFAULT_ZERO_INT,
11
+ PD_CC_PAYOFF, PD_CD_PAYOFF, PD_DC_PAYOFF, PD_DD_PAYOFF,
12
+ SH_SS_PAYOFF, SH_SH_PAYOFF, SH_HS_PAYOFF, SH_HH_PAYOFF,
13
+ HD_HH_PAYOFF, HD_HD_PAYOFF, HD_DH_PAYOFF, HD_DD_PAYOFF,
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ULTIMATUM_POT,
15
+ TRUST_MULTIPLIER, TRUST_ENDOWMENT,
16
+ PG_MULTIPLIER_NUMERATOR, PG_MULTIPLIER_DENOMINATOR,
17
+ PG_ENDOWMENT, PG_DEFAULT_NUM_PLAYERS,
18
+ DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS, DEFAULT_TWO_PLAYERS,
19
+ OPPONENT_MODE_STRATEGY,
 
 
 
 
 
 
20
  )
21
 
22
  # ---------------------------------------------------------------------------
 
31
  name: str
32
  description: str
33
  actions: list[str]
34
+ game_type: str
35
  default_rounds: int
36
+ payoff_fn: Callable
37
+ num_players: int = DEFAULT_TWO_PLAYERS
38
+ applied_variants: tuple[str, ...] = ()
39
+ base_game_key: str = ""
40
+ enforcement: str = ""
41
+ penalty_numerator: int = DEFAULT_ZERO_INT
42
+ penalty_denominator: int = SINGLE_SHOT_ROUNDS
43
+ allow_side_payments: bool = False
44
+ opponent_mode: str = OPPONENT_MODE_STRATEGY
45
+ opponent_actions: tuple[str, ...] | None = None
46
 
47
 
48
  # ---------------------------------------------------------------------------
49
  # Matrix-game payoff helpers
50
  # ---------------------------------------------------------------------------
51
 
52
+ _PD_MATRIX = {
53
  ("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
54
  ("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
55
  ("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
56
  ("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
57
  }
58
 
59
+ _SH_MATRIX = {
60
  ("stag", "stag"): (float(SH_SS_PAYOFF), float(SH_SS_PAYOFF)),
61
  ("stag", "hare"): (float(SH_SH_PAYOFF), float(SH_HS_PAYOFF)),
62
  ("hare", "stag"): (float(SH_HS_PAYOFF), float(SH_SH_PAYOFF)),
63
  ("hare", "hare"): (float(SH_HH_PAYOFF), float(SH_HH_PAYOFF)),
64
  }
65
 
66
+ _HD_MATRIX = {
67
  ("hawk", "hawk"): (float(HD_HH_PAYOFF), float(HD_HH_PAYOFF)),
68
  ("hawk", "dove"): (float(HD_HD_PAYOFF), float(HD_DH_PAYOFF)),
69
  ("dove", "hawk"): (float(HD_DH_PAYOFF), float(HD_HD_PAYOFF)),
 
71
  }
72
 
73
 
74
+ def _matrix_payoff_fn(matrix: dict) -> Callable:
 
 
75
  """Return a payoff function backed by a pre-built matrix dict."""
76
 
77
  def _payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
 
272
  "common.games_market.advanced", "common.games_coop.cooperative",
273
  "common.games_coop.dynamic", "common.games_coop.pd_variants",
274
  "common.games_coop.infinite", "common.games_coop.stochastic",
275
+ "common.meta.meta_games",
276
  ]:
277
  try:
278
  importlib.import_module(mod)
common/games_coop/pd_variants.py CHANGED
@@ -1,13 +1,14 @@
1
  """Prisoner's Dilemma variants for KantBench."""
2
  from __future__ import annotations
3
 
 
 
4
  from common.games import GAMES, GameConfig, _matrix_payoff_fn
 
5
  from constant_definitions.game_constants import (
6
- PD_CC_PAYOFF, PD_CD_PAYOFF, PD_DC_PAYOFF, PD_DD_PAYOFF,
7
  DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS,
8
  )
9
  from constant_definitions.var.pd_variant_constants import (
10
- OPD_EXIT_PAYOFF,
11
  APD_A_TEMPTATION, APD_A_REWARD, APD_A_PUNISHMENT, APD_A_SUCKER,
12
  APD_B_TEMPTATION, APD_B_REWARD, APD_B_PUNISHMENT, APD_B_SUCKER,
13
  DONATION_BENEFIT, DONATION_COST,
@@ -18,22 +19,6 @@ from constant_definitions.var.pd_variant_constants import (
18
  _ZERO_F = float()
19
 
20
 
21
- # -- Optional PD (cooperate / defect / exit) --
22
- _OPD_EXIT_F = float(OPD_EXIT_PAYOFF)
23
- _OPD_BASE: dict[tuple[str, str], tuple[float, float]] = {
24
- ("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
25
- ("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
26
- ("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
27
- ("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
28
- }
29
-
30
-
31
- def _optional_pd_payoff(pa: str, oa: str) -> tuple[float, float]:
32
- if pa == "exit" or oa == "exit":
33
- return (_OPD_EXIT_F, _OPD_EXIT_F)
34
- return _OPD_BASE[(pa, oa)]
35
-
36
-
37
  # -- Asymmetric PD (alibi game: different payoffs per player) --
38
  _ASYM_PD: dict[tuple[str, str], tuple[float, float]] = {
39
  ("cooperate", "cooperate"): (float(APD_A_REWARD), float(APD_B_REWARD)),
@@ -74,20 +59,21 @@ _PW: dict[tuple[str, str], tuple[float, float]] = {
74
 
75
 
76
  # -- Register --
77
- PD_VARIANT_GAMES: dict[str, GameConfig] = {
78
- "optional_pd": GameConfig(
79
- name="Optional Prisoner's Dilemma",
80
- description=(
81
- "A Prisoner's Dilemma with a third action: exit. Exiting gives "
82
- "a safe intermediate payoff regardless of the opponent's choice. "
83
- "Tests whether outside options change cooperation dynamics and "
84
- "models situations where players can walk away from interactions."
85
- ),
86
- actions=["cooperate", "defect", "exit"],
87
- game_type="matrix",
88
- default_rounds=DEFAULT_NUM_ROUNDS,
89
- payoff_fn=_optional_pd_payoff,
90
  ),
 
 
 
 
91
  "asymmetric_pd": GameConfig(
92
  name="Asymmetric Prisoner's Dilemma",
93
  description=(
 
1
  """Prisoner's Dilemma variants for KantBench."""
2
  from __future__ import annotations
3
 
4
+ from dataclasses import replace
5
+
6
  from common.games import GAMES, GameConfig, _matrix_payoff_fn
7
+ from common.variants import apply_exit
8
  from constant_definitions.game_constants import (
 
9
  DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS,
10
  )
11
  from constant_definitions.var.pd_variant_constants import (
 
12
  APD_A_TEMPTATION, APD_A_REWARD, APD_A_PUNISHMENT, APD_A_SUCKER,
13
  APD_B_TEMPTATION, APD_B_REWARD, APD_B_PUNISHMENT, APD_B_SUCKER,
14
  DONATION_BENEFIT, DONATION_COST,
 
19
  _ZERO_F = float()
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # -- Asymmetric PD (alibi game: different payoffs per player) --
23
  _ASYM_PD: dict[tuple[str, str], tuple[float, float]] = {
24
  ("cooperate", "cooperate"): (float(APD_A_REWARD), float(APD_B_REWARD)),
 
59
 
60
 
61
  # -- Register --
62
+ _PD_KEY = "prisoners_dilemma"
63
+ _optional_pd_composed = apply_exit(GAMES[_PD_KEY], base_key=_PD_KEY)
64
+ _optional_pd = replace(
65
+ _optional_pd_composed,
66
+ name="Optional Prisoner's Dilemma",
67
+ description=(
68
+ "A Prisoner's Dilemma with a third action: exit. Exiting gives "
69
+ "a safe intermediate payoff regardless of the opponent's choice. "
70
+ "Tests whether outside options change cooperation dynamics and "
71
+ "models situations where players can walk away from interactions."
 
 
 
72
  ),
73
+ )
74
+
75
+ PD_VARIANT_GAMES: dict[str, GameConfig] = {
76
+ "optional_pd": _optional_pd,
77
  "asymmetric_pd": GameConfig(
78
  name="Asymmetric Prisoner's Dilemma",
79
  description=(
common/games_info/communication.py CHANGED
@@ -1,11 +1,12 @@
1
  """Communication and mediation games for KantBench."""
2
  from __future__ import annotations
3
 
 
 
4
  from common.games import GAMES, GameConfig, _matrix_payoff_fn
 
5
  from constant_definitions.game_constants import DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS
6
  from constant_definitions.var.communication_constants import (
7
- CTPD_REWARD, CTPD_TEMPTATION, CTPD_PUNISHMENT, CTPD_SUCKER,
8
- COMMIT_COST,
9
  CE_FOLLOW_FOLLOW, CE_FOLLOW_DEVIATE,
10
  CE_DEVIATE_FOLLOW, CE_DEVIATE_DEVIATE,
11
  FP_MATCH_PAYOFF, FP_MISMATCH_PAYOFF,
@@ -13,49 +14,35 @@ from constant_definitions.var.communication_constants import (
13
  MG_REJECT_ACCEPT, MG_REJECT_REJECT,
14
  )
15
 
16
- _ONE = int(bool(True))
17
  _ZERO_F = float()
18
 
19
- # -- Cheap Talk PD (message + action, messages are non-binding) --
20
- _CTPD_BASE: dict[tuple[str, str], tuple[float, float]] = {
21
- ("cooperate", "cooperate"): (float(CTPD_REWARD), float(CTPD_REWARD)),
22
- ("cooperate", "defect"): (float(CTPD_SUCKER), float(CTPD_TEMPTATION)),
23
- ("defect", "cooperate"): (float(CTPD_TEMPTATION), float(CTPD_SUCKER)),
24
- ("defect", "defect"): (float(CTPD_PUNISHMENT), float(CTPD_PUNISHMENT)),
25
- }
26
-
 
 
 
 
 
 
27
 
28
- def _cheap_talk_pd_payoff(pa: str, oa: str) -> tuple[float, float]:
29
- """Message is cheap talk; payoff depends only on actual action."""
30
- actual_p = pa.rsplit("_", _ONE)[_ONE]
31
- actual_o = oa.rsplit("_", _ONE)[_ONE]
32
- return _CTPD_BASE[(actual_p, actual_o)]
33
-
34
-
35
- _CTPD_ACTS = [
36
- "msg_coop_cooperate", "msg_coop_defect",
37
- "msg_def_cooperate", "msg_def_defect",
38
- ]
39
-
40
-
41
- # -- Binding Commitment (costly commitment mechanism) --
42
- _CC = float(CTPD_REWARD)
43
- _CS = float(CTPD_SUCKER)
44
- _CT = float(CTPD_TEMPTATION)
45
- _CP = float(CTPD_PUNISHMENT)
46
- _COST = float(COMMIT_COST)
47
-
48
- _BIND_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
49
- ("commit_coop", "commit_coop"): (_CC - _COST, _CC - _COST),
50
- ("commit_coop", "free_coop"): (_CC - _COST, _CC),
51
- ("commit_coop", "free_defect"): (_CS - _COST, _CT),
52
- ("free_coop", "commit_coop"): (_CC, _CC - _COST),
53
- ("free_coop", "free_coop"): (_CC, _CC),
54
- ("free_coop", "free_defect"): (_CS, _CT),
55
- ("free_defect", "commit_coop"): (_CT, _CS - _COST),
56
- ("free_defect", "free_coop"): (_CT, _CS),
57
- ("free_defect", "free_defect"): (_CP, _CP),
58
- }
59
 
60
 
61
  # -- Correlated Equilibrium (follow external mediator or deviate) --
@@ -90,32 +77,8 @@ _MED: dict[tuple[str, str], tuple[float, float]] = {
90
 
91
  # -- Register --
92
  COMMUNICATION_GAMES: dict[str, GameConfig] = {
93
- "cheap_talk_pd": GameConfig(
94
- name="Cheap Talk Prisoner's Dilemma",
95
- description=(
96
- "A Prisoner's Dilemma where each player sends a non-binding "
97
- "message before acting. Messages are cheap talk: costless and "
98
- "unenforceable. Payoffs depend only on actual actions. Tests "
99
- "whether non-binding communication improves cooperation."
100
- ),
101
- actions=_CTPD_ACTS,
102
- game_type="cheap_talk_pd",
103
- default_rounds=DEFAULT_NUM_ROUNDS,
104
- payoff_fn=_cheap_talk_pd_payoff,
105
- ),
106
- "binding_commitment": GameConfig(
107
- name="Binding Commitment Game",
108
- description=(
109
- "A Prisoner's Dilemma where players can pay a cost to make a "
110
- "binding commitment to cooperate. The commitment is credible "
111
- "but costly. Tests whether costly signaling through commitment "
112
- "mechanisms changes equilibrium behavior."
113
- ),
114
- actions=["commit_coop", "free_coop", "free_defect"],
115
- game_type="matrix",
116
- default_rounds=DEFAULT_NUM_ROUNDS,
117
- payoff_fn=_matrix_payoff_fn(_BIND_MATRIX),
118
- ),
119
  "correlated_equilibrium": GameConfig(
120
  name="Correlated Equilibrium Game",
121
  description=(
 
1
  """Communication and mediation games for KantBench."""
2
  from __future__ import annotations
3
 
4
+ from dataclasses import replace
5
+
6
  from common.games import GAMES, GameConfig, _matrix_payoff_fn
7
+ from common.variants import apply_cheap_talk, apply_binding_commitment
8
  from constant_definitions.game_constants import DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS
9
  from constant_definitions.var.communication_constants import (
 
 
10
  CE_FOLLOW_FOLLOW, CE_FOLLOW_DEVIATE,
11
  CE_DEVIATE_FOLLOW, CE_DEVIATE_DEVIATE,
12
  FP_MATCH_PAYOFF, FP_MISMATCH_PAYOFF,
 
14
  MG_REJECT_ACCEPT, MG_REJECT_REJECT,
15
  )
16
 
 
17
  _ZERO_F = float()
18
 
19
+ # -- Cheap Talk PD via composition --
20
+ _PD_KEY = "prisoners_dilemma"
21
+ _cheap_talk_pd_composed = apply_cheap_talk(GAMES[_PD_KEY], base_key=_PD_KEY)
22
+ _cheap_talk_pd = replace(
23
+ _cheap_talk_pd_composed,
24
+ name="Cheap Talk Prisoner's Dilemma",
25
+ description=(
26
+ "A Prisoner's Dilemma where each player sends a non-binding "
27
+ "message before acting. Messages are cheap talk: costless and "
28
+ "unenforceable. Payoffs depend only on actual actions. Tests "
29
+ "whether non-binding communication improves cooperation."
30
+ ),
31
+ game_type="cheap_talk_pd",
32
+ )
33
 
34
+ # -- Binding Commitment via composition --
35
+ _binding_composed = apply_binding_commitment(GAMES[_PD_KEY], base_key=_PD_KEY)
36
+ _binding_commitment = replace(
37
+ _binding_composed,
38
+ name="Binding Commitment Game",
39
+ description=(
40
+ "A Prisoner's Dilemma where players can pay a cost to make a "
41
+ "binding commitment to cooperate. The commitment is credible "
42
+ "but costly. Tests whether costly signaling through commitment "
43
+ "mechanisms changes equilibrium behavior."
44
+ ),
45
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  # -- Correlated Equilibrium (follow external mediator or deviate) --
 
77
 
78
  # -- Register --
79
  COMMUNICATION_GAMES: dict[str, GameConfig] = {
80
+ "cheap_talk_pd": _cheap_talk_pd,
81
+ "binding_commitment": _binding_commitment,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  "correlated_equilibrium": GameConfig(
83
  name="Correlated Equilibrium Game",
84
  description=(
common/games_meta/coalition_config.py CHANGED
@@ -2,10 +2,8 @@
2
 
3
  from __future__ import annotations
4
 
5
- from dataclasses import dataclass
6
- from typing import Callable
7
-
8
- from common.games_meta.nplayer_config import NPlayerGameConfig, NPLAYER_GAMES
9
  from constant_definitions.nplayer.coalition_constants import (
10
  COALITION_DEFAULT_ROUNDS, COALITION_DEFAULT_PENALTY_NUMERATOR,
11
  COALITION_DEFAULT_PENALTY_DENOMINATOR,
@@ -27,32 +25,18 @@ from constant_definitions.nplayer.coalition_constants import (
27
  COMMONS_LOW_DEPLETED, COMMONS_HIGH_DEPLETED,
28
  )
29
 
 
 
30
  _ONE = int(bool(True))
31
  _ZERO = int()
32
  _PEN_N = COALITION_DEFAULT_PENALTY_NUMERATOR
33
  _PEN_D = COALITION_DEFAULT_PENALTY_DENOMINATOR
34
 
35
 
36
- @dataclass(frozen=True)
37
- class CoalitionGameConfig:
38
- """Immutable specification for a coalition-enabled N-player game."""
39
-
40
- name: str
41
- description: str
42
- actions: list[str]
43
- num_players: int
44
- default_rounds: int
45
- payoff_fn: Callable[[tuple[str, ...]], tuple[float, ...]]
46
- enforcement: str
47
- penalty_numerator: int
48
- penalty_denominator: int
49
- allow_side_payments: bool
50
 
51
 
52
- COALITION_GAMES: dict[str, CoalitionGameConfig] = {}
53
-
54
-
55
- def get_coalition_game(name: str) -> CoalitionGameConfig:
56
  """Look up a coalition game by name. Raises KeyError if not found."""
57
  return COALITION_GAMES[name]
58
 
@@ -160,16 +144,17 @@ def _commons_governance_payoff(actions: tuple[str, ...]) -> tuple[float, ...]:
160
  # ---------------------------------------------------------------------------
161
 
162
  def _cfg(name: str, desc: str, actions: list[str], n: int,
163
- fn: object, enf: str, side: bool = False) -> CoalitionGameConfig:
164
- return CoalitionGameConfig(
165
- name=name, description=desc, actions=actions, num_players=n,
166
- default_rounds=COALITION_DEFAULT_ROUNDS, payoff_fn=fn, # type: ignore[arg-type]
 
167
  enforcement=enf, penalty_numerator=_PEN_N, penalty_denominator=_PEN_D,
168
  allow_side_payments=side,
169
  )
170
 
171
 
172
- _BUILTIN_COALITION_GAMES: dict[str, CoalitionGameConfig] = {
173
  "coalition_cartel": _cfg(
174
  "Cartel",
175
  "Players collude or compete. If enough collude the cartel holds. "
@@ -218,10 +203,6 @@ _BUILTIN_COALITION_GAMES: dict[str, CoalitionGameConfig] = {
218
 
219
  COALITION_GAMES.update(_BUILTIN_COALITION_GAMES)
220
 
221
- # Dual registration as plain NPlayerGameConfig
222
  for _key, _c in _BUILTIN_COALITION_GAMES.items():
223
- NPLAYER_GAMES[_key] = NPlayerGameConfig(
224
- name=_c.name, description=_c.description, actions=_c.actions,
225
- num_players=_c.num_players, default_rounds=_c.default_rounds,
226
- payoff_fn=_c.payoff_fn,
227
- )
 
2
 
3
  from __future__ import annotations
4
 
5
+ from common.games import GameConfig
6
+ from common.games_meta.nplayer_config import NPLAYER_GAMES
 
 
7
  from constant_definitions.nplayer.coalition_constants import (
8
  COALITION_DEFAULT_ROUNDS, COALITION_DEFAULT_PENALTY_NUMERATOR,
9
  COALITION_DEFAULT_PENALTY_DENOMINATOR,
 
25
  COMMONS_LOW_DEPLETED, COMMONS_HIGH_DEPLETED,
26
  )
27
 
28
+ CoalitionGameConfig = GameConfig
29
+
30
  _ONE = int(bool(True))
31
  _ZERO = int()
32
  _PEN_N = COALITION_DEFAULT_PENALTY_NUMERATOR
33
  _PEN_D = COALITION_DEFAULT_PENALTY_DENOMINATOR
34
 
35
 
36
+ COALITION_GAMES: dict[str, GameConfig] = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
+ def get_coalition_game(name: str) -> GameConfig:
 
 
 
40
  """Look up a coalition game by name. Raises KeyError if not found."""
41
  return COALITION_GAMES[name]
42
 
 
144
  # ---------------------------------------------------------------------------
145
 
146
  def _cfg(name: str, desc: str, actions: list[str], n: int,
147
+ fn: object, enf: str, side: bool = False) -> GameConfig:
148
+ return GameConfig(
149
+ name=name, description=desc, actions=actions, game_type="coalition",
150
+ num_players=n, default_rounds=COALITION_DEFAULT_ROUNDS,
151
+ payoff_fn=fn, # type: ignore[arg-type]
152
  enforcement=enf, penalty_numerator=_PEN_N, penalty_denominator=_PEN_D,
153
  allow_side_payments=side,
154
  )
155
 
156
 
157
+ _BUILTIN_COALITION_GAMES: dict[str, GameConfig] = {
158
  "coalition_cartel": _cfg(
159
  "Cartel",
160
  "Players collude or compete. If enough collude the cartel holds. "
 
203
 
204
  COALITION_GAMES.update(_BUILTIN_COALITION_GAMES)
205
 
206
+ # Register coalition games as N-player games too (same GameConfig instances)
207
  for _key, _c in _BUILTIN_COALITION_GAMES.items():
208
+ NPLAYER_GAMES[_key] = _c
 
 
 
 
common/games_meta/game_tags.py CHANGED
@@ -78,8 +78,8 @@ GAME_TAGS: dict[str, frozenset[str]] = {
78
  "gift_exchange": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, ASYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
79
 
80
  # ── games_info/communication.py ──
81
- "cheap_talk_pd": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
82
- "binding_commitment": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
83
  "correlated_equilibrium": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
84
  "focal_point": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, SOCIAL_DILEMMA, SMALL_CHOICE}),
85
  "mediated_game": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
@@ -175,6 +175,19 @@ GAME_TAGS: dict[str, frozenset[str]] = {
175
  "coalition_resource_trading": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
176
  "coalition_rule_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
177
  "coalition_commons": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  }
179
 
180
 
@@ -183,6 +196,33 @@ GAME_TAGS: dict[str, frozenset[str]] = {
183
  # ---------------------------------------------------------------------------
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  def get_games_by_tag(tag: str) -> list[str]:
187
  """Return all game keys that have the given tag."""
188
  return [key for key, tags in GAME_TAGS.items() if tag in tags]
 
78
  "gift_exchange": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, ASYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
79
 
80
  # ── games_info/communication.py ──
81
+ "cheap_talk_pd": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, SMALL_CHOICE}),
82
+ "binding_commitment": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, SMALL_CHOICE}),
83
  "correlated_equilibrium": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
84
  "focal_point": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, SOCIAL_DILEMMA, SMALL_CHOICE}),
85
  "mediated_game": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
 
175
  "coalition_resource_trading": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
176
  "coalition_rule_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
177
  "coalition_commons": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
178
+
179
+ # ── meta/meta_games.py ──
180
+ "rule_proposal_prisoners_dilemma": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
181
+ "rule_proposal_stag_hunt": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
182
+ "rule_proposal_hawk_dove": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
183
+ "rule_signal_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
184
+ "rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
185
+ "rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
186
+
187
+ # ── meta/meta_games.py (gossip) ──
188
+ "gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
189
+ "gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
190
+ "gossip_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
191
  }
192
 
193
 
 
196
  # ---------------------------------------------------------------------------
197
 
198
 
199
+ def derive_variant_tags(
200
+ base_tags: frozenset[str], variant_name: str,
201
+ ) -> frozenset[str]:
202
+ """Compute tags for a composed variant game from base game tags."""
203
+ tags = set(base_tags)
204
+ if variant_name == "cheap_talk":
205
+ tags.discard(NO_COMMUNICATION)
206
+ tags.add(CHEAP_TALK)
207
+ tags.discard(BINARY_CHOICE)
208
+ tags.add(SMALL_CHOICE)
209
+ elif variant_name == "binding_commitment":
210
+ tags.discard(NO_COMMUNICATION)
211
+ tags.add(BINDING_COMMITMENT)
212
+ tags.discard(BINARY_CHOICE)
213
+ tags.add(SMALL_CHOICE)
214
+ elif variant_name == "exit":
215
+ tags.discard(BINARY_CHOICE)
216
+ tags.add(SMALL_CHOICE)
217
+ elif variant_name == "gossip":
218
+ tags.discard(NO_COMMUNICATION)
219
+ tags.add(CHEAP_TALK)
220
+ tags.discard(BINARY_CHOICE)
221
+ tags.add(LARGE_CHOICE)
222
+ tags.add(META_GOVERNANCE)
223
+ return frozenset(tags)
224
+
225
+
226
  def get_games_by_tag(tag: str) -> list[str]:
227
  """Return all game keys that have the given tag."""
228
  return [key for key, tags in GAME_TAGS.items() if tag in tags]
common/games_meta/nplayer_config.py CHANGED
@@ -2,29 +2,13 @@
2
 
3
  from __future__ import annotations
4
 
5
- from dataclasses import dataclass
6
- from typing import Callable
7
 
8
- from constant_definitions.nplayer.nplayer_constants import (
9
- NPLAYER_DEFAULT_ROUNDS,
10
- )
11
 
 
12
 
13
- @dataclass(frozen=True)
14
- class NPlayerGameConfig:
15
- """Immutable specification for an N-player game type."""
16
 
17
- name: str
18
- description: str
19
- actions: list[str]
20
- num_players: int
21
- default_rounds: int
22
- payoff_fn: Callable[[tuple[str, ...]], tuple[float, ...]]
23
-
24
-
25
- NPLAYER_GAMES: dict[str, NPlayerGameConfig] = {}
26
-
27
-
28
- def get_nplayer_game(name: str) -> NPlayerGameConfig:
29
  """Look up an N-player game by name. Raises KeyError if not found."""
30
  return NPLAYER_GAMES[name]
 
2
 
3
  from __future__ import annotations
4
 
5
+ from common.games import GameConfig
 
6
 
7
+ NPlayerGameConfig = GameConfig
 
 
8
 
9
+ NPLAYER_GAMES: dict[str, GameConfig] = {}
10
 
 
 
 
11
 
12
+ def get_nplayer_game(name: str) -> GameConfig:
 
 
 
 
 
 
 
 
 
 
 
13
  """Look up an N-player game by name. Raises KeyError if not found."""
14
  return NPLAYER_GAMES[name]
common/games_meta/nplayer_games.py CHANGED
@@ -2,7 +2,8 @@
2
 
3
  from __future__ import annotations
4
 
5
- from common.games_meta.nplayer_config import NPlayerGameConfig, NPLAYER_GAMES
 
6
  from constant_definitions.nplayer.nplayer_constants import (
7
  NPLAYER_DEFAULT_ROUNDS,
8
  NPG_ENDOWMENT,
@@ -90,19 +91,20 @@ def _el_farol_payoff(actions: tuple[str, ...]) -> tuple[float, ...]:
90
  _THREE = _ONE + _ONE + _ONE
91
  _FIVE = _THREE + _ONE + _ONE
92
 
93
- _BUILTIN_NPLAYER_GAMES: dict[str, NPlayerGameConfig] = {
94
- "nplayer_public_goods": NPlayerGameConfig(
95
  name="N-Player Public Goods",
96
  description=(
97
  "Each player contributes from an endowment. The total pot is "
98
  "multiplied and split equally among all players."
99
  ),
100
  actions=_PG_ACTIONS,
 
101
  num_players=_FIVE,
102
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
103
  payoff_fn=_public_goods_payoff,
104
  ),
105
- "nplayer_volunteer_dilemma": NPlayerGameConfig(
106
  name="N-Player Volunteer's Dilemma",
107
  description=(
108
  "Players choose to volunteer or abstain. If at least one "
@@ -110,17 +112,19 @@ _BUILTIN_NPLAYER_GAMES: dict[str, NPlayerGameConfig] = {
110
  "If nobody volunteers, everyone gets nothing."
111
  ),
112
  actions=["volunteer", "abstain"],
 
113
  num_players=_FIVE,
114
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
115
  payoff_fn=_volunteer_dilemma_payoff,
116
  ),
117
- "nplayer_el_farol": NPlayerGameConfig(
118
  name="N-Player El Farol Bar",
119
  description=(
120
  "Players decide whether to attend a bar. The bar is fun when "
121
  "not crowded but unpleasant when too many people show up."
122
  ),
123
  actions=["attend", "stay_home"],
 
124
  num_players=_FIVE,
125
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
126
  payoff_fn=_el_farol_payoff,
 
2
 
3
  from __future__ import annotations
4
 
5
+ from common.games import GameConfig
6
+ from common.games_meta.nplayer_config import NPLAYER_GAMES
7
  from constant_definitions.nplayer.nplayer_constants import (
8
  NPLAYER_DEFAULT_ROUNDS,
9
  NPG_ENDOWMENT,
 
91
  _THREE = _ONE + _ONE + _ONE
92
  _FIVE = _THREE + _ONE + _ONE
93
 
94
+ _BUILTIN_NPLAYER_GAMES: dict[str, GameConfig] = {
95
+ "nplayer_public_goods": GameConfig(
96
  name="N-Player Public Goods",
97
  description=(
98
  "Each player contributes from an endowment. The total pot is "
99
  "multiplied and split equally among all players."
100
  ),
101
  actions=_PG_ACTIONS,
102
+ game_type="public_goods",
103
  num_players=_FIVE,
104
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
105
  payoff_fn=_public_goods_payoff,
106
  ),
107
+ "nplayer_volunteer_dilemma": GameConfig(
108
  name="N-Player Volunteer's Dilemma",
109
  description=(
110
  "Players choose to volunteer or abstain. If at least one "
 
112
  "If nobody volunteers, everyone gets nothing."
113
  ),
114
  actions=["volunteer", "abstain"],
115
+ game_type="matrix",
116
  num_players=_FIVE,
117
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
118
  payoff_fn=_volunteer_dilemma_payoff,
119
  ),
120
+ "nplayer_el_farol": GameConfig(
121
  name="N-Player El Farol Bar",
122
  description=(
123
  "Players decide whether to attend a bar. The bar is fun when "
124
  "not crowded but unpleasant when too many people show up."
125
  ),
126
  actions=["attend", "stay_home"],
127
+ game_type="matrix",
128
  num_players=_FIVE,
129
  default_rounds=NPLAYER_DEFAULT_ROUNDS,
130
  payoff_fn=_el_farol_payoff,
common/meta/memory_store.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Persistent cross-episode memory backed by cognee knowledge graph.
2
+
3
+ Records episode summaries, gossip ratings, and opponent statistics.
4
+ Uses in-memory stats when cognee is not installed.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import threading
10
+ from typing import Any
11
+
12
+ from constant_definitions.var.meta.reputation_constants import (
13
+ COGNEE_DATASET_NAME,
14
+ COGNEE_SEARCH_TYPE,
15
+ DEFAULT_REPUTATION_SCORE_NUMERATOR,
16
+ DEFAULT_REPUTATION_SCORE_DENOMINATOR,
17
+ REPUTATION_DECAY_NUMERATOR,
18
+ REPUTATION_DECAY_DENOMINATOR,
19
+ META_KEY_COOPERATION_RATE,
20
+ META_KEY_INTERACTION_COUNT,
21
+ META_KEY_GOSSIP_HISTORY,
22
+ )
23
+
24
+ _ZERO = int()
25
+ _ONE = int(bool(True))
26
+ _DEFAULT_SCORE = (
27
+ DEFAULT_REPUTATION_SCORE_NUMERATOR / DEFAULT_REPUTATION_SCORE_DENOMINATOR
28
+ )
29
+ _DECAY = REPUTATION_DECAY_NUMERATOR / REPUTATION_DECAY_DENOMINATOR
30
+
31
+ try:
32
+ import cognee as _cognee # type: ignore[import-untyped]
33
+ _HAS_COGNEE = True
34
+ except ImportError:
35
+ _cognee = None
36
+ _HAS_COGNEE = False
37
+
38
+
39
+ class AsyncBridge:
40
+ """Runs async coroutines from sync code via a dedicated thread."""
41
+
42
+ def __init__(self) -> None:
43
+ self._loop = asyncio.new_event_loop()
44
+ self._thread = threading.Thread(
45
+ target=self._loop.run_forever, daemon=True,
46
+ )
47
+ self._thread.start()
48
+
49
+ def run(self, coro: Any) -> Any:
50
+ """Submit *coro* to the background loop and block for the result."""
51
+ future = asyncio.run_coroutine_threadsafe(coro, self._loop)
52
+ return future.result()
53
+
54
+
55
+ def _default_reputation() -> dict[str, Any]:
56
+ """Return a neutral default reputation dict."""
57
+ return {
58
+ "score": _DEFAULT_SCORE,
59
+ META_KEY_COOPERATION_RATE: _DEFAULT_SCORE,
60
+ META_KEY_INTERACTION_COUNT: _ZERO,
61
+ META_KEY_GOSSIP_HISTORY: [],
62
+ }
63
+
64
+
65
+ def _format_episode_text(
66
+ agent_id: str,
67
+ opponent_id: str,
68
+ game: str,
69
+ history: list[Any],
70
+ cooperation_rate: float,
71
+ scores: tuple[float, float],
72
+ ) -> str:
73
+ """Format an episode summary for cognee ingestion."""
74
+ rounds = len(history)
75
+ p_score, o_score = scores
76
+ actions = "; ".join(
77
+ f"R{r.round_number}: {r.player_action} vs {r.opponent_action}"
78
+ for r in history
79
+ )
80
+ return (
81
+ f"Game Interaction Report\n"
82
+ f"Agent: {agent_id} | Opponent: {opponent_id} | Game: {game}\n"
83
+ f"Rounds: {rounds} | Agent Score: {p_score} | "
84
+ f"Opponent Score: {o_score}\n"
85
+ f"Cooperation Rate: {cooperation_rate}\n"
86
+ f"Actions: {actions}\n"
87
+ )
88
+
89
+
90
+ def _parse_reputation(
91
+ results: Any, stats: dict[str, Any],
92
+ ) -> dict[str, Any]:
93
+ """Merge cognee search results with in-memory stats."""
94
+ rep = dict(stats) if stats else _default_reputation()
95
+ if results:
96
+ rep["cognee_context"] = str(results)
97
+ return rep
98
+
99
+
100
+ class CogneeMemoryStore:
101
+ """Persistent memory backed by cognee knowledge graph."""
102
+
103
+ def __init__(self) -> None:
104
+ self._bridge = AsyncBridge() if _HAS_COGNEE else None
105
+ self._stats: dict[str, dict[str, Any]] = {}
106
+
107
+ def record_episode(
108
+ self,
109
+ agent_id: str,
110
+ opponent_id: str,
111
+ game: str,
112
+ history: list[Any],
113
+ cooperation_rate: float,
114
+ scores: tuple[float, float],
115
+ ) -> None:
116
+ """Format episode as text and add to cognee, then cognify."""
117
+ text = _format_episode_text(
118
+ agent_id, opponent_id, game, history,
119
+ cooperation_rate, scores,
120
+ )
121
+ if self._bridge is not None and _HAS_COGNEE:
122
+ try:
123
+ self._bridge.run(
124
+ _cognee.add(text, dataset_name=COGNEE_DATASET_NAME),
125
+ )
126
+ self._bridge.run(_cognee.cognify())
127
+ except Exception:
128
+ pass
129
+ self._update_stats(opponent_id, cooperation_rate, scores)
130
+
131
+ def query_reputation(self, opponent_id: str) -> dict[str, Any]:
132
+ """Query cognee for opponent reputation. Uses stats if unavailable."""
133
+ stats = self._stats.get(opponent_id, _default_reputation())
134
+ if self._bridge is None or not _HAS_COGNEE:
135
+ return stats
136
+ try:
137
+ results = self._bridge.run(
138
+ _cognee.search(
139
+ f"reputation and behavior of {opponent_id}",
140
+ search_type=COGNEE_SEARCH_TYPE,
141
+ ),
142
+ )
143
+ return _parse_reputation(results, stats)
144
+ except Exception:
145
+ return stats
146
+
147
+ def record_gossip(
148
+ self, rater_id: str, target_id: str, rating: str,
149
+ ) -> None:
150
+ """Record a gossip rating in cognee."""
151
+ text = f"{rater_id} rated {target_id} as {rating}."
152
+ if self._bridge is not None and _HAS_COGNEE:
153
+ try:
154
+ self._bridge.run(
155
+ _cognee.add(text, dataset_name=COGNEE_DATASET_NAME),
156
+ )
157
+ except Exception:
158
+ pass
159
+ target_stats = self._stats.setdefault(
160
+ target_id, _default_reputation(),
161
+ )
162
+ gossip_list = target_stats.setdefault(META_KEY_GOSSIP_HISTORY, [])
163
+ gossip_list.append({"rater": rater_id, "rating": rating})
164
+
165
+ def get_stats(self, opponent_id: str) -> dict[str, Any]:
166
+ """Fast in-memory stats (no LLM call)."""
167
+ return self._stats.get(opponent_id, _default_reputation())
168
+
169
+ def _update_stats(
170
+ self,
171
+ opponent_id: str,
172
+ coop_rate: float,
173
+ scores: tuple[float, float],
174
+ ) -> None:
175
+ """Update running statistics for an opponent."""
176
+ current = self._stats.get(opponent_id, _default_reputation())
177
+ count = current.get(META_KEY_INTERACTION_COUNT, _ZERO) + _ONE
178
+ old_coop = current.get(META_KEY_COOPERATION_RATE, _DEFAULT_SCORE)
179
+ blended = old_coop * _DECAY + coop_rate * (_ONE - _DECAY)
180
+ current["score"] = blended
181
+ current[META_KEY_COOPERATION_RATE] = blended
182
+ current[META_KEY_INTERACTION_COUNT] = count
183
+ self._stats[opponent_id] = current
common/meta/meta_games.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pre-registered meta-gaming rule-proposal games for KantBench.
2
+
3
+ Registers symmetric meta-games (rule_proposal, rule_signal) for the
4
+ three core matrix games. Constitutional and proposer-responder are
5
+ composed per-episode via ``compose_game()`` because constitutional uses
6
+ mutable closure state that must be fresh per episode.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import replace
11
+
12
+ from common.games import GAMES
13
+ from common.meta.variants_meta import apply_rule_proposal, apply_rule_signal
14
+ from common.meta.variants_reputation import apply_gossip
15
+
16
+ _BASE_KEYS = ("prisoners_dilemma", "stag_hunt", "hawk_dove")
17
+
18
+ _FRIENDLY_NAMES = {
19
+ "prisoners_dilemma": "Prisoner's Dilemma",
20
+ "stag_hunt": "Stag Hunt",
21
+ "hawk_dove": "Hawk-Dove",
22
+ }
23
+
24
+ META_GAMES: dict = {}
25
+
26
+ for _key in _BASE_KEYS:
27
+ _base = GAMES[_key]
28
+ _fname = _FRIENDLY_NAMES[_key]
29
+
30
+ _rp = apply_rule_proposal(_base, base_key=_key)
31
+ _rp = replace(
32
+ _rp,
33
+ name=f"Rule Proposal {_fname}",
34
+ description=(
35
+ f"{_fname} with simultaneous binding rule proposals. "
36
+ "Both players propose a rule and choose an action. "
37
+ "If proposals match the agreed rule modifies payoffs."
38
+ ),
39
+ )
40
+ META_GAMES[f"rule_proposal_{_key}"] = _rp
41
+
42
+ _rs = apply_rule_signal(_base, base_key=_key)
43
+ _rs = replace(
44
+ _rs,
45
+ name=f"Rule Signal {_fname}",
46
+ description=(
47
+ f"{_fname} with simultaneous non-binding rule signals. "
48
+ "Both players signal a preferred rule and choose an action. "
49
+ "Signals are visible but never enforced."
50
+ ),
51
+ )
52
+ META_GAMES[f"rule_signal_{_key}"] = _rs
53
+
54
+ for _key in _BASE_KEYS:
55
+ _base = GAMES[_key]
56
+ _fname = _FRIENDLY_NAMES[_key]
57
+
58
+ _gp = apply_gossip(_base, base_key=_key)
59
+ _gp = replace(
60
+ _gp,
61
+ name=f"Gossip {_fname}",
62
+ description=(
63
+ f"{_fname} with reputation gossip. "
64
+ "Players rate opponents as trustworthy, untrustworthy, "
65
+ "or neutral alongside each action."
66
+ ),
67
+ )
68
+ META_GAMES[f"gossip_{_key}"] = _gp
69
+
70
+ GAMES.update(META_GAMES)
common/meta/meta_rules.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Rule catalog and payoff transforms for the meta-gaming variant system.
2
+
3
+ Each rule is a payoff transform: given base payoffs and actions, return
4
+ modified payoffs. The ``apply_rule`` dispatcher looks up a rule by name
5
+ and delegates to the corresponding transform function.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from typing import Callable
10
+
11
+ from constant_definitions.var.meta.meta_rule_constants import (
12
+ RULE_NONE, RULE_EQUAL_SPLIT, RULE_COOP_BONUS,
13
+ RULE_DEFECT_PENALTY, RULE_MIN_GUARANTEE, RULE_BAN_DEFECT,
14
+ COOP_BONUS_NUMERATOR, COOP_BONUS_DENOMINATOR,
15
+ DEFECT_PENALTY_NUMERATOR, DEFECT_PENALTY_DENOMINATOR,
16
+ MIN_GUARANTEE_NUMERATOR, MIN_GUARANTEE_DENOMINATOR,
17
+ BAN_DEFECT_PENALTY_NUMERATOR, BAN_DEFECT_PENALTY_DENOMINATOR,
18
+ EQUAL_SPLIT_DENOMINATOR,
19
+ META_SEPARATOR, META_SPLIT_LIMIT,
20
+ )
21
+
22
+ RuleTransform = Callable[
23
+ [float, float, str, str], tuple[float, float]
24
+ ]
25
+
26
+ _COOPERATIVE_ACTIONS = frozenset({"cooperate", "stag", "dove"})
27
+
28
+ _COOP_BONUS = COOP_BONUS_NUMERATOR / COOP_BONUS_DENOMINATOR
29
+ _DEFECT_PENALTY = DEFECT_PENALTY_NUMERATOR / DEFECT_PENALTY_DENOMINATOR
30
+ _MIN_GUARANTEE = MIN_GUARANTEE_NUMERATOR / MIN_GUARANTEE_DENOMINATOR
31
+ _BAN_PENALTY = BAN_DEFECT_PENALTY_NUMERATOR / BAN_DEFECT_PENALTY_DENOMINATOR
32
+
33
+
34
+ def _is_cooperative(action: str) -> bool:
35
+ """Return True if *action* is a cooperative action."""
36
+ return action in _COOPERATIVE_ACTIONS
37
+
38
+
39
+ def _rule_none(
40
+ base_p: float, base_o: float,
41
+ p_action: str, o_action: str,
42
+ ) -> tuple[float, float]:
43
+ return (base_p, base_o)
44
+
45
+
46
+ def _rule_equal_split(
47
+ base_p: float, base_o: float,
48
+ p_action: str, o_action: str,
49
+ ) -> tuple[float, float]:
50
+ total = base_p + base_o
51
+ share = total / EQUAL_SPLIT_DENOMINATOR
52
+ return (share, share)
53
+
54
+
55
+ def _rule_coop_bonus(
56
+ base_p: float, base_o: float,
57
+ p_action: str, o_action: str,
58
+ ) -> tuple[float, float]:
59
+ p_pay = base_p + (_COOP_BONUS if _is_cooperative(p_action) else float())
60
+ o_pay = base_o + (_COOP_BONUS if _is_cooperative(o_action) else float())
61
+ return (p_pay, o_pay)
62
+
63
+
64
+ def _rule_defect_penalty(
65
+ base_p: float, base_o: float,
66
+ p_action: str, o_action: str,
67
+ ) -> tuple[float, float]:
68
+ p_pay = base_p - (
69
+ _DEFECT_PENALTY if not _is_cooperative(p_action) else float()
70
+ )
71
+ o_pay = base_o - (
72
+ _DEFECT_PENALTY if not _is_cooperative(o_action) else float()
73
+ )
74
+ return (p_pay, o_pay)
75
+
76
+
77
+ def _rule_min_guarantee(
78
+ base_p: float, base_o: float,
79
+ p_action: str, o_action: str,
80
+ ) -> tuple[float, float]:
81
+ p_pay = max(base_p, _MIN_GUARANTEE)
82
+ o_pay = max(base_o, _MIN_GUARANTEE)
83
+ return (p_pay, o_pay)
84
+
85
+
86
+ def _rule_ban_defect(
87
+ base_p: float, base_o: float,
88
+ p_action: str, o_action: str,
89
+ ) -> tuple[float, float]:
90
+ p_pay = base_p - (
91
+ _BAN_PENALTY if not _is_cooperative(p_action) else float()
92
+ )
93
+ o_pay = base_o - (
94
+ _BAN_PENALTY if not _is_cooperative(o_action) else float()
95
+ )
96
+ return (p_pay, o_pay)
97
+
98
+
99
+ RULE_CATALOG: dict[str, RuleTransform] = {
100
+ RULE_NONE: _rule_none,
101
+ RULE_EQUAL_SPLIT: _rule_equal_split,
102
+ RULE_COOP_BONUS: _rule_coop_bonus,
103
+ RULE_DEFECT_PENALTY: _rule_defect_penalty,
104
+ RULE_MIN_GUARANTEE: _rule_min_guarantee,
105
+ RULE_BAN_DEFECT: _rule_ban_defect,
106
+ }
107
+
108
+
109
+ def apply_rule(
110
+ rule_name: str,
111
+ base_p: float, base_o: float,
112
+ p_action: str, o_action: str,
113
+ ) -> tuple[float, float]:
114
+ """Look up *rule_name* in the catalog and apply its transform."""
115
+ return RULE_CATALOG[rule_name](base_p, base_o, p_action, o_action)
116
+
117
+
118
+ _ZERO = int()
119
+ _ONE = int(bool(True))
120
+ _TWO = _ONE + _ONE
121
+
122
+
123
+ def parse_meta_action(
124
+ action: str,
125
+ split_limit: int = META_SPLIT_LIMIT,
126
+ ) -> tuple[str, str, str]:
127
+ """Parse an encoded meta-action into (prefix, rule, base_action).
128
+
129
+ The action format is ``prefix_rule_baseaction`` where *rule* is a
130
+ single token (no underscores). Using ``split`` with the configured
131
+ split limit yields exactly three parts.
132
+ """
133
+ parts = action.split(META_SEPARATOR, split_limit)
134
+ return (parts[_ZERO], parts[_ONE], parts[_TWO])
common/meta/variants_meta.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Meta-gaming variant transforms for rule proposal, signaling, and negotiation.
2
+
3
+ Four composable transforms following the ``apply_*`` pattern from
4
+ ``variants.py``. Each expands the action space to encode both a rule
5
+ proposal and a base-game action in a single string.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import replace
10
+ from typing import Callable
11
+
12
+ from common.games import GameConfig
13
+ from common.meta.meta_rules import apply_rule, parse_meta_action
14
+
15
+ from constant_definitions.var.meta.meta_rule_constants import (
16
+ VARIANT_RULE_PROPOSAL, VARIANT_RULE_SIGNAL,
17
+ VARIANT_CONSTITUTIONAL, VARIANT_PROPOSER_RESPONDER,
18
+ META_PROP_PREFIX, META_SIG_PREFIX, META_CONST_PREFIX,
19
+ META_RPROP_PREFIX, META_RACCEPT_PREFIX, META_RREJECT_PREFIX,
20
+ META_SEPARATOR,
21
+ DEFAULT_RULE_CATALOG, RULE_NONE,
22
+ )
23
+
24
+ _ONE = int(bool(True))
25
+ _ZERO = int()
26
+
27
+
28
+ def _build_prefixed_actions(
29
+ prefix: str,
30
+ rules: tuple[str, ...],
31
+ base_actions: list[str],
32
+ ) -> list[str]:
33
+ """Build action list: prefix_rule_baseaction for each combination."""
34
+ sep = META_SEPARATOR
35
+ return [
36
+ sep.join([prefix, rule, act])
37
+ for rule in rules
38
+ for act in base_actions
39
+ ]
40
+
41
+
42
+ def apply_rule_proposal(
43
+ base: GameConfig,
44
+ rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
45
+ base_key: str = "",
46
+ ) -> GameConfig:
47
+ """Simultaneous, binding, per-round rule proposal.
48
+
49
+ Both players choose ``prop_<rule>_<action>``. If both propose the
50
+ same rule the rule's payoff transform is applied; otherwise base
51
+ payoffs are used.
52
+ """
53
+ prefix = META_PROP_PREFIX
54
+ new_actions = _build_prefixed_actions(prefix, rules, base.actions)
55
+ original_payoff = base.payoff_fn
56
+
57
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
58
+ _, p_rule, p_act = parse_meta_action(pa)
59
+ _, o_rule, o_act = parse_meta_action(oa)
60
+ base_p, base_o = original_payoff(p_act, o_act)
61
+ if p_rule == o_rule:
62
+ return apply_rule(p_rule, base_p, base_o, p_act, o_act)
63
+ return (base_p, base_o)
64
+
65
+ return replace(
66
+ base,
67
+ actions=new_actions,
68
+ payoff_fn=_payoff,
69
+ applied_variants=base.applied_variants + (VARIANT_RULE_PROPOSAL,),
70
+ base_game_key=base_key or base.base_game_key,
71
+ )
72
+
73
+
74
+ def apply_rule_signal(
75
+ base: GameConfig,
76
+ rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
77
+ base_key: str = "",
78
+ ) -> GameConfig:
79
+ """Simultaneous, non-binding, per-round rule signal.
80
+
81
+ Both players choose ``sig_<rule>_<action>``. Proposals are visible
82
+ in history but never enforced -- payoffs always come from the base game.
83
+ """
84
+ prefix = META_SIG_PREFIX
85
+ new_actions = _build_prefixed_actions(prefix, rules, base.actions)
86
+ original_payoff = base.payoff_fn
87
+
88
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
89
+ _, _p_rule, p_act = parse_meta_action(pa)
90
+ _, _o_rule, o_act = parse_meta_action(oa)
91
+ return original_payoff(p_act, o_act)
92
+
93
+ return replace(
94
+ base,
95
+ actions=new_actions,
96
+ payoff_fn=_payoff,
97
+ applied_variants=base.applied_variants + (VARIANT_RULE_SIGNAL,),
98
+ base_game_key=base_key or base.base_game_key,
99
+ )
100
+
101
+
102
+ def apply_constitutional(
103
+ base: GameConfig,
104
+ rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
105
+ base_key: str = "",
106
+ ) -> GameConfig:
107
+ """Multi-round negotiation with binding lock-in once agreed.
108
+
109
+ Both players choose ``const_<rule>_<action>``. The first round
110
+ where both propose the same non-none rule locks that rule in for
111
+ ALL subsequent rounds. Before agreement, base payoffs apply.
112
+
113
+ A fresh mutable closure is created per call so each episode via
114
+ ``compose_game()`` gets clean state.
115
+ """
116
+ prefix = META_CONST_PREFIX
117
+ new_actions = _build_prefixed_actions(prefix, rules, base.actions)
118
+ original_payoff = base.payoff_fn
119
+ adopted_rule: list[str] = []
120
+
121
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
122
+ _, p_rule, p_act = parse_meta_action(pa)
123
+ _, o_rule, o_act = parse_meta_action(oa)
124
+ base_p, base_o = original_payoff(p_act, o_act)
125
+
126
+ if adopted_rule:
127
+ return apply_rule(adopted_rule[_ZERO], base_p, base_o, p_act, o_act)
128
+
129
+ if p_rule == o_rule and p_rule != RULE_NONE:
130
+ adopted_rule.append(p_rule)
131
+ return apply_rule(p_rule, base_p, base_o, p_act, o_act)
132
+
133
+ return (base_p, base_o)
134
+
135
+ return replace(
136
+ base,
137
+ actions=new_actions,
138
+ payoff_fn=_payoff,
139
+ applied_variants=base.applied_variants + (VARIANT_CONSTITUTIONAL,),
140
+ base_game_key=base_key or base.base_game_key,
141
+ )
142
+
143
+
144
+ def apply_proposer_responder(
145
+ base: GameConfig,
146
+ rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
147
+ base_key: str = "",
148
+ ) -> GameConfig:
149
+ """Asymmetric: player proposes a rule, opponent accepts or rejects.
150
+
151
+ Player actions: ``rprop_<rule>_<action>`` (propose + play).
152
+ Opponent actions: ``raccept_<action>`` or ``rreject_<action>``
153
+ (respond + play).
154
+
155
+ Accept -> rule applies to base payoffs. Reject -> base payoffs.
156
+ """
157
+ sep = META_SEPARATOR
158
+ player_actions = _build_prefixed_actions(
159
+ META_RPROP_PREFIX, rules, base.actions,
160
+ )
161
+ opp_actions: list[str] = []
162
+ for act in base.actions:
163
+ opp_actions.append(sep.join([META_RACCEPT_PREFIX, act]))
164
+ opp_actions.append(sep.join([META_RREJECT_PREFIX, act]))
165
+
166
+ original_payoff = base.payoff_fn
167
+
168
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
169
+ _, p_rule, p_act = parse_meta_action(pa)
170
+ o_parts = oa.split(sep, _ONE)
171
+ o_prefix = o_parts[_ZERO]
172
+ o_act = o_parts[_ONE]
173
+ base_p, base_o = original_payoff(p_act, o_act)
174
+ if o_prefix == META_RACCEPT_PREFIX:
175
+ return apply_rule(p_rule, base_p, base_o, p_act, o_act)
176
+ return (base_p, base_o)
177
+
178
+ return replace(
179
+ base,
180
+ actions=player_actions,
181
+ payoff_fn=_payoff,
182
+ applied_variants=base.applied_variants + (VARIANT_PROPOSER_RESPONDER,),
183
+ base_game_key=base_key or base.base_game_key,
184
+ opponent_actions=tuple(opp_actions),
185
+ )
186
+
187
+
188
+ _META_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
189
+ VARIANT_RULE_PROPOSAL: apply_rule_proposal,
190
+ VARIANT_RULE_SIGNAL: apply_rule_signal,
191
+ VARIANT_CONSTITUTIONAL: apply_constitutional,
192
+ VARIANT_PROPOSER_RESPONDER: apply_proposer_responder,
193
+ }
common/meta/variants_reputation.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reputation gossip variant transform for the composable variant system.
2
+
3
+ Follows the ``apply_*`` pattern from ``variants.py`` and ``variants_meta.py``.
4
+ Adds ``gossip_<rating>_<base_action>`` actions to any base game.
5
+ Payoffs depend only on the base action, like cheap_talk.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import replace
10
+ from typing import Callable
11
+
12
+ from common.games import GameConfig
13
+
14
+ from constant_definitions.var.meta.reputation_constants import (
15
+ VARIANT_GOSSIP,
16
+ DEFAULT_RATINGS,
17
+ GOSSIP_PREFIX,
18
+ GOSSIP_SEPARATOR,
19
+ GOSSIP_SPLIT_LIMIT,
20
+ )
21
+
22
+ _ONE = int(bool(True))
23
+ _ZERO = int()
24
+ _TWO = _ONE + _ONE
25
+
26
+
27
+ def apply_gossip(
28
+ base: GameConfig,
29
+ ratings: tuple[str, ...] = DEFAULT_RATINGS,
30
+ base_key: str = "",
31
+ ) -> GameConfig:
32
+ """Add reputation gossip to a base game.
33
+
34
+ For base actions ``[A, B]`` and ratings ``[trustworthy, untrustworthy,
35
+ neutral]``, produces ``[gossip_trustworthy_A, gossip_trustworthy_B,
36
+ gossip_untrustworthy_A, ...]``. Payoffs depend only on the actual
37
+ action (last segment), like cheap_talk.
38
+ """
39
+ sep = GOSSIP_SEPARATOR
40
+ prefix = GOSSIP_PREFIX
41
+ new_actions = [
42
+ sep.join([prefix, rating, act])
43
+ for rating in ratings
44
+ for act in base.actions
45
+ ]
46
+ original_payoff = base.payoff_fn
47
+
48
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
49
+ actual_p = pa.rsplit(sep, _ONE)[_ONE]
50
+ actual_o = oa.rsplit(sep, _ONE)[_ONE]
51
+ return original_payoff(actual_p, actual_o)
52
+
53
+ return replace(
54
+ base,
55
+ actions=new_actions,
56
+ payoff_fn=_payoff,
57
+ applied_variants=base.applied_variants + (VARIANT_GOSSIP,),
58
+ base_game_key=base_key or base.base_game_key,
59
+ )
60
+
61
+
62
+ def parse_gossip_action(action: str) -> tuple[str, str, str]:
63
+ """Parse ``gossip_<rating>_<base_action>`` into components.
64
+
65
+ Returns ``(prefix, rating, base_action)``.
66
+ """
67
+ parts = action.split(GOSSIP_SEPARATOR, GOSSIP_SPLIT_LIMIT)
68
+ return (parts[_ZERO], parts[_ONE], parts[_TWO])
69
+
70
+
71
+ _REPUTATION_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
72
+ VARIANT_GOSSIP: apply_gossip,
73
+ }
common/variants.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Composable game variant transforms for KantBench.
2
+
3
+ Each ``apply_*`` function takes a :class:`GameConfig` and returns a new
4
+ :class:`GameConfig` with modified actions, payoff function, and metadata.
5
+ Variants compose: ``apply_exit(apply_cheap_talk(base))`` works.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import replace
11
+ from typing import Callable
12
+
13
+ from common.games import GAMES, GameConfig
14
+ from constant_definitions.game_constants import (
15
+ DEFAULT_TWO_PLAYERS,
16
+ OPPONENT_MODE_SELF,
17
+ OPPONENT_MODE_CROSS,
18
+ )
19
+ from constant_definitions.var.pd_variant_constants import (
20
+ OPD_EXIT_PAYOFF,
21
+ VARIANT_CHEAP_TALK,
22
+ VARIANT_EXIT,
23
+ VARIANT_BINDING_COMMITMENT,
24
+ VARIANT_NOISY_ACTIONS,
25
+ VARIANT_NOISY_PAYOFFS,
26
+ VARIANT_SELF_PLAY,
27
+ VARIANT_CROSS_MODEL,
28
+ CT_MSG_PREFIX,
29
+ CT_SEPARATOR,
30
+ BC_COMMIT_PREFIX,
31
+ BC_FREE_PREFIX,
32
+ EXIT_ACTION,
33
+ DEFAULT_TREMBLE_PROB_NUMERATOR,
34
+ DEFAULT_TREMBLE_PROB_DENOMINATOR,
35
+ DEFAULT_NOISE_SCALE_NUMERATOR,
36
+ DEFAULT_NOISE_SCALE_DENOMINATOR,
37
+ )
38
+ from constant_definitions.var.communication_constants import COMMIT_COST
39
+
40
+ _ONE = int(bool(True))
41
+ _ZERO = int()
42
+
43
+
44
+ def apply_cheap_talk(
45
+ base: GameConfig,
46
+ base_key: str = "",
47
+ ) -> GameConfig:
48
+ """Add a non-binding message phase to a base game.
49
+
50
+ For base actions ``[A, B]`` produces ``[msg_A_A, msg_A_B, msg_B_A,
51
+ msg_B_B]``. Payoffs depend only on the actual action (last segment).
52
+ """
53
+ sep = CT_SEPARATOR
54
+ prefix = CT_MSG_PREFIX
55
+ base_actions = base.actions
56
+ new_actions = [
57
+ sep.join([prefix, msg, act])
58
+ for msg in base_actions
59
+ for act in base_actions
60
+ ]
61
+
62
+ original_payoff = base.payoff_fn
63
+
64
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
65
+ actual_p = pa.rsplit(sep, _ONE)[_ONE]
66
+ actual_o = oa.rsplit(sep, _ONE)[_ONE]
67
+ return original_payoff(actual_p, actual_o)
68
+
69
+ return replace(
70
+ base,
71
+ actions=new_actions,
72
+ payoff_fn=_payoff,
73
+ applied_variants=base.applied_variants + (VARIANT_CHEAP_TALK,),
74
+ base_game_key=base_key or base.base_game_key,
75
+ )
76
+
77
+
78
+ def apply_exit(
79
+ base: GameConfig,
80
+ base_key: str = "",
81
+ exit_payoff: int = OPD_EXIT_PAYOFF,
82
+ ) -> GameConfig:
83
+ """Add an exit option that gives both players a safe payoff.
84
+
85
+ Appends ``"exit"`` to the action list. If either player exits both
86
+ receive *exit_payoff*; otherwise delegates to the base payoff function.
87
+ """
88
+ exit_f = float(exit_payoff)
89
+ exit_act = EXIT_ACTION
90
+ new_actions = list(base.actions) + [exit_act]
91
+ original_payoff = base.payoff_fn
92
+
93
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
94
+ if pa == exit_act or oa == exit_act:
95
+ return (exit_f, exit_f)
96
+ return original_payoff(pa, oa)
97
+
98
+ return replace(
99
+ base,
100
+ actions=new_actions,
101
+ payoff_fn=_payoff,
102
+ applied_variants=base.applied_variants + (VARIANT_EXIT,),
103
+ base_game_key=base_key or base.base_game_key,
104
+ )
105
+
106
+
107
+ def apply_binding_commitment(
108
+ base: GameConfig,
109
+ base_key: str = "",
110
+ commit_cost: int = COMMIT_COST,
111
+ ) -> GameConfig:
112
+ """Add a costly binding commitment mechanism.
113
+
114
+ For base actions ``[A, B, ...]`` the first action *A* gets a
115
+ ``commit_A`` variant (player locked to *A*, pays *commit_cost*).
116
+ All actions get a ``free_X`` variant (no cost, free choice).
117
+ """
118
+ sep = CT_SEPARATOR
119
+ commit_pfx = BC_COMMIT_PREFIX
120
+ free_pfx = BC_FREE_PREFIX
121
+ cost_f = float(commit_cost)
122
+ base_actions = base.actions
123
+ commit_action = base_actions[_ZERO]
124
+
125
+ new_actions = [sep.join([commit_pfx, commit_action])]
126
+ for act in base_actions:
127
+ new_actions.append(sep.join([free_pfx, act]))
128
+
129
+ original_payoff = base.payoff_fn
130
+
131
+ def _parse(action: str) -> tuple[str, bool]:
132
+ """Return (actual_action, is_committed)."""
133
+ parts = action.split(sep, _ONE)
134
+ return parts[_ONE], parts[_ZERO] == commit_pfx
135
+
136
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
137
+ p_act, p_committed = _parse(pa)
138
+ o_act, o_committed = _parse(oa)
139
+ p_pay, o_pay = original_payoff(p_act, o_act)
140
+ if p_committed:
141
+ p_pay = p_pay - cost_f
142
+ if o_committed:
143
+ o_pay = o_pay - cost_f
144
+ return (p_pay, o_pay)
145
+
146
+ return replace(
147
+ base,
148
+ actions=new_actions,
149
+ payoff_fn=_payoff,
150
+ applied_variants=base.applied_variants + (VARIANT_BINDING_COMMITMENT,),
151
+ base_game_key=base_key or base.base_game_key,
152
+ )
153
+
154
+
155
+ _DEFAULT_TREMBLE = DEFAULT_TREMBLE_PROB_NUMERATOR / DEFAULT_TREMBLE_PROB_DENOMINATOR
156
+ _DEFAULT_NOISE = DEFAULT_NOISE_SCALE_NUMERATOR / DEFAULT_NOISE_SCALE_DENOMINATOR
157
+ _NOISY_ONLY_TWO_PLAYER = "apply_noisy variant only supports two-player games"
158
+
159
+
160
+ def apply_noisy_actions(
161
+ base: GameConfig,
162
+ base_key: str = "",
163
+ tremble_prob: float = _DEFAULT_TREMBLE,
164
+ ) -> GameConfig:
165
+ """With probability *tremble_prob* each player's action is replaced by a random one."""
166
+ if base.num_players != DEFAULT_TWO_PLAYERS:
167
+ raise ValueError(_NOISY_ONLY_TWO_PLAYER)
168
+ import random as _rng_mod
169
+ original_payoff = base.payoff_fn
170
+ actions = base.actions
171
+
172
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
173
+ actual_p = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else pa
174
+ actual_o = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else oa
175
+ return original_payoff(actual_p, actual_o)
176
+
177
+ return replace(
178
+ base,
179
+ payoff_fn=_payoff,
180
+ applied_variants=base.applied_variants + (VARIANT_NOISY_ACTIONS,),
181
+ base_game_key=base_key or base.base_game_key,
182
+ )
183
+
184
+
185
+ def apply_noisy_payoffs(
186
+ base: GameConfig,
187
+ base_key: str = "",
188
+ noise_scale: float = _DEFAULT_NOISE,
189
+ ) -> GameConfig:
190
+ """Add Gaussian noise N(zero, noise_scale) to each payoff independently."""
191
+ if base.num_players != DEFAULT_TWO_PLAYERS:
192
+ raise ValueError(_NOISY_ONLY_TWO_PLAYER)
193
+ import random as _rng_mod
194
+ original_payoff = base.payoff_fn
195
+
196
+ def _payoff(pa: str, oa: str) -> tuple[float, float]:
197
+ p, o = original_payoff(pa, oa)
198
+ return (p + _rng_mod.gauss(float(_ZERO), noise_scale),
199
+ o + _rng_mod.gauss(float(_ZERO), noise_scale))
200
+
201
+ return replace(
202
+ base,
203
+ payoff_fn=_payoff,
204
+ applied_variants=base.applied_variants + (VARIANT_NOISY_PAYOFFS,),
205
+ base_game_key=base_key or base.base_game_key,
206
+ )
207
+
208
+
209
+ _OPPONENT_ONLY_TWO_PLAYER = "opponent mode variants only support two-player games"
210
+
211
+
212
+ def apply_self_play(
213
+ base: GameConfig,
214
+ base_key: str = "",
215
+ ) -> GameConfig:
216
+ """Mark a game for self-play: the model plays against itself."""
217
+ if base.num_players != DEFAULT_TWO_PLAYERS:
218
+ raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
219
+ return replace(
220
+ base,
221
+ opponent_mode=OPPONENT_MODE_SELF,
222
+ applied_variants=base.applied_variants + (VARIANT_SELF_PLAY,),
223
+ base_game_key=base_key or base.base_game_key,
224
+ )
225
+
226
+
227
+ def apply_cross_model(
228
+ base: GameConfig,
229
+ base_key: str = "",
230
+ ) -> GameConfig:
231
+ """Mark a game for cross-model play: model vs a different model."""
232
+ if base.num_players != DEFAULT_TWO_PLAYERS:
233
+ raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
234
+ return replace(
235
+ base,
236
+ opponent_mode=OPPONENT_MODE_CROSS,
237
+ applied_variants=base.applied_variants + (VARIANT_CROSS_MODEL,),
238
+ base_game_key=base_key or base.base_game_key,
239
+ )
240
+
241
+
242
+ _VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
243
+ VARIANT_CHEAP_TALK: apply_cheap_talk,
244
+ VARIANT_EXIT: apply_exit,
245
+ VARIANT_BINDING_COMMITMENT: apply_binding_commitment,
246
+ VARIANT_NOISY_ACTIONS: apply_noisy_actions,
247
+ VARIANT_NOISY_PAYOFFS: apply_noisy_payoffs,
248
+ VARIANT_SELF_PLAY: apply_self_play,
249
+ VARIANT_CROSS_MODEL: apply_cross_model,
250
+ }
251
+
252
+ from common.meta.variants_meta import ( # noqa: E402
253
+ apply_rule_proposal, apply_rule_signal,
254
+ apply_constitutional, apply_proposer_responder,
255
+ _META_VARIANT_REGISTRY,
256
+ )
257
+
258
+ _VARIANT_REGISTRY.update(_META_VARIANT_REGISTRY)
259
+
260
+ from common.meta.variants_reputation import ( # noqa: E402
261
+ apply_gossip,
262
+ _REPUTATION_VARIANT_REGISTRY,
263
+ )
264
+
265
+ _VARIANT_REGISTRY.update(_REPUTATION_VARIANT_REGISTRY)
266
+
267
+
268
+ def compose_game(base_key: str, *variant_names: str) -> GameConfig:
269
+ """Build a game by applying named variants to a base game.
270
+
271
+ Example::
272
+
273
+ compose_game("stag_hunt", "cheap_talk", "exit")
274
+ """
275
+ game = GAMES[base_key]
276
+ for vname in variant_names:
277
+ apply_fn = _VARIANT_REGISTRY[vname]
278
+ game = apply_fn(game, base_key=base_key)
279
+ return game
constant_definitions/game_constants.py CHANGED
@@ -8,6 +8,7 @@ MIN_STEP_COUNT = int()
8
  # Episode configuration
9
  DEFAULT_NUM_ROUNDS = 10
10
  SINGLE_SHOT_ROUNDS = 1
 
11
 
12
  # --- Prisoner's Dilemma payoffs ---
13
  PD_CC_PAYOFF = 3 # Both cooperate
@@ -71,6 +72,11 @@ SERVER_PORT = 8000
71
  # Max concurrent environments
72
  MAX_CONCURRENT_ENVS = 1
73
 
 
 
 
 
 
74
  # --- Evaluation module constants ---
75
  EVAL_ZERO = 0
76
  EVAL_ONE = 1
@@ -86,6 +92,10 @@ EVAL_ONE_FLOAT = 1.0
86
  EVAL_HALF = 0.5
87
  EVAL_NEGATIVE_ONE = -1
88
 
 
 
 
 
89
  # --- External benchmark constants ---
90
  EVAL_EIGHT = 8
91
  EVAL_TEN = 10
 
8
  # Episode configuration
9
  DEFAULT_NUM_ROUNDS = 10
10
  SINGLE_SHOT_ROUNDS = 1
11
+ DEFAULT_TWO_PLAYERS = 2
12
 
13
  # --- Prisoner's Dilemma payoffs ---
14
  PD_CC_PAYOFF = 3 # Both cooperate
 
72
  # Max concurrent environments
73
  MAX_CONCURRENT_ENVS = 1
74
 
75
+ # --- Opponent mode ---
76
+ OPPONENT_MODE_STRATEGY = "strategy"
77
+ OPPONENT_MODE_SELF = "self_play"
78
+ OPPONENT_MODE_CROSS = "cross_model"
79
+
80
  # --- Evaluation module constants ---
81
  EVAL_ZERO = 0
82
  EVAL_ONE = 1
 
92
  EVAL_HALF = 0.5
93
  EVAL_NEGATIVE_ONE = -1
94
 
95
+ # --- N-player / coalition evaluation constants ---
96
+ NPLAYER_EVAL_DEFAULT_EPISODES = 3
97
+ COALITION_EVAL_DEFAULT_EPISODES = 3
98
+
99
  # --- External benchmark constants ---
100
  EVAL_EIGHT = 8
101
  EVAL_TEN = 10
constant_definitions/train/agent_constants.py CHANGED
@@ -33,3 +33,33 @@ SYSTEM_PROMPT = (
33
 
34
  # Sentinel returned when LLM output cannot be parsed
35
  PARSE_FAILURE_SENTINEL = "__PARSE_FAILURE__"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Sentinel returned when LLM output cannot be parsed
35
  PARSE_FAILURE_SENTINEL = "__PARSE_FAILURE__"
36
+
37
+ # --- N-player prompt section headers ---
38
+ NPLAYER_PROMPT_SECTION_PLAYERS = "PLAYERS"
39
+ NPLAYER_PROMPT_SECTION_ALL_SCORES = "ALL SCORES"
40
+
41
+ # --- Coalition prompt section headers ---
42
+ COALITION_PROMPT_SECTION_PHASE = "PHASE"
43
+ COALITION_PROMPT_SECTION_PROPOSALS = "PENDING PROPOSALS"
44
+ COALITION_PROMPT_SECTION_COALITIONS = "ACTIVE COALITIONS"
45
+
46
+ # --- Governance prompt section headers ---
47
+ GOVERNANCE_PROMPT_SECTION_RULES = "GOVERNANCE RULES"
48
+ GOVERNANCE_PROMPT_SECTION_PENDING = "PENDING GOVERNANCE"
49
+
50
+ # N-player system prompt
51
+ NPLAYER_SYSTEM_PROMPT = (
52
+ "You are playing an N-player game-theory game. Analyse the situation "
53
+ "and choose the best action. Respond with ONLY the action name, "
54
+ "nothing else."
55
+ )
56
+
57
+ # Coalition system prompt
58
+ COALITION_SYSTEM_PROMPT = (
59
+ "You are playing a coalition formation game. You can form coalitions "
60
+ "with other players and propose governance changes. Respond with "
61
+ "valid JSON when negotiating, or ONLY the action name when acting."
62
+ )
63
+
64
+ # Maximum tokens for coalition JSON response
65
+ COALITION_MAX_ACTION_TOKENS = 256
constant_definitions/var/meta/meta_rule_constants.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Meta-gaming rule proposal variant constants
2
+
3
+ # Variant names
4
+ VARIANT_RULE_PROPOSAL = "rule_proposal"
5
+ VARIANT_RULE_SIGNAL = "rule_signal"
6
+ VARIANT_CONSTITUTIONAL = "constitutional"
7
+ VARIANT_PROPOSER_RESPONDER = "proposer_responder"
8
+
9
+ # Action prefixes (single tokens)
10
+ META_PROP_PREFIX = "prop"
11
+ META_SIG_PREFIX = "sig"
12
+ META_CONST_PREFIX = "const"
13
+ META_RPROP_PREFIX = "rprop"
14
+ META_RACCEPT_PREFIX = "raccept"
15
+ META_RREJECT_PREFIX = "rreject"
16
+ META_SEPARATOR = "_"
17
+ META_SPLIT_LIMIT = 2
18
+
19
+ # Rule names (single tokens, no underscores)
20
+ RULE_NONE = "none"
21
+ RULE_EQUAL_SPLIT = "equalsplit"
22
+ RULE_COOP_BONUS = "coopbonus"
23
+ RULE_DEFECT_PENALTY = "defectpenalty"
24
+ RULE_MIN_GUARANTEE = "minguarantee"
25
+ RULE_BAN_DEFECT = "bandefect"
26
+
27
+ DEFAULT_RULE_CATALOG = (
28
+ RULE_NONE, RULE_EQUAL_SPLIT, RULE_COOP_BONUS,
29
+ RULE_DEFECT_PENALTY, RULE_MIN_GUARANTEE, RULE_BAN_DEFECT,
30
+ )
31
+
32
+ # Payoff parameters (numerator/denominator to avoid inline literals)
33
+ COOP_BONUS_NUMERATOR = 2
34
+ COOP_BONUS_DENOMINATOR = 1
35
+ DEFECT_PENALTY_NUMERATOR = 3
36
+ DEFECT_PENALTY_DENOMINATOR = 1
37
+ MIN_GUARANTEE_NUMERATOR = 1
38
+ MIN_GUARANTEE_DENOMINATOR = 1
39
+ BAN_DEFECT_PENALTY_NUMERATOR = 10
40
+ BAN_DEFECT_PENALTY_DENOMINATOR = 1
41
+ EQUAL_SPLIT_DENOMINATOR = 2
constant_definitions/var/meta/reputation_constants.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reputation and gossip variant constants
2
+
3
+ # Variant names
4
+ VARIANT_GOSSIP = "gossip"
5
+ VARIANT_MEMORY = "memory"
6
+
7
+ # Gossip ratings (single tokens, no underscores)
8
+ RATING_TRUSTWORTHY = "trustworthy"
9
+ RATING_UNTRUSTWORTHY = "untrustworthy"
10
+ RATING_NEUTRAL = "neutral"
11
+ DEFAULT_RATINGS = (RATING_TRUSTWORTHY, RATING_UNTRUSTWORTHY, RATING_NEUTRAL)
12
+
13
+ # Action prefixes
14
+ GOSSIP_PREFIX = "gossip"
15
+ GOSSIP_SEPARATOR = "_"
16
+ GOSSIP_SPLIT_LIMIT = 2
17
+
18
+ # Reputation defaults (numerator / denominator)
19
+ DEFAULT_REPUTATION_SCORE_NUMERATOR = 5
20
+ DEFAULT_REPUTATION_SCORE_DENOMINATOR = 10
21
+ REPUTATION_DECAY_NUMERATOR = 9
22
+ REPUTATION_DECAY_DENOMINATOR = 10
23
+
24
+ # Metadata keys
25
+ META_KEY_REPUTATION = "opponent_reputation"
26
+ META_KEY_GOSSIP_HISTORY = "gossip_history"
27
+ META_KEY_INTERACTION_COUNT = "interaction_count"
28
+ META_KEY_COOPERATION_RATE = "cooperation_rate"
29
+
30
+ # Cognee dataset name
31
+ COGNEE_DATASET_NAME = "kant_interactions"
32
+ COGNEE_SEARCH_TYPE = "GRAPH_COMPLETION"
constant_definitions/var/pd_variant_constants.py CHANGED
@@ -24,3 +24,25 @@ PW_DISARM_DISARM = 4
24
  PW_DISARM_ARM = -1
25
  PW_ARM_DISARM = 6
26
  PW_ARM_ARM = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  PW_DISARM_ARM = -1
25
  PW_ARM_DISARM = 6
26
  PW_ARM_ARM = 0
27
+
28
+ # Composable variant system -- name strings and prefixes
29
+ VARIANT_CHEAP_TALK = "cheap_talk"
30
+ VARIANT_EXIT = "exit"
31
+ VARIANT_BINDING_COMMITMENT = "binding_commitment"
32
+ VARIANT_NOISY_ACTIONS = "noisy_actions"
33
+ VARIANT_NOISY_PAYOFFS = "noisy_payoffs"
34
+ CT_MSG_PREFIX = "msg"
35
+ CT_SEPARATOR = "_"
36
+ BC_COMMIT_PREFIX = "commit"
37
+ BC_FREE_PREFIX = "free"
38
+ EXIT_ACTION = "exit"
39
+
40
+ # Opponent mode variants
41
+ VARIANT_SELF_PLAY = "self_play"
42
+ VARIANT_CROSS_MODEL = "cross_model"
43
+
44
+ # Bayesian variant parameters
45
+ DEFAULT_TREMBLE_PROB_NUMERATOR = 1
46
+ DEFAULT_TREMBLE_PROB_DENOMINATOR = 10
47
+ DEFAULT_NOISE_SCALE_NUMERATOR = 1
48
+ DEFAULT_NOISE_SCALE_DENOMINATOR = 2
env/environment.py CHANGED
@@ -155,6 +155,8 @@ class KantEnvironment(Environment[GameObservation, GameAction, GameState]):
155
 
156
  def _opponent_actions(self) -> list[str]:
157
  assert self._game is not None
 
 
158
  gt = self._game.game_type
159
  if gt == "ultimatum":
160
  return ["accept", "reject"]
 
155
 
156
  def _opponent_actions(self) -> list[str]:
157
  assert self._game is not None
158
+ if self._game.opponent_actions is not None:
159
+ return list(self._game.opponent_actions)
160
  gt = self._game.game_type
161
  if gt == "ultimatum":
162
  return ["accept", "reject"]
env/reputation/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """Reputation environment subpackage."""
2
+ from env.reputation.reputation_env import ReputationEnvironment
3
+
4
+ __all__ = ["ReputationEnvironment"]
env/reputation/reputation_env.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Environment wrapper adding cross-episode reputation via cognee.
2
+
3
+ Injects opponent reputation into obs.metadata before each episode.
4
+ Records episode outcomes and gossip ratings in CogneeMemoryStore after.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from typing import Any, Optional
9
+
10
+ from env.environment import KantEnvironment
11
+ from env.models import GameAction, GameObservation, GameState
12
+ from common.meta.memory_store import CogneeMemoryStore
13
+ from common.meta.variants_reputation import parse_gossip_action
14
+
15
+ from constant_definitions.var.meta.reputation_constants import (
16
+ GOSSIP_PREFIX,
17
+ GOSSIP_SEPARATOR,
18
+ META_KEY_REPUTATION,
19
+ META_KEY_INTERACTION_COUNT,
20
+ )
21
+
22
+ _ZERO = int()
23
+ _ONE = int(bool(True))
24
+ _COOPERATIVE_ACTIONS = frozenset({"cooperate", "stag", "dove"})
25
+
26
+
27
+ def _compute_coop_rate(history: list[Any]) -> float:
28
+ """Compute cooperation rate from round history."""
29
+ if not history:
30
+ return float(_ZERO)
31
+ coop_count = _ZERO
32
+ for rnd in history:
33
+ base_action = rnd.player_action
34
+ if GOSSIP_SEPARATOR in base_action:
35
+ base_action = base_action.rsplit(GOSSIP_SEPARATOR, _ONE)[_ONE]
36
+ if base_action in _COOPERATIVE_ACTIONS:
37
+ coop_count = coop_count + _ONE
38
+ return coop_count / len(history)
39
+
40
+
41
+ class ReputationEnvironment:
42
+ """Environment wrapper that adds cross-episode reputation via cognee.
43
+
44
+ Injects opponent reputation into obs.metadata before each episode.
45
+ Records episode outcomes and gossip ratings in CogneeMemoryStore.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ memory_store: CogneeMemoryStore,
51
+ env: Optional[KantEnvironment] = None,
52
+ ) -> None:
53
+ self._env = env if env is not None else KantEnvironment()
54
+ self._store = memory_store
55
+ self._agent_id: str = ""
56
+ self._opponent_id: str = ""
57
+
58
+ def reset(
59
+ self,
60
+ *,
61
+ agent_id: str = "agent",
62
+ **kwargs: Any,
63
+ ) -> GameObservation:
64
+ """Reset environment and inject reputation into metadata."""
65
+ self._agent_id = agent_id
66
+ self._opponent_id = kwargs.get("strategy", "unknown")
67
+ obs = self._env.reset(**kwargs)
68
+ reputation = self._store.query_reputation(self._opponent_id)
69
+ updated_meta = dict(obs.metadata)
70
+ updated_meta[META_KEY_REPUTATION] = reputation
71
+ updated_meta[META_KEY_INTERACTION_COUNT] = reputation.get(
72
+ META_KEY_INTERACTION_COUNT, _ZERO,
73
+ )
74
+ obs = obs.model_copy(update={"metadata": updated_meta})
75
+ return obs
76
+
77
+ def step(
78
+ self,
79
+ action: GameAction,
80
+ **kwargs: Any,
81
+ ) -> GameObservation:
82
+ """Step environment, extracting gossip and recording episodes."""
83
+ gossip_marker = GOSSIP_PREFIX + GOSSIP_SEPARATOR
84
+ if action.action.startswith(gossip_marker):
85
+ _, rating, _ = parse_gossip_action(action.action)
86
+ self._store.record_gossip(
87
+ self._agent_id, self._opponent_id, rating,
88
+ )
89
+
90
+ obs = self._env.step(action, **kwargs)
91
+
92
+ if obs.done:
93
+ self._store.record_episode(
94
+ agent_id=self._agent_id,
95
+ opponent_id=self._opponent_id,
96
+ game=obs.game_name,
97
+ history=obs.history,
98
+ cooperation_rate=_compute_coop_rate(obs.history),
99
+ scores=(obs.player_score, obs.opponent_score),
100
+ )
101
+
102
+ reputation = self._store.get_stats(self._opponent_id)
103
+ updated_meta = dict(obs.metadata)
104
+ updated_meta[META_KEY_REPUTATION] = reputation
105
+ obs = obs.model_copy(update={"metadata": updated_meta})
106
+ return obs
107
+
108
+ @property
109
+ def state(self) -> GameState:
110
+ """Delegate to wrapped environment state."""
111
+ return self._env.state
server/KantBench_environment.py CHANGED
@@ -1,8 +1,8 @@
1
  """KantBench environment adapter for the HF Space.
2
 
3
  Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games,
4
- 17 strategies) and NPlayerEnvironment (3 N-player games) instead of a
5
- standalone reimplementation.
6
  """
7
 
8
  from __future__ import annotations
@@ -22,12 +22,18 @@ from env.nplayer.models import NPlayerAction, NPlayerObservation
22
  import common.games_meta.nplayer_games # noqa: F401
23
  from common.games_meta.nplayer_config import NPLAYER_GAMES
24
 
 
 
 
25
 
26
  class KantbenchEnvironment(Environment):
27
  """Game theory environment exposing 90+ two-player and N-player games.
28
 
29
  Wraps the real KantEnvironment and NPlayerEnvironment, routing
30
  automatically based on the requested game name.
 
 
 
31
  """
32
 
33
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
@@ -39,6 +45,16 @@ class KantbenchEnvironment(Environment):
39
 
40
  def reset(self, **kwargs: Any) -> KantBenchObservation:
41
  game_name: str = kwargs.get("game", "prisoners_dilemma")
 
 
 
 
 
 
 
 
 
 
42
 
43
  if game_name in NPLAYER_GAMES:
44
  self._is_nplayer = True
 
1
  """KantBench environment adapter for the HF Space.
2
 
3
  Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games,
4
+ 17 strategies, meta-games, composable variants) and NPlayerEnvironment
5
+ (3 N-player games) instead of a standalone reimplementation.
6
  """
7
 
8
  from __future__ import annotations
 
22
  import common.games_meta.nplayer_games # noqa: F401
23
  from common.games_meta.nplayer_config import NPLAYER_GAMES
24
 
25
+ from common.games import GAMES
26
+ from common.variants import compose_game
27
+
28
 
29
  class KantbenchEnvironment(Environment):
30
  """Game theory environment exposing 90+ two-player and N-player games.
31
 
32
  Wraps the real KantEnvironment and NPlayerEnvironment, routing
33
  automatically based on the requested game name.
34
+
35
+ Supports a ``variant`` reset parameter for dynamic game composition
36
+ (e.g. ``variant="constitutional"`` or ``variant="cheap_talk"``).
37
  """
38
 
39
  SUPPORTS_CONCURRENT_SESSIONS: bool = True
 
45
 
46
  def reset(self, **kwargs: Any) -> KantBenchObservation:
47
  game_name: str = kwargs.get("game", "prisoners_dilemma")
48
+ variant: Optional[str] = kwargs.pop("variant", None)
49
+
50
+ # Dynamic variant composition — compose game on-the-fly and
51
+ # register it so KantEnvironment can look it up via get_game().
52
+ # Constitutional variant creates fresh mutable closure per call.
53
+ if variant and game_name in GAMES:
54
+ composed = compose_game(game_name, variant)
55
+ composed_key = f"_composed_{variant}_{game_name}"
56
+ GAMES[composed_key] = composed
57
+ kwargs["game"] = composed_key
58
 
59
  if game_name in NPLAYER_GAMES:
60
  self._is_nplayer = True