Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- common/games.py +24 -35
- common/games_coop/pd_variants.py +17 -31
- common/games_info/communication.py +31 -68
- common/games_meta/coalition_config.py +14 -33
- common/games_meta/game_tags.py +42 -2
- common/games_meta/nplayer_config.py +4 -20
- common/games_meta/nplayer_games.py +9 -5
- common/meta/memory_store.py +183 -0
- common/meta/meta_games.py +70 -0
- common/meta/meta_rules.py +134 -0
- common/meta/variants_meta.py +193 -0
- common/meta/variants_reputation.py +73 -0
- common/variants.py +279 -0
- constant_definitions/game_constants.py +10 -0
- constant_definitions/train/agent_constants.py +30 -0
- constant_definitions/var/meta/meta_rule_constants.py +41 -0
- constant_definitions/var/meta/reputation_constants.py +32 -0
- constant_definitions/var/pd_variant_constants.py +22 -0
- env/environment.py +2 -0
- env/reputation/__init__.py +4 -0
- env/reputation/reputation_env.py +111 -0
- server/KantBench_environment.py +18 -2
common/games.py
CHANGED
|
@@ -8,34 +8,15 @@ from typing import Callable
|
|
| 8 |
from constant_definitions.game_constants import (
|
| 9 |
DEFAULT_ZERO_FLOAT,
|
| 10 |
DEFAULT_ZERO_INT,
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
PD_DC_PAYOFF,
|
| 15 |
-
PD_DD_PAYOFF,
|
| 16 |
-
# Stag Hunt
|
| 17 |
-
SH_SS_PAYOFF,
|
| 18 |
-
SH_SH_PAYOFF,
|
| 19 |
-
SH_HS_PAYOFF,
|
| 20 |
-
SH_HH_PAYOFF,
|
| 21 |
-
# Hawk-Dove
|
| 22 |
-
HD_HH_PAYOFF,
|
| 23 |
-
HD_HD_PAYOFF,
|
| 24 |
-
HD_DH_PAYOFF,
|
| 25 |
-
HD_DD_PAYOFF,
|
| 26 |
-
# Ultimatum
|
| 27 |
ULTIMATUM_POT,
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
PG_MULTIPLIER_DENOMINATOR,
|
| 34 |
-
PG_ENDOWMENT,
|
| 35 |
-
PG_DEFAULT_NUM_PLAYERS,
|
| 36 |
-
# Round counts
|
| 37 |
-
DEFAULT_NUM_ROUNDS,
|
| 38 |
-
SINGLE_SHOT_ROUNDS,
|
| 39 |
)
|
| 40 |
|
| 41 |
# ---------------------------------------------------------------------------
|
|
@@ -50,30 +31,39 @@ class GameConfig:
|
|
| 50 |
name: str
|
| 51 |
description: str
|
| 52 |
actions: list[str]
|
| 53 |
-
game_type: str
|
| 54 |
default_rounds: int
|
| 55 |
-
payoff_fn: Callable
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
# ---------------------------------------------------------------------------
|
| 59 |
# Matrix-game payoff helpers
|
| 60 |
# ---------------------------------------------------------------------------
|
| 61 |
|
| 62 |
-
_PD_MATRIX
|
| 63 |
("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
|
| 64 |
("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
|
| 65 |
("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
|
| 66 |
("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
|
| 67 |
}
|
| 68 |
|
| 69 |
-
_SH_MATRIX
|
| 70 |
("stag", "stag"): (float(SH_SS_PAYOFF), float(SH_SS_PAYOFF)),
|
| 71 |
("stag", "hare"): (float(SH_SH_PAYOFF), float(SH_HS_PAYOFF)),
|
| 72 |
("hare", "stag"): (float(SH_HS_PAYOFF), float(SH_SH_PAYOFF)),
|
| 73 |
("hare", "hare"): (float(SH_HH_PAYOFF), float(SH_HH_PAYOFF)),
|
| 74 |
}
|
| 75 |
|
| 76 |
-
_HD_MATRIX
|
| 77 |
("hawk", "hawk"): (float(HD_HH_PAYOFF), float(HD_HH_PAYOFF)),
|
| 78 |
("hawk", "dove"): (float(HD_HD_PAYOFF), float(HD_DH_PAYOFF)),
|
| 79 |
("dove", "hawk"): (float(HD_DH_PAYOFF), float(HD_HD_PAYOFF)),
|
|
@@ -81,9 +71,7 @@ _HD_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
|
|
| 81 |
}
|
| 82 |
|
| 83 |
|
| 84 |
-
def _matrix_payoff_fn(
|
| 85 |
-
matrix: dict[tuple[str, str], tuple[float, float]],
|
| 86 |
-
) -> Callable[[str, str], tuple[float, float]]:
|
| 87 |
"""Return a payoff function backed by a pre-built matrix dict."""
|
| 88 |
|
| 89 |
def _payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
|
|
@@ -284,6 +272,7 @@ def _load_extensions() -> None:
|
|
| 284 |
"common.games_market.advanced", "common.games_coop.cooperative",
|
| 285 |
"common.games_coop.dynamic", "common.games_coop.pd_variants",
|
| 286 |
"common.games_coop.infinite", "common.games_coop.stochastic",
|
|
|
|
| 287 |
]:
|
| 288 |
try:
|
| 289 |
importlib.import_module(mod)
|
|
|
|
| 8 |
from constant_definitions.game_constants import (
|
| 9 |
DEFAULT_ZERO_FLOAT,
|
| 10 |
DEFAULT_ZERO_INT,
|
| 11 |
+
PD_CC_PAYOFF, PD_CD_PAYOFF, PD_DC_PAYOFF, PD_DD_PAYOFF,
|
| 12 |
+
SH_SS_PAYOFF, SH_SH_PAYOFF, SH_HS_PAYOFF, SH_HH_PAYOFF,
|
| 13 |
+
HD_HH_PAYOFF, HD_HD_PAYOFF, HD_DH_PAYOFF, HD_DD_PAYOFF,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
ULTIMATUM_POT,
|
| 15 |
+
TRUST_MULTIPLIER, TRUST_ENDOWMENT,
|
| 16 |
+
PG_MULTIPLIER_NUMERATOR, PG_MULTIPLIER_DENOMINATOR,
|
| 17 |
+
PG_ENDOWMENT, PG_DEFAULT_NUM_PLAYERS,
|
| 18 |
+
DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS, DEFAULT_TWO_PLAYERS,
|
| 19 |
+
OPPONENT_MODE_STRATEGY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
# ---------------------------------------------------------------------------
|
|
|
|
| 31 |
name: str
|
| 32 |
description: str
|
| 33 |
actions: list[str]
|
| 34 |
+
game_type: str
|
| 35 |
default_rounds: int
|
| 36 |
+
payoff_fn: Callable
|
| 37 |
+
num_players: int = DEFAULT_TWO_PLAYERS
|
| 38 |
+
applied_variants: tuple[str, ...] = ()
|
| 39 |
+
base_game_key: str = ""
|
| 40 |
+
enforcement: str = ""
|
| 41 |
+
penalty_numerator: int = DEFAULT_ZERO_INT
|
| 42 |
+
penalty_denominator: int = SINGLE_SHOT_ROUNDS
|
| 43 |
+
allow_side_payments: bool = False
|
| 44 |
+
opponent_mode: str = OPPONENT_MODE_STRATEGY
|
| 45 |
+
opponent_actions: tuple[str, ...] | None = None
|
| 46 |
|
| 47 |
|
| 48 |
# ---------------------------------------------------------------------------
|
| 49 |
# Matrix-game payoff helpers
|
| 50 |
# ---------------------------------------------------------------------------
|
| 51 |
|
| 52 |
+
_PD_MATRIX = {
|
| 53 |
("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
|
| 54 |
("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
|
| 55 |
("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
|
| 56 |
("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
|
| 57 |
}
|
| 58 |
|
| 59 |
+
_SH_MATRIX = {
|
| 60 |
("stag", "stag"): (float(SH_SS_PAYOFF), float(SH_SS_PAYOFF)),
|
| 61 |
("stag", "hare"): (float(SH_SH_PAYOFF), float(SH_HS_PAYOFF)),
|
| 62 |
("hare", "stag"): (float(SH_HS_PAYOFF), float(SH_SH_PAYOFF)),
|
| 63 |
("hare", "hare"): (float(SH_HH_PAYOFF), float(SH_HH_PAYOFF)),
|
| 64 |
}
|
| 65 |
|
| 66 |
+
_HD_MATRIX = {
|
| 67 |
("hawk", "hawk"): (float(HD_HH_PAYOFF), float(HD_HH_PAYOFF)),
|
| 68 |
("hawk", "dove"): (float(HD_HD_PAYOFF), float(HD_DH_PAYOFF)),
|
| 69 |
("dove", "hawk"): (float(HD_DH_PAYOFF), float(HD_HD_PAYOFF)),
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
|
| 74 |
+
def _matrix_payoff_fn(matrix: dict) -> Callable:
|
|
|
|
|
|
|
| 75 |
"""Return a payoff function backed by a pre-built matrix dict."""
|
| 76 |
|
| 77 |
def _payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
|
|
|
|
| 272 |
"common.games_market.advanced", "common.games_coop.cooperative",
|
| 273 |
"common.games_coop.dynamic", "common.games_coop.pd_variants",
|
| 274 |
"common.games_coop.infinite", "common.games_coop.stochastic",
|
| 275 |
+
"common.meta.meta_games",
|
| 276 |
]:
|
| 277 |
try:
|
| 278 |
importlib.import_module(mod)
|
common/games_coop/pd_variants.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
"""Prisoner's Dilemma variants for KantBench."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
|
|
|
| 4 |
from common.games import GAMES, GameConfig, _matrix_payoff_fn
|
|
|
|
| 5 |
from constant_definitions.game_constants import (
|
| 6 |
-
PD_CC_PAYOFF, PD_CD_PAYOFF, PD_DC_PAYOFF, PD_DD_PAYOFF,
|
| 7 |
DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS,
|
| 8 |
)
|
| 9 |
from constant_definitions.var.pd_variant_constants import (
|
| 10 |
-
OPD_EXIT_PAYOFF,
|
| 11 |
APD_A_TEMPTATION, APD_A_REWARD, APD_A_PUNISHMENT, APD_A_SUCKER,
|
| 12 |
APD_B_TEMPTATION, APD_B_REWARD, APD_B_PUNISHMENT, APD_B_SUCKER,
|
| 13 |
DONATION_BENEFIT, DONATION_COST,
|
|
@@ -18,22 +19,6 @@ from constant_definitions.var.pd_variant_constants import (
|
|
| 18 |
_ZERO_F = float()
|
| 19 |
|
| 20 |
|
| 21 |
-
# -- Optional PD (cooperate / defect / exit) --
|
| 22 |
-
_OPD_EXIT_F = float(OPD_EXIT_PAYOFF)
|
| 23 |
-
_OPD_BASE: dict[tuple[str, str], tuple[float, float]] = {
|
| 24 |
-
("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
|
| 25 |
-
("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
|
| 26 |
-
("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
|
| 27 |
-
("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def _optional_pd_payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 32 |
-
if pa == "exit" or oa == "exit":
|
| 33 |
-
return (_OPD_EXIT_F, _OPD_EXIT_F)
|
| 34 |
-
return _OPD_BASE[(pa, oa)]
|
| 35 |
-
|
| 36 |
-
|
| 37 |
# -- Asymmetric PD (alibi game: different payoffs per player) --
|
| 38 |
_ASYM_PD: dict[tuple[str, str], tuple[float, float]] = {
|
| 39 |
("cooperate", "cooperate"): (float(APD_A_REWARD), float(APD_B_REWARD)),
|
|
@@ -74,20 +59,21 @@ _PW: dict[tuple[str, str], tuple[float, float]] = {
|
|
| 74 |
|
| 75 |
|
| 76 |
# -- Register --
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
game_type="matrix",
|
| 88 |
-
default_rounds=DEFAULT_NUM_ROUNDS,
|
| 89 |
-
payoff_fn=_optional_pd_payoff,
|
| 90 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
"asymmetric_pd": GameConfig(
|
| 92 |
name="Asymmetric Prisoner's Dilemma",
|
| 93 |
description=(
|
|
|
|
| 1 |
"""Prisoner's Dilemma variants for KantBench."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
from dataclasses import replace
|
| 5 |
+
|
| 6 |
from common.games import GAMES, GameConfig, _matrix_payoff_fn
|
| 7 |
+
from common.variants import apply_exit
|
| 8 |
from constant_definitions.game_constants import (
|
|
|
|
| 9 |
DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS,
|
| 10 |
)
|
| 11 |
from constant_definitions.var.pd_variant_constants import (
|
|
|
|
| 12 |
APD_A_TEMPTATION, APD_A_REWARD, APD_A_PUNISHMENT, APD_A_SUCKER,
|
| 13 |
APD_B_TEMPTATION, APD_B_REWARD, APD_B_PUNISHMENT, APD_B_SUCKER,
|
| 14 |
DONATION_BENEFIT, DONATION_COST,
|
|
|
|
| 19 |
_ZERO_F = float()
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# -- Asymmetric PD (alibi game: different payoffs per player) --
|
| 23 |
_ASYM_PD: dict[tuple[str, str], tuple[float, float]] = {
|
| 24 |
("cooperate", "cooperate"): (float(APD_A_REWARD), float(APD_B_REWARD)),
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
# -- Register --
|
| 62 |
+
_PD_KEY = "prisoners_dilemma"
|
| 63 |
+
_optional_pd_composed = apply_exit(GAMES[_PD_KEY], base_key=_PD_KEY)
|
| 64 |
+
_optional_pd = replace(
|
| 65 |
+
_optional_pd_composed,
|
| 66 |
+
name="Optional Prisoner's Dilemma",
|
| 67 |
+
description=(
|
| 68 |
+
"A Prisoner's Dilemma with a third action: exit. Exiting gives "
|
| 69 |
+
"a safe intermediate payoff regardless of the opponent's choice. "
|
| 70 |
+
"Tests whether outside options change cooperation dynamics and "
|
| 71 |
+
"models situations where players can walk away from interactions."
|
|
|
|
|
|
|
|
|
|
| 72 |
),
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
PD_VARIANT_GAMES: dict[str, GameConfig] = {
|
| 76 |
+
"optional_pd": _optional_pd,
|
| 77 |
"asymmetric_pd": GameConfig(
|
| 78 |
name="Asymmetric Prisoner's Dilemma",
|
| 79 |
description=(
|
common/games_info/communication.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
"""Communication and mediation games for KantBench."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
|
|
|
| 4 |
from common.games import GAMES, GameConfig, _matrix_payoff_fn
|
|
|
|
| 5 |
from constant_definitions.game_constants import DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS
|
| 6 |
from constant_definitions.var.communication_constants import (
|
| 7 |
-
CTPD_REWARD, CTPD_TEMPTATION, CTPD_PUNISHMENT, CTPD_SUCKER,
|
| 8 |
-
COMMIT_COST,
|
| 9 |
CE_FOLLOW_FOLLOW, CE_FOLLOW_DEVIATE,
|
| 10 |
CE_DEVIATE_FOLLOW, CE_DEVIATE_DEVIATE,
|
| 11 |
FP_MATCH_PAYOFF, FP_MISMATCH_PAYOFF,
|
|
@@ -13,49 +14,35 @@ from constant_definitions.var.communication_constants import (
|
|
| 13 |
MG_REJECT_ACCEPT, MG_REJECT_REJECT,
|
| 14 |
)
|
| 15 |
|
| 16 |
-
_ONE = int(bool(True))
|
| 17 |
_ZERO_F = float()
|
| 18 |
|
| 19 |
-
# -- Cheap Talk PD
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
# -- Binding Commitment (costly commitment mechanism) --
|
| 42 |
-
_CC = float(CTPD_REWARD)
|
| 43 |
-
_CS = float(CTPD_SUCKER)
|
| 44 |
-
_CT = float(CTPD_TEMPTATION)
|
| 45 |
-
_CP = float(CTPD_PUNISHMENT)
|
| 46 |
-
_COST = float(COMMIT_COST)
|
| 47 |
-
|
| 48 |
-
_BIND_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
|
| 49 |
-
("commit_coop", "commit_coop"): (_CC - _COST, _CC - _COST),
|
| 50 |
-
("commit_coop", "free_coop"): (_CC - _COST, _CC),
|
| 51 |
-
("commit_coop", "free_defect"): (_CS - _COST, _CT),
|
| 52 |
-
("free_coop", "commit_coop"): (_CC, _CC - _COST),
|
| 53 |
-
("free_coop", "free_coop"): (_CC, _CC),
|
| 54 |
-
("free_coop", "free_defect"): (_CS, _CT),
|
| 55 |
-
("free_defect", "commit_coop"): (_CT, _CS - _COST),
|
| 56 |
-
("free_defect", "free_coop"): (_CT, _CS),
|
| 57 |
-
("free_defect", "free_defect"): (_CP, _CP),
|
| 58 |
-
}
|
| 59 |
|
| 60 |
|
| 61 |
# -- Correlated Equilibrium (follow external mediator or deviate) --
|
|
@@ -90,32 +77,8 @@ _MED: dict[tuple[str, str], tuple[float, float]] = {
|
|
| 90 |
|
| 91 |
# -- Register --
|
| 92 |
COMMUNICATION_GAMES: dict[str, GameConfig] = {
|
| 93 |
-
"cheap_talk_pd":
|
| 94 |
-
|
| 95 |
-
description=(
|
| 96 |
-
"A Prisoner's Dilemma where each player sends a non-binding "
|
| 97 |
-
"message before acting. Messages are cheap talk: costless and "
|
| 98 |
-
"unenforceable. Payoffs depend only on actual actions. Tests "
|
| 99 |
-
"whether non-binding communication improves cooperation."
|
| 100 |
-
),
|
| 101 |
-
actions=_CTPD_ACTS,
|
| 102 |
-
game_type="cheap_talk_pd",
|
| 103 |
-
default_rounds=DEFAULT_NUM_ROUNDS,
|
| 104 |
-
payoff_fn=_cheap_talk_pd_payoff,
|
| 105 |
-
),
|
| 106 |
-
"binding_commitment": GameConfig(
|
| 107 |
-
name="Binding Commitment Game",
|
| 108 |
-
description=(
|
| 109 |
-
"A Prisoner's Dilemma where players can pay a cost to make a "
|
| 110 |
-
"binding commitment to cooperate. The commitment is credible "
|
| 111 |
-
"but costly. Tests whether costly signaling through commitment "
|
| 112 |
-
"mechanisms changes equilibrium behavior."
|
| 113 |
-
),
|
| 114 |
-
actions=["commit_coop", "free_coop", "free_defect"],
|
| 115 |
-
game_type="matrix",
|
| 116 |
-
default_rounds=DEFAULT_NUM_ROUNDS,
|
| 117 |
-
payoff_fn=_matrix_payoff_fn(_BIND_MATRIX),
|
| 118 |
-
),
|
| 119 |
"correlated_equilibrium": GameConfig(
|
| 120 |
name="Correlated Equilibrium Game",
|
| 121 |
description=(
|
|
|
|
| 1 |
"""Communication and mediation games for KantBench."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
from dataclasses import replace
|
| 5 |
+
|
| 6 |
from common.games import GAMES, GameConfig, _matrix_payoff_fn
|
| 7 |
+
from common.variants import apply_cheap_talk, apply_binding_commitment
|
| 8 |
from constant_definitions.game_constants import DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS
|
| 9 |
from constant_definitions.var.communication_constants import (
|
|
|
|
|
|
|
| 10 |
CE_FOLLOW_FOLLOW, CE_FOLLOW_DEVIATE,
|
| 11 |
CE_DEVIATE_FOLLOW, CE_DEVIATE_DEVIATE,
|
| 12 |
FP_MATCH_PAYOFF, FP_MISMATCH_PAYOFF,
|
|
|
|
| 14 |
MG_REJECT_ACCEPT, MG_REJECT_REJECT,
|
| 15 |
)
|
| 16 |
|
|
|
|
| 17 |
_ZERO_F = float()
|
| 18 |
|
| 19 |
+
# -- Cheap Talk PD via composition --
|
| 20 |
+
_PD_KEY = "prisoners_dilemma"
|
| 21 |
+
_cheap_talk_pd_composed = apply_cheap_talk(GAMES[_PD_KEY], base_key=_PD_KEY)
|
| 22 |
+
_cheap_talk_pd = replace(
|
| 23 |
+
_cheap_talk_pd_composed,
|
| 24 |
+
name="Cheap Talk Prisoner's Dilemma",
|
| 25 |
+
description=(
|
| 26 |
+
"A Prisoner's Dilemma where each player sends a non-binding "
|
| 27 |
+
"message before acting. Messages are cheap talk: costless and "
|
| 28 |
+
"unenforceable. Payoffs depend only on actual actions. Tests "
|
| 29 |
+
"whether non-binding communication improves cooperation."
|
| 30 |
+
),
|
| 31 |
+
game_type="cheap_talk_pd",
|
| 32 |
+
)
|
| 33 |
|
| 34 |
+
# -- Binding Commitment via composition --
|
| 35 |
+
_binding_composed = apply_binding_commitment(GAMES[_PD_KEY], base_key=_PD_KEY)
|
| 36 |
+
_binding_commitment = replace(
|
| 37 |
+
_binding_composed,
|
| 38 |
+
name="Binding Commitment Game",
|
| 39 |
+
description=(
|
| 40 |
+
"A Prisoner's Dilemma where players can pay a cost to make a "
|
| 41 |
+
"binding commitment to cooperate. The commitment is credible "
|
| 42 |
+
"but costly. Tests whether costly signaling through commitment "
|
| 43 |
+
"mechanisms changes equilibrium behavior."
|
| 44 |
+
),
|
| 45 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
# -- Correlated Equilibrium (follow external mediator or deviate) --
|
|
|
|
| 77 |
|
| 78 |
# -- Register --
|
| 79 |
COMMUNICATION_GAMES: dict[str, GameConfig] = {
|
| 80 |
+
"cheap_talk_pd": _cheap_talk_pd,
|
| 81 |
+
"binding_commitment": _binding_commitment,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"correlated_equilibrium": GameConfig(
|
| 83 |
name="Correlated Equilibrium Game",
|
| 84 |
description=(
|
common/games_meta/coalition_config.py
CHANGED
|
@@ -2,10 +2,8 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
from
|
| 6 |
-
from
|
| 7 |
-
|
| 8 |
-
from common.games_meta.nplayer_config import NPlayerGameConfig, NPLAYER_GAMES
|
| 9 |
from constant_definitions.nplayer.coalition_constants import (
|
| 10 |
COALITION_DEFAULT_ROUNDS, COALITION_DEFAULT_PENALTY_NUMERATOR,
|
| 11 |
COALITION_DEFAULT_PENALTY_DENOMINATOR,
|
|
@@ -27,32 +25,18 @@ from constant_definitions.nplayer.coalition_constants import (
|
|
| 27 |
COMMONS_LOW_DEPLETED, COMMONS_HIGH_DEPLETED,
|
| 28 |
)
|
| 29 |
|
|
|
|
|
|
|
| 30 |
_ONE = int(bool(True))
|
| 31 |
_ZERO = int()
|
| 32 |
_PEN_N = COALITION_DEFAULT_PENALTY_NUMERATOR
|
| 33 |
_PEN_D = COALITION_DEFAULT_PENALTY_DENOMINATOR
|
| 34 |
|
| 35 |
|
| 36 |
-
|
| 37 |
-
class CoalitionGameConfig:
|
| 38 |
-
"""Immutable specification for a coalition-enabled N-player game."""
|
| 39 |
-
|
| 40 |
-
name: str
|
| 41 |
-
description: str
|
| 42 |
-
actions: list[str]
|
| 43 |
-
num_players: int
|
| 44 |
-
default_rounds: int
|
| 45 |
-
payoff_fn: Callable[[tuple[str, ...]], tuple[float, ...]]
|
| 46 |
-
enforcement: str
|
| 47 |
-
penalty_numerator: int
|
| 48 |
-
penalty_denominator: int
|
| 49 |
-
allow_side_payments: bool
|
| 50 |
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def get_coalition_game(name: str) -> CoalitionGameConfig:
|
| 56 |
"""Look up a coalition game by name. Raises KeyError if not found."""
|
| 57 |
return COALITION_GAMES[name]
|
| 58 |
|
|
@@ -160,16 +144,17 @@ def _commons_governance_payoff(actions: tuple[str, ...]) -> tuple[float, ...]:
|
|
| 160 |
# ---------------------------------------------------------------------------
|
| 161 |
|
| 162 |
def _cfg(name: str, desc: str, actions: list[str], n: int,
|
| 163 |
-
fn: object, enf: str, side: bool = False) ->
|
| 164 |
-
return
|
| 165 |
-
name=name, description=desc, actions=actions,
|
| 166 |
-
|
|
|
|
| 167 |
enforcement=enf, penalty_numerator=_PEN_N, penalty_denominator=_PEN_D,
|
| 168 |
allow_side_payments=side,
|
| 169 |
)
|
| 170 |
|
| 171 |
|
| 172 |
-
_BUILTIN_COALITION_GAMES: dict[str,
|
| 173 |
"coalition_cartel": _cfg(
|
| 174 |
"Cartel",
|
| 175 |
"Players collude or compete. If enough collude the cartel holds. "
|
|
@@ -218,10 +203,6 @@ _BUILTIN_COALITION_GAMES: dict[str, CoalitionGameConfig] = {
|
|
| 218 |
|
| 219 |
COALITION_GAMES.update(_BUILTIN_COALITION_GAMES)
|
| 220 |
|
| 221 |
-
#
|
| 222 |
for _key, _c in _BUILTIN_COALITION_GAMES.items():
|
| 223 |
-
NPLAYER_GAMES[_key] =
|
| 224 |
-
name=_c.name, description=_c.description, actions=_c.actions,
|
| 225 |
-
num_players=_c.num_players, default_rounds=_c.default_rounds,
|
| 226 |
-
payoff_fn=_c.payoff_fn,
|
| 227 |
-
)
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from common.games import GameConfig
|
| 6 |
+
from common.games_meta.nplayer_config import NPLAYER_GAMES
|
|
|
|
|
|
|
| 7 |
from constant_definitions.nplayer.coalition_constants import (
|
| 8 |
COALITION_DEFAULT_ROUNDS, COALITION_DEFAULT_PENALTY_NUMERATOR,
|
| 9 |
COALITION_DEFAULT_PENALTY_DENOMINATOR,
|
|
|
|
| 25 |
COMMONS_LOW_DEPLETED, COMMONS_HIGH_DEPLETED,
|
| 26 |
)
|
| 27 |
|
| 28 |
+
CoalitionGameConfig = GameConfig
|
| 29 |
+
|
| 30 |
_ONE = int(bool(True))
|
| 31 |
_ZERO = int()
|
| 32 |
_PEN_N = COALITION_DEFAULT_PENALTY_NUMERATOR
|
| 33 |
_PEN_D = COALITION_DEFAULT_PENALTY_DENOMINATOR
|
| 34 |
|
| 35 |
|
| 36 |
+
COALITION_GAMES: dict[str, GameConfig] = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
+
def get_coalition_game(name: str) -> GameConfig:
|
|
|
|
|
|
|
|
|
|
| 40 |
"""Look up a coalition game by name. Raises KeyError if not found."""
|
| 41 |
return COALITION_GAMES[name]
|
| 42 |
|
|
|
|
| 144 |
# ---------------------------------------------------------------------------
|
| 145 |
|
| 146 |
def _cfg(name: str, desc: str, actions: list[str], n: int,
|
| 147 |
+
fn: object, enf: str, side: bool = False) -> GameConfig:
|
| 148 |
+
return GameConfig(
|
| 149 |
+
name=name, description=desc, actions=actions, game_type="coalition",
|
| 150 |
+
num_players=n, default_rounds=COALITION_DEFAULT_ROUNDS,
|
| 151 |
+
payoff_fn=fn, # type: ignore[arg-type]
|
| 152 |
enforcement=enf, penalty_numerator=_PEN_N, penalty_denominator=_PEN_D,
|
| 153 |
allow_side_payments=side,
|
| 154 |
)
|
| 155 |
|
| 156 |
|
| 157 |
+
_BUILTIN_COALITION_GAMES: dict[str, GameConfig] = {
|
| 158 |
"coalition_cartel": _cfg(
|
| 159 |
"Cartel",
|
| 160 |
"Players collude or compete. If enough collude the cartel holds. "
|
|
|
|
| 203 |
|
| 204 |
COALITION_GAMES.update(_BUILTIN_COALITION_GAMES)
|
| 205 |
|
| 206 |
+
# Register coalition games as N-player games too (same GameConfig instances)
|
| 207 |
for _key, _c in _BUILTIN_COALITION_GAMES.items():
|
| 208 |
+
NPLAYER_GAMES[_key] = _c
|
|
|
|
|
|
|
|
|
|
|
|
common/games_meta/game_tags.py
CHANGED
|
@@ -78,8 +78,8 @@ GAME_TAGS: dict[str, frozenset[str]] = {
|
|
| 78 |
"gift_exchange": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, ASYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
|
| 79 |
|
| 80 |
# ── games_info/communication.py ──
|
| 81 |
-
"cheap_talk_pd": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA,
|
| 82 |
-
"binding_commitment": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA,
|
| 83 |
"correlated_equilibrium": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 84 |
"focal_point": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, SOCIAL_DILEMMA, SMALL_CHOICE}),
|
| 85 |
"mediated_game": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
|
@@ -175,6 +175,19 @@ GAME_TAGS: dict[str, frozenset[str]] = {
|
|
| 175 |
"coalition_resource_trading": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
|
| 176 |
"coalition_rule_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
|
| 177 |
"coalition_commons": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
}
|
| 179 |
|
| 180 |
|
|
@@ -183,6 +196,33 @@ GAME_TAGS: dict[str, frozenset[str]] = {
|
|
| 183 |
# ---------------------------------------------------------------------------
|
| 184 |
|
| 185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
def get_games_by_tag(tag: str) -> list[str]:
|
| 187 |
"""Return all game keys that have the given tag."""
|
| 188 |
return [key for key, tags in GAME_TAGS.items() if tag in tags]
|
|
|
|
| 78 |
"gift_exchange": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, ASYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE}),
|
| 79 |
|
| 80 |
# ── games_info/communication.py ──
|
| 81 |
+
"cheap_talk_pd": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, SMALL_CHOICE}),
|
| 82 |
+
"binding_commitment": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SEQUENTIAL, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, SMALL_CHOICE}),
|
| 83 |
"correlated_equilibrium": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 84 |
"focal_point": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, COORDINATION, SOCIAL_DILEMMA, SMALL_CHOICE}),
|
| 85 |
"mediated_game": frozenset({MEDIATED, COMPLETE_INFORMATION, SIMULTANEOUS, SINGLE_SHOT, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
|
|
|
| 175 |
"coalition_resource_trading": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, META_GOVERNANCE}),
|
| 176 |
"coalition_rule_voting": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, VOTING, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, BINDING_ENFORCEMENT, META_GOVERNANCE}),
|
| 177 |
"coalition_commons": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE, MULTIPLAYER, COALITION_FORMATION, PENALTY_ENFORCEMENT, META_GOVERNANCE}),
|
| 178 |
+
|
| 179 |
+
# ── meta/meta_games.py ──
|
| 180 |
+
"rule_proposal_prisoners_dilemma": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 181 |
+
"rule_proposal_stag_hunt": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 182 |
+
"rule_proposal_hawk_dove": frozenset({BINDING_COMMITMENT, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 183 |
+
"rule_signal_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 184 |
+
"rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 185 |
+
"rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 186 |
+
|
| 187 |
+
# ── meta/meta_games.py (gossip) ──
|
| 188 |
+
"gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 189 |
+
"gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 190 |
+
"gossip_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 191 |
}
|
| 192 |
|
| 193 |
|
|
|
|
| 196 |
# ---------------------------------------------------------------------------
|
| 197 |
|
| 198 |
|
| 199 |
+
def derive_variant_tags(
|
| 200 |
+
base_tags: frozenset[str], variant_name: str,
|
| 201 |
+
) -> frozenset[str]:
|
| 202 |
+
"""Compute tags for a composed variant game from base game tags."""
|
| 203 |
+
tags = set(base_tags)
|
| 204 |
+
if variant_name == "cheap_talk":
|
| 205 |
+
tags.discard(NO_COMMUNICATION)
|
| 206 |
+
tags.add(CHEAP_TALK)
|
| 207 |
+
tags.discard(BINARY_CHOICE)
|
| 208 |
+
tags.add(SMALL_CHOICE)
|
| 209 |
+
elif variant_name == "binding_commitment":
|
| 210 |
+
tags.discard(NO_COMMUNICATION)
|
| 211 |
+
tags.add(BINDING_COMMITMENT)
|
| 212 |
+
tags.discard(BINARY_CHOICE)
|
| 213 |
+
tags.add(SMALL_CHOICE)
|
| 214 |
+
elif variant_name == "exit":
|
| 215 |
+
tags.discard(BINARY_CHOICE)
|
| 216 |
+
tags.add(SMALL_CHOICE)
|
| 217 |
+
elif variant_name == "gossip":
|
| 218 |
+
tags.discard(NO_COMMUNICATION)
|
| 219 |
+
tags.add(CHEAP_TALK)
|
| 220 |
+
tags.discard(BINARY_CHOICE)
|
| 221 |
+
tags.add(LARGE_CHOICE)
|
| 222 |
+
tags.add(META_GOVERNANCE)
|
| 223 |
+
return frozenset(tags)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
def get_games_by_tag(tag: str) -> list[str]:
|
| 227 |
"""Return all game keys that have the given tag."""
|
| 228 |
return [key for key, tags in GAME_TAGS.items() if tag in tags]
|
common/games_meta/nplayer_config.py
CHANGED
|
@@ -2,29 +2,13 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
from
|
| 6 |
-
from typing import Callable
|
| 7 |
|
| 8 |
-
|
| 9 |
-
NPLAYER_DEFAULT_ROUNDS,
|
| 10 |
-
)
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
-
@dataclass(frozen=True)
|
| 14 |
-
class NPlayerGameConfig:
|
| 15 |
-
"""Immutable specification for an N-player game type."""
|
| 16 |
|
| 17 |
-
|
| 18 |
-
description: str
|
| 19 |
-
actions: list[str]
|
| 20 |
-
num_players: int
|
| 21 |
-
default_rounds: int
|
| 22 |
-
payoff_fn: Callable[[tuple[str, ...]], tuple[float, ...]]
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
NPLAYER_GAMES: dict[str, NPlayerGameConfig] = {}
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def get_nplayer_game(name: str) -> NPlayerGameConfig:
|
| 29 |
"""Look up an N-player game by name. Raises KeyError if not found."""
|
| 30 |
return NPLAYER_GAMES[name]
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from common.games import GameConfig
|
|
|
|
| 6 |
|
| 7 |
+
NPlayerGameConfig = GameConfig
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
NPLAYER_GAMES: dict[str, GameConfig] = {}
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
def get_nplayer_game(name: str) -> GameConfig:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"""Look up an N-player game by name. Raises KeyError if not found."""
|
| 14 |
return NPLAYER_GAMES[name]
|
common/games_meta/nplayer_games.py
CHANGED
|
@@ -2,7 +2,8 @@
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
from common.
|
|
|
|
| 6 |
from constant_definitions.nplayer.nplayer_constants import (
|
| 7 |
NPLAYER_DEFAULT_ROUNDS,
|
| 8 |
NPG_ENDOWMENT,
|
|
@@ -90,19 +91,20 @@ def _el_farol_payoff(actions: tuple[str, ...]) -> tuple[float, ...]:
|
|
| 90 |
_THREE = _ONE + _ONE + _ONE
|
| 91 |
_FIVE = _THREE + _ONE + _ONE
|
| 92 |
|
| 93 |
-
_BUILTIN_NPLAYER_GAMES: dict[str,
|
| 94 |
-
"nplayer_public_goods":
|
| 95 |
name="N-Player Public Goods",
|
| 96 |
description=(
|
| 97 |
"Each player contributes from an endowment. The total pot is "
|
| 98 |
"multiplied and split equally among all players."
|
| 99 |
),
|
| 100 |
actions=_PG_ACTIONS,
|
|
|
|
| 101 |
num_players=_FIVE,
|
| 102 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 103 |
payoff_fn=_public_goods_payoff,
|
| 104 |
),
|
| 105 |
-
"nplayer_volunteer_dilemma":
|
| 106 |
name="N-Player Volunteer's Dilemma",
|
| 107 |
description=(
|
| 108 |
"Players choose to volunteer or abstain. If at least one "
|
|
@@ -110,17 +112,19 @@ _BUILTIN_NPLAYER_GAMES: dict[str, NPlayerGameConfig] = {
|
|
| 110 |
"If nobody volunteers, everyone gets nothing."
|
| 111 |
),
|
| 112 |
actions=["volunteer", "abstain"],
|
|
|
|
| 113 |
num_players=_FIVE,
|
| 114 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 115 |
payoff_fn=_volunteer_dilemma_payoff,
|
| 116 |
),
|
| 117 |
-
"nplayer_el_farol":
|
| 118 |
name="N-Player El Farol Bar",
|
| 119 |
description=(
|
| 120 |
"Players decide whether to attend a bar. The bar is fun when "
|
| 121 |
"not crowded but unpleasant when too many people show up."
|
| 122 |
),
|
| 123 |
actions=["attend", "stay_home"],
|
|
|
|
| 124 |
num_players=_FIVE,
|
| 125 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 126 |
payoff_fn=_el_farol_payoff,
|
|
|
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from common.games import GameConfig
|
| 6 |
+
from common.games_meta.nplayer_config import NPLAYER_GAMES
|
| 7 |
from constant_definitions.nplayer.nplayer_constants import (
|
| 8 |
NPLAYER_DEFAULT_ROUNDS,
|
| 9 |
NPG_ENDOWMENT,
|
|
|
|
| 91 |
_THREE = _ONE + _ONE + _ONE
|
| 92 |
_FIVE = _THREE + _ONE + _ONE
|
| 93 |
|
| 94 |
+
_BUILTIN_NPLAYER_GAMES: dict[str, GameConfig] = {
|
| 95 |
+
"nplayer_public_goods": GameConfig(
|
| 96 |
name="N-Player Public Goods",
|
| 97 |
description=(
|
| 98 |
"Each player contributes from an endowment. The total pot is "
|
| 99 |
"multiplied and split equally among all players."
|
| 100 |
),
|
| 101 |
actions=_PG_ACTIONS,
|
| 102 |
+
game_type="public_goods",
|
| 103 |
num_players=_FIVE,
|
| 104 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 105 |
payoff_fn=_public_goods_payoff,
|
| 106 |
),
|
| 107 |
+
"nplayer_volunteer_dilemma": GameConfig(
|
| 108 |
name="N-Player Volunteer's Dilemma",
|
| 109 |
description=(
|
| 110 |
"Players choose to volunteer or abstain. If at least one "
|
|
|
|
| 112 |
"If nobody volunteers, everyone gets nothing."
|
| 113 |
),
|
| 114 |
actions=["volunteer", "abstain"],
|
| 115 |
+
game_type="matrix",
|
| 116 |
num_players=_FIVE,
|
| 117 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 118 |
payoff_fn=_volunteer_dilemma_payoff,
|
| 119 |
),
|
| 120 |
+
"nplayer_el_farol": GameConfig(
|
| 121 |
name="N-Player El Farol Bar",
|
| 122 |
description=(
|
| 123 |
"Players decide whether to attend a bar. The bar is fun when "
|
| 124 |
"not crowded but unpleasant when too many people show up."
|
| 125 |
),
|
| 126 |
actions=["attend", "stay_home"],
|
| 127 |
+
game_type="matrix",
|
| 128 |
num_players=_FIVE,
|
| 129 |
default_rounds=NPLAYER_DEFAULT_ROUNDS,
|
| 130 |
payoff_fn=_el_farol_payoff,
|
common/meta/memory_store.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Persistent cross-episode memory backed by cognee knowledge graph.
|
| 2 |
+
|
| 3 |
+
Records episode summaries, gossip ratings, and opponent statistics.
|
| 4 |
+
Uses in-memory stats when cognee is not installed.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import asyncio
|
| 9 |
+
import threading
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from constant_definitions.var.meta.reputation_constants import (
|
| 13 |
+
COGNEE_DATASET_NAME,
|
| 14 |
+
COGNEE_SEARCH_TYPE,
|
| 15 |
+
DEFAULT_REPUTATION_SCORE_NUMERATOR,
|
| 16 |
+
DEFAULT_REPUTATION_SCORE_DENOMINATOR,
|
| 17 |
+
REPUTATION_DECAY_NUMERATOR,
|
| 18 |
+
REPUTATION_DECAY_DENOMINATOR,
|
| 19 |
+
META_KEY_COOPERATION_RATE,
|
| 20 |
+
META_KEY_INTERACTION_COUNT,
|
| 21 |
+
META_KEY_GOSSIP_HISTORY,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
_ZERO = int()
|
| 25 |
+
_ONE = int(bool(True))
|
| 26 |
+
_DEFAULT_SCORE = (
|
| 27 |
+
DEFAULT_REPUTATION_SCORE_NUMERATOR / DEFAULT_REPUTATION_SCORE_DENOMINATOR
|
| 28 |
+
)
|
| 29 |
+
_DECAY = REPUTATION_DECAY_NUMERATOR / REPUTATION_DECAY_DENOMINATOR
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
import cognee as _cognee # type: ignore[import-untyped]
|
| 33 |
+
_HAS_COGNEE = True
|
| 34 |
+
except ImportError:
|
| 35 |
+
_cognee = None
|
| 36 |
+
_HAS_COGNEE = False
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class AsyncBridge:
|
| 40 |
+
"""Runs async coroutines from sync code via a dedicated thread."""
|
| 41 |
+
|
| 42 |
+
def __init__(self) -> None:
|
| 43 |
+
self._loop = asyncio.new_event_loop()
|
| 44 |
+
self._thread = threading.Thread(
|
| 45 |
+
target=self._loop.run_forever, daemon=True,
|
| 46 |
+
)
|
| 47 |
+
self._thread.start()
|
| 48 |
+
|
| 49 |
+
def run(self, coro: Any) -> Any:
|
| 50 |
+
"""Submit *coro* to the background loop and block for the result."""
|
| 51 |
+
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
| 52 |
+
return future.result()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _default_reputation() -> dict[str, Any]:
|
| 56 |
+
"""Return a neutral default reputation dict."""
|
| 57 |
+
return {
|
| 58 |
+
"score": _DEFAULT_SCORE,
|
| 59 |
+
META_KEY_COOPERATION_RATE: _DEFAULT_SCORE,
|
| 60 |
+
META_KEY_INTERACTION_COUNT: _ZERO,
|
| 61 |
+
META_KEY_GOSSIP_HISTORY: [],
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _format_episode_text(
|
| 66 |
+
agent_id: str,
|
| 67 |
+
opponent_id: str,
|
| 68 |
+
game: str,
|
| 69 |
+
history: list[Any],
|
| 70 |
+
cooperation_rate: float,
|
| 71 |
+
scores: tuple[float, float],
|
| 72 |
+
) -> str:
|
| 73 |
+
"""Format an episode summary for cognee ingestion."""
|
| 74 |
+
rounds = len(history)
|
| 75 |
+
p_score, o_score = scores
|
| 76 |
+
actions = "; ".join(
|
| 77 |
+
f"R{r.round_number}: {r.player_action} vs {r.opponent_action}"
|
| 78 |
+
for r in history
|
| 79 |
+
)
|
| 80 |
+
return (
|
| 81 |
+
f"Game Interaction Report\n"
|
| 82 |
+
f"Agent: {agent_id} | Opponent: {opponent_id} | Game: {game}\n"
|
| 83 |
+
f"Rounds: {rounds} | Agent Score: {p_score} | "
|
| 84 |
+
f"Opponent Score: {o_score}\n"
|
| 85 |
+
f"Cooperation Rate: {cooperation_rate}\n"
|
| 86 |
+
f"Actions: {actions}\n"
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _parse_reputation(
|
| 91 |
+
results: Any, stats: dict[str, Any],
|
| 92 |
+
) -> dict[str, Any]:
|
| 93 |
+
"""Merge cognee search results with in-memory stats."""
|
| 94 |
+
rep = dict(stats) if stats else _default_reputation()
|
| 95 |
+
if results:
|
| 96 |
+
rep["cognee_context"] = str(results)
|
| 97 |
+
return rep
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class CogneeMemoryStore:
|
| 101 |
+
"""Persistent memory backed by cognee knowledge graph."""
|
| 102 |
+
|
| 103 |
+
def __init__(self) -> None:
|
| 104 |
+
self._bridge = AsyncBridge() if _HAS_COGNEE else None
|
| 105 |
+
self._stats: dict[str, dict[str, Any]] = {}
|
| 106 |
+
|
| 107 |
+
def record_episode(
|
| 108 |
+
self,
|
| 109 |
+
agent_id: str,
|
| 110 |
+
opponent_id: str,
|
| 111 |
+
game: str,
|
| 112 |
+
history: list[Any],
|
| 113 |
+
cooperation_rate: float,
|
| 114 |
+
scores: tuple[float, float],
|
| 115 |
+
) -> None:
|
| 116 |
+
"""Format episode as text and add to cognee, then cognify."""
|
| 117 |
+
text = _format_episode_text(
|
| 118 |
+
agent_id, opponent_id, game, history,
|
| 119 |
+
cooperation_rate, scores,
|
| 120 |
+
)
|
| 121 |
+
if self._bridge is not None and _HAS_COGNEE:
|
| 122 |
+
try:
|
| 123 |
+
self._bridge.run(
|
| 124 |
+
_cognee.add(text, dataset_name=COGNEE_DATASET_NAME),
|
| 125 |
+
)
|
| 126 |
+
self._bridge.run(_cognee.cognify())
|
| 127 |
+
except Exception:
|
| 128 |
+
pass
|
| 129 |
+
self._update_stats(opponent_id, cooperation_rate, scores)
|
| 130 |
+
|
| 131 |
+
def query_reputation(self, opponent_id: str) -> dict[str, Any]:
|
| 132 |
+
"""Query cognee for opponent reputation. Uses stats if unavailable."""
|
| 133 |
+
stats = self._stats.get(opponent_id, _default_reputation())
|
| 134 |
+
if self._bridge is None or not _HAS_COGNEE:
|
| 135 |
+
return stats
|
| 136 |
+
try:
|
| 137 |
+
results = self._bridge.run(
|
| 138 |
+
_cognee.search(
|
| 139 |
+
f"reputation and behavior of {opponent_id}",
|
| 140 |
+
search_type=COGNEE_SEARCH_TYPE,
|
| 141 |
+
),
|
| 142 |
+
)
|
| 143 |
+
return _parse_reputation(results, stats)
|
| 144 |
+
except Exception:
|
| 145 |
+
return stats
|
| 146 |
+
|
| 147 |
+
def record_gossip(
|
| 148 |
+
self, rater_id: str, target_id: str, rating: str,
|
| 149 |
+
) -> None:
|
| 150 |
+
"""Record a gossip rating in cognee."""
|
| 151 |
+
text = f"{rater_id} rated {target_id} as {rating}."
|
| 152 |
+
if self._bridge is not None and _HAS_COGNEE:
|
| 153 |
+
try:
|
| 154 |
+
self._bridge.run(
|
| 155 |
+
_cognee.add(text, dataset_name=COGNEE_DATASET_NAME),
|
| 156 |
+
)
|
| 157 |
+
except Exception:
|
| 158 |
+
pass
|
| 159 |
+
target_stats = self._stats.setdefault(
|
| 160 |
+
target_id, _default_reputation(),
|
| 161 |
+
)
|
| 162 |
+
gossip_list = target_stats.setdefault(META_KEY_GOSSIP_HISTORY, [])
|
| 163 |
+
gossip_list.append({"rater": rater_id, "rating": rating})
|
| 164 |
+
|
| 165 |
+
def get_stats(self, opponent_id: str) -> dict[str, Any]:
|
| 166 |
+
"""Fast in-memory stats (no LLM call)."""
|
| 167 |
+
return self._stats.get(opponent_id, _default_reputation())
|
| 168 |
+
|
| 169 |
+
def _update_stats(
|
| 170 |
+
self,
|
| 171 |
+
opponent_id: str,
|
| 172 |
+
coop_rate: float,
|
| 173 |
+
scores: tuple[float, float],
|
| 174 |
+
) -> None:
|
| 175 |
+
"""Update running statistics for an opponent."""
|
| 176 |
+
current = self._stats.get(opponent_id, _default_reputation())
|
| 177 |
+
count = current.get(META_KEY_INTERACTION_COUNT, _ZERO) + _ONE
|
| 178 |
+
old_coop = current.get(META_KEY_COOPERATION_RATE, _DEFAULT_SCORE)
|
| 179 |
+
blended = old_coop * _DECAY + coop_rate * (_ONE - _DECAY)
|
| 180 |
+
current["score"] = blended
|
| 181 |
+
current[META_KEY_COOPERATION_RATE] = blended
|
| 182 |
+
current[META_KEY_INTERACTION_COUNT] = count
|
| 183 |
+
self._stats[opponent_id] = current
|
common/meta/meta_games.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pre-registered meta-gaming rule-proposal games for KantBench.
|
| 2 |
+
|
| 3 |
+
Registers symmetric meta-games (rule_proposal, rule_signal) for the
|
| 4 |
+
three core matrix games. Constitutional and proposer-responder are
|
| 5 |
+
composed per-episode via ``compose_game()`` because constitutional uses
|
| 6 |
+
mutable closure state that must be fresh per episode.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from dataclasses import replace
|
| 11 |
+
|
| 12 |
+
from common.games import GAMES
|
| 13 |
+
from common.meta.variants_meta import apply_rule_proposal, apply_rule_signal
|
| 14 |
+
from common.meta.variants_reputation import apply_gossip
|
| 15 |
+
|
| 16 |
+
_BASE_KEYS = ("prisoners_dilemma", "stag_hunt", "hawk_dove")
|
| 17 |
+
|
| 18 |
+
_FRIENDLY_NAMES = {
|
| 19 |
+
"prisoners_dilemma": "Prisoner's Dilemma",
|
| 20 |
+
"stag_hunt": "Stag Hunt",
|
| 21 |
+
"hawk_dove": "Hawk-Dove",
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
META_GAMES: dict = {}
|
| 25 |
+
|
| 26 |
+
for _key in _BASE_KEYS:
|
| 27 |
+
_base = GAMES[_key]
|
| 28 |
+
_fname = _FRIENDLY_NAMES[_key]
|
| 29 |
+
|
| 30 |
+
_rp = apply_rule_proposal(_base, base_key=_key)
|
| 31 |
+
_rp = replace(
|
| 32 |
+
_rp,
|
| 33 |
+
name=f"Rule Proposal {_fname}",
|
| 34 |
+
description=(
|
| 35 |
+
f"{_fname} with simultaneous binding rule proposals. "
|
| 36 |
+
"Both players propose a rule and choose an action. "
|
| 37 |
+
"If proposals match the agreed rule modifies payoffs."
|
| 38 |
+
),
|
| 39 |
+
)
|
| 40 |
+
META_GAMES[f"rule_proposal_{_key}"] = _rp
|
| 41 |
+
|
| 42 |
+
_rs = apply_rule_signal(_base, base_key=_key)
|
| 43 |
+
_rs = replace(
|
| 44 |
+
_rs,
|
| 45 |
+
name=f"Rule Signal {_fname}",
|
| 46 |
+
description=(
|
| 47 |
+
f"{_fname} with simultaneous non-binding rule signals. "
|
| 48 |
+
"Both players signal a preferred rule and choose an action. "
|
| 49 |
+
"Signals are visible but never enforced."
|
| 50 |
+
),
|
| 51 |
+
)
|
| 52 |
+
META_GAMES[f"rule_signal_{_key}"] = _rs
|
| 53 |
+
|
| 54 |
+
for _key in _BASE_KEYS:
|
| 55 |
+
_base = GAMES[_key]
|
| 56 |
+
_fname = _FRIENDLY_NAMES[_key]
|
| 57 |
+
|
| 58 |
+
_gp = apply_gossip(_base, base_key=_key)
|
| 59 |
+
_gp = replace(
|
| 60 |
+
_gp,
|
| 61 |
+
name=f"Gossip {_fname}",
|
| 62 |
+
description=(
|
| 63 |
+
f"{_fname} with reputation gossip. "
|
| 64 |
+
"Players rate opponents as trustworthy, untrustworthy, "
|
| 65 |
+
"or neutral alongside each action."
|
| 66 |
+
),
|
| 67 |
+
)
|
| 68 |
+
META_GAMES[f"gossip_{_key}"] = _gp
|
| 69 |
+
|
| 70 |
+
GAMES.update(META_GAMES)
|
common/meta/meta_rules.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Rule catalog and payoff transforms for the meta-gaming variant system.
|
| 2 |
+
|
| 3 |
+
Each rule is a payoff transform: given base payoffs and actions, return
|
| 4 |
+
modified payoffs. The ``apply_rule`` dispatcher looks up a rule by name
|
| 5 |
+
and delegates to the corresponding transform function.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from typing import Callable
|
| 10 |
+
|
| 11 |
+
from constant_definitions.var.meta.meta_rule_constants import (
|
| 12 |
+
RULE_NONE, RULE_EQUAL_SPLIT, RULE_COOP_BONUS,
|
| 13 |
+
RULE_DEFECT_PENALTY, RULE_MIN_GUARANTEE, RULE_BAN_DEFECT,
|
| 14 |
+
COOP_BONUS_NUMERATOR, COOP_BONUS_DENOMINATOR,
|
| 15 |
+
DEFECT_PENALTY_NUMERATOR, DEFECT_PENALTY_DENOMINATOR,
|
| 16 |
+
MIN_GUARANTEE_NUMERATOR, MIN_GUARANTEE_DENOMINATOR,
|
| 17 |
+
BAN_DEFECT_PENALTY_NUMERATOR, BAN_DEFECT_PENALTY_DENOMINATOR,
|
| 18 |
+
EQUAL_SPLIT_DENOMINATOR,
|
| 19 |
+
META_SEPARATOR, META_SPLIT_LIMIT,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
RuleTransform = Callable[
|
| 23 |
+
[float, float, str, str], tuple[float, float]
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
_COOPERATIVE_ACTIONS = frozenset({"cooperate", "stag", "dove"})
|
| 27 |
+
|
| 28 |
+
_COOP_BONUS = COOP_BONUS_NUMERATOR / COOP_BONUS_DENOMINATOR
|
| 29 |
+
_DEFECT_PENALTY = DEFECT_PENALTY_NUMERATOR / DEFECT_PENALTY_DENOMINATOR
|
| 30 |
+
_MIN_GUARANTEE = MIN_GUARANTEE_NUMERATOR / MIN_GUARANTEE_DENOMINATOR
|
| 31 |
+
_BAN_PENALTY = BAN_DEFECT_PENALTY_NUMERATOR / BAN_DEFECT_PENALTY_DENOMINATOR
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _is_cooperative(action: str) -> bool:
|
| 35 |
+
"""Return True if *action* is a cooperative action."""
|
| 36 |
+
return action in _COOPERATIVE_ACTIONS
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _rule_none(
|
| 40 |
+
base_p: float, base_o: float,
|
| 41 |
+
p_action: str, o_action: str,
|
| 42 |
+
) -> tuple[float, float]:
|
| 43 |
+
return (base_p, base_o)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _rule_equal_split(
|
| 47 |
+
base_p: float, base_o: float,
|
| 48 |
+
p_action: str, o_action: str,
|
| 49 |
+
) -> tuple[float, float]:
|
| 50 |
+
total = base_p + base_o
|
| 51 |
+
share = total / EQUAL_SPLIT_DENOMINATOR
|
| 52 |
+
return (share, share)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _rule_coop_bonus(
|
| 56 |
+
base_p: float, base_o: float,
|
| 57 |
+
p_action: str, o_action: str,
|
| 58 |
+
) -> tuple[float, float]:
|
| 59 |
+
p_pay = base_p + (_COOP_BONUS if _is_cooperative(p_action) else float())
|
| 60 |
+
o_pay = base_o + (_COOP_BONUS if _is_cooperative(o_action) else float())
|
| 61 |
+
return (p_pay, o_pay)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _rule_defect_penalty(
|
| 65 |
+
base_p: float, base_o: float,
|
| 66 |
+
p_action: str, o_action: str,
|
| 67 |
+
) -> tuple[float, float]:
|
| 68 |
+
p_pay = base_p - (
|
| 69 |
+
_DEFECT_PENALTY if not _is_cooperative(p_action) else float()
|
| 70 |
+
)
|
| 71 |
+
o_pay = base_o - (
|
| 72 |
+
_DEFECT_PENALTY if not _is_cooperative(o_action) else float()
|
| 73 |
+
)
|
| 74 |
+
return (p_pay, o_pay)
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _rule_min_guarantee(
|
| 78 |
+
base_p: float, base_o: float,
|
| 79 |
+
p_action: str, o_action: str,
|
| 80 |
+
) -> tuple[float, float]:
|
| 81 |
+
p_pay = max(base_p, _MIN_GUARANTEE)
|
| 82 |
+
o_pay = max(base_o, _MIN_GUARANTEE)
|
| 83 |
+
return (p_pay, o_pay)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _rule_ban_defect(
|
| 87 |
+
base_p: float, base_o: float,
|
| 88 |
+
p_action: str, o_action: str,
|
| 89 |
+
) -> tuple[float, float]:
|
| 90 |
+
p_pay = base_p - (
|
| 91 |
+
_BAN_PENALTY if not _is_cooperative(p_action) else float()
|
| 92 |
+
)
|
| 93 |
+
o_pay = base_o - (
|
| 94 |
+
_BAN_PENALTY if not _is_cooperative(o_action) else float()
|
| 95 |
+
)
|
| 96 |
+
return (p_pay, o_pay)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
RULE_CATALOG: dict[str, RuleTransform] = {
|
| 100 |
+
RULE_NONE: _rule_none,
|
| 101 |
+
RULE_EQUAL_SPLIT: _rule_equal_split,
|
| 102 |
+
RULE_COOP_BONUS: _rule_coop_bonus,
|
| 103 |
+
RULE_DEFECT_PENALTY: _rule_defect_penalty,
|
| 104 |
+
RULE_MIN_GUARANTEE: _rule_min_guarantee,
|
| 105 |
+
RULE_BAN_DEFECT: _rule_ban_defect,
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def apply_rule(
|
| 110 |
+
rule_name: str,
|
| 111 |
+
base_p: float, base_o: float,
|
| 112 |
+
p_action: str, o_action: str,
|
| 113 |
+
) -> tuple[float, float]:
|
| 114 |
+
"""Look up *rule_name* in the catalog and apply its transform."""
|
| 115 |
+
return RULE_CATALOG[rule_name](base_p, base_o, p_action, o_action)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
_ZERO = int()
|
| 119 |
+
_ONE = int(bool(True))
|
| 120 |
+
_TWO = _ONE + _ONE
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def parse_meta_action(
|
| 124 |
+
action: str,
|
| 125 |
+
split_limit: int = META_SPLIT_LIMIT,
|
| 126 |
+
) -> tuple[str, str, str]:
|
| 127 |
+
"""Parse an encoded meta-action into (prefix, rule, base_action).
|
| 128 |
+
|
| 129 |
+
The action format is ``prefix_rule_baseaction`` where *rule* is a
|
| 130 |
+
single token (no underscores). Using ``split`` with the configured
|
| 131 |
+
split limit yields exactly three parts.
|
| 132 |
+
"""
|
| 133 |
+
parts = action.split(META_SEPARATOR, split_limit)
|
| 134 |
+
return (parts[_ZERO], parts[_ONE], parts[_TWO])
|
common/meta/variants_meta.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Meta-gaming variant transforms for rule proposal, signaling, and negotiation.
|
| 2 |
+
|
| 3 |
+
Four composable transforms following the ``apply_*`` pattern from
|
| 4 |
+
``variants.py``. Each expands the action space to encode both a rule
|
| 5 |
+
proposal and a base-game action in a single string.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from dataclasses import replace
|
| 10 |
+
from typing import Callable
|
| 11 |
+
|
| 12 |
+
from common.games import GameConfig
|
| 13 |
+
from common.meta.meta_rules import apply_rule, parse_meta_action
|
| 14 |
+
|
| 15 |
+
from constant_definitions.var.meta.meta_rule_constants import (
|
| 16 |
+
VARIANT_RULE_PROPOSAL, VARIANT_RULE_SIGNAL,
|
| 17 |
+
VARIANT_CONSTITUTIONAL, VARIANT_PROPOSER_RESPONDER,
|
| 18 |
+
META_PROP_PREFIX, META_SIG_PREFIX, META_CONST_PREFIX,
|
| 19 |
+
META_RPROP_PREFIX, META_RACCEPT_PREFIX, META_RREJECT_PREFIX,
|
| 20 |
+
META_SEPARATOR,
|
| 21 |
+
DEFAULT_RULE_CATALOG, RULE_NONE,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
_ONE = int(bool(True))
|
| 25 |
+
_ZERO = int()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _build_prefixed_actions(
|
| 29 |
+
prefix: str,
|
| 30 |
+
rules: tuple[str, ...],
|
| 31 |
+
base_actions: list[str],
|
| 32 |
+
) -> list[str]:
|
| 33 |
+
"""Build action list: prefix_rule_baseaction for each combination."""
|
| 34 |
+
sep = META_SEPARATOR
|
| 35 |
+
return [
|
| 36 |
+
sep.join([prefix, rule, act])
|
| 37 |
+
for rule in rules
|
| 38 |
+
for act in base_actions
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def apply_rule_proposal(
|
| 43 |
+
base: GameConfig,
|
| 44 |
+
rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
|
| 45 |
+
base_key: str = "",
|
| 46 |
+
) -> GameConfig:
|
| 47 |
+
"""Simultaneous, binding, per-round rule proposal.
|
| 48 |
+
|
| 49 |
+
Both players choose ``prop_<rule>_<action>``. If both propose the
|
| 50 |
+
same rule the rule's payoff transform is applied; otherwise base
|
| 51 |
+
payoffs are used.
|
| 52 |
+
"""
|
| 53 |
+
prefix = META_PROP_PREFIX
|
| 54 |
+
new_actions = _build_prefixed_actions(prefix, rules, base.actions)
|
| 55 |
+
original_payoff = base.payoff_fn
|
| 56 |
+
|
| 57 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 58 |
+
_, p_rule, p_act = parse_meta_action(pa)
|
| 59 |
+
_, o_rule, o_act = parse_meta_action(oa)
|
| 60 |
+
base_p, base_o = original_payoff(p_act, o_act)
|
| 61 |
+
if p_rule == o_rule:
|
| 62 |
+
return apply_rule(p_rule, base_p, base_o, p_act, o_act)
|
| 63 |
+
return (base_p, base_o)
|
| 64 |
+
|
| 65 |
+
return replace(
|
| 66 |
+
base,
|
| 67 |
+
actions=new_actions,
|
| 68 |
+
payoff_fn=_payoff,
|
| 69 |
+
applied_variants=base.applied_variants + (VARIANT_RULE_PROPOSAL,),
|
| 70 |
+
base_game_key=base_key or base.base_game_key,
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def apply_rule_signal(
|
| 75 |
+
base: GameConfig,
|
| 76 |
+
rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
|
| 77 |
+
base_key: str = "",
|
| 78 |
+
) -> GameConfig:
|
| 79 |
+
"""Simultaneous, non-binding, per-round rule signal.
|
| 80 |
+
|
| 81 |
+
Both players choose ``sig_<rule>_<action>``. Proposals are visible
|
| 82 |
+
in history but never enforced -- payoffs always come from the base game.
|
| 83 |
+
"""
|
| 84 |
+
prefix = META_SIG_PREFIX
|
| 85 |
+
new_actions = _build_prefixed_actions(prefix, rules, base.actions)
|
| 86 |
+
original_payoff = base.payoff_fn
|
| 87 |
+
|
| 88 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 89 |
+
_, _p_rule, p_act = parse_meta_action(pa)
|
| 90 |
+
_, _o_rule, o_act = parse_meta_action(oa)
|
| 91 |
+
return original_payoff(p_act, o_act)
|
| 92 |
+
|
| 93 |
+
return replace(
|
| 94 |
+
base,
|
| 95 |
+
actions=new_actions,
|
| 96 |
+
payoff_fn=_payoff,
|
| 97 |
+
applied_variants=base.applied_variants + (VARIANT_RULE_SIGNAL,),
|
| 98 |
+
base_game_key=base_key or base.base_game_key,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def apply_constitutional(
|
| 103 |
+
base: GameConfig,
|
| 104 |
+
rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
|
| 105 |
+
base_key: str = "",
|
| 106 |
+
) -> GameConfig:
|
| 107 |
+
"""Multi-round negotiation with binding lock-in once agreed.
|
| 108 |
+
|
| 109 |
+
Both players choose ``const_<rule>_<action>``. The first round
|
| 110 |
+
where both propose the same non-none rule locks that rule in for
|
| 111 |
+
ALL subsequent rounds. Before agreement, base payoffs apply.
|
| 112 |
+
|
| 113 |
+
A fresh mutable closure is created per call so each episode via
|
| 114 |
+
``compose_game()`` gets clean state.
|
| 115 |
+
"""
|
| 116 |
+
prefix = META_CONST_PREFIX
|
| 117 |
+
new_actions = _build_prefixed_actions(prefix, rules, base.actions)
|
| 118 |
+
original_payoff = base.payoff_fn
|
| 119 |
+
adopted_rule: list[str] = []
|
| 120 |
+
|
| 121 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 122 |
+
_, p_rule, p_act = parse_meta_action(pa)
|
| 123 |
+
_, o_rule, o_act = parse_meta_action(oa)
|
| 124 |
+
base_p, base_o = original_payoff(p_act, o_act)
|
| 125 |
+
|
| 126 |
+
if adopted_rule:
|
| 127 |
+
return apply_rule(adopted_rule[_ZERO], base_p, base_o, p_act, o_act)
|
| 128 |
+
|
| 129 |
+
if p_rule == o_rule and p_rule != RULE_NONE:
|
| 130 |
+
adopted_rule.append(p_rule)
|
| 131 |
+
return apply_rule(p_rule, base_p, base_o, p_act, o_act)
|
| 132 |
+
|
| 133 |
+
return (base_p, base_o)
|
| 134 |
+
|
| 135 |
+
return replace(
|
| 136 |
+
base,
|
| 137 |
+
actions=new_actions,
|
| 138 |
+
payoff_fn=_payoff,
|
| 139 |
+
applied_variants=base.applied_variants + (VARIANT_CONSTITUTIONAL,),
|
| 140 |
+
base_game_key=base_key or base.base_game_key,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def apply_proposer_responder(
|
| 145 |
+
base: GameConfig,
|
| 146 |
+
rules: tuple[str, ...] = DEFAULT_RULE_CATALOG,
|
| 147 |
+
base_key: str = "",
|
| 148 |
+
) -> GameConfig:
|
| 149 |
+
"""Asymmetric: player proposes a rule, opponent accepts or rejects.
|
| 150 |
+
|
| 151 |
+
Player actions: ``rprop_<rule>_<action>`` (propose + play).
|
| 152 |
+
Opponent actions: ``raccept_<action>`` or ``rreject_<action>``
|
| 153 |
+
(respond + play).
|
| 154 |
+
|
| 155 |
+
Accept -> rule applies to base payoffs. Reject -> base payoffs.
|
| 156 |
+
"""
|
| 157 |
+
sep = META_SEPARATOR
|
| 158 |
+
player_actions = _build_prefixed_actions(
|
| 159 |
+
META_RPROP_PREFIX, rules, base.actions,
|
| 160 |
+
)
|
| 161 |
+
opp_actions: list[str] = []
|
| 162 |
+
for act in base.actions:
|
| 163 |
+
opp_actions.append(sep.join([META_RACCEPT_PREFIX, act]))
|
| 164 |
+
opp_actions.append(sep.join([META_RREJECT_PREFIX, act]))
|
| 165 |
+
|
| 166 |
+
original_payoff = base.payoff_fn
|
| 167 |
+
|
| 168 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 169 |
+
_, p_rule, p_act = parse_meta_action(pa)
|
| 170 |
+
o_parts = oa.split(sep, _ONE)
|
| 171 |
+
o_prefix = o_parts[_ZERO]
|
| 172 |
+
o_act = o_parts[_ONE]
|
| 173 |
+
base_p, base_o = original_payoff(p_act, o_act)
|
| 174 |
+
if o_prefix == META_RACCEPT_PREFIX:
|
| 175 |
+
return apply_rule(p_rule, base_p, base_o, p_act, o_act)
|
| 176 |
+
return (base_p, base_o)
|
| 177 |
+
|
| 178 |
+
return replace(
|
| 179 |
+
base,
|
| 180 |
+
actions=player_actions,
|
| 181 |
+
payoff_fn=_payoff,
|
| 182 |
+
applied_variants=base.applied_variants + (VARIANT_PROPOSER_RESPONDER,),
|
| 183 |
+
base_game_key=base_key or base.base_game_key,
|
| 184 |
+
opponent_actions=tuple(opp_actions),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
_META_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
|
| 189 |
+
VARIANT_RULE_PROPOSAL: apply_rule_proposal,
|
| 190 |
+
VARIANT_RULE_SIGNAL: apply_rule_signal,
|
| 191 |
+
VARIANT_CONSTITUTIONAL: apply_constitutional,
|
| 192 |
+
VARIANT_PROPOSER_RESPONDER: apply_proposer_responder,
|
| 193 |
+
}
|
common/meta/variants_reputation.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reputation gossip variant transform for the composable variant system.
|
| 2 |
+
|
| 3 |
+
Follows the ``apply_*`` pattern from ``variants.py`` and ``variants_meta.py``.
|
| 4 |
+
Adds ``gossip_<rating>_<base_action>`` actions to any base game.
|
| 5 |
+
Payoffs depend only on the base action, like cheap_talk.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from dataclasses import replace
|
| 10 |
+
from typing import Callable
|
| 11 |
+
|
| 12 |
+
from common.games import GameConfig
|
| 13 |
+
|
| 14 |
+
from constant_definitions.var.meta.reputation_constants import (
|
| 15 |
+
VARIANT_GOSSIP,
|
| 16 |
+
DEFAULT_RATINGS,
|
| 17 |
+
GOSSIP_PREFIX,
|
| 18 |
+
GOSSIP_SEPARATOR,
|
| 19 |
+
GOSSIP_SPLIT_LIMIT,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
_ONE = int(bool(True))
|
| 23 |
+
_ZERO = int()
|
| 24 |
+
_TWO = _ONE + _ONE
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def apply_gossip(
|
| 28 |
+
base: GameConfig,
|
| 29 |
+
ratings: tuple[str, ...] = DEFAULT_RATINGS,
|
| 30 |
+
base_key: str = "",
|
| 31 |
+
) -> GameConfig:
|
| 32 |
+
"""Add reputation gossip to a base game.
|
| 33 |
+
|
| 34 |
+
For base actions ``[A, B]`` and ratings ``[trustworthy, untrustworthy,
|
| 35 |
+
neutral]``, produces ``[gossip_trustworthy_A, gossip_trustworthy_B,
|
| 36 |
+
gossip_untrustworthy_A, ...]``. Payoffs depend only on the actual
|
| 37 |
+
action (last segment), like cheap_talk.
|
| 38 |
+
"""
|
| 39 |
+
sep = GOSSIP_SEPARATOR
|
| 40 |
+
prefix = GOSSIP_PREFIX
|
| 41 |
+
new_actions = [
|
| 42 |
+
sep.join([prefix, rating, act])
|
| 43 |
+
for rating in ratings
|
| 44 |
+
for act in base.actions
|
| 45 |
+
]
|
| 46 |
+
original_payoff = base.payoff_fn
|
| 47 |
+
|
| 48 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 49 |
+
actual_p = pa.rsplit(sep, _ONE)[_ONE]
|
| 50 |
+
actual_o = oa.rsplit(sep, _ONE)[_ONE]
|
| 51 |
+
return original_payoff(actual_p, actual_o)
|
| 52 |
+
|
| 53 |
+
return replace(
|
| 54 |
+
base,
|
| 55 |
+
actions=new_actions,
|
| 56 |
+
payoff_fn=_payoff,
|
| 57 |
+
applied_variants=base.applied_variants + (VARIANT_GOSSIP,),
|
| 58 |
+
base_game_key=base_key or base.base_game_key,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def parse_gossip_action(action: str) -> tuple[str, str, str]:
|
| 63 |
+
"""Parse ``gossip_<rating>_<base_action>`` into components.
|
| 64 |
+
|
| 65 |
+
Returns ``(prefix, rating, base_action)``.
|
| 66 |
+
"""
|
| 67 |
+
parts = action.split(GOSSIP_SEPARATOR, GOSSIP_SPLIT_LIMIT)
|
| 68 |
+
return (parts[_ZERO], parts[_ONE], parts[_TWO])
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
_REPUTATION_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
|
| 72 |
+
VARIANT_GOSSIP: apply_gossip,
|
| 73 |
+
}
|
common/variants.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Composable game variant transforms for KantBench.
|
| 2 |
+
|
| 3 |
+
Each ``apply_*`` function takes a :class:`GameConfig` and returns a new
|
| 4 |
+
:class:`GameConfig` with modified actions, payoff function, and metadata.
|
| 5 |
+
Variants compose: ``apply_exit(apply_cheap_talk(base))`` works.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from dataclasses import replace
|
| 11 |
+
from typing import Callable
|
| 12 |
+
|
| 13 |
+
from common.games import GAMES, GameConfig
|
| 14 |
+
from constant_definitions.game_constants import (
|
| 15 |
+
DEFAULT_TWO_PLAYERS,
|
| 16 |
+
OPPONENT_MODE_SELF,
|
| 17 |
+
OPPONENT_MODE_CROSS,
|
| 18 |
+
)
|
| 19 |
+
from constant_definitions.var.pd_variant_constants import (
|
| 20 |
+
OPD_EXIT_PAYOFF,
|
| 21 |
+
VARIANT_CHEAP_TALK,
|
| 22 |
+
VARIANT_EXIT,
|
| 23 |
+
VARIANT_BINDING_COMMITMENT,
|
| 24 |
+
VARIANT_NOISY_ACTIONS,
|
| 25 |
+
VARIANT_NOISY_PAYOFFS,
|
| 26 |
+
VARIANT_SELF_PLAY,
|
| 27 |
+
VARIANT_CROSS_MODEL,
|
| 28 |
+
CT_MSG_PREFIX,
|
| 29 |
+
CT_SEPARATOR,
|
| 30 |
+
BC_COMMIT_PREFIX,
|
| 31 |
+
BC_FREE_PREFIX,
|
| 32 |
+
EXIT_ACTION,
|
| 33 |
+
DEFAULT_TREMBLE_PROB_NUMERATOR,
|
| 34 |
+
DEFAULT_TREMBLE_PROB_DENOMINATOR,
|
| 35 |
+
DEFAULT_NOISE_SCALE_NUMERATOR,
|
| 36 |
+
DEFAULT_NOISE_SCALE_DENOMINATOR,
|
| 37 |
+
)
|
| 38 |
+
from constant_definitions.var.communication_constants import COMMIT_COST
|
| 39 |
+
|
| 40 |
+
_ONE = int(bool(True))
|
| 41 |
+
_ZERO = int()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def apply_cheap_talk(
|
| 45 |
+
base: GameConfig,
|
| 46 |
+
base_key: str = "",
|
| 47 |
+
) -> GameConfig:
|
| 48 |
+
"""Add a non-binding message phase to a base game.
|
| 49 |
+
|
| 50 |
+
For base actions ``[A, B]`` produces ``[msg_A_A, msg_A_B, msg_B_A,
|
| 51 |
+
msg_B_B]``. Payoffs depend only on the actual action (last segment).
|
| 52 |
+
"""
|
| 53 |
+
sep = CT_SEPARATOR
|
| 54 |
+
prefix = CT_MSG_PREFIX
|
| 55 |
+
base_actions = base.actions
|
| 56 |
+
new_actions = [
|
| 57 |
+
sep.join([prefix, msg, act])
|
| 58 |
+
for msg in base_actions
|
| 59 |
+
for act in base_actions
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
original_payoff = base.payoff_fn
|
| 63 |
+
|
| 64 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 65 |
+
actual_p = pa.rsplit(sep, _ONE)[_ONE]
|
| 66 |
+
actual_o = oa.rsplit(sep, _ONE)[_ONE]
|
| 67 |
+
return original_payoff(actual_p, actual_o)
|
| 68 |
+
|
| 69 |
+
return replace(
|
| 70 |
+
base,
|
| 71 |
+
actions=new_actions,
|
| 72 |
+
payoff_fn=_payoff,
|
| 73 |
+
applied_variants=base.applied_variants + (VARIANT_CHEAP_TALK,),
|
| 74 |
+
base_game_key=base_key or base.base_game_key,
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def apply_exit(
|
| 79 |
+
base: GameConfig,
|
| 80 |
+
base_key: str = "",
|
| 81 |
+
exit_payoff: int = OPD_EXIT_PAYOFF,
|
| 82 |
+
) -> GameConfig:
|
| 83 |
+
"""Add an exit option that gives both players a safe payoff.
|
| 84 |
+
|
| 85 |
+
Appends ``"exit"`` to the action list. If either player exits both
|
| 86 |
+
receive *exit_payoff*; otherwise delegates to the base payoff function.
|
| 87 |
+
"""
|
| 88 |
+
exit_f = float(exit_payoff)
|
| 89 |
+
exit_act = EXIT_ACTION
|
| 90 |
+
new_actions = list(base.actions) + [exit_act]
|
| 91 |
+
original_payoff = base.payoff_fn
|
| 92 |
+
|
| 93 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 94 |
+
if pa == exit_act or oa == exit_act:
|
| 95 |
+
return (exit_f, exit_f)
|
| 96 |
+
return original_payoff(pa, oa)
|
| 97 |
+
|
| 98 |
+
return replace(
|
| 99 |
+
base,
|
| 100 |
+
actions=new_actions,
|
| 101 |
+
payoff_fn=_payoff,
|
| 102 |
+
applied_variants=base.applied_variants + (VARIANT_EXIT,),
|
| 103 |
+
base_game_key=base_key or base.base_game_key,
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def apply_binding_commitment(
|
| 108 |
+
base: GameConfig,
|
| 109 |
+
base_key: str = "",
|
| 110 |
+
commit_cost: int = COMMIT_COST,
|
| 111 |
+
) -> GameConfig:
|
| 112 |
+
"""Add a costly binding commitment mechanism.
|
| 113 |
+
|
| 114 |
+
For base actions ``[A, B, ...]`` the first action *A* gets a
|
| 115 |
+
``commit_A`` variant (player locked to *A*, pays *commit_cost*).
|
| 116 |
+
All actions get a ``free_X`` variant (no cost, free choice).
|
| 117 |
+
"""
|
| 118 |
+
sep = CT_SEPARATOR
|
| 119 |
+
commit_pfx = BC_COMMIT_PREFIX
|
| 120 |
+
free_pfx = BC_FREE_PREFIX
|
| 121 |
+
cost_f = float(commit_cost)
|
| 122 |
+
base_actions = base.actions
|
| 123 |
+
commit_action = base_actions[_ZERO]
|
| 124 |
+
|
| 125 |
+
new_actions = [sep.join([commit_pfx, commit_action])]
|
| 126 |
+
for act in base_actions:
|
| 127 |
+
new_actions.append(sep.join([free_pfx, act]))
|
| 128 |
+
|
| 129 |
+
original_payoff = base.payoff_fn
|
| 130 |
+
|
| 131 |
+
def _parse(action: str) -> tuple[str, bool]:
|
| 132 |
+
"""Return (actual_action, is_committed)."""
|
| 133 |
+
parts = action.split(sep, _ONE)
|
| 134 |
+
return parts[_ONE], parts[_ZERO] == commit_pfx
|
| 135 |
+
|
| 136 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 137 |
+
p_act, p_committed = _parse(pa)
|
| 138 |
+
o_act, o_committed = _parse(oa)
|
| 139 |
+
p_pay, o_pay = original_payoff(p_act, o_act)
|
| 140 |
+
if p_committed:
|
| 141 |
+
p_pay = p_pay - cost_f
|
| 142 |
+
if o_committed:
|
| 143 |
+
o_pay = o_pay - cost_f
|
| 144 |
+
return (p_pay, o_pay)
|
| 145 |
+
|
| 146 |
+
return replace(
|
| 147 |
+
base,
|
| 148 |
+
actions=new_actions,
|
| 149 |
+
payoff_fn=_payoff,
|
| 150 |
+
applied_variants=base.applied_variants + (VARIANT_BINDING_COMMITMENT,),
|
| 151 |
+
base_game_key=base_key or base.base_game_key,
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
_DEFAULT_TREMBLE = DEFAULT_TREMBLE_PROB_NUMERATOR / DEFAULT_TREMBLE_PROB_DENOMINATOR
|
| 156 |
+
_DEFAULT_NOISE = DEFAULT_NOISE_SCALE_NUMERATOR / DEFAULT_NOISE_SCALE_DENOMINATOR
|
| 157 |
+
_NOISY_ONLY_TWO_PLAYER = "apply_noisy variant only supports two-player games"
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def apply_noisy_actions(
|
| 161 |
+
base: GameConfig,
|
| 162 |
+
base_key: str = "",
|
| 163 |
+
tremble_prob: float = _DEFAULT_TREMBLE,
|
| 164 |
+
) -> GameConfig:
|
| 165 |
+
"""With probability *tremble_prob* each player's action is replaced by a random one."""
|
| 166 |
+
if base.num_players != DEFAULT_TWO_PLAYERS:
|
| 167 |
+
raise ValueError(_NOISY_ONLY_TWO_PLAYER)
|
| 168 |
+
import random as _rng_mod
|
| 169 |
+
original_payoff = base.payoff_fn
|
| 170 |
+
actions = base.actions
|
| 171 |
+
|
| 172 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 173 |
+
actual_p = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else pa
|
| 174 |
+
actual_o = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else oa
|
| 175 |
+
return original_payoff(actual_p, actual_o)
|
| 176 |
+
|
| 177 |
+
return replace(
|
| 178 |
+
base,
|
| 179 |
+
payoff_fn=_payoff,
|
| 180 |
+
applied_variants=base.applied_variants + (VARIANT_NOISY_ACTIONS,),
|
| 181 |
+
base_game_key=base_key or base.base_game_key,
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def apply_noisy_payoffs(
|
| 186 |
+
base: GameConfig,
|
| 187 |
+
base_key: str = "",
|
| 188 |
+
noise_scale: float = _DEFAULT_NOISE,
|
| 189 |
+
) -> GameConfig:
|
| 190 |
+
"""Add Gaussian noise N(zero, noise_scale) to each payoff independently."""
|
| 191 |
+
if base.num_players != DEFAULT_TWO_PLAYERS:
|
| 192 |
+
raise ValueError(_NOISY_ONLY_TWO_PLAYER)
|
| 193 |
+
import random as _rng_mod
|
| 194 |
+
original_payoff = base.payoff_fn
|
| 195 |
+
|
| 196 |
+
def _payoff(pa: str, oa: str) -> tuple[float, float]:
|
| 197 |
+
p, o = original_payoff(pa, oa)
|
| 198 |
+
return (p + _rng_mod.gauss(float(_ZERO), noise_scale),
|
| 199 |
+
o + _rng_mod.gauss(float(_ZERO), noise_scale))
|
| 200 |
+
|
| 201 |
+
return replace(
|
| 202 |
+
base,
|
| 203 |
+
payoff_fn=_payoff,
|
| 204 |
+
applied_variants=base.applied_variants + (VARIANT_NOISY_PAYOFFS,),
|
| 205 |
+
base_game_key=base_key or base.base_game_key,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
_OPPONENT_ONLY_TWO_PLAYER = "opponent mode variants only support two-player games"
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def apply_self_play(
|
| 213 |
+
base: GameConfig,
|
| 214 |
+
base_key: str = "",
|
| 215 |
+
) -> GameConfig:
|
| 216 |
+
"""Mark a game for self-play: the model plays against itself."""
|
| 217 |
+
if base.num_players != DEFAULT_TWO_PLAYERS:
|
| 218 |
+
raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
|
| 219 |
+
return replace(
|
| 220 |
+
base,
|
| 221 |
+
opponent_mode=OPPONENT_MODE_SELF,
|
| 222 |
+
applied_variants=base.applied_variants + (VARIANT_SELF_PLAY,),
|
| 223 |
+
base_game_key=base_key or base.base_game_key,
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def apply_cross_model(
|
| 228 |
+
base: GameConfig,
|
| 229 |
+
base_key: str = "",
|
| 230 |
+
) -> GameConfig:
|
| 231 |
+
"""Mark a game for cross-model play: model vs a different model."""
|
| 232 |
+
if base.num_players != DEFAULT_TWO_PLAYERS:
|
| 233 |
+
raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
|
| 234 |
+
return replace(
|
| 235 |
+
base,
|
| 236 |
+
opponent_mode=OPPONENT_MODE_CROSS,
|
| 237 |
+
applied_variants=base.applied_variants + (VARIANT_CROSS_MODEL,),
|
| 238 |
+
base_game_key=base_key or base.base_game_key,
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
|
| 243 |
+
VARIANT_CHEAP_TALK: apply_cheap_talk,
|
| 244 |
+
VARIANT_EXIT: apply_exit,
|
| 245 |
+
VARIANT_BINDING_COMMITMENT: apply_binding_commitment,
|
| 246 |
+
VARIANT_NOISY_ACTIONS: apply_noisy_actions,
|
| 247 |
+
VARIANT_NOISY_PAYOFFS: apply_noisy_payoffs,
|
| 248 |
+
VARIANT_SELF_PLAY: apply_self_play,
|
| 249 |
+
VARIANT_CROSS_MODEL: apply_cross_model,
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
from common.meta.variants_meta import ( # noqa: E402
|
| 253 |
+
apply_rule_proposal, apply_rule_signal,
|
| 254 |
+
apply_constitutional, apply_proposer_responder,
|
| 255 |
+
_META_VARIANT_REGISTRY,
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
_VARIANT_REGISTRY.update(_META_VARIANT_REGISTRY)
|
| 259 |
+
|
| 260 |
+
from common.meta.variants_reputation import ( # noqa: E402
|
| 261 |
+
apply_gossip,
|
| 262 |
+
_REPUTATION_VARIANT_REGISTRY,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
_VARIANT_REGISTRY.update(_REPUTATION_VARIANT_REGISTRY)
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def compose_game(base_key: str, *variant_names: str) -> GameConfig:
|
| 269 |
+
"""Build a game by applying named variants to a base game.
|
| 270 |
+
|
| 271 |
+
Example::
|
| 272 |
+
|
| 273 |
+
compose_game("stag_hunt", "cheap_talk", "exit")
|
| 274 |
+
"""
|
| 275 |
+
game = GAMES[base_key]
|
| 276 |
+
for vname in variant_names:
|
| 277 |
+
apply_fn = _VARIANT_REGISTRY[vname]
|
| 278 |
+
game = apply_fn(game, base_key=base_key)
|
| 279 |
+
return game
|
constant_definitions/game_constants.py
CHANGED
|
@@ -8,6 +8,7 @@ MIN_STEP_COUNT = int()
|
|
| 8 |
# Episode configuration
|
| 9 |
DEFAULT_NUM_ROUNDS = 10
|
| 10 |
SINGLE_SHOT_ROUNDS = 1
|
|
|
|
| 11 |
|
| 12 |
# --- Prisoner's Dilemma payoffs ---
|
| 13 |
PD_CC_PAYOFF = 3 # Both cooperate
|
|
@@ -71,6 +72,11 @@ SERVER_PORT = 8000
|
|
| 71 |
# Max concurrent environments
|
| 72 |
MAX_CONCURRENT_ENVS = 1
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# --- Evaluation module constants ---
|
| 75 |
EVAL_ZERO = 0
|
| 76 |
EVAL_ONE = 1
|
|
@@ -86,6 +92,10 @@ EVAL_ONE_FLOAT = 1.0
|
|
| 86 |
EVAL_HALF = 0.5
|
| 87 |
EVAL_NEGATIVE_ONE = -1
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# --- External benchmark constants ---
|
| 90 |
EVAL_EIGHT = 8
|
| 91 |
EVAL_TEN = 10
|
|
|
|
| 8 |
# Episode configuration
|
| 9 |
DEFAULT_NUM_ROUNDS = 10
|
| 10 |
SINGLE_SHOT_ROUNDS = 1
|
| 11 |
+
DEFAULT_TWO_PLAYERS = 2
|
| 12 |
|
| 13 |
# --- Prisoner's Dilemma payoffs ---
|
| 14 |
PD_CC_PAYOFF = 3 # Both cooperate
|
|
|
|
| 72 |
# Max concurrent environments
|
| 73 |
MAX_CONCURRENT_ENVS = 1
|
| 74 |
|
| 75 |
+
# --- Opponent mode ---
|
| 76 |
+
OPPONENT_MODE_STRATEGY = "strategy"
|
| 77 |
+
OPPONENT_MODE_SELF = "self_play"
|
| 78 |
+
OPPONENT_MODE_CROSS = "cross_model"
|
| 79 |
+
|
| 80 |
# --- Evaluation module constants ---
|
| 81 |
EVAL_ZERO = 0
|
| 82 |
EVAL_ONE = 1
|
|
|
|
| 92 |
EVAL_HALF = 0.5
|
| 93 |
EVAL_NEGATIVE_ONE = -1
|
| 94 |
|
| 95 |
+
# --- N-player / coalition evaluation constants ---
|
| 96 |
+
NPLAYER_EVAL_DEFAULT_EPISODES = 3
|
| 97 |
+
COALITION_EVAL_DEFAULT_EPISODES = 3
|
| 98 |
+
|
| 99 |
# --- External benchmark constants ---
|
| 100 |
EVAL_EIGHT = 8
|
| 101 |
EVAL_TEN = 10
|
constant_definitions/train/agent_constants.py
CHANGED
|
@@ -33,3 +33,33 @@ SYSTEM_PROMPT = (
|
|
| 33 |
|
| 34 |
# Sentinel returned when LLM output cannot be parsed
|
| 35 |
PARSE_FAILURE_SENTINEL = "__PARSE_FAILURE__"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Sentinel returned when LLM output cannot be parsed
|
| 35 |
PARSE_FAILURE_SENTINEL = "__PARSE_FAILURE__"
|
| 36 |
+
|
| 37 |
+
# --- N-player prompt section headers ---
|
| 38 |
+
NPLAYER_PROMPT_SECTION_PLAYERS = "PLAYERS"
|
| 39 |
+
NPLAYER_PROMPT_SECTION_ALL_SCORES = "ALL SCORES"
|
| 40 |
+
|
| 41 |
+
# --- Coalition prompt section headers ---
|
| 42 |
+
COALITION_PROMPT_SECTION_PHASE = "PHASE"
|
| 43 |
+
COALITION_PROMPT_SECTION_PROPOSALS = "PENDING PROPOSALS"
|
| 44 |
+
COALITION_PROMPT_SECTION_COALITIONS = "ACTIVE COALITIONS"
|
| 45 |
+
|
| 46 |
+
# --- Governance prompt section headers ---
|
| 47 |
+
GOVERNANCE_PROMPT_SECTION_RULES = "GOVERNANCE RULES"
|
| 48 |
+
GOVERNANCE_PROMPT_SECTION_PENDING = "PENDING GOVERNANCE"
|
| 49 |
+
|
| 50 |
+
# N-player system prompt
|
| 51 |
+
NPLAYER_SYSTEM_PROMPT = (
|
| 52 |
+
"You are playing an N-player game-theory game. Analyse the situation "
|
| 53 |
+
"and choose the best action. Respond with ONLY the action name, "
|
| 54 |
+
"nothing else."
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# Coalition system prompt
|
| 58 |
+
COALITION_SYSTEM_PROMPT = (
|
| 59 |
+
"You are playing a coalition formation game. You can form coalitions "
|
| 60 |
+
"with other players and propose governance changes. Respond with "
|
| 61 |
+
"valid JSON when negotiating, or ONLY the action name when acting."
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Maximum tokens for coalition JSON response
|
| 65 |
+
COALITION_MAX_ACTION_TOKENS = 256
|
constant_definitions/var/meta/meta_rule_constants.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Meta-gaming rule proposal variant constants
|
| 2 |
+
|
| 3 |
+
# Variant names
|
| 4 |
+
VARIANT_RULE_PROPOSAL = "rule_proposal"
|
| 5 |
+
VARIANT_RULE_SIGNAL = "rule_signal"
|
| 6 |
+
VARIANT_CONSTITUTIONAL = "constitutional"
|
| 7 |
+
VARIANT_PROPOSER_RESPONDER = "proposer_responder"
|
| 8 |
+
|
| 9 |
+
# Action prefixes (single tokens)
|
| 10 |
+
META_PROP_PREFIX = "prop"
|
| 11 |
+
META_SIG_PREFIX = "sig"
|
| 12 |
+
META_CONST_PREFIX = "const"
|
| 13 |
+
META_RPROP_PREFIX = "rprop"
|
| 14 |
+
META_RACCEPT_PREFIX = "raccept"
|
| 15 |
+
META_RREJECT_PREFIX = "rreject"
|
| 16 |
+
META_SEPARATOR = "_"
|
| 17 |
+
META_SPLIT_LIMIT = 2
|
| 18 |
+
|
| 19 |
+
# Rule names (single tokens, no underscores)
|
| 20 |
+
RULE_NONE = "none"
|
| 21 |
+
RULE_EQUAL_SPLIT = "equalsplit"
|
| 22 |
+
RULE_COOP_BONUS = "coopbonus"
|
| 23 |
+
RULE_DEFECT_PENALTY = "defectpenalty"
|
| 24 |
+
RULE_MIN_GUARANTEE = "minguarantee"
|
| 25 |
+
RULE_BAN_DEFECT = "bandefect"
|
| 26 |
+
|
| 27 |
+
DEFAULT_RULE_CATALOG = (
|
| 28 |
+
RULE_NONE, RULE_EQUAL_SPLIT, RULE_COOP_BONUS,
|
| 29 |
+
RULE_DEFECT_PENALTY, RULE_MIN_GUARANTEE, RULE_BAN_DEFECT,
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Payoff parameters (numerator/denominator to avoid inline literals)
|
| 33 |
+
COOP_BONUS_NUMERATOR = 2
|
| 34 |
+
COOP_BONUS_DENOMINATOR = 1
|
| 35 |
+
DEFECT_PENALTY_NUMERATOR = 3
|
| 36 |
+
DEFECT_PENALTY_DENOMINATOR = 1
|
| 37 |
+
MIN_GUARANTEE_NUMERATOR = 1
|
| 38 |
+
MIN_GUARANTEE_DENOMINATOR = 1
|
| 39 |
+
BAN_DEFECT_PENALTY_NUMERATOR = 10
|
| 40 |
+
BAN_DEFECT_PENALTY_DENOMINATOR = 1
|
| 41 |
+
EQUAL_SPLIT_DENOMINATOR = 2
|
constant_definitions/var/meta/reputation_constants.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reputation and gossip variant constants
|
| 2 |
+
|
| 3 |
+
# Variant names
|
| 4 |
+
VARIANT_GOSSIP = "gossip"
|
| 5 |
+
VARIANT_MEMORY = "memory"
|
| 6 |
+
|
| 7 |
+
# Gossip ratings (single tokens, no underscores)
|
| 8 |
+
RATING_TRUSTWORTHY = "trustworthy"
|
| 9 |
+
RATING_UNTRUSTWORTHY = "untrustworthy"
|
| 10 |
+
RATING_NEUTRAL = "neutral"
|
| 11 |
+
DEFAULT_RATINGS = (RATING_TRUSTWORTHY, RATING_UNTRUSTWORTHY, RATING_NEUTRAL)
|
| 12 |
+
|
| 13 |
+
# Action prefixes
|
| 14 |
+
GOSSIP_PREFIX = "gossip"
|
| 15 |
+
GOSSIP_SEPARATOR = "_"
|
| 16 |
+
GOSSIP_SPLIT_LIMIT = 2
|
| 17 |
+
|
| 18 |
+
# Reputation defaults (numerator / denominator)
|
| 19 |
+
DEFAULT_REPUTATION_SCORE_NUMERATOR = 5
|
| 20 |
+
DEFAULT_REPUTATION_SCORE_DENOMINATOR = 10
|
| 21 |
+
REPUTATION_DECAY_NUMERATOR = 9
|
| 22 |
+
REPUTATION_DECAY_DENOMINATOR = 10
|
| 23 |
+
|
| 24 |
+
# Metadata keys
|
| 25 |
+
META_KEY_REPUTATION = "opponent_reputation"
|
| 26 |
+
META_KEY_GOSSIP_HISTORY = "gossip_history"
|
| 27 |
+
META_KEY_INTERACTION_COUNT = "interaction_count"
|
| 28 |
+
META_KEY_COOPERATION_RATE = "cooperation_rate"
|
| 29 |
+
|
| 30 |
+
# Cognee dataset name
|
| 31 |
+
COGNEE_DATASET_NAME = "kant_interactions"
|
| 32 |
+
COGNEE_SEARCH_TYPE = "GRAPH_COMPLETION"
|
constant_definitions/var/pd_variant_constants.py
CHANGED
|
@@ -24,3 +24,25 @@ PW_DISARM_DISARM = 4
|
|
| 24 |
PW_DISARM_ARM = -1
|
| 25 |
PW_ARM_DISARM = 6
|
| 26 |
PW_ARM_ARM = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
PW_DISARM_ARM = -1
|
| 25 |
PW_ARM_DISARM = 6
|
| 26 |
PW_ARM_ARM = 0
|
| 27 |
+
|
| 28 |
+
# Composable variant system -- name strings and prefixes
|
| 29 |
+
VARIANT_CHEAP_TALK = "cheap_talk"
|
| 30 |
+
VARIANT_EXIT = "exit"
|
| 31 |
+
VARIANT_BINDING_COMMITMENT = "binding_commitment"
|
| 32 |
+
VARIANT_NOISY_ACTIONS = "noisy_actions"
|
| 33 |
+
VARIANT_NOISY_PAYOFFS = "noisy_payoffs"
|
| 34 |
+
CT_MSG_PREFIX = "msg"
|
| 35 |
+
CT_SEPARATOR = "_"
|
| 36 |
+
BC_COMMIT_PREFIX = "commit"
|
| 37 |
+
BC_FREE_PREFIX = "free"
|
| 38 |
+
EXIT_ACTION = "exit"
|
| 39 |
+
|
| 40 |
+
# Opponent mode variants
|
| 41 |
+
VARIANT_SELF_PLAY = "self_play"
|
| 42 |
+
VARIANT_CROSS_MODEL = "cross_model"
|
| 43 |
+
|
| 44 |
+
# Bayesian variant parameters
|
| 45 |
+
DEFAULT_TREMBLE_PROB_NUMERATOR = 1
|
| 46 |
+
DEFAULT_TREMBLE_PROB_DENOMINATOR = 10
|
| 47 |
+
DEFAULT_NOISE_SCALE_NUMERATOR = 1
|
| 48 |
+
DEFAULT_NOISE_SCALE_DENOMINATOR = 2
|
env/environment.py
CHANGED
|
@@ -155,6 +155,8 @@ class KantEnvironment(Environment[GameObservation, GameAction, GameState]):
|
|
| 155 |
|
| 156 |
def _opponent_actions(self) -> list[str]:
|
| 157 |
assert self._game is not None
|
|
|
|
|
|
|
| 158 |
gt = self._game.game_type
|
| 159 |
if gt == "ultimatum":
|
| 160 |
return ["accept", "reject"]
|
|
|
|
| 155 |
|
| 156 |
def _opponent_actions(self) -> list[str]:
|
| 157 |
assert self._game is not None
|
| 158 |
+
if self._game.opponent_actions is not None:
|
| 159 |
+
return list(self._game.opponent_actions)
|
| 160 |
gt = self._game.game_type
|
| 161 |
if gt == "ultimatum":
|
| 162 |
return ["accept", "reject"]
|
env/reputation/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reputation environment subpackage."""
|
| 2 |
+
from env.reputation.reputation_env import ReputationEnvironment
|
| 3 |
+
|
| 4 |
+
__all__ = ["ReputationEnvironment"]
|
env/reputation/reputation_env.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Environment wrapper adding cross-episode reputation via cognee.
|
| 2 |
+
|
| 3 |
+
Injects opponent reputation into obs.metadata before each episode.
|
| 4 |
+
Records episode outcomes and gossip ratings in CogneeMemoryStore after.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from typing import Any, Optional
|
| 9 |
+
|
| 10 |
+
from env.environment import KantEnvironment
|
| 11 |
+
from env.models import GameAction, GameObservation, GameState
|
| 12 |
+
from common.meta.memory_store import CogneeMemoryStore
|
| 13 |
+
from common.meta.variants_reputation import parse_gossip_action
|
| 14 |
+
|
| 15 |
+
from constant_definitions.var.meta.reputation_constants import (
|
| 16 |
+
GOSSIP_PREFIX,
|
| 17 |
+
GOSSIP_SEPARATOR,
|
| 18 |
+
META_KEY_REPUTATION,
|
| 19 |
+
META_KEY_INTERACTION_COUNT,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
_ZERO = int()
|
| 23 |
+
_ONE = int(bool(True))
|
| 24 |
+
_COOPERATIVE_ACTIONS = frozenset({"cooperate", "stag", "dove"})
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _compute_coop_rate(history: list[Any]) -> float:
|
| 28 |
+
"""Compute cooperation rate from round history."""
|
| 29 |
+
if not history:
|
| 30 |
+
return float(_ZERO)
|
| 31 |
+
coop_count = _ZERO
|
| 32 |
+
for rnd in history:
|
| 33 |
+
base_action = rnd.player_action
|
| 34 |
+
if GOSSIP_SEPARATOR in base_action:
|
| 35 |
+
base_action = base_action.rsplit(GOSSIP_SEPARATOR, _ONE)[_ONE]
|
| 36 |
+
if base_action in _COOPERATIVE_ACTIONS:
|
| 37 |
+
coop_count = coop_count + _ONE
|
| 38 |
+
return coop_count / len(history)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class ReputationEnvironment:
|
| 42 |
+
"""Environment wrapper that adds cross-episode reputation via cognee.
|
| 43 |
+
|
| 44 |
+
Injects opponent reputation into obs.metadata before each episode.
|
| 45 |
+
Records episode outcomes and gossip ratings in CogneeMemoryStore.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(
|
| 49 |
+
self,
|
| 50 |
+
memory_store: CogneeMemoryStore,
|
| 51 |
+
env: Optional[KantEnvironment] = None,
|
| 52 |
+
) -> None:
|
| 53 |
+
self._env = env if env is not None else KantEnvironment()
|
| 54 |
+
self._store = memory_store
|
| 55 |
+
self._agent_id: str = ""
|
| 56 |
+
self._opponent_id: str = ""
|
| 57 |
+
|
| 58 |
+
def reset(
|
| 59 |
+
self,
|
| 60 |
+
*,
|
| 61 |
+
agent_id: str = "agent",
|
| 62 |
+
**kwargs: Any,
|
| 63 |
+
) -> GameObservation:
|
| 64 |
+
"""Reset environment and inject reputation into metadata."""
|
| 65 |
+
self._agent_id = agent_id
|
| 66 |
+
self._opponent_id = kwargs.get("strategy", "unknown")
|
| 67 |
+
obs = self._env.reset(**kwargs)
|
| 68 |
+
reputation = self._store.query_reputation(self._opponent_id)
|
| 69 |
+
updated_meta = dict(obs.metadata)
|
| 70 |
+
updated_meta[META_KEY_REPUTATION] = reputation
|
| 71 |
+
updated_meta[META_KEY_INTERACTION_COUNT] = reputation.get(
|
| 72 |
+
META_KEY_INTERACTION_COUNT, _ZERO,
|
| 73 |
+
)
|
| 74 |
+
obs = obs.model_copy(update={"metadata": updated_meta})
|
| 75 |
+
return obs
|
| 76 |
+
|
| 77 |
+
def step(
|
| 78 |
+
self,
|
| 79 |
+
action: GameAction,
|
| 80 |
+
**kwargs: Any,
|
| 81 |
+
) -> GameObservation:
|
| 82 |
+
"""Step environment, extracting gossip and recording episodes."""
|
| 83 |
+
gossip_marker = GOSSIP_PREFIX + GOSSIP_SEPARATOR
|
| 84 |
+
if action.action.startswith(gossip_marker):
|
| 85 |
+
_, rating, _ = parse_gossip_action(action.action)
|
| 86 |
+
self._store.record_gossip(
|
| 87 |
+
self._agent_id, self._opponent_id, rating,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
obs = self._env.step(action, **kwargs)
|
| 91 |
+
|
| 92 |
+
if obs.done:
|
| 93 |
+
self._store.record_episode(
|
| 94 |
+
agent_id=self._agent_id,
|
| 95 |
+
opponent_id=self._opponent_id,
|
| 96 |
+
game=obs.game_name,
|
| 97 |
+
history=obs.history,
|
| 98 |
+
cooperation_rate=_compute_coop_rate(obs.history),
|
| 99 |
+
scores=(obs.player_score, obs.opponent_score),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
reputation = self._store.get_stats(self._opponent_id)
|
| 103 |
+
updated_meta = dict(obs.metadata)
|
| 104 |
+
updated_meta[META_KEY_REPUTATION] = reputation
|
| 105 |
+
obs = obs.model_copy(update={"metadata": updated_meta})
|
| 106 |
+
return obs
|
| 107 |
+
|
| 108 |
+
@property
|
| 109 |
+
def state(self) -> GameState:
|
| 110 |
+
"""Delegate to wrapped environment state."""
|
| 111 |
+
return self._env.state
|
server/KantBench_environment.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
"""KantBench environment adapter for the HF Space.
|
| 2 |
|
| 3 |
Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games,
|
| 4 |
-
17 strategies
|
| 5 |
-
standalone reimplementation.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
|
@@ -22,12 +22,18 @@ from env.nplayer.models import NPlayerAction, NPlayerObservation
|
|
| 22 |
import common.games_meta.nplayer_games # noqa: F401
|
| 23 |
from common.games_meta.nplayer_config import NPLAYER_GAMES
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
class KantbenchEnvironment(Environment):
|
| 27 |
"""Game theory environment exposing 90+ two-player and N-player games.
|
| 28 |
|
| 29 |
Wraps the real KantEnvironment and NPlayerEnvironment, routing
|
| 30 |
automatically based on the requested game name.
|
|
|
|
|
|
|
|
|
|
| 31 |
"""
|
| 32 |
|
| 33 |
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
|
@@ -39,6 +45,16 @@ class KantbenchEnvironment(Environment):
|
|
| 39 |
|
| 40 |
def reset(self, **kwargs: Any) -> KantBenchObservation:
|
| 41 |
game_name: str = kwargs.get("game", "prisoners_dilemma")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
if game_name in NPLAYER_GAMES:
|
| 44 |
self._is_nplayer = True
|
|
|
|
| 1 |
"""KantBench environment adapter for the HF Space.
|
| 2 |
|
| 3 |
Thin wrapper that delegates to the real KantEnvironment (90+ 2-player games,
|
| 4 |
+
17 strategies, meta-games, composable variants) and NPlayerEnvironment
|
| 5 |
+
(3 N-player games) instead of a standalone reimplementation.
|
| 6 |
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
|
|
|
| 22 |
import common.games_meta.nplayer_games # noqa: F401
|
| 23 |
from common.games_meta.nplayer_config import NPLAYER_GAMES
|
| 24 |
|
| 25 |
+
from common.games import GAMES
|
| 26 |
+
from common.variants import compose_game
|
| 27 |
+
|
| 28 |
|
| 29 |
class KantbenchEnvironment(Environment):
|
| 30 |
"""Game theory environment exposing 90+ two-player and N-player games.
|
| 31 |
|
| 32 |
Wraps the real KantEnvironment and NPlayerEnvironment, routing
|
| 33 |
automatically based on the requested game name.
|
| 34 |
+
|
| 35 |
+
Supports a ``variant`` reset parameter for dynamic game composition
|
| 36 |
+
(e.g. ``variant="constitutional"`` or ``variant="cheap_talk"``).
|
| 37 |
"""
|
| 38 |
|
| 39 |
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
|
|
|
| 45 |
|
| 46 |
def reset(self, **kwargs: Any) -> KantBenchObservation:
|
| 47 |
game_name: str = kwargs.get("game", "prisoners_dilemma")
|
| 48 |
+
variant: Optional[str] = kwargs.pop("variant", None)
|
| 49 |
+
|
| 50 |
+
# Dynamic variant composition — compose game on-the-fly and
|
| 51 |
+
# register it so KantEnvironment can look it up via get_game().
|
| 52 |
+
# Constitutional variant creates fresh mutable closure per call.
|
| 53 |
+
if variant and game_name in GAMES:
|
| 54 |
+
composed = compose_game(game_name, variant)
|
| 55 |
+
composed_key = f"_composed_{variant}_{game_name}"
|
| 56 |
+
GAMES[composed_key] = composed
|
| 57 |
+
kwargs["game"] = composed_key
|
| 58 |
|
| 59 |
if game_name in NPLAYER_GAMES:
|
| 60 |
self._is_nplayer = True
|