Upload folder using huggingface_hub
Browse files- bench/gradio_app/app.py +26 -18
- common/games.py +11 -2
- common/games_adaptive/__init__.py +1 -0
- common/games_adaptive/factories.py +192 -0
- common/games_meta/game_tags.py +7 -0
- constant_definitions/slides/__init__.py +1 -0
- constant_definitions/slides/layout.py +94 -0
- constant_definitions/var/meta/adaptive_constants.py +39 -0
- constant_definitions/var/meta/self_play_constants.py +32 -0
bench/gradio_app/app.py
CHANGED
|
@@ -121,25 +121,28 @@ def _filter_game_names(category_tag):
|
|
| 121 |
return sorted(_KEY_TO_NAME[k] for k in matching_keys if k in _KEY_TO_NAME)
|
| 122 |
|
| 123 |
# ---------------------------------------------------------------------------
|
| 124 |
-
#
|
| 125 |
# ---------------------------------------------------------------------------
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
def
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
def _strat_tft(actions, h):
|
| 136 |
-
if not h:
|
| 137 |
return actions[_ZERO]
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
# N-player strategy names
|
| 145 |
_NPLAYER_STRAT_NAMES = list(NPLAYER_STRATEGIES.keys()) if _HAS_NPLAYER_ENV else ["random"]
|
|
@@ -256,7 +259,12 @@ def play_round(action_str, state):
|
|
| 256 |
opp_act_list = list(opp_actions)
|
| 257 |
else:
|
| 258 |
opp_act_list = info["actions"]
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
p_pay, o_pay = info["payoff_fn"](action_str, opp)
|
| 261 |
state["round"] += _ONE
|
| 262 |
state["p_score"] += p_pay
|
|
|
|
| 121 |
return sorted(_KEY_TO_NAME[k] for k in matching_keys if k in _KEY_TO_NAME)
|
| 122 |
|
| 123 |
# ---------------------------------------------------------------------------
|
| 124 |
+
# 2-player strategies (from the real strategy registry)
|
| 125 |
# ---------------------------------------------------------------------------
|
| 126 |
+
try:
|
| 127 |
+
from common.strategies import STRATEGIES as _STRAT_REGISTRY
|
| 128 |
+
STRATEGIES_2P = _STRAT_REGISTRY
|
| 129 |
+
_HAS_FULL_STRATEGIES = True
|
| 130 |
+
except ImportError:
|
| 131 |
+
# Minimal fallback
|
| 132 |
+
def _strat_random(actions, _h):
|
| 133 |
+
return _rand.choice(actions)
|
| 134 |
+
def _strat_first(actions, _h):
|
|
|
|
|
|
|
| 135 |
return actions[_ZERO]
|
| 136 |
+
def _strat_last(actions, _h):
|
| 137 |
+
return actions[min(_ONE, len(actions) - _ONE)]
|
| 138 |
+
def _strat_tft(actions, h):
|
| 139 |
+
if not h:
|
| 140 |
+
return actions[_ZERO]
|
| 141 |
+
prev = h[_NEG_ONE]["player_action"]
|
| 142 |
+
return prev if prev in actions else actions[_ZERO]
|
| 143 |
+
STRATEGIES_2P = {"random": _strat_random, "always_cooperate": _strat_first,
|
| 144 |
+
"always_defect": _strat_last, "tit_for_tat": _strat_tft}
|
| 145 |
+
_HAS_FULL_STRATEGIES = False
|
| 146 |
|
| 147 |
# N-player strategy names
|
| 148 |
_NPLAYER_STRAT_NAMES = list(NPLAYER_STRATEGIES.keys()) if _HAS_NPLAYER_ENV else ["random"]
|
|
|
|
| 259 |
opp_act_list = list(opp_actions)
|
| 260 |
else:
|
| 261 |
opp_act_list = info["actions"]
|
| 262 |
+
strat = STRATEGIES_2P[state["strategy"]]
|
| 263 |
+
game_type = info.get("game_type", "matrix")
|
| 264 |
+
if _HAS_FULL_STRATEGIES:
|
| 265 |
+
opp = strat.choose_action(game_type, opp_act_list, state["history"])
|
| 266 |
+
else:
|
| 267 |
+
opp = strat(opp_act_list, state["history"])
|
| 268 |
p_pay, o_pay = info["payoff_fn"](action_str, opp)
|
| 269 |
state["round"] += _ONE
|
| 270 |
state["p_score"] += p_pay
|
common/games.py
CHANGED
|
@@ -165,6 +165,8 @@ _PG_CONTRIBUTIONS: list[str] = [
|
|
| 165 |
# Game registry
|
| 166 |
# ---------------------------------------------------------------------------
|
| 167 |
|
|
|
|
|
|
|
| 168 |
GAMES: dict[str, GameConfig] = {
|
| 169 |
"prisoners_dilemma": GameConfig(
|
| 170 |
name="Prisoner's Dilemma",
|
|
@@ -246,15 +248,21 @@ GAMES: dict[str, GameConfig] = {
|
|
| 246 |
def get_game(name: str) -> GameConfig:
|
| 247 |
"""Retrieve a GameConfig by its registry key.
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
Args:
|
| 250 |
-
name: Key in
|
| 251 |
|
| 252 |
Returns:
|
| 253 |
The corresponding :class:`GameConfig` instance.
|
| 254 |
|
| 255 |
Raises:
|
| 256 |
-
KeyError: If *name* is not
|
| 257 |
"""
|
|
|
|
|
|
|
| 258 |
return GAMES[name]
|
| 259 |
|
| 260 |
|
|
@@ -273,6 +281,7 @@ def _load_extensions() -> None:
|
|
| 273 |
"common.games_coop.dynamic", "common.games_coop.pd_variants",
|
| 274 |
"common.games_coop.infinite", "common.games_coop.stochastic",
|
| 275 |
"common.meta.meta_games",
|
|
|
|
| 276 |
]:
|
| 277 |
try:
|
| 278 |
importlib.import_module(mod)
|
|
|
|
| 165 |
# Game registry
|
| 166 |
# ---------------------------------------------------------------------------
|
| 167 |
|
| 168 |
+
GAME_FACTORIES: dict[str, Callable[[], GameConfig]] = {}
|
| 169 |
+
|
| 170 |
GAMES: dict[str, GameConfig] = {
|
| 171 |
"prisoners_dilemma": GameConfig(
|
| 172 |
name="Prisoner's Dilemma",
|
|
|
|
| 248 |
def get_game(name: str) -> GameConfig:
|
| 249 |
"""Retrieve a GameConfig by its registry key.
|
| 250 |
|
| 251 |
+
If *name* is in :data:`GAME_FACTORIES`, the factory is called to
|
| 252 |
+
produce a fresh :class:`GameConfig` with independent mutable state.
|
| 253 |
+
Otherwise falls back to the static :data:`GAMES` registry.
|
| 254 |
+
|
| 255 |
Args:
|
| 256 |
+
name: Key in GAME_FACTORIES or GAMES.
|
| 257 |
|
| 258 |
Returns:
|
| 259 |
The corresponding :class:`GameConfig` instance.
|
| 260 |
|
| 261 |
Raises:
|
| 262 |
+
KeyError: If *name* is not in either registry.
|
| 263 |
"""
|
| 264 |
+
if name in GAME_FACTORIES:
|
| 265 |
+
return GAME_FACTORIES[name]()
|
| 266 |
return GAMES[name]
|
| 267 |
|
| 268 |
|
|
|
|
| 281 |
"common.games_coop.dynamic", "common.games_coop.pd_variants",
|
| 282 |
"common.games_coop.infinite", "common.games_coop.stochastic",
|
| 283 |
"common.meta.meta_games",
|
| 284 |
+
"common.games_adaptive.factories",
|
| 285 |
]:
|
| 286 |
try:
|
| 287 |
importlib.import_module(mod)
|
common/games_adaptive/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Adaptive payoff game factories."""
|
common/games_adaptive/factories.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Adaptive payoff game factories with history-dependent payoff functions."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from typing import Callable
|
| 4 |
+
from common.games import GameConfig, GAME_FACTORIES, _PD_MATRIX, _HD_MATRIX
|
| 5 |
+
from constant_definitions.game_constants import (
|
| 6 |
+
TRUST_MULTIPLIER, EVAL_ZERO_FLOAT, EVAL_ONE_FLOAT,
|
| 7 |
+
)
|
| 8 |
+
from constant_definitions.var.meta.adaptive_constants import (
|
| 9 |
+
ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR,
|
| 10 |
+
ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR,
|
| 11 |
+
ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR,
|
| 12 |
+
ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR,
|
| 13 |
+
ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR,
|
| 14 |
+
ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR,
|
| 15 |
+
ARMS_RACE_COST_STEP_NUMERATOR, ARMS_RACE_COST_STEP_DENOMINATOR,
|
| 16 |
+
ARMS_RACE_MAX_COST_NUMERATOR, ARMS_RACE_MAX_COST_DENOMINATOR,
|
| 17 |
+
TRUST_EROSION_DECAY_NUMERATOR, TRUST_EROSION_DECAY_DENOMINATOR,
|
| 18 |
+
TRUST_EROSION_RECOVERY_NUMERATOR, TRUST_EROSION_RECOVERY_DENOMINATOR,
|
| 19 |
+
MARKET_DEMAND_SHIFT_NUMERATOR, MARKET_DEMAND_SHIFT_DENOMINATOR,
|
| 20 |
+
REPUTATION_BONUS_NUMERATOR, REPUTATION_BONUS_DENOMINATOR,
|
| 21 |
+
ADAPTIVE_DEFAULT_ROUNDS, ADAPTIVE_GAME_TYPE,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
_ZERO = int()
|
| 25 |
+
_ONE = int(bool(True))
|
| 26 |
+
_TWO = _ONE + _ONE
|
| 27 |
+
|
| 28 |
+
# Market dynamics tables
|
| 29 |
+
_MKT_OUT = {"low": _TWO, "medium": _TWO + _TWO, "high": _TWO * _TWO + _TWO}
|
| 30 |
+
_MKT_COST = {"low": _ONE, "medium": _TWO + _ONE, "high": _TWO * _TWO + _TWO}
|
| 31 |
+
_MKT_INTERCEPT = (_TWO + _TWO) * (_TWO + _ONE)
|
| 32 |
+
|
| 33 |
+
def _adaptive_pd_factory() -> GameConfig:
|
| 34 |
+
"""PD where mutual cooperation increases future payoffs."""
|
| 35 |
+
min_m = ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR
|
| 36 |
+
max_m = ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR
|
| 37 |
+
step = ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR
|
| 38 |
+
_s = [EVAL_ONE_FLOAT]
|
| 39 |
+
|
| 40 |
+
def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
|
| 41 |
+
mult = _s[_ZERO]
|
| 42 |
+
base = _PD_MATRIX[(p_act, o_act)]
|
| 43 |
+
result = (base[_ZERO] * mult, base[_ONE] * mult)
|
| 44 |
+
if p_act == "cooperate" and o_act == "cooperate":
|
| 45 |
+
_s[_ZERO] = min(max_m, _s[_ZERO] + step)
|
| 46 |
+
elif p_act == "defect" and o_act == "defect":
|
| 47 |
+
_s[_ZERO] = max(min_m, _s[_ZERO] - step)
|
| 48 |
+
return result
|
| 49 |
+
|
| 50 |
+
return GameConfig(
|
| 51 |
+
name="Adaptive Prisoner's Dilemma",
|
| 52 |
+
description=(
|
| 53 |
+
"A Prisoner's Dilemma where mutual cooperation increases "
|
| 54 |
+
"future payoffs via a growing multiplier, while mutual "
|
| 55 |
+
"defection decreases it. Mixed outcomes leave it unchanged."
|
| 56 |
+
),
|
| 57 |
+
actions=["cooperate", "defect"],
|
| 58 |
+
game_type=ADAPTIVE_GAME_TYPE,
|
| 59 |
+
default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
|
| 60 |
+
payoff_fn=payoff_fn,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _arms_race_factory() -> GameConfig:
|
| 65 |
+
"""Hawk-Dove where hawk-hawk conflict costs escalate each round."""
|
| 66 |
+
c_step = ARMS_RACE_COST_STEP_NUMERATOR / ARMS_RACE_COST_STEP_DENOMINATOR
|
| 67 |
+
max_c = ARMS_RACE_MAX_COST_NUMERATOR / ARMS_RACE_MAX_COST_DENOMINATOR
|
| 68 |
+
_s = [EVAL_ZERO_FLOAT]
|
| 69 |
+
|
| 70 |
+
def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
|
| 71 |
+
cost = _s[_ZERO]
|
| 72 |
+
base = _HD_MATRIX[(p_act, o_act)]
|
| 73 |
+
if p_act == "hawk" and o_act == "hawk":
|
| 74 |
+
result = (base[_ZERO] - cost, base[_ONE] - cost)
|
| 75 |
+
_s[_ZERO] = min(max_c, _s[_ZERO] + c_step)
|
| 76 |
+
else:
|
| 77 |
+
result = base
|
| 78 |
+
_s[_ZERO] = max(EVAL_ZERO_FLOAT, _s[_ZERO] - c_step / _TWO)
|
| 79 |
+
return result
|
| 80 |
+
|
| 81 |
+
return GameConfig(
|
| 82 |
+
name="Arms Race",
|
| 83 |
+
description=(
|
| 84 |
+
"A Hawk-Dove game where mutual hawk play incurs "
|
| 85 |
+
"escalating costs each round. Non-hawk rounds "
|
| 86 |
+
"de-escalate the accumulated conflict cost."
|
| 87 |
+
),
|
| 88 |
+
actions=["hawk", "dove"],
|
| 89 |
+
game_type=ADAPTIVE_GAME_TYPE,
|
| 90 |
+
default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
|
| 91 |
+
payoff_fn=payoff_fn,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _trust_erosion_factory() -> GameConfig:
|
| 96 |
+
"""Trust-like PD where a multiplier decays after mutual defection."""
|
| 97 |
+
decay = TRUST_EROSION_DECAY_NUMERATOR / TRUST_EROSION_DECAY_DENOMINATOR
|
| 98 |
+
recov = TRUST_EROSION_RECOVERY_NUMERATOR / TRUST_EROSION_RECOVERY_DENOMINATOR
|
| 99 |
+
_s = [float(TRUST_MULTIPLIER)]
|
| 100 |
+
|
| 101 |
+
def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
|
| 102 |
+
mult = _s[_ZERO]
|
| 103 |
+
base = _PD_MATRIX[(p_act, o_act)]
|
| 104 |
+
result = (base[_ZERO] * mult, base[_ONE] * mult)
|
| 105 |
+
if p_act == "defect" and o_act == "defect":
|
| 106 |
+
_s[_ZERO] = _s[_ZERO] * decay
|
| 107 |
+
elif p_act == "cooperate" and o_act == "cooperate":
|
| 108 |
+
_s[_ZERO] = min(float(TRUST_MULTIPLIER), _s[_ZERO] + recov)
|
| 109 |
+
return result
|
| 110 |
+
|
| 111 |
+
return GameConfig(
|
| 112 |
+
name="Trust Erosion",
|
| 113 |
+
description=(
|
| 114 |
+
"A Prisoner's Dilemma where a trust multiplier amplifies "
|
| 115 |
+
"all payoffs. Mutual defection erodes trust, while mutual "
|
| 116 |
+
"cooperation slowly rebuilds it."
|
| 117 |
+
),
|
| 118 |
+
actions=["cooperate", "defect"],
|
| 119 |
+
game_type=ADAPTIVE_GAME_TYPE,
|
| 120 |
+
default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
|
| 121 |
+
payoff_fn=payoff_fn,
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _market_dynamics_factory() -> GameConfig:
|
| 126 |
+
"""Cournot-like duopoly where demand shifts based on total output."""
|
| 127 |
+
shift = MARKET_DEMAND_SHIFT_NUMERATOR / MARKET_DEMAND_SHIFT_DENOMINATOR
|
| 128 |
+
_s = [float(_MKT_INTERCEPT)]
|
| 129 |
+
|
| 130 |
+
def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
|
| 131 |
+
intercept = _s[_ZERO]
|
| 132 |
+
p_out, o_out = _MKT_OUT[p_act], _MKT_OUT[o_act]
|
| 133 |
+
total = p_out + o_out
|
| 134 |
+
price = max(EVAL_ZERO_FLOAT, intercept - total)
|
| 135 |
+
p_rev = price * p_out - _MKT_COST[p_act]
|
| 136 |
+
o_rev = price * o_out - _MKT_COST[o_act]
|
| 137 |
+
if total > (_MKT_INTERCEPT / _TWO):
|
| 138 |
+
_s[_ZERO] = max(float(_TWO), _s[_ZERO] - shift)
|
| 139 |
+
else:
|
| 140 |
+
_s[_ZERO] = min(float(_MKT_INTERCEPT), _s[_ZERO] + shift)
|
| 141 |
+
return (p_rev, o_rev)
|
| 142 |
+
|
| 143 |
+
return GameConfig(
|
| 144 |
+
name="Market Dynamics",
|
| 145 |
+
description=(
|
| 146 |
+
"A Cournot-like duopoly where each player chooses output "
|
| 147 |
+
"level. The demand curve shifts based on past total output: "
|
| 148 |
+
"high output depresses future demand, restraint recovers it."
|
| 149 |
+
),
|
| 150 |
+
actions=["low", "medium", "high"],
|
| 151 |
+
game_type=ADAPTIVE_GAME_TYPE,
|
| 152 |
+
default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
|
| 153 |
+
payoff_fn=payoff_fn,
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _reputation_payoffs_factory() -> GameConfig:
|
| 158 |
+
"""Base PD with payoff bonus proportional to cooperation history."""
|
| 159 |
+
bonus_rate = REPUTATION_BONUS_NUMERATOR / REPUTATION_BONUS_DENOMINATOR
|
| 160 |
+
_s = [_ZERO, _ZERO] # [coop_count, total_rounds]
|
| 161 |
+
|
| 162 |
+
def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
|
| 163 |
+
base = _PD_MATRIX[(p_act, o_act)]
|
| 164 |
+
total = _s[_ONE]
|
| 165 |
+
coop_rate = _s[_ZERO] / total if total > _ZERO else EVAL_ZERO_FLOAT
|
| 166 |
+
bonus = coop_rate * bonus_rate
|
| 167 |
+
result = (base[_ZERO] + bonus, base[_ONE] + bonus)
|
| 168 |
+
_s[_ONE] += _ONE
|
| 169 |
+
if p_act == "cooperate":
|
| 170 |
+
_s[_ZERO] += _ONE
|
| 171 |
+
return result
|
| 172 |
+
|
| 173 |
+
return GameConfig(
|
| 174 |
+
name="Reputation Payoffs",
|
| 175 |
+
description=(
|
| 176 |
+
"A Prisoner's Dilemma where both players receive a bonus "
|
| 177 |
+
"proportional to the player's historical cooperation rate. "
|
| 178 |
+
"Building a cooperative reputation pays future dividends."
|
| 179 |
+
),
|
| 180 |
+
actions=["cooperate", "defect"],
|
| 181 |
+
game_type=ADAPTIVE_GAME_TYPE,
|
| 182 |
+
default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
|
| 183 |
+
payoff_fn=payoff_fn,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
# Register all factories
|
| 188 |
+
GAME_FACTORIES["adaptive_prisoners_dilemma"] = _adaptive_pd_factory
|
| 189 |
+
GAME_FACTORIES["arms_race"] = _arms_race_factory
|
| 190 |
+
GAME_FACTORIES["trust_erosion"] = _trust_erosion_factory
|
| 191 |
+
GAME_FACTORIES["market_dynamics"] = _market_dynamics_factory
|
| 192 |
+
GAME_FACTORIES["reputation_payoffs"] = _reputation_payoffs_factory
|
common/games_meta/game_tags.py
CHANGED
|
@@ -184,6 +184,13 @@ GAME_TAGS: dict[str, frozenset[str]] = {
|
|
| 184 |
"rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 185 |
"rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
# ── meta/meta_games.py (gossip) ──
|
| 188 |
"gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 189 |
"gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
|
|
|
| 184 |
"rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 185 |
"rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 186 |
|
| 187 |
+
# ── games_adaptive/factories.py ──
|
| 188 |
+
"adaptive_prisoners_dilemma": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 189 |
+
"arms_race": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 190 |
+
"trust_erosion": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 191 |
+
"market_dynamics": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, SMALL_CHOICE}),
|
| 192 |
+
"reputation_payoffs": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
|
| 193 |
+
|
| 194 |
# ── meta/meta_games.py (gossip) ──
|
| 195 |
"gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
| 196 |
"gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
|
constant_definitions/slides/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Slides layout constants."""
|
constant_definitions/slides/layout.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Numeric constants for slide generation layout and Wisent brand colors."""
|
| 2 |
+
|
| 3 |
+
# Wisent brand palette from wisent-visuals (RGB tuples 0-255)
|
| 4 |
+
ACCENT_R = 197
|
| 5 |
+
ACCENT_G = 255
|
| 6 |
+
ACCENT_B = 200
|
| 7 |
+
RED_R = 250
|
| 8 |
+
RED_G = 90
|
| 9 |
+
RED_B = 70
|
| 10 |
+
PURPLE_R = 177
|
| 11 |
+
PURPLE_G = 158
|
| 12 |
+
PURPLE_B = 204
|
| 13 |
+
DARK_R = 18
|
| 14 |
+
DARK_G = 18
|
| 15 |
+
DARK_B = 18
|
| 16 |
+
GRID_R = 45
|
| 17 |
+
GRID_G = 49
|
| 18 |
+
GRID_B = 48
|
| 19 |
+
LEGEND_R = 118
|
| 20 |
+
LEGEND_G = 153
|
| 21 |
+
LEGEND_B = 120
|
| 22 |
+
WHITE_VAL = 255
|
| 23 |
+
BLACK_VAL = 0
|
| 24 |
+
|
| 25 |
+
# Font sizes in points
|
| 26 |
+
PT_TITLE = 36
|
| 27 |
+
PT_SUBTITLE = 20
|
| 28 |
+
PT_BODY = 16
|
| 29 |
+
PT_SMALL = 12
|
| 30 |
+
PT_STAT = 48
|
| 31 |
+
PT_LABEL = 14
|
| 32 |
+
PT_TEAM = 28
|
| 33 |
+
|
| 34 |
+
# Slide dimensions in inches (for widescreen 16:9)
|
| 35 |
+
SLIDE_W_INCHES = 10
|
| 36 |
+
SLIDE_H_NUMER = 45
|
| 37 |
+
SLIDE_H_DENOM = 8
|
| 38 |
+
|
| 39 |
+
# Position helpers in inches
|
| 40 |
+
POS_HALF = 0.5
|
| 41 |
+
POS_ONE = 1.0
|
| 42 |
+
POS_ONE_HALF = 1.5
|
| 43 |
+
POS_TWO = 2.0
|
| 44 |
+
POS_TWO_HALF = 2.5
|
| 45 |
+
POS_THREE = 3.0
|
| 46 |
+
POS_THREE_HALF = 3.5
|
| 47 |
+
POS_FOUR = 4.0
|
| 48 |
+
POS_FOUR_HALF = 4.5
|
| 49 |
+
POS_FIVE = 5.0
|
| 50 |
+
POS_SIX = 6.0
|
| 51 |
+
POS_SEVEN = 7.0
|
| 52 |
+
POS_EIGHT = 8.0
|
| 53 |
+
POS_NINE = 9.0
|
| 54 |
+
|
| 55 |
+
# Image dimensions
|
| 56 |
+
IMG_FIG_W = 7.0
|
| 57 |
+
IMG_FIG_H = 3.5
|
| 58 |
+
IMG_KANT_W = 3.0
|
| 59 |
+
IMG_KANT_H = 4.0
|
| 60 |
+
|
| 61 |
+
# Column layout
|
| 62 |
+
COL_LEFT_X = 0.5
|
| 63 |
+
COL_RIGHT_X = 5.0
|
| 64 |
+
COL_W = 4.5
|
| 65 |
+
COL_H = 4.0
|
| 66 |
+
|
| 67 |
+
# Stat column positions
|
| 68 |
+
STAT_COL_ONE_X = 0.5
|
| 69 |
+
STAT_COL_TWO_X = 3.5
|
| 70 |
+
STAT_COL_THREE_X = 6.5
|
| 71 |
+
STAT_COL_W = 3.0
|
| 72 |
+
|
| 73 |
+
# Title position
|
| 74 |
+
TITLE_X = 0.5
|
| 75 |
+
TITLE_Y = 0.3
|
| 76 |
+
TITLE_W = 9.0
|
| 77 |
+
TITLE_H = 1.0
|
| 78 |
+
|
| 79 |
+
# Centered text position
|
| 80 |
+
CENTER_Y = 1.5
|
| 81 |
+
CENTER_W = 8.0
|
| 82 |
+
CENTER_H = 3.5
|
| 83 |
+
CENTER_X = 1.0
|
| 84 |
+
|
| 85 |
+
# Footer position
|
| 86 |
+
FOOTER_Y = 4.8
|
| 87 |
+
FOOTER_H = 0.5
|
| 88 |
+
|
| 89 |
+
# Team layout
|
| 90 |
+
TEAM_NAME_Y = 2.5
|
| 91 |
+
TEAM_NAME_H = 1.0
|
| 92 |
+
TEAM_COL_ONE_X = 1.0
|
| 93 |
+
TEAM_COL_TWO_X = 5.5
|
| 94 |
+
TEAM_COL_W = 3.5
|
constant_definitions/var/meta/adaptive_constants.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constants for adaptive payoff games."""
|
| 2 |
+
|
| 3 |
+
# Adaptive PD: cooperation multiplier range
|
| 4 |
+
ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR = 5
|
| 5 |
+
ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR = 10
|
| 6 |
+
|
| 7 |
+
ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR = 2
|
| 8 |
+
ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR = 1
|
| 9 |
+
|
| 10 |
+
ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR = 1
|
| 11 |
+
ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR = 10
|
| 12 |
+
|
| 13 |
+
# Arms Race: cost escalation per round
|
| 14 |
+
ARMS_RACE_COST_STEP_NUMERATOR = 1
|
| 15 |
+
ARMS_RACE_COST_STEP_DENOMINATOR = 2
|
| 16 |
+
|
| 17 |
+
ARMS_RACE_MAX_COST_NUMERATOR = 5
|
| 18 |
+
ARMS_RACE_MAX_COST_DENOMINATOR = 1
|
| 19 |
+
|
| 20 |
+
# Trust Erosion: multiplier decay after defection
|
| 21 |
+
TRUST_EROSION_DECAY_NUMERATOR = 8
|
| 22 |
+
TRUST_EROSION_DECAY_DENOMINATOR = 10
|
| 23 |
+
|
| 24 |
+
TRUST_EROSION_RECOVERY_NUMERATOR = 1
|
| 25 |
+
TRUST_EROSION_RECOVERY_DENOMINATOR = 10
|
| 26 |
+
|
| 27 |
+
# Market dynamics: demand shift per round
|
| 28 |
+
MARKET_DEMAND_SHIFT_NUMERATOR = 1
|
| 29 |
+
MARKET_DEMAND_SHIFT_DENOMINATOR = 2
|
| 30 |
+
|
| 31 |
+
# Reputation payoffs: cooperation bonus scaling
|
| 32 |
+
REPUTATION_BONUS_NUMERATOR = 1
|
| 33 |
+
REPUTATION_BONUS_DENOMINATOR = 5
|
| 34 |
+
|
| 35 |
+
# Default rounds for adaptive games
|
| 36 |
+
ADAPTIVE_DEFAULT_ROUNDS = 10
|
| 37 |
+
|
| 38 |
+
# Game type identifier
|
| 39 |
+
ADAPTIVE_GAME_TYPE = "adaptive"
|
constant_definitions/var/meta/self_play_constants.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constants for self-play multi-agent training."""
|
| 2 |
+
|
| 3 |
+
# Opponent update frequency (steps between opponent refresh)
|
| 4 |
+
SELF_PLAY_OPPONENT_UPDATE_INTERVAL = 50
|
| 5 |
+
|
| 6 |
+
# Maximum frozen checkpoints kept in the opponent pool
|
| 7 |
+
SELF_PLAY_POOL_MAX_SIZE = 5
|
| 8 |
+
|
| 9 |
+
# Self-play reward weights (numerator / denominator pairs)
|
| 10 |
+
SELF_PLAY_EXPLOIT_WEIGHT_NUMERATOR = 3
|
| 11 |
+
SELF_PLAY_EXPLOIT_WEIGHT_DENOMINATOR = 10
|
| 12 |
+
|
| 13 |
+
SELF_PLAY_COOP_WEIGHT_NUMERATOR = 3
|
| 14 |
+
SELF_PLAY_COOP_WEIGHT_DENOMINATOR = 10
|
| 15 |
+
|
| 16 |
+
SELF_PLAY_PARETO_WEIGHT_NUMERATOR = 2
|
| 17 |
+
SELF_PLAY_PARETO_WEIGHT_DENOMINATOR = 10
|
| 18 |
+
|
| 19 |
+
SELF_PLAY_FAIRNESS_WEIGHT_NUMERATOR = 1
|
| 20 |
+
SELF_PLAY_FAIRNESS_WEIGHT_DENOMINATOR = 10
|
| 21 |
+
|
| 22 |
+
SELF_PLAY_ADAPT_WEIGHT_NUMERATOR = 1
|
| 23 |
+
SELF_PLAY_ADAPT_WEIGHT_DENOMINATOR = 10
|
| 24 |
+
|
| 25 |
+
# Training defaults
|
| 26 |
+
SELF_PLAY_DEFAULT_EPISODES_PER_STEP = 16
|
| 27 |
+
SELF_PLAY_DEFAULT_MAX_STEPS = 500
|
| 28 |
+
SELF_PLAY_CHECKPOINT_PREFIX = "self_play_step"
|
| 29 |
+
SELF_PLAY_WARMUP_EPISODES = 32
|
| 30 |
+
|
| 31 |
+
# Opponent strategy label used in trajectory metadata
|
| 32 |
+
SELF_PLAY_OPPONENT_LABEL = "agent"
|