jtowarek commited on
Commit
780d0f2
·
verified ·
1 Parent(s): ba4ecd0

Upload folder using huggingface_hub

Browse files
bench/gradio_app/app.py CHANGED
@@ -121,25 +121,28 @@ def _filter_game_names(category_tag):
121
  return sorted(_KEY_TO_NAME[k] for k in matching_keys if k in _KEY_TO_NAME)
122
 
123
  # ---------------------------------------------------------------------------
124
- # Inline 2-player strategies
125
  # ---------------------------------------------------------------------------
126
- def _strat_random(actions, _h):
127
- return _rand.choice(actions)
128
-
129
- def _strat_first(actions, _h):
130
- return actions[_ZERO]
131
-
132
- def _strat_last(actions, _h):
133
- return actions[min(_ONE, len(actions) - _ONE)]
134
-
135
- def _strat_tft(actions, h):
136
- if not h:
137
  return actions[_ZERO]
138
- prev = h[_NEG_ONE]["player_action"]
139
- return prev if prev in actions else actions[_ZERO]
140
-
141
- STRATEGIES_2P = {"random": _strat_random, "always_cooperate": _strat_first,
142
- "always_defect": _strat_last, "tit_for_tat": _strat_tft}
 
 
 
 
 
143
 
144
  # N-player strategy names
145
  _NPLAYER_STRAT_NAMES = list(NPLAYER_STRATEGIES.keys()) if _HAS_NPLAYER_ENV else ["random"]
@@ -256,7 +259,12 @@ def play_round(action_str, state):
256
  opp_act_list = list(opp_actions)
257
  else:
258
  opp_act_list = info["actions"]
259
- opp = STRATEGIES_2P[state["strategy"]](opp_act_list, state["history"])
 
 
 
 
 
260
  p_pay, o_pay = info["payoff_fn"](action_str, opp)
261
  state["round"] += _ONE
262
  state["p_score"] += p_pay
 
121
  return sorted(_KEY_TO_NAME[k] for k in matching_keys if k in _KEY_TO_NAME)
122
 
123
  # ---------------------------------------------------------------------------
124
+ # 2-player strategies (from the real strategy registry)
125
  # ---------------------------------------------------------------------------
126
+ try:
127
+ from common.strategies import STRATEGIES as _STRAT_REGISTRY
128
+ STRATEGIES_2P = _STRAT_REGISTRY
129
+ _HAS_FULL_STRATEGIES = True
130
+ except ImportError:
131
+ # Minimal fallback
132
+ def _strat_random(actions, _h):
133
+ return _rand.choice(actions)
134
+ def _strat_first(actions, _h):
 
 
135
  return actions[_ZERO]
136
+ def _strat_last(actions, _h):
137
+ return actions[min(_ONE, len(actions) - _ONE)]
138
+ def _strat_tft(actions, h):
139
+ if not h:
140
+ return actions[_ZERO]
141
+ prev = h[_NEG_ONE]["player_action"]
142
+ return prev if prev in actions else actions[_ZERO]
143
+ STRATEGIES_2P = {"random": _strat_random, "always_cooperate": _strat_first,
144
+ "always_defect": _strat_last, "tit_for_tat": _strat_tft}
145
+ _HAS_FULL_STRATEGIES = False
146
 
147
  # N-player strategy names
148
  _NPLAYER_STRAT_NAMES = list(NPLAYER_STRATEGIES.keys()) if _HAS_NPLAYER_ENV else ["random"]
 
259
  opp_act_list = list(opp_actions)
260
  else:
261
  opp_act_list = info["actions"]
262
+ strat = STRATEGIES_2P[state["strategy"]]
263
+ game_type = info.get("game_type", "matrix")
264
+ if _HAS_FULL_STRATEGIES:
265
+ opp = strat.choose_action(game_type, opp_act_list, state["history"])
266
+ else:
267
+ opp = strat(opp_act_list, state["history"])
268
  p_pay, o_pay = info["payoff_fn"](action_str, opp)
269
  state["round"] += _ONE
270
  state["p_score"] += p_pay
common/games.py CHANGED
@@ -165,6 +165,8 @@ _PG_CONTRIBUTIONS: list[str] = [
165
  # Game registry
166
  # ---------------------------------------------------------------------------
167
 
 
 
168
  GAMES: dict[str, GameConfig] = {
169
  "prisoners_dilemma": GameConfig(
170
  name="Prisoner's Dilemma",
@@ -246,15 +248,21 @@ GAMES: dict[str, GameConfig] = {
246
  def get_game(name: str) -> GameConfig:
247
  """Retrieve a GameConfig by its registry key.
248
 
 
 
 
 
249
  Args:
250
- name: Key in the GAMES registry (e.g. ``"prisoners_dilemma"``).
251
 
252
  Returns:
253
  The corresponding :class:`GameConfig` instance.
254
 
255
  Raises:
256
- KeyError: If *name* is not present in the registry.
257
  """
 
 
258
  return GAMES[name]
259
 
260
 
@@ -273,6 +281,7 @@ def _load_extensions() -> None:
273
  "common.games_coop.dynamic", "common.games_coop.pd_variants",
274
  "common.games_coop.infinite", "common.games_coop.stochastic",
275
  "common.meta.meta_games",
 
276
  ]:
277
  try:
278
  importlib.import_module(mod)
 
165
  # Game registry
166
  # ---------------------------------------------------------------------------
167
 
168
+ GAME_FACTORIES: dict[str, Callable[[], GameConfig]] = {}
169
+
170
  GAMES: dict[str, GameConfig] = {
171
  "prisoners_dilemma": GameConfig(
172
  name="Prisoner's Dilemma",
 
248
  def get_game(name: str) -> GameConfig:
249
  """Retrieve a GameConfig by its registry key.
250
 
251
+ If *name* is in :data:`GAME_FACTORIES`, the factory is called to
252
+ produce a fresh :class:`GameConfig` with independent mutable state.
253
+ Otherwise falls back to the static :data:`GAMES` registry.
254
+
255
  Args:
256
+ name: Key in GAME_FACTORIES or GAMES.
257
 
258
  Returns:
259
  The corresponding :class:`GameConfig` instance.
260
 
261
  Raises:
262
+ KeyError: If *name* is not in either registry.
263
  """
264
+ if name in GAME_FACTORIES:
265
+ return GAME_FACTORIES[name]()
266
  return GAMES[name]
267
 
268
 
 
281
  "common.games_coop.dynamic", "common.games_coop.pd_variants",
282
  "common.games_coop.infinite", "common.games_coop.stochastic",
283
  "common.meta.meta_games",
284
+ "common.games_adaptive.factories",
285
  ]:
286
  try:
287
  importlib.import_module(mod)
common/games_adaptive/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Adaptive payoff game factories."""
common/games_adaptive/factories.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Adaptive payoff game factories with history-dependent payoff functions."""
2
+ from __future__ import annotations
3
+ from typing import Callable
4
+ from common.games import GameConfig, GAME_FACTORIES, _PD_MATRIX, _HD_MATRIX
5
+ from constant_definitions.game_constants import (
6
+ TRUST_MULTIPLIER, EVAL_ZERO_FLOAT, EVAL_ONE_FLOAT,
7
+ )
8
+ from constant_definitions.var.meta.adaptive_constants import (
9
+ ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR,
10
+ ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR,
11
+ ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR,
12
+ ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR,
13
+ ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR,
14
+ ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR,
15
+ ARMS_RACE_COST_STEP_NUMERATOR, ARMS_RACE_COST_STEP_DENOMINATOR,
16
+ ARMS_RACE_MAX_COST_NUMERATOR, ARMS_RACE_MAX_COST_DENOMINATOR,
17
+ TRUST_EROSION_DECAY_NUMERATOR, TRUST_EROSION_DECAY_DENOMINATOR,
18
+ TRUST_EROSION_RECOVERY_NUMERATOR, TRUST_EROSION_RECOVERY_DENOMINATOR,
19
+ MARKET_DEMAND_SHIFT_NUMERATOR, MARKET_DEMAND_SHIFT_DENOMINATOR,
20
+ REPUTATION_BONUS_NUMERATOR, REPUTATION_BONUS_DENOMINATOR,
21
+ ADAPTIVE_DEFAULT_ROUNDS, ADAPTIVE_GAME_TYPE,
22
+ )
23
+
24
+ _ZERO = int()
25
+ _ONE = int(bool(True))
26
+ _TWO = _ONE + _ONE
27
+
28
+ # Market dynamics tables
29
+ _MKT_OUT = {"low": _TWO, "medium": _TWO + _TWO, "high": _TWO * _TWO + _TWO}
30
+ _MKT_COST = {"low": _ONE, "medium": _TWO + _ONE, "high": _TWO * _TWO + _TWO}
31
+ _MKT_INTERCEPT = (_TWO + _TWO) * (_TWO + _ONE)
32
+
33
+ def _adaptive_pd_factory() -> GameConfig:
34
+ """PD where mutual cooperation increases future payoffs."""
35
+ min_m = ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR
36
+ max_m = ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR
37
+ step = ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR / ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR
38
+ _s = [EVAL_ONE_FLOAT]
39
+
40
+ def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
41
+ mult = _s[_ZERO]
42
+ base = _PD_MATRIX[(p_act, o_act)]
43
+ result = (base[_ZERO] * mult, base[_ONE] * mult)
44
+ if p_act == "cooperate" and o_act == "cooperate":
45
+ _s[_ZERO] = min(max_m, _s[_ZERO] + step)
46
+ elif p_act == "defect" and o_act == "defect":
47
+ _s[_ZERO] = max(min_m, _s[_ZERO] - step)
48
+ return result
49
+
50
+ return GameConfig(
51
+ name="Adaptive Prisoner's Dilemma",
52
+ description=(
53
+ "A Prisoner's Dilemma where mutual cooperation increases "
54
+ "future payoffs via a growing multiplier, while mutual "
55
+ "defection decreases it. Mixed outcomes leave it unchanged."
56
+ ),
57
+ actions=["cooperate", "defect"],
58
+ game_type=ADAPTIVE_GAME_TYPE,
59
+ default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
60
+ payoff_fn=payoff_fn,
61
+ )
62
+
63
+
64
+ def _arms_race_factory() -> GameConfig:
65
+ """Hawk-Dove where hawk-hawk conflict costs escalate each round."""
66
+ c_step = ARMS_RACE_COST_STEP_NUMERATOR / ARMS_RACE_COST_STEP_DENOMINATOR
67
+ max_c = ARMS_RACE_MAX_COST_NUMERATOR / ARMS_RACE_MAX_COST_DENOMINATOR
68
+ _s = [EVAL_ZERO_FLOAT]
69
+
70
+ def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
71
+ cost = _s[_ZERO]
72
+ base = _HD_MATRIX[(p_act, o_act)]
73
+ if p_act == "hawk" and o_act == "hawk":
74
+ result = (base[_ZERO] - cost, base[_ONE] - cost)
75
+ _s[_ZERO] = min(max_c, _s[_ZERO] + c_step)
76
+ else:
77
+ result = base
78
+ _s[_ZERO] = max(EVAL_ZERO_FLOAT, _s[_ZERO] - c_step / _TWO)
79
+ return result
80
+
81
+ return GameConfig(
82
+ name="Arms Race",
83
+ description=(
84
+ "A Hawk-Dove game where mutual hawk play incurs "
85
+ "escalating costs each round. Non-hawk rounds "
86
+ "de-escalate the accumulated conflict cost."
87
+ ),
88
+ actions=["hawk", "dove"],
89
+ game_type=ADAPTIVE_GAME_TYPE,
90
+ default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
91
+ payoff_fn=payoff_fn,
92
+ )
93
+
94
+
95
+ def _trust_erosion_factory() -> GameConfig:
96
+ """Trust-like PD where a multiplier decays after mutual defection."""
97
+ decay = TRUST_EROSION_DECAY_NUMERATOR / TRUST_EROSION_DECAY_DENOMINATOR
98
+ recov = TRUST_EROSION_RECOVERY_NUMERATOR / TRUST_EROSION_RECOVERY_DENOMINATOR
99
+ _s = [float(TRUST_MULTIPLIER)]
100
+
101
+ def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
102
+ mult = _s[_ZERO]
103
+ base = _PD_MATRIX[(p_act, o_act)]
104
+ result = (base[_ZERO] * mult, base[_ONE] * mult)
105
+ if p_act == "defect" and o_act == "defect":
106
+ _s[_ZERO] = _s[_ZERO] * decay
107
+ elif p_act == "cooperate" and o_act == "cooperate":
108
+ _s[_ZERO] = min(float(TRUST_MULTIPLIER), _s[_ZERO] + recov)
109
+ return result
110
+
111
+ return GameConfig(
112
+ name="Trust Erosion",
113
+ description=(
114
+ "A Prisoner's Dilemma where a trust multiplier amplifies "
115
+ "all payoffs. Mutual defection erodes trust, while mutual "
116
+ "cooperation slowly rebuilds it."
117
+ ),
118
+ actions=["cooperate", "defect"],
119
+ game_type=ADAPTIVE_GAME_TYPE,
120
+ default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
121
+ payoff_fn=payoff_fn,
122
+ )
123
+
124
+
125
+ def _market_dynamics_factory() -> GameConfig:
126
+ """Cournot-like duopoly where demand shifts based on total output."""
127
+ shift = MARKET_DEMAND_SHIFT_NUMERATOR / MARKET_DEMAND_SHIFT_DENOMINATOR
128
+ _s = [float(_MKT_INTERCEPT)]
129
+
130
+ def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
131
+ intercept = _s[_ZERO]
132
+ p_out, o_out = _MKT_OUT[p_act], _MKT_OUT[o_act]
133
+ total = p_out + o_out
134
+ price = max(EVAL_ZERO_FLOAT, intercept - total)
135
+ p_rev = price * p_out - _MKT_COST[p_act]
136
+ o_rev = price * o_out - _MKT_COST[o_act]
137
+ if total > (_MKT_INTERCEPT / _TWO):
138
+ _s[_ZERO] = max(float(_TWO), _s[_ZERO] - shift)
139
+ else:
140
+ _s[_ZERO] = min(float(_MKT_INTERCEPT), _s[_ZERO] + shift)
141
+ return (p_rev, o_rev)
142
+
143
+ return GameConfig(
144
+ name="Market Dynamics",
145
+ description=(
146
+ "A Cournot-like duopoly where each player chooses output "
147
+ "level. The demand curve shifts based on past total output: "
148
+ "high output depresses future demand, restraint recovers it."
149
+ ),
150
+ actions=["low", "medium", "high"],
151
+ game_type=ADAPTIVE_GAME_TYPE,
152
+ default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
153
+ payoff_fn=payoff_fn,
154
+ )
155
+
156
+
157
+ def _reputation_payoffs_factory() -> GameConfig:
158
+ """Base PD with payoff bonus proportional to cooperation history."""
159
+ bonus_rate = REPUTATION_BONUS_NUMERATOR / REPUTATION_BONUS_DENOMINATOR
160
+ _s = [_ZERO, _ZERO] # [coop_count, total_rounds]
161
+
162
+ def payoff_fn(p_act: str, o_act: str) -> tuple[float, float]:
163
+ base = _PD_MATRIX[(p_act, o_act)]
164
+ total = _s[_ONE]
165
+ coop_rate = _s[_ZERO] / total if total > _ZERO else EVAL_ZERO_FLOAT
166
+ bonus = coop_rate * bonus_rate
167
+ result = (base[_ZERO] + bonus, base[_ONE] + bonus)
168
+ _s[_ONE] += _ONE
169
+ if p_act == "cooperate":
170
+ _s[_ZERO] += _ONE
171
+ return result
172
+
173
+ return GameConfig(
174
+ name="Reputation Payoffs",
175
+ description=(
176
+ "A Prisoner's Dilemma where both players receive a bonus "
177
+ "proportional to the player's historical cooperation rate. "
178
+ "Building a cooperative reputation pays future dividends."
179
+ ),
180
+ actions=["cooperate", "defect"],
181
+ game_type=ADAPTIVE_GAME_TYPE,
182
+ default_rounds=ADAPTIVE_DEFAULT_ROUNDS,
183
+ payoff_fn=payoff_fn,
184
+ )
185
+
186
+
187
+ # Register all factories
188
+ GAME_FACTORIES["adaptive_prisoners_dilemma"] = _adaptive_pd_factory
189
+ GAME_FACTORIES["arms_race"] = _arms_race_factory
190
+ GAME_FACTORIES["trust_erosion"] = _trust_erosion_factory
191
+ GAME_FACTORIES["market_dynamics"] = _market_dynamics_factory
192
+ GAME_FACTORIES["reputation_payoffs"] = _reputation_payoffs_factory
common/games_meta/game_tags.py CHANGED
@@ -184,6 +184,13 @@ GAME_TAGS: dict[str, frozenset[str]] = {
184
  "rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
185
  "rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
186
 
 
 
 
 
 
 
 
187
  # ── meta/meta_games.py (gossip) ──
188
  "gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
189
  "gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
 
184
  "rule_signal_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
185
  "rule_signal_hawk_dove": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
186
 
187
+ # ── games_adaptive/factories.py ──
188
+ "adaptive_prisoners_dilemma": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
189
+ "arms_race": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, ANTI_COORDINATION, SOCIAL_DILEMMA, BINARY_CHOICE}),
190
+ "trust_erosion": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
191
+ "market_dynamics": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, MARKET_COMPETITION, SMALL_CHOICE}),
192
+ "reputation_payoffs": frozenset({NO_COMMUNICATION, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, BINARY_CHOICE}),
193
+
194
  # ── meta/meta_games.py (gossip) ──
195
  "gossip_prisoners_dilemma": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, SYMMETRIC_PAYOFF, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
196
  "gossip_stag_hunt": frozenset({CHEAP_TALK, COMPLETE_INFORMATION, SIMULTANEOUS, REPEATED, COORDINATION, SOCIAL_DILEMMA, LARGE_CHOICE, META_GOVERNANCE}),
constant_definitions/slides/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Slides layout constants."""
constant_definitions/slides/layout.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Numeric constants for slide generation layout and Wisent brand colors."""
2
+
3
+ # Wisent brand palette from wisent-visuals (RGB tuples 0-255)
4
+ ACCENT_R = 197
5
+ ACCENT_G = 255
6
+ ACCENT_B = 200
7
+ RED_R = 250
8
+ RED_G = 90
9
+ RED_B = 70
10
+ PURPLE_R = 177
11
+ PURPLE_G = 158
12
+ PURPLE_B = 204
13
+ DARK_R = 18
14
+ DARK_G = 18
15
+ DARK_B = 18
16
+ GRID_R = 45
17
+ GRID_G = 49
18
+ GRID_B = 48
19
+ LEGEND_R = 118
20
+ LEGEND_G = 153
21
+ LEGEND_B = 120
22
+ WHITE_VAL = 255
23
+ BLACK_VAL = 0
24
+
25
+ # Font sizes in points
26
+ PT_TITLE = 36
27
+ PT_SUBTITLE = 20
28
+ PT_BODY = 16
29
+ PT_SMALL = 12
30
+ PT_STAT = 48
31
+ PT_LABEL = 14
32
+ PT_TEAM = 28
33
+
34
+ # Slide dimensions in inches (for widescreen 16:9)
35
+ SLIDE_W_INCHES = 10
36
+ SLIDE_H_NUMER = 45
37
+ SLIDE_H_DENOM = 8
38
+
39
+ # Position helpers in inches
40
+ POS_HALF = 0.5
41
+ POS_ONE = 1.0
42
+ POS_ONE_HALF = 1.5
43
+ POS_TWO = 2.0
44
+ POS_TWO_HALF = 2.5
45
+ POS_THREE = 3.0
46
+ POS_THREE_HALF = 3.5
47
+ POS_FOUR = 4.0
48
+ POS_FOUR_HALF = 4.5
49
+ POS_FIVE = 5.0
50
+ POS_SIX = 6.0
51
+ POS_SEVEN = 7.0
52
+ POS_EIGHT = 8.0
53
+ POS_NINE = 9.0
54
+
55
+ # Image dimensions
56
+ IMG_FIG_W = 7.0
57
+ IMG_FIG_H = 3.5
58
+ IMG_KANT_W = 3.0
59
+ IMG_KANT_H = 4.0
60
+
61
+ # Column layout
62
+ COL_LEFT_X = 0.5
63
+ COL_RIGHT_X = 5.0
64
+ COL_W = 4.5
65
+ COL_H = 4.0
66
+
67
+ # Stat column positions
68
+ STAT_COL_ONE_X = 0.5
69
+ STAT_COL_TWO_X = 3.5
70
+ STAT_COL_THREE_X = 6.5
71
+ STAT_COL_W = 3.0
72
+
73
+ # Title position
74
+ TITLE_X = 0.5
75
+ TITLE_Y = 0.3
76
+ TITLE_W = 9.0
77
+ TITLE_H = 1.0
78
+
79
+ # Centered text position
80
+ CENTER_Y = 1.5
81
+ CENTER_W = 8.0
82
+ CENTER_H = 3.5
83
+ CENTER_X = 1.0
84
+
85
+ # Footer position
86
+ FOOTER_Y = 4.8
87
+ FOOTER_H = 0.5
88
+
89
+ # Team layout
90
+ TEAM_NAME_Y = 2.5
91
+ TEAM_NAME_H = 1.0
92
+ TEAM_COL_ONE_X = 1.0
93
+ TEAM_COL_TWO_X = 5.5
94
+ TEAM_COL_W = 3.5
constant_definitions/var/meta/adaptive_constants.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constants for adaptive payoff games."""
2
+
3
+ # Adaptive PD: cooperation multiplier range
4
+ ADAPTIVE_PD_MULTIPLIER_MIN_NUMERATOR = 5
5
+ ADAPTIVE_PD_MULTIPLIER_MIN_DENOMINATOR = 10
6
+
7
+ ADAPTIVE_PD_MULTIPLIER_MAX_NUMERATOR = 2
8
+ ADAPTIVE_PD_MULTIPLIER_MAX_DENOMINATOR = 1
9
+
10
+ ADAPTIVE_PD_MULTIPLIER_STEP_NUMERATOR = 1
11
+ ADAPTIVE_PD_MULTIPLIER_STEP_DENOMINATOR = 10
12
+
13
+ # Arms Race: cost escalation per round
14
+ ARMS_RACE_COST_STEP_NUMERATOR = 1
15
+ ARMS_RACE_COST_STEP_DENOMINATOR = 2
16
+
17
+ ARMS_RACE_MAX_COST_NUMERATOR = 5
18
+ ARMS_RACE_MAX_COST_DENOMINATOR = 1
19
+
20
+ # Trust Erosion: multiplier decay after defection
21
+ TRUST_EROSION_DECAY_NUMERATOR = 8
22
+ TRUST_EROSION_DECAY_DENOMINATOR = 10
23
+
24
+ TRUST_EROSION_RECOVERY_NUMERATOR = 1
25
+ TRUST_EROSION_RECOVERY_DENOMINATOR = 10
26
+
27
+ # Market dynamics: demand shift per round
28
+ MARKET_DEMAND_SHIFT_NUMERATOR = 1
29
+ MARKET_DEMAND_SHIFT_DENOMINATOR = 2
30
+
31
+ # Reputation payoffs: cooperation bonus scaling
32
+ REPUTATION_BONUS_NUMERATOR = 1
33
+ REPUTATION_BONUS_DENOMINATOR = 5
34
+
35
+ # Default rounds for adaptive games
36
+ ADAPTIVE_DEFAULT_ROUNDS = 10
37
+
38
+ # Game type identifier
39
+ ADAPTIVE_GAME_TYPE = "adaptive"
constant_definitions/var/meta/self_play_constants.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constants for self-play multi-agent training."""
2
+
3
+ # Opponent update frequency (steps between opponent refresh)
4
+ SELF_PLAY_OPPONENT_UPDATE_INTERVAL = 50
5
+
6
+ # Maximum frozen checkpoints kept in the opponent pool
7
+ SELF_PLAY_POOL_MAX_SIZE = 5
8
+
9
+ # Self-play reward weights (numerator / denominator pairs)
10
+ SELF_PLAY_EXPLOIT_WEIGHT_NUMERATOR = 3
11
+ SELF_PLAY_EXPLOIT_WEIGHT_DENOMINATOR = 10
12
+
13
+ SELF_PLAY_COOP_WEIGHT_NUMERATOR = 3
14
+ SELF_PLAY_COOP_WEIGHT_DENOMINATOR = 10
15
+
16
+ SELF_PLAY_PARETO_WEIGHT_NUMERATOR = 2
17
+ SELF_PLAY_PARETO_WEIGHT_DENOMINATOR = 10
18
+
19
+ SELF_PLAY_FAIRNESS_WEIGHT_NUMERATOR = 1
20
+ SELF_PLAY_FAIRNESS_WEIGHT_DENOMINATOR = 10
21
+
22
+ SELF_PLAY_ADAPT_WEIGHT_NUMERATOR = 1
23
+ SELF_PLAY_ADAPT_WEIGHT_DENOMINATOR = 10
24
+
25
+ # Training defaults
26
+ SELF_PLAY_DEFAULT_EPISODES_PER_STEP = 16
27
+ SELF_PLAY_DEFAULT_MAX_STEPS = 500
28
+ SELF_PLAY_CHECKPOINT_PREFIX = "self_play_step"
29
+ SELF_PLAY_WARMUP_EPISODES = 32
30
+
31
+ # Opponent strategy label used in trajectory metadata
32
+ SELF_PLAY_OPPONENT_LABEL = "agent"