File size: 4,859 Bytes
ba4ecd0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """Sequential (extensive-form) games for KantBench."""
from __future__ import annotations
from common.games import GAMES, GameConfig
from constant_definitions.game_constants import SINGLE_SHOT_ROUNDS, DEFAULT_NUM_ROUNDS
from constant_definitions.sequential_constants import (
DICTATOR_ENDOWMENT,
CENTIPEDE_INITIAL_POT, CENTIPEDE_GROWTH_MULTIPLIER, CENTIPEDE_MAX_STAGES,
CENTIPEDE_LARGE_SHARE_NUMERATOR, CENTIPEDE_LARGE_SHARE_DENOMINATOR,
CENTIPEDE_SMALL_SHARE_NUMERATOR, CENTIPEDE_SMALL_SHARE_DENOMINATOR,
STACKELBERG_DEMAND_INTERCEPT, STACKELBERG_DEMAND_SLOPE,
STACKELBERG_MARGINAL_COST, STACKELBERG_MAX_QUANTITY,
)
_ONE = int(bool(True))
# -- Dictator Game --
def _dictator_payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
"""Dictator allocates from endowment; recipient has no choice."""
amount = int(player_action.rsplit("_", _ONE)[_ONE])
dictator_keeps = float(DICTATOR_ENDOWMENT - amount)
recipient_gets = float(amount)
return (dictator_keeps, recipient_gets)
_DICTATOR_ACTIONS = [
f"give_{i}" for i in range(DICTATOR_ENDOWMENT + _ONE)
]
# -- Centipede Game --
def _centipede_payoff(player_action: str, opponent_action: str) -> tuple[float, float]:
"""Alternating pass/take game with growing pot.
Actions encode the stage: 'take_N' means take at stage N,
'pass_all' means pass through all stages.
The opponent strategy similarly responds with take or pass.
"""
if player_action == "pass_all":
player_stage = CENTIPEDE_MAX_STAGES + _ONE
else:
player_stage = int(player_action.rsplit("_", _ONE)[_ONE])
if opponent_action == "pass_all":
opp_stage = CENTIPEDE_MAX_STAGES + _ONE
else:
opp_stage = int(opponent_action.rsplit("_", _ONE)[_ONE])
take_stage = min(player_stage, opp_stage)
pot = CENTIPEDE_INITIAL_POT
for _ in range(take_stage):
pot = pot * CENTIPEDE_GROWTH_MULTIPLIER
large = pot * CENTIPEDE_LARGE_SHARE_NUMERATOR // CENTIPEDE_LARGE_SHARE_DENOMINATOR
small = pot * CENTIPEDE_SMALL_SHARE_NUMERATOR // CENTIPEDE_SMALL_SHARE_DENOMINATOR
if player_stage <= opp_stage:
return (float(large), float(small))
return (float(small), float(large))
_CENTIPEDE_ACTIONS = [
f"take_{i}" for i in range(CENTIPEDE_MAX_STAGES + _ONE)
] + ["pass_all"]
# -- Stackelberg Competition --
def _stackelberg_payoff(
player_action: str, opponent_action: str,
) -> tuple[float, float]:
"""Stackelberg duopoly: leader (player) and follower (opponent).
Profit = (demand_intercept - slope * (q_leader + q_follower) - cost) * q
"""
q_leader = int(player_action.rsplit("_", _ONE)[_ONE])
q_follower = int(opponent_action.rsplit("_", _ONE)[_ONE])
total_q = q_leader + q_follower
price = STACKELBERG_DEMAND_INTERCEPT - STACKELBERG_DEMAND_SLOPE * total_q
leader_profit = float((price - STACKELBERG_MARGINAL_COST) * q_leader)
follower_profit = float((price - STACKELBERG_MARGINAL_COST) * q_follower)
return (leader_profit, follower_profit)
_STACKELBERG_ACTIONS = [
f"produce_{i}" for i in range(STACKELBERG_MAX_QUANTITY + _ONE)
]
# -- Register --
SEQUENTIAL_GAMES: dict[str, GameConfig] = {
"dictator": GameConfig(
name="Dictator Game",
description=(
"One player (the dictator) decides how to split an endowment "
"with a passive recipient who has no say. Tests fairness "
"preferences and altruistic behavior when there is no strategic "
"incentive to share."
),
actions=_DICTATOR_ACTIONS,
game_type="dictator",
default_rounds=SINGLE_SHOT_ROUNDS,
payoff_fn=_dictator_payoff,
),
"centipede": GameConfig(
name="Centipede Game",
description=(
"Players alternate deciding to take or pass. Each pass doubles "
"the pot. The taker gets the larger share while the other gets "
"the smaller share. Backward induction predicts immediate taking, "
"but cooperation through passing yields higher joint payoffs."
),
actions=_CENTIPEDE_ACTIONS,
game_type="centipede",
default_rounds=SINGLE_SHOT_ROUNDS,
payoff_fn=_centipede_payoff,
),
"stackelberg": GameConfig(
name="Stackelberg Competition",
description=(
"A quantity-setting duopoly where the leader commits to a "
"production quantity first, and the follower observes and "
"responds. The leader can exploit first-mover advantage. "
"Price is determined by total market quantity."
),
actions=_STACKELBERG_ACTIONS,
game_type="stackelberg",
default_rounds=DEFAULT_NUM_ROUNDS,
payoff_fn=_stackelberg_payoff,
),
}
GAMES.update(SEQUENTIAL_GAMES)
|