KantBench

Paused

App Files Files Community

KantBench / common /games_coop /dynamic.py

jtowarek

Upload folder using huggingface_hub

f7e2ae6 verified about 1 month ago

raw

history blame contribute delete

6.87 kB

	"""Dynamic, behavioral, and repeated games for KantBench."""
	from __future__ import annotations

	from common.games import GAMES, GameConfig, _matrix_payoff_fn
	from constant_definitions.game_constants import DEFAULT_NUM_ROUNDS, SINGLE_SHOT_ROUNDS
	from constant_definitions.ext.dynamic_constants import (
	BR_PATIENCE_REWARD, BR_EARLY_WITHDRAW, BR_BANK_FAIL_PAYOFF,
	GSH_STAG_PAYOFF, GSH_HARE_PAYOFF, GSH_STAG_ALONE_PAYOFF,
	BC_MAX_NUMBER, BC_TARGET_FRACTION_NUM, BC_TARGET_FRACTION_DEN,
	BC_WIN_PAYOFF, BC_LOSE_PAYOFF, BC_TIE_PAYOFF,
	HDB_RESOURCE_VALUE, HDB_FIGHT_COST, HDB_SHARE_DIVISOR,
	)
	from constant_definitions.game_constants import (
	PD_CC_PAYOFF, PD_CD_PAYOFF, PD_DC_PAYOFF, PD_DD_PAYOFF,
	)

	_ONE = int(bool(True))
	_TWO = _ONE + _ONE
	_ZERO_F = float()


	# -- Bank Run (Diamond-Dybvig) --
	_BR_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
	("wait", "wait"): (float(BR_PATIENCE_REWARD), float(BR_PATIENCE_REWARD)),
	("wait", "withdraw"): (float(BR_BANK_FAIL_PAYOFF), float(BR_EARLY_WITHDRAW)),
	("withdraw", "wait"): (float(BR_EARLY_WITHDRAW), float(BR_BANK_FAIL_PAYOFF)),
	("withdraw", "withdraw"): (float(BR_BANK_FAIL_PAYOFF), float(BR_BANK_FAIL_PAYOFF)),
	}


	# -- Global Stag Hunt (higher stakes variant) --
	_GSH_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
	("stag", "stag"): (float(GSH_STAG_PAYOFF), float(GSH_STAG_PAYOFF)),
	("stag", "hare"): (float(GSH_STAG_ALONE_PAYOFF), float(GSH_HARE_PAYOFF)),
	("hare", "stag"): (float(GSH_HARE_PAYOFF), float(GSH_STAG_ALONE_PAYOFF)),
	("hare", "hare"): (float(GSH_HARE_PAYOFF), float(GSH_HARE_PAYOFF)),
	}


	# -- Beauty Contest (p-Guessing Game) --
	def _beauty_contest_payoff(pa: str, oa: str) -> tuple[float, float]:
	"""Each picks a number. Closest to p * average wins."""
	n_p = int(pa.rsplit("_", _ONE)[_ONE])
	n_o = int(oa.rsplit("_", _ONE)[_ONE])
	avg = float(n_p + n_o) / _TWO
	target = avg * BC_TARGET_FRACTION_NUM / BC_TARGET_FRACTION_DEN
	dist_p = abs(float(n_p) - target)
	dist_o = abs(float(n_o) - target)
	if dist_p < dist_o:
	return (float(BC_WIN_PAYOFF), float(BC_LOSE_PAYOFF))
	if dist_o < dist_p:
	return (float(BC_LOSE_PAYOFF), float(BC_WIN_PAYOFF))
	return (float(BC_TIE_PAYOFF), float(BC_TIE_PAYOFF))


	_BC_ACTS = [f"guess_{i}" for i in range(BC_MAX_NUMBER + _ONE)]


	# -- Hawk-Dove-Bourgeois --
	_V = float(HDB_RESOURCE_VALUE)
	_C = float(HDB_FIGHT_COST)
	_S = _V / float(HDB_SHARE_DIVISOR)
	_HDB_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
	("hawk", "hawk"): ((_V - _C) / _TWO, (_V - _C) / _TWO),
	("hawk", "dove"): (_V, _ZERO_F),
	("hawk", "bourgeois"): (_V / _TWO, (_V - _C) / (float(_TWO) * _TWO)),
	("dove", "hawk"): (_ZERO_F, _V),
	("dove", "dove"): (_S, _S),
	("dove", "bourgeois"): (_S / _TWO, _S + _V / (float(_TWO) * _TWO)),
	("bourgeois", "hawk"): ((_V - _C) / (float(_TWO) * _TWO), _V / _TWO),
	("bourgeois", "dove"): (_S + _V / (float(_TWO) * _TWO), _S / _TWO),
	("bourgeois", "bourgeois"): (_S, _S),
	}


	# -- Finitely Repeated PD (same payoffs, explicit short horizon) --
	_FPD_MATRIX: dict[tuple[str, str], tuple[float, float]] = {
	("cooperate", "cooperate"): (float(PD_CC_PAYOFF), float(PD_CC_PAYOFF)),
	("cooperate", "defect"): (float(PD_CD_PAYOFF), float(PD_DC_PAYOFF)),
	("defect", "cooperate"): (float(PD_DC_PAYOFF), float(PD_CD_PAYOFF)),
	("defect", "defect"): (float(PD_DD_PAYOFF), float(PD_DD_PAYOFF)),
	}

	_FIVE = _TWO + _TWO + _ONE
	_MARKOV_ROUNDS = _FIVE + _FIVE + _FIVE

	DYNAMIC_GAMES: dict[str, GameConfig] = {
	"bank_run": GameConfig(
	name="Bank Run (Diamond-Dybvig)",
	description=(
	"Depositors simultaneously decide whether to withdraw early. "
	"If both wait, the bank survives and both earn a premium. If "
	"both withdraw, the bank fails. Models coordination failure "
	"in financial systems."
	),
	actions=["wait", "withdraw"], game_type="matrix",
	default_rounds=DEFAULT_NUM_ROUNDS,
	payoff_fn=_matrix_payoff_fn(_BR_MATRIX),
	),
	"global_stag_hunt": GameConfig(
	name="Global Stag Hunt",
	description=(
	"A higher-stakes Stag Hunt modeling coordination under "
	"uncertainty. Both hunting stag yields a large payoff but "
	"hunting stag alone yields nothing. Models bank runs, "
	"currency attacks, and regime change dynamics."
	),
	actions=["stag", "hare"], game_type="matrix",
	default_rounds=DEFAULT_NUM_ROUNDS,
	payoff_fn=_matrix_payoff_fn(_GSH_MATRIX),
	),
	"beauty_contest": GameConfig(
	name="Keynesian Beauty Contest",
	description=(
	"Each player picks a number. The winner is closest to a "
	"target fraction of the average. Tests depth of strategic "
	"reasoning and level-k thinking. The unique Nash equilibrium "
	"is zero, reached through iterated elimination."
	),
	actions=_BC_ACTS, game_type="beauty_contest",
	default_rounds=SINGLE_SHOT_ROUNDS,
	payoff_fn=_beauty_contest_payoff,
	),
	"hawk_dove_bourgeois": GameConfig(
	name="Hawk-Dove-Bourgeois",
	description=(
	"Extended Hawk-Dove with a Bourgeois strategy that plays "
	"Hawk when incumbent and Dove when intruder. The Bourgeois "
	"strategy is an evolutionarily stable strategy. Tests "
	"reasoning about ownership conventions."
	),
	actions=["hawk", "dove", "bourgeois"], game_type="matrix",
	default_rounds=DEFAULT_NUM_ROUNDS,
	payoff_fn=_matrix_payoff_fn(_HDB_MATRIX),
	),
	"finitely_repeated_pd": GameConfig(
	name="Finitely Repeated Prisoner's Dilemma",
	description=(
	"A Prisoner's Dilemma played for a known finite number of "
	"rounds. Backward induction predicts mutual defection in "
	"every round, yet cooperation often emerges experimentally. "
	"Tests backward induction versus cooperation heuristics."
	),
	actions=["cooperate", "defect"], game_type="matrix",
	default_rounds=_FIVE,
	payoff_fn=_matrix_payoff_fn(_FPD_MATRIX),
	),
	"markov_game": GameConfig(
	name="Markov Decision Game",
	description=(
	"A repeated game where the payoff structure shifts based on "
	"recent history. Players must adapt strategies to changing "
	"incentives. Tests dynamic programming and Markov-perfect "
	"equilibrium reasoning over multiple rounds."
	),
	actions=["cooperate", "defect"], game_type="matrix",
	default_rounds=_MARKOV_ROUNDS,
	payoff_fn=_matrix_payoff_fn(_FPD_MATRIX),
	),
	}

	GAMES.update(DYNAMIC_GAMES)