File size: 8,644 Bytes
688c130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
"""Composable game variant transforms for KantBench.

Each ``apply_*`` function takes a :class:`GameConfig` and returns a new
:class:`GameConfig` with modified actions, payoff function, and metadata.
Variants compose: ``apply_exit(apply_cheap_talk(base))`` works.
"""

from __future__ import annotations

from dataclasses import replace
from typing import Callable

from common.games import GAMES, GameConfig
from constant_definitions.game_constants import (
    DEFAULT_TWO_PLAYERS,
    OPPONENT_MODE_SELF,
    OPPONENT_MODE_CROSS,
)
from constant_definitions.var.pd_variant_constants import (
    OPD_EXIT_PAYOFF,
    VARIANT_CHEAP_TALK,
    VARIANT_EXIT,
    VARIANT_BINDING_COMMITMENT,
    VARIANT_NOISY_ACTIONS,
    VARIANT_NOISY_PAYOFFS,
    VARIANT_SELF_PLAY,
    VARIANT_CROSS_MODEL,
    CT_MSG_PREFIX,
    CT_SEPARATOR,
    BC_COMMIT_PREFIX,
    BC_FREE_PREFIX,
    EXIT_ACTION,
    DEFAULT_TREMBLE_PROB_NUMERATOR,
    DEFAULT_TREMBLE_PROB_DENOMINATOR,
    DEFAULT_NOISE_SCALE_NUMERATOR,
    DEFAULT_NOISE_SCALE_DENOMINATOR,
)
from constant_definitions.var.communication_constants import COMMIT_COST

_ONE = int(bool(True))
_ZERO = int()


def apply_cheap_talk(
    base: GameConfig,
    base_key: str = "",
) -> GameConfig:
    """Add a non-binding message phase to a base game.

    For base actions ``[A, B]`` produces ``[msg_A_A, msg_A_B, msg_B_A,
    msg_B_B]``.  Payoffs depend only on the actual action (last segment).
    """
    sep = CT_SEPARATOR
    prefix = CT_MSG_PREFIX
    base_actions = base.actions
    new_actions = [
        sep.join([prefix, msg, act])
        for msg in base_actions
        for act in base_actions
    ]

    original_payoff = base.payoff_fn

    def _payoff(pa: str, oa: str) -> tuple[float, float]:
        actual_p = pa.rsplit(sep, _ONE)[_ONE]
        actual_o = oa.rsplit(sep, _ONE)[_ONE]
        return original_payoff(actual_p, actual_o)

    return replace(
        base,
        actions=new_actions,
        payoff_fn=_payoff,
        applied_variants=base.applied_variants + (VARIANT_CHEAP_TALK,),
        base_game_key=base_key or base.base_game_key,
    )


def apply_exit(
    base: GameConfig,
    base_key: str = "",
    exit_payoff: int = OPD_EXIT_PAYOFF,
) -> GameConfig:
    """Add an exit option that gives both players a safe payoff.

    Appends ``"exit"`` to the action list.  If either player exits both
    receive *exit_payoff*; otherwise delegates to the base payoff function.
    """
    exit_f = float(exit_payoff)
    exit_act = EXIT_ACTION
    new_actions = list(base.actions) + [exit_act]
    original_payoff = base.payoff_fn

    def _payoff(pa: str, oa: str) -> tuple[float, float]:
        if pa == exit_act or oa == exit_act:
            return (exit_f, exit_f)
        return original_payoff(pa, oa)

    return replace(
        base,
        actions=new_actions,
        payoff_fn=_payoff,
        applied_variants=base.applied_variants + (VARIANT_EXIT,),
        base_game_key=base_key or base.base_game_key,
    )


def apply_binding_commitment(
    base: GameConfig,
    base_key: str = "",
    commit_cost: int = COMMIT_COST,
) -> GameConfig:
    """Add a costly binding commitment mechanism.

    For base actions ``[A, B, ...]`` the first action *A* gets a
    ``commit_A`` variant (player locked to *A*, pays *commit_cost*).
    All actions get a ``free_X`` variant (no cost, free choice).
    """
    sep = CT_SEPARATOR
    commit_pfx = BC_COMMIT_PREFIX
    free_pfx = BC_FREE_PREFIX
    cost_f = float(commit_cost)
    base_actions = base.actions
    commit_action = base_actions[_ZERO]

    new_actions = [sep.join([commit_pfx, commit_action])]
    for act in base_actions:
        new_actions.append(sep.join([free_pfx, act]))

    original_payoff = base.payoff_fn

    def _parse(action: str) -> tuple[str, bool]:
        """Return (actual_action, is_committed)."""
        parts = action.split(sep, _ONE)
        return parts[_ONE], parts[_ZERO] == commit_pfx

    def _payoff(pa: str, oa: str) -> tuple[float, float]:
        p_act, p_committed = _parse(pa)
        o_act, o_committed = _parse(oa)
        p_pay, o_pay = original_payoff(p_act, o_act)
        if p_committed:
            p_pay = p_pay - cost_f
        if o_committed:
            o_pay = o_pay - cost_f
        return (p_pay, o_pay)

    return replace(
        base,
        actions=new_actions,
        payoff_fn=_payoff,
        applied_variants=base.applied_variants + (VARIANT_BINDING_COMMITMENT,),
        base_game_key=base_key or base.base_game_key,
    )


_DEFAULT_TREMBLE = DEFAULT_TREMBLE_PROB_NUMERATOR / DEFAULT_TREMBLE_PROB_DENOMINATOR
_DEFAULT_NOISE = DEFAULT_NOISE_SCALE_NUMERATOR / DEFAULT_NOISE_SCALE_DENOMINATOR
_NOISY_ONLY_TWO_PLAYER = "apply_noisy variant only supports two-player games"


def apply_noisy_actions(
    base: GameConfig,
    base_key: str = "",
    tremble_prob: float = _DEFAULT_TREMBLE,
) -> GameConfig:
    """With probability *tremble_prob* each player's action is replaced by a random one."""
    if base.num_players != DEFAULT_TWO_PLAYERS:
        raise ValueError(_NOISY_ONLY_TWO_PLAYER)
    import random as _rng_mod
    original_payoff = base.payoff_fn
    actions = base.actions

    def _payoff(pa: str, oa: str) -> tuple[float, float]:
        actual_p = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else pa
        actual_o = _rng_mod.choice(actions) if _rng_mod.random() < tremble_prob else oa
        return original_payoff(actual_p, actual_o)

    return replace(
        base,
        payoff_fn=_payoff,
        applied_variants=base.applied_variants + (VARIANT_NOISY_ACTIONS,),
        base_game_key=base_key or base.base_game_key,
    )


def apply_noisy_payoffs(
    base: GameConfig,
    base_key: str = "",
    noise_scale: float = _DEFAULT_NOISE,
) -> GameConfig:
    """Add Gaussian noise N(zero, noise_scale) to each payoff independently."""
    if base.num_players != DEFAULT_TWO_PLAYERS:
        raise ValueError(_NOISY_ONLY_TWO_PLAYER)
    import random as _rng_mod
    original_payoff = base.payoff_fn

    def _payoff(pa: str, oa: str) -> tuple[float, float]:
        p, o = original_payoff(pa, oa)
        return (p + _rng_mod.gauss(float(_ZERO), noise_scale),
                o + _rng_mod.gauss(float(_ZERO), noise_scale))

    return replace(
        base,
        payoff_fn=_payoff,
        applied_variants=base.applied_variants + (VARIANT_NOISY_PAYOFFS,),
        base_game_key=base_key or base.base_game_key,
    )


_OPPONENT_ONLY_TWO_PLAYER = "opponent mode variants only support two-player games"


def apply_self_play(
    base: GameConfig,
    base_key: str = "",
) -> GameConfig:
    """Mark a game for self-play: the model plays against itself."""
    if base.num_players != DEFAULT_TWO_PLAYERS:
        raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
    return replace(
        base,
        opponent_mode=OPPONENT_MODE_SELF,
        applied_variants=base.applied_variants + (VARIANT_SELF_PLAY,),
        base_game_key=base_key or base.base_game_key,
    )


def apply_cross_model(
    base: GameConfig,
    base_key: str = "",
) -> GameConfig:
    """Mark a game for cross-model play: model vs a different model."""
    if base.num_players != DEFAULT_TWO_PLAYERS:
        raise ValueError(_OPPONENT_ONLY_TWO_PLAYER)
    return replace(
        base,
        opponent_mode=OPPONENT_MODE_CROSS,
        applied_variants=base.applied_variants + (VARIANT_CROSS_MODEL,),
        base_game_key=base_key or base.base_game_key,
    )


_VARIANT_REGISTRY: dict[str, Callable[..., GameConfig]] = {
    VARIANT_CHEAP_TALK: apply_cheap_talk,
    VARIANT_EXIT: apply_exit,
    VARIANT_BINDING_COMMITMENT: apply_binding_commitment,
    VARIANT_NOISY_ACTIONS: apply_noisy_actions,
    VARIANT_NOISY_PAYOFFS: apply_noisy_payoffs,
    VARIANT_SELF_PLAY: apply_self_play,
    VARIANT_CROSS_MODEL: apply_cross_model,
}

from common.meta.variants_meta import (  # noqa: E402
    apply_rule_proposal, apply_rule_signal,
    apply_constitutional, apply_proposer_responder,
    _META_VARIANT_REGISTRY,
)

_VARIANT_REGISTRY.update(_META_VARIANT_REGISTRY)

from common.meta.variants_reputation import (  # noqa: E402
    apply_gossip,
    _REPUTATION_VARIANT_REGISTRY,
)

_VARIANT_REGISTRY.update(_REPUTATION_VARIANT_REGISTRY)


def compose_game(base_key: str, *variant_names: str) -> GameConfig:
    """Build a game by applying named variants to a base game.

    Example::

        compose_game("stag_hunt", "cheap_talk", "exit")
    """
    game = GAMES[base_key]
    for vname in variant_names:
        apply_fn = _VARIANT_REGISTRY[vname]
        game = apply_fn(game, base_key=base_key)
    return game