Add files using upload-large-folder tool
Browse files- seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_model.safetensors +3 -0
- seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_model.safetensors +3 -0
- seed_42/agent_trainer/critic_optimizer_state.pt +3 -0
- seed_42/agent_trainer/policy_optimizer_state.pt +3 -0
- seed_42/agent_trainer/trainer_annealing_state.pkl +3 -0
- seed_42/random_state.pkl +3 -0
- src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc +0 -0
- src_code_for_reproducibility/markov_games/negotiation/dond_simulation.py +176 -0
- src_code_for_reproducibility/markov_games/negotiation/no_press_nego_agent.py +108 -0
- src_code_for_reproducibility/markov_games/negotiation/no_press_nego_simulation.py +182 -0
- src_code_for_reproducibility/models/__pycache__/__init__.cpython-312.pyc +0 -0
- src_code_for_reproducibility/models/__pycache__/inference_backend.cpython-312.pyc +0 -0
- src_code_for_reproducibility/models/__pycache__/scalar_critic.cpython-312.pyc +0 -0
- src_code_for_reproducibility/training/__pycache__/__init__.cpython-312.pyc +0 -0
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e6e2c070d5b214399e5b5b2c59952896c83984ef5e9785cd825b34b193d318f
|
| 3 |
+
size 323014168
|
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2810cb0ec24072033412e5ff181e51188612e95b9f1685f9177794aa66a8bc0
|
| 3 |
+
size 323014168
|
seed_42/agent_trainer/critic_optimizer_state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1574fdb90735a922b09c67d07f7abdbd51181f00dc7bed878cb80adb5f50c1d
|
| 3 |
+
size 2631
|
seed_42/agent_trainer/policy_optimizer_state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e09a8f7a1cba58eee3b92c4c63eb09713d7bb2e9c1248bda1479bab99de86429
|
| 3 |
+
size 646269121
|
seed_42/agent_trainer/trainer_annealing_state.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e1ece3508808aa0372885bc9aafb57c945a1aa92d15785b25ba6ae0f7fe9860
|
| 3 |
+
size 104
|
seed_42/random_state.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da5363953b9a73a6aef9243748650aa4dfb203e5208ae92c87b3735e89bfa42c
|
| 3 |
+
size 12254
|
src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc
ADDED
|
Binary file (1.64 kB). View file
|
|
|
src_code_for_reproducibility/markov_games/negotiation/dond_simulation.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File: mllm/markov_games/negotiation/dond_simulation.py
|
| 3 |
+
Summary: Simulates Deal-or-No-Deal negotiation games and logs rollouts.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import copy
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from typing import Any, Dict, List, Tuple
|
| 9 |
+
|
| 10 |
+
from numpy.random import default_rng
|
| 11 |
+
|
| 12 |
+
from mllm.markov_games.negotiation.nego_simulation import (
|
| 13 |
+
NegotiationObs,
|
| 14 |
+
NegotiationSimulation,
|
| 15 |
+
NegotiationState,
|
| 16 |
+
Split,
|
| 17 |
+
)
|
| 18 |
+
from mllm.markov_games.rollout_tree import SimulationStepLog
|
| 19 |
+
from mllm.utils.get_coagent_id import get_coagent_id
|
| 20 |
+
|
| 21 |
+
AgentId = str
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class DealNoDealState(NegotiationState):
|
| 26 |
+
"""NegotiationState with per-agent value tables and item taxonomy."""
|
| 27 |
+
|
| 28 |
+
item_types: List[str]
|
| 29 |
+
values: Dict[AgentId, Dict[str, int]]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class DealNoDealObs(NegotiationObs):
|
| 34 |
+
"""Observation that reveals own values and (lagged) opponent values."""
|
| 35 |
+
|
| 36 |
+
my_values: Dict[str, int]
|
| 37 |
+
item_types: List[str]
|
| 38 |
+
previous_values_coagent: Dict[str, int] | None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def random_partition_integer(rng, total: int, parts: int) -> List[int]:
|
| 42 |
+
"""Sample non-negative integers summing to ``total`` across ``parts`` buckets."""
|
| 43 |
+
if parts <= 0:
|
| 44 |
+
return []
|
| 45 |
+
if total <= 0:
|
| 46 |
+
return [0 for _ in range(parts)]
|
| 47 |
+
cuts = sorted(rng.integers(0, total + 1, size=parts - 1).tolist())
|
| 48 |
+
vals = []
|
| 49 |
+
prev = 0
|
| 50 |
+
for c in cuts + [total]:
|
| 51 |
+
vals.append(c - prev)
|
| 52 |
+
prev = c
|
| 53 |
+
return vals
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class DealNoDealSimulation(NegotiationSimulation):
|
| 57 |
+
"""NegotiationSimulation variant implementing the Rubinstein-style Deal-or-No-Deal."""
|
| 58 |
+
|
| 59 |
+
def __init__(
|
| 60 |
+
self,
|
| 61 |
+
item_types: List[str] = ["books", "hats", "balls"],
|
| 62 |
+
*args,
|
| 63 |
+
**kwargs,
|
| 64 |
+
):
|
| 65 |
+
super().__init__(item_types=item_types, *args, **kwargs)
|
| 66 |
+
self.reset()
|
| 67 |
+
|
| 68 |
+
def _other(self, agent_id: AgentId) -> AgentId:
|
| 69 |
+
return get_coagent_id(self.agent_ids, agent_id)
|
| 70 |
+
|
| 71 |
+
def _sample_stock(self) -> Dict[str, int]:
|
| 72 |
+
# total items between 5 and 7
|
| 73 |
+
total_items = int(self.rng.integers(5, 8))
|
| 74 |
+
# nonnegative per-type counts summing to total_items
|
| 75 |
+
parts = random_partition_integer(self.rng, total_items, len(self.item_types))
|
| 76 |
+
# allow zeros per type
|
| 77 |
+
return {t: int(c) for t, c in zip(self.item_types, parts)}
|
| 78 |
+
|
| 79 |
+
def _sample_values_pair(self) -> Dict[AgentId, Dict[str, int]]:
|
| 80 |
+
# Each agent has integer non-negative values that sum to 10
|
| 81 |
+
# Each item type valued by at least one agent
|
| 82 |
+
# Some item type valued by both agents
|
| 83 |
+
while True:
|
| 84 |
+
vals_a = random_partition_integer(self.rng, 10, len(self.item_types))
|
| 85 |
+
vals_b = random_partition_integer(self.rng, 10, len(self.item_types))
|
| 86 |
+
a = {t: int(v) for t, v in zip(self.item_types, vals_a)}
|
| 87 |
+
b = {t: int(v) for t, v in zip(self.item_types, vals_b)}
|
| 88 |
+
# each item valued by at least one
|
| 89 |
+
ok1 = all((a[t] > 0) or (b[t] > 0) for t in self.item_types)
|
| 90 |
+
# some item valued by both
|
| 91 |
+
ok2 = any((a[t] > 0) and (b[t] > 0) for t in self.item_types)
|
| 92 |
+
if ok1 and ok2:
|
| 93 |
+
return {self.agent_ids[0]: a, self.agent_ids[1]: b}
|
| 94 |
+
|
| 95 |
+
def _is_valid_allocation(
|
| 96 |
+
self, allocation: Dict[str, int], stock: Dict[str, int]
|
| 97 |
+
) -> bool:
|
| 98 |
+
for t in self.item_types:
|
| 99 |
+
v = allocation.get(t)
|
| 100 |
+
if v is None:
|
| 101 |
+
return False
|
| 102 |
+
if not isinstance(v, int):
|
| 103 |
+
return False
|
| 104 |
+
if v < 0 or v > int(stock.get(t, 0)):
|
| 105 |
+
return False
|
| 106 |
+
return True
|
| 107 |
+
|
| 108 |
+
def set_new_round_of_variant(self):
|
| 109 |
+
# Keep same values, resample stock
|
| 110 |
+
self.state.quantities = self._sample_stock()
|
| 111 |
+
|
| 112 |
+
def get_info_of_variant(
|
| 113 |
+
self, state: NegotiationState, actions: Dict[AgentId, Any]
|
| 114 |
+
) -> Dict[str, Any]:
|
| 115 |
+
return {
|
| 116 |
+
"quantities": copy.deepcopy(state.quantities),
|
| 117 |
+
"values": copy.deepcopy(state.values),
|
| 118 |
+
"splits": copy.deepcopy(state.splits),
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
|
| 122 |
+
"""
|
| 123 |
+
Returns the rewards for each agent.
|
| 124 |
+
"""
|
| 125 |
+
split_a = splits[self.agent_ids[0]].items_given_to_self
|
| 126 |
+
split_b = splits[self.agent_ids[1]].items_given_to_self
|
| 127 |
+
rewards = {self.agent_ids[0]: 0, self.agent_ids[1]: 0}
|
| 128 |
+
for t in self.item_types:
|
| 129 |
+
# If not complementary, return 0!
|
| 130 |
+
if not split_a[t] + split_b[t] == self.state.quantities[t]:
|
| 131 |
+
return {self.agent_ids[0]: 0, self.agent_ids[1]: 0}
|
| 132 |
+
rewards[self.agent_ids[0]] += (
|
| 133 |
+
split_a[t] * self.state.values[self.agent_ids[0]][t]
|
| 134 |
+
)
|
| 135 |
+
rewards[self.agent_ids[1]] += (
|
| 136 |
+
split_b[t] * self.state.values[self.agent_ids[1]][t]
|
| 137 |
+
)
|
| 138 |
+
return rewards
|
| 139 |
+
|
| 140 |
+
def get_obs(self):
|
| 141 |
+
return {agent_id: self.get_obs_agent(agent_id) for agent_id in self.agent_ids}
|
| 142 |
+
|
| 143 |
+
def get_obs_agent(self, agent_id):
|
| 144 |
+
other_id = self._other(agent_id)
|
| 145 |
+
obs = DealNoDealObs(
|
| 146 |
+
round_nb=self.state.round_nb,
|
| 147 |
+
last_message=self.state.last_message,
|
| 148 |
+
current_agent=self.state.current_agent,
|
| 149 |
+
quantities=copy.deepcopy(self.state.quantities),
|
| 150 |
+
value=0.0, # unused in DOND
|
| 151 |
+
other_agent_split=None, # not meaningful until split
|
| 152 |
+
split_phase=self.state.split_phase,
|
| 153 |
+
quota_messages_per_agent_per_round=self.quota_messages_per_agent_per_round,
|
| 154 |
+
my_values=copy.deepcopy(self.state.values[agent_id]),
|
| 155 |
+
item_types=list(self.item_types),
|
| 156 |
+
previous_values_coagent=copy.deepcopy(self.state.values.get(other_id, {})),
|
| 157 |
+
)
|
| 158 |
+
return obs
|
| 159 |
+
|
| 160 |
+
def reset(self):
|
| 161 |
+
start_agent = self.agent_ids[self._starting_agent_index]
|
| 162 |
+
stock = self._sample_stock()
|
| 163 |
+
values = self._sample_values_pair()
|
| 164 |
+
self.state = DealNoDealState(
|
| 165 |
+
round_nb=0,
|
| 166 |
+
last_message="",
|
| 167 |
+
current_agent=start_agent,
|
| 168 |
+
quantities=stock,
|
| 169 |
+
values=values,
|
| 170 |
+
previous_values=None,
|
| 171 |
+
splits={aid: None for aid in self.agent_ids},
|
| 172 |
+
nb_messages_sent={aid: 0 for aid in self.agent_ids},
|
| 173 |
+
split_phase=False,
|
| 174 |
+
item_types=list(self.item_types),
|
| 175 |
+
)
|
| 176 |
+
return self.get_obs()
|
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_agent.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File: mllm/markov_games/negotiation/no_press_nego_agent.py
|
| 3 |
+
Summary: Agent variant for no-press negotiations without explicit messaging.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Any, Dict, List, Tuple
|
| 7 |
+
|
| 8 |
+
from mllm.markov_games.negotiation.nego_agent import (
|
| 9 |
+
NegotiationAgent,
|
| 10 |
+
NegotiationAgentState,
|
| 11 |
+
)
|
| 12 |
+
from mllm.markov_games.negotiation.nego_simulation import Split
|
| 13 |
+
from mllm.markov_games.negotiation.no_press_nego_simulation import NoPressObs
|
| 14 |
+
from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class NoPressAgent(NegotiationAgent):
|
| 18 |
+
def __init__(self, *args, **kwargs):
|
| 19 |
+
super().__init__(*args, **kwargs)
|
| 20 |
+
# No communication in this variant
|
| 21 |
+
self.intro_prompt = (
|
| 22 |
+
"Welcome to an iterated game. You are {agent}. The other agent is {other_agent}.\n"
|
| 23 |
+
"Setup:\n"
|
| 24 |
+
"1. The game consists of multiple independent rounds.\n"
|
| 25 |
+
"2. In each round, there are multiple items to split between the two agents.\n"
|
| 26 |
+
"3. Both agents are assigned a per-item value between 1 and 20 (inclusive) in each round.\n"
|
| 27 |
+
"4. You can observe per-item values of both agents.\n"
|
| 28 |
+
"5. Because assignments are random, both agents are equally likely to have same expected per-item value.\n"
|
| 29 |
+
"\n"
|
| 30 |
+
"Protocol:\n"
|
| 31 |
+
"1. Both agents simultaneously propose the amount of each item they will keep.\n"
|
| 32 |
+
"2. If the total sum of proposals is less than or equal to the item quantity, both agents receive their proposed amounts.\n"
|
| 33 |
+
"3. If the total sum of proposals exceeds the item quantity, they are allocated proportionally.\n"
|
| 34 |
+
"4. Your points for the round = (amount you receive per item) x (your per-item value for that round), added across all items.\n"
|
| 35 |
+
"5. Points are accumulated across rounds.\n"
|
| 36 |
+
"Your goal: {goal}\n"
|
| 37 |
+
)
|
| 38 |
+
self.new_round_prompt = (
|
| 39 |
+
"A New Round Begins\n"
|
| 40 |
+
"The items to split are {quantities}.\n"
|
| 41 |
+
"Your per-item values are {value} and {other_agent}'s per-item values are {other_value}."
|
| 42 |
+
)
|
| 43 |
+
self.last_round_prompt = (
|
| 44 |
+
"Last Round Summary:\n"
|
| 45 |
+
" - Items to split: {last_quantities}\n"
|
| 46 |
+
" - Your per-item values: {last_value_agent}\n"
|
| 47 |
+
" - {other_agent}'s per-item values: {last_value_coagent}\n"
|
| 48 |
+
" - You proposed: {last_split_agent}\n"
|
| 49 |
+
" - You earned: {last_points_agent} points\n"
|
| 50 |
+
" - {other_agent} proposed: {last_split_coagent}\n"
|
| 51 |
+
" - {other_agent} earned: {last_points_coagent} points\n"
|
| 52 |
+
" - Round Complete.\n"
|
| 53 |
+
)
|
| 54 |
+
self.send_split_prompt = "Submit Your Proposal\n" "Respond as {proposal_style}"
|
| 55 |
+
|
| 56 |
+
def get_message_regex(self, observation: NoPressObs) -> str:
|
| 57 |
+
"""Return an empty pattern because the no-press variant forbids chat."""
|
| 58 |
+
return r"^$" # No messages allowed
|
| 59 |
+
|
| 60 |
+
def get_split_regex(self, observation: NoPressObs) -> str:
|
| 61 |
+
"""Match proposals like ``Proposal: 4 coins, 6 apples`` case-insensitively."""
|
| 62 |
+
items = list(observation.quantities.keys())
|
| 63 |
+
# Accept both singular and plural forms
|
| 64 |
+
item_pattern = "|".join(
|
| 65 |
+
[f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?" for item in items]
|
| 66 |
+
)
|
| 67 |
+
regex = rf"(?i)Proposal:\s*((?:\s*(?P<num>(10|[0-9]))\s*(?P<item>{item_pattern})\s*,?)+)"
|
| 68 |
+
return regex
|
| 69 |
+
|
| 70 |
+
def get_split_action(self, policy_output: str, observation: NoPressObs) -> Split:
|
| 71 |
+
"""
|
| 72 |
+
Parse the LLM proposal into a normalized ``Split`` structure.
|
| 73 |
+
|
| 74 |
+
The regex-based parser is lenient (accepts pluralization variants) so that
|
| 75 |
+
prompt tweaks do not require re-training the extraction logic.
|
| 76 |
+
"""
|
| 77 |
+
items = list(observation.quantities.keys())
|
| 78 |
+
import re as _re
|
| 79 |
+
|
| 80 |
+
split_regex = self.get_split_regex(observation)
|
| 81 |
+
items_given_to_self = {item: 0 for item in items}
|
| 82 |
+
m = _re.match(split_regex, policy_output.strip())
|
| 83 |
+
if m:
|
| 84 |
+
# Find all (number, item) pairs
|
| 85 |
+
item_pattern = "|".join(
|
| 86 |
+
[
|
| 87 |
+
f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?"
|
| 88 |
+
for item in items
|
| 89 |
+
]
|
| 90 |
+
)
|
| 91 |
+
inner_regex = rf"(?i)(10|[0-9])\s*({item_pattern})"
|
| 92 |
+
|
| 93 |
+
def normalize_item_name(item_str):
|
| 94 |
+
"""Canonicalize plural/singular user text back to the config item id."""
|
| 95 |
+
for orig in items:
|
| 96 |
+
if item_str.lower() == orig.lower():
|
| 97 |
+
return orig
|
| 98 |
+
if orig.endswith("s") and item_str.lower() == orig[:-1].lower():
|
| 99 |
+
return orig
|
| 100 |
+
if (
|
| 101 |
+
not orig.endswith("s")
|
| 102 |
+
and item_str.lower() == orig.lower() + "s"
|
| 103 |
+
):
|
| 104 |
+
return orig
|
| 105 |
+
|
| 106 |
+
for num, item in _re.findall(inner_regex, m.group(1)):
|
| 107 |
+
items_given_to_self[normalize_item_name(item)] = int(num)
|
| 108 |
+
return Split(items_given_to_self=items_given_to_self)
|
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_simulation.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File: mllm/markov_games/negotiation/no_press_nego_simulation.py
|
| 3 |
+
Summary: Simulation driver for no-press negotiation scenarios.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import copy
|
| 7 |
+
from collections import defaultdict
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
from typing import Any, Dict, List, Literal, Tuple
|
| 10 |
+
|
| 11 |
+
from mllm.markov_games.negotiation.nego_simulation import (
|
| 12 |
+
NegotiationObs,
|
| 13 |
+
NegotiationSimulation,
|
| 14 |
+
NegotiationState,
|
| 15 |
+
Split,
|
| 16 |
+
compute_tas_style_rewards,
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
AgentId = str
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class NoPressState(NegotiationState):
|
| 24 |
+
"""NegotiationState alias used to clarify we run in always-split phase."""
|
| 25 |
+
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class NoPressObs(NegotiationObs):
|
| 31 |
+
"""Observation that includes both agents' values (since there is no messaging)."""
|
| 32 |
+
|
| 33 |
+
other_value: Dict[str, float]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class NoPressSimulation(NegotiationSimulation):
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
game_type: Literal["10-1-exclusive", "10-1-ties", "1-to-20"] = "1-to-20",
|
| 40 |
+
same_round_value: bool = True,
|
| 41 |
+
atleast_one_conflict: bool = False,
|
| 42 |
+
*args,
|
| 43 |
+
**kwargs,
|
| 44 |
+
):
|
| 45 |
+
self.game_type = game_type
|
| 46 |
+
self.same_round_value = same_round_value
|
| 47 |
+
self.atleast_one_conflict = atleast_one_conflict
|
| 48 |
+
super().__init__(*args, **kwargs)
|
| 49 |
+
|
| 50 |
+
def _sample_values(self) -> Dict[AgentId, dict]:
|
| 51 |
+
"""Sample per-item valuations according to the configured template."""
|
| 52 |
+
values = defaultdict(dict)
|
| 53 |
+
if self.state is None:
|
| 54 |
+
item_types = self.item_types
|
| 55 |
+
else:
|
| 56 |
+
item_types = list(self.state.quantities.keys())
|
| 57 |
+
while True:
|
| 58 |
+
for item in item_types:
|
| 59 |
+
if self.game_type == "10-1-exclusive":
|
| 60 |
+
v = int(self.rng.choice([1, 10]))
|
| 61 |
+
values[self.agent_ids[0]][item] = v
|
| 62 |
+
values[self.agent_ids[1]][item] = 10 if v == 1 else 1
|
| 63 |
+
elif self.game_type == "10-1-ties":
|
| 64 |
+
for aid in self.agent_ids:
|
| 65 |
+
values[aid][item] = int(self.rng.choice([1, 10]))
|
| 66 |
+
elif self.game_type == "1-to-20":
|
| 67 |
+
for aid in self.agent_ids:
|
| 68 |
+
values[aid][item] = int(self.rng.integers(1, 21))
|
| 69 |
+
if self.atleast_one_conflict:
|
| 70 |
+
has_conflict = False
|
| 71 |
+
for item in item_types:
|
| 72 |
+
agent_values_for_item = [
|
| 73 |
+
values[aid][item] for aid in self.agent_ids
|
| 74 |
+
]
|
| 75 |
+
if len(set(agent_values_for_item)) > 1:
|
| 76 |
+
has_conflict = True
|
| 77 |
+
break
|
| 78 |
+
if not has_conflict:
|
| 79 |
+
continue
|
| 80 |
+
agent_values = [sum(v.values()) for v in values.values()]
|
| 81 |
+
if len(set(agent_values)) == 1 or not self.same_round_value:
|
| 82 |
+
break
|
| 83 |
+
return values
|
| 84 |
+
|
| 85 |
+
def _sample_quantities(self) -> Dict[str, int]:
|
| 86 |
+
"""No-press setups use symmetric 10-unit stocks for every item."""
|
| 87 |
+
return {item.lower(): 10 for item in self.item_types}
|
| 88 |
+
|
| 89 |
+
def set_new_round_of_variant(self):
|
| 90 |
+
"""Refresh quantities/values and jump directly into the simultaneous split."""
|
| 91 |
+
self.state.quantities = self._sample_quantities()
|
| 92 |
+
self.state.values = self._sample_values()
|
| 93 |
+
self.state.split_phase = True
|
| 94 |
+
|
| 95 |
+
def get_info_of_variant(
|
| 96 |
+
self, state: NegotiationState, actions: Dict[AgentId, Any]
|
| 97 |
+
) -> Dict[str, Any]:
|
| 98 |
+
"""Surface quantities/values/splits so statistics modules can read them."""
|
| 99 |
+
return {
|
| 100 |
+
"quantities": copy.deepcopy(state.quantities),
|
| 101 |
+
"values": copy.deepcopy(state.values),
|
| 102 |
+
"splits": copy.deepcopy(state.splits),
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
|
| 106 |
+
"""Reuse TAS reward logic because the split arbitration is identical."""
|
| 107 |
+
return compute_tas_style_rewards(
|
| 108 |
+
self.agent_ids, self.state.values, splits, self.state.quantities
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
def get_obs(self):
|
| 112 |
+
return {agent_id: self.get_obs_agent(agent_id) for agent_id in self.agent_ids}
|
| 113 |
+
|
| 114 |
+
def get_obs_agent(self, agent_id):
|
| 115 |
+
other_id = self._other(agent_id)
|
| 116 |
+
last_value_coagent = (
|
| 117 |
+
None
|
| 118 |
+
if self.state.previous_values is None
|
| 119 |
+
else self.state.previous_values.get(other_id)
|
| 120 |
+
)
|
| 121 |
+
last_points_coagent = (
|
| 122 |
+
None
|
| 123 |
+
if self.state.previous_points is None
|
| 124 |
+
else round(self.state.previous_points.get(other_id), 1)
|
| 125 |
+
)
|
| 126 |
+
last_value_agent = (
|
| 127 |
+
None
|
| 128 |
+
if self.state.previous_values is None
|
| 129 |
+
else self.state.previous_values.get(agent_id)
|
| 130 |
+
)
|
| 131 |
+
last_points_agent = (
|
| 132 |
+
None
|
| 133 |
+
if self.state.previous_points is None
|
| 134 |
+
else round(self.state.previous_points.get(agent_id), 1)
|
| 135 |
+
)
|
| 136 |
+
last_split_coagent = None
|
| 137 |
+
last_split_agent = None
|
| 138 |
+
if self.state.previous_splits is not None:
|
| 139 |
+
last_split_coagent = self.state.previous_splits[
|
| 140 |
+
other_id
|
| 141 |
+
].items_given_to_self
|
| 142 |
+
last_split_agent = self.state.previous_splits[agent_id].items_given_to_self
|
| 143 |
+
obs = NoPressObs(
|
| 144 |
+
round_nb=self.state.round_nb,
|
| 145 |
+
last_message="",
|
| 146 |
+
quota_messages_per_agent_per_round=self.quota_messages_per_agent_per_round,
|
| 147 |
+
current_agent=self.state.current_agent,
|
| 148 |
+
other_agent=self.agent_id_to_name[other_id],
|
| 149 |
+
quantities=self.state.quantities,
|
| 150 |
+
item_types=self.item_types,
|
| 151 |
+
value=self.state.values[agent_id],
|
| 152 |
+
split_phase=self.state.split_phase,
|
| 153 |
+
last_split_agent=last_split_agent,
|
| 154 |
+
last_value_agent=last_value_agent,
|
| 155 |
+
last_points_agent=last_points_agent,
|
| 156 |
+
last_split_coagent=last_split_coagent,
|
| 157 |
+
last_value_coagent=last_value_coagent,
|
| 158 |
+
last_points_coagent=last_points_coagent,
|
| 159 |
+
other_value=self.state.values[other_id],
|
| 160 |
+
last_quantities=self.state.previous_quantities,
|
| 161 |
+
)
|
| 162 |
+
return obs
|
| 163 |
+
|
| 164 |
+
def reset(self):
|
| 165 |
+
start_agent = self.agent_ids[self._starting_agent_index]
|
| 166 |
+
quantities = self._sample_quantities()
|
| 167 |
+
values = self._sample_values()
|
| 168 |
+
self.state = NoPressState(
|
| 169 |
+
round_nb=0,
|
| 170 |
+
last_message="",
|
| 171 |
+
current_agent=start_agent,
|
| 172 |
+
quantities=quantities,
|
| 173 |
+
values=values,
|
| 174 |
+
previous_values=None,
|
| 175 |
+
splits={aid: None for aid in self.agent_ids},
|
| 176 |
+
nb_messages_sent={aid: 0 for aid in self.agent_ids},
|
| 177 |
+
split_phase=True,
|
| 178 |
+
previous_splits=None,
|
| 179 |
+
previous_points=None,
|
| 180 |
+
previous_quantities=None,
|
| 181 |
+
)
|
| 182 |
+
return self.get_obs()
|
src_code_for_reproducibility/models/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (269 Bytes). View file
|
|
|
src_code_for_reproducibility/models/__pycache__/inference_backend.cpython-312.pyc
ADDED
|
Binary file (2.38 kB). View file
|
|
|
src_code_for_reproducibility/models/__pycache__/scalar_critic.cpython-312.pyc
ADDED
|
Binary file (3.32 kB). View file
|
|
|
src_code_for_reproducibility/training/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (277 Bytes). View file
|
|
|