Muqeeth commited on
Commit
6355fae
·
verified ·
1 Parent(s): f0ad870

Add files using upload-large-folder tool

Browse files
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1436ad64c7607b662e4cd395c9c37b0a0f5021552b3b73a095acc06bb533387f
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c2062df6f7a10adbd244c9b6e75a8153c54f776c3619732a3fa0c766be3166
3
  size 323014168
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15466bfa7faf33695c2e4470cb1a440f086f27f467fe02f2b09a20631914bab9
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2810cb0ec24072033412e5ff181e51188612e95b9f1685f9177794aa66a8bc0
3
  size 323014168
seed_42/agent_trainer/policy_optimizer_state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b0ce97dbf5119cad8778d4a88276b1f2a9857951d9286bafbc1889a0668f7f3
3
  size 646269121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d017d2f98e71c9ed36613fc1d2e7e8daeef9aa62ee96e0e838ec293e469025
3
  size 646269121
seed_42/agent_trainer/trainer_annealing_state.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79cfce2a5040c0939846d147a00d13a3f05afa3b73ce05b85fd5b5b13bf4ddcf
3
  size 104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5468e667c6b74a7cb34fc016988230e631fc520b2df33e5a5c71068b59689f3e
3
  size 104
seed_42/random_state.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bcfbe7f3d2c5bca58434c5809547ecb4c92e58ceebcb74196e41d6c6751e9ea
3
  size 12254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03db597030fc1fe5f071eb41114416ad894895b73935ddcee0fc06e622471c8a
3
  size 12254
src_code_for_reproducibility/markov_games/__pycache__/agent.cpython-312.pyc CHANGED
Binary files a/src_code_for_reproducibility/markov_games/__pycache__/agent.cpython-312.pyc and b/src_code_for_reproducibility/markov_games/__pycache__/agent.cpython-312.pyc differ
 
src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc CHANGED
Binary files a/src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc and b/src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc differ
 
src_code_for_reproducibility/markov_games/negotiation/README.md CHANGED
@@ -9,29 +9,16 @@ Proportional splitting is used when the two proposals exceed the available total
9
  ### Variants (in increasing difficulty)
10
 
11
  - No‑Press Split
12
- - Single item type (coins)
13
- - No communication; agents go straight to making split proposals, with the starting player alternating deterministically.
 
14
  - Motivation: mirrors no‑communication setups (e.g., Advantage Alignment) while keeping the split decision nontrivial.
15
- - Deterministic Mode: values are fixed and public: one agent values coins at 10, the other at 1 (alternates each round).
16
- - Stochastic Mode: values are random and uncorrelated.
17
 
18
  - Trust-and-Split RPS (TAS-RPS)
19
  - Single item type (coins)
20
  - Each round, a rock–paper–scissors hand draw creates a strong asymmetry: the winner’s per-coin value is 10, the loser’s is 1.
21
  - Each agent initially sees only their own hand and must communicate to coordinate an optimal split.
22
  - Motivation: enforce large value disparity so one’s own value reveals little about the other’s (avoiding ceiling effects) and incentivize meaningful communication.
23
-
24
- - Trust-and-Split (TAS)
25
- - Single item type (coins); each round, each agent’s per-coin value is independently sampled in a broad range (e.g., 1–20).
26
- - Each agent observes only their own value; they may use short messages to share and negotiate.
27
- - Motivation: a simple blend that tests whether agents learn to exchange private information and coordinate proportional, value-aware splits.
28
-
29
- - Deal-or-No-Deal (DOND)
30
- - Introduced in [Deal or No Deal? End-to-End Learning for Negotiation Dialogues](https://arxiv.org/pdf/1706.05125)
31
- - Multiple item types (typically "books", "hats" and "balls") with limited stocks; each agent has its own per-type values.
32
- - A deal pays out only if both proposals exactly agree and respect the stock; otherwise no deal (zero reward) that round.
33
- - Motivation: a known benchmark closer to real-world bargaining, where both parties must explicitly agree.
34
-
35
 
36
 
37
 
 
9
  ### Variants (in increasing difficulty)
10
 
11
  - No‑Press Split
12
+ - Multiple item types (e.g., hats, balls, books)
13
+ - The item values for each agent are public.
14
+ - No communication; agents go straight to making split proposals.
15
  - Motivation: mirrors no‑communication setups (e.g., Advantage Alignment) while keeping the split decision nontrivial.
 
 
16
 
17
  - Trust-and-Split RPS (TAS-RPS)
18
  - Single item type (coins)
19
  - Each round, a rock–paper–scissors hand draw creates a strong asymmetry: the winner’s per-coin value is 10, the loser’s is 1.
20
  - Each agent initially sees only their own hand and must communicate to coordinate an optimal split.
21
  - Motivation: enforce large value disparity so one’s own value reveals little about the other’s (avoiding ceiling effects) and incentivize meaningful communication.
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
 
src_code_for_reproducibility/markov_games/negotiation/dond_agent.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import copy
2
  import re
3
  from collections.abc import Callable
@@ -5,14 +10,18 @@ from dataclasses import dataclass
5
  from typing import Any, Dict, List, Tuple
6
 
7
  from mllm.markov_games.agent import Agent
8
- from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
9
- from mllm.markov_games.negotiation.dond_simulation import (
10
- DealNoDealObs,
 
11
  )
12
  from mllm.markov_games.negotiation.nego_simulation import Split
13
- from mllm.markov_games.negotiation.nego_agent import NegotiationAgent, NegotiationAgentState
 
14
 
15
  class DealNoDealAgent(NegotiationAgent):
 
 
16
  def __init__(
17
  self,
18
  *args,
@@ -20,42 +29,47 @@ class DealNoDealAgent(NegotiationAgent):
20
  ):
21
  super().__init__(*args, **kwargs)
22
  self.intro_prompt = (
23
- "You are {agent_id}. You are playing an iterated game. "
24
- "At each round, you and other agent will try to distribute among yourselves items of types {item_types}. "
25
- "You only know how much you value each item type, but not the other agent's values. "
26
- "You can communicate with the other agent by sending up to {quota_messages_per_agent_per_round} short messages per round. "
27
- "Each round, after exchanging messages, you and the other agent will submit a private proposal. "
28
- "A deal is accepted only if both proposals match exactly and are within stock; otherwise no deal (0 points for both at that round). "
29
- "The values of the items of the other agent at the previous round are revealed to you after each round. "
30
- "Your goal is: {goal}."
31
- )
32
- self.new_round_prompt = ("New round {round_nb}. Items: {stock}. Your values: {values}. ")
33
- self.last_round_prompt = ("Last round, other agent's values: {previous_values_coagent}. ")
34
- self.send_split_prompt = ("Respond with <split>...</split> where you propose how many items of each type you want to keep.")
35
-
 
 
 
 
36
  def get_message_regex(self, observation: DealNoDealObs) -> str:
 
37
  return r"<message>[\s\S]{0,400}</message>"
38
-
39
  def get_split_regex(self, observation: DealNoDealObs) -> str:
 
40
  parts = []
41
  for t in observation.item_types:
42
  s = int(observation.quantities.get(t, 0))
43
  allowed = "|".join(str(k) for k in range(0, s + 1))
44
  rng = f"({allowed})"
45
- parts.append(fr"<{t}>{rng}</{t}>")
46
  items_block = "".join(parts)
47
- return fr"(<split>{items_block}</split>)"
48
-
49
  def get_split_action(self, policy_output: str, observation: DealNoDealObs) -> Split:
 
50
  import re as _re
 
51
  allocations: Dict[str, int] = {}
52
  for t in observation.item_types:
53
- m = _re.search(fr"<{t}>([0-9]+)</{t}>", policy_output)
54
  if m:
55
  allocations[t] = int(m.group(1))
56
  else:
57
  allocations[t] = 0
58
  return Split(items_given_to_self=allocations)
59
-
60
-
61
-
 
1
+ """
2
+ File: mllm/markov_games/negotiation/dond_agent.py
3
+ Summary: Agent implementation for Deal-or-No-Deal style negotiations.
4
+ """
5
+
6
  import copy
7
  import re
8
  from collections.abc import Callable
 
10
  from typing import Any, Dict, List, Tuple
11
 
12
  from mllm.markov_games.agent import Agent
13
+ from mllm.markov_games.negotiation.dond_simulation import DealNoDealObs
14
+ from mllm.markov_games.negotiation.nego_agent import (
15
+ NegotiationAgent,
16
+ NegotiationAgentState,
17
  )
18
  from mllm.markov_games.negotiation.nego_simulation import Split
19
+ from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
20
+
21
 
22
  class DealNoDealAgent(NegotiationAgent):
23
+ """NegotiationAgent tailored to the Deal-or-No-Deal stock/value revelation rules."""
24
+
25
  def __init__(
26
  self,
27
  *args,
 
29
  ):
30
  super().__init__(*args, **kwargs)
31
  self.intro_prompt = (
32
+ "You are {agent_id}. You are playing an iterated game. "
33
+ "At each round, you and other agent will try to distribute among yourselves items of types {item_types}. "
34
+ "You only know how much you value each item type, but not the other agent's values. "
35
+ "You can communicate with the other agent by sending up to {quota_messages_per_agent_per_round} short messages per round. "
36
+ "Each round, after exchanging messages, you and the other agent will submit a private proposal. "
37
+ "A deal is accepted only if both proposals match exactly and are within stock; otherwise no deal (0 points for both at that round). "
38
+ "The values of the items of the other agent at the previous round are revealed to you after each round. "
39
+ "Your goal is: {goal}."
40
+ )
41
+ self.new_round_prompt = (
42
+ "New round {round_nb}. Items: {stock}. Your values: {values}. "
43
+ )
44
+ self.last_round_prompt = (
45
+ "Last round, other agent's values: {previous_values_coagent}. "
46
+ )
47
+ self.send_split_prompt = "Respond with <split>...</split> where you propose how many items of each type you want to keep."
48
+
49
  def get_message_regex(self, observation: DealNoDealObs) -> str:
50
+ """Allow short XML messages (<400 chars) between proposal phases."""
51
  return r"<message>[\s\S]{0,400}</message>"
52
+
53
  def get_split_regex(self, observation: DealNoDealObs) -> str:
54
+ """Constrain split proposals to per-item XML tags bounded by the current stock."""
55
  parts = []
56
  for t in observation.item_types:
57
  s = int(observation.quantities.get(t, 0))
58
  allowed = "|".join(str(k) for k in range(0, s + 1))
59
  rng = f"({allowed})"
60
+ parts.append(rf"<{t}>{rng}</{t}>")
61
  items_block = "".join(parts)
62
+ return rf"(<split>{items_block}</split>)"
63
+
64
  def get_split_action(self, policy_output: str, observation: DealNoDealObs) -> Split:
65
+ """Convert the XML proposal into a Split dataclass understood by the simulator."""
66
  import re as _re
67
+
68
  allocations: Dict[str, int] = {}
69
  for t in observation.item_types:
70
+ m = _re.search(rf"<{t}>([0-9]+)</{t}>", policy_output)
71
  if m:
72
  allocations[t] = int(m.group(1))
73
  else:
74
  allocations[t] = 0
75
  return Split(items_given_to_self=allocations)
 
 
 
src_code_for_reproducibility/markov_games/negotiation/nego_agent.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import copy
2
  from abc import abstractmethod
3
  from collections.abc import Callable
@@ -13,6 +18,8 @@ from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
13
 
14
  @dataclass
15
  class NegotiationAgentState:
 
 
16
  round_nb: int
17
  nb_messages_sent_this_round: int
18
  chat_counter: int
@@ -20,6 +27,8 @@ class NegotiationAgentState:
20
 
21
 
22
  class NegotiationAgent(Agent):
 
 
23
  def __init__(
24
  self,
25
  seed: int,
@@ -61,19 +70,29 @@ class NegotiationAgent(Agent):
61
 
62
  @abstractmethod
63
  def get_message_regex(self, observation: NegotiationObs) -> str:
 
64
  pass
65
 
66
  @abstractmethod
67
  def get_split_regex(self, observation: NegotiationObs) -> str:
 
68
  pass
69
 
70
  @abstractmethod
71
  def get_split_action(
72
  self, policy_output: str, observation: NegotiationObs
73
  ) -> Split:
 
74
  pass
75
 
76
  async def act(self, observation: NegotiationObs) -> Tuple[Any, AgentActLog]:
 
 
 
 
 
 
 
77
  def dict_to_str(d: dict) -> str:
78
  return ", ".join(f"{v} {k}" for k, v in d.items())
79
 
 
1
+ """
2
+ File: mllm/markov_games/negotiation/nego_agent.py
3
+ Summary: General-purpose negotiation agent coordinating prompts and actions.
4
+ """
5
+
6
  import copy
7
  from abc import abstractmethod
8
  from collections.abc import Callable
 
18
 
19
  @dataclass
20
  class NegotiationAgentState:
21
+ """Lightweight container tracking round progression and message history."""
22
+
23
  round_nb: int
24
  nb_messages_sent_this_round: int
25
  chat_counter: int
 
27
 
28
 
29
  class NegotiationAgent(Agent):
30
+ """Base agent that manages prompt scaffolding and regex validation for variants."""
31
+
32
  def __init__(
33
  self,
34
  seed: int,
 
70
 
71
  @abstractmethod
72
  def get_message_regex(self, observation: NegotiationObs) -> str:
73
+ """Return the regex that outgoing chat messages must satisfy."""
74
  pass
75
 
76
  @abstractmethod
77
  def get_split_regex(self, observation: NegotiationObs) -> str:
78
+ """Return the regex that final split proposals must satisfy."""
79
  pass
80
 
81
  @abstractmethod
82
  def get_split_action(
83
  self, policy_output: str, observation: NegotiationObs
84
  ) -> Split:
85
+ """Convert raw LLM output into the ``Split`` structure required by simulations."""
86
  pass
87
 
88
  async def act(self, observation: NegotiationObs) -> Tuple[Any, AgentActLog]:
89
+ """
90
+ Assemble the appropriate prompt, query the policy, and return message or split.
91
+
92
+ This handles intro text, new-round reminders, quota tracking, and post-processing
93
+ (regex enforcement + ChatTurn logging) so subclasses only customize prompts/regexes.
94
+ """
95
+
96
  def dict_to_str(d: dict) -> str:
97
  return ", ".join(f"{v} {k}" for k, v in d.items())
98
 
src_code_for_reproducibility/markov_games/negotiation/nego_hard_coded_policies.py CHANGED
@@ -1,11 +1,17 @@
 
 
 
 
 
1
  import asyncio
2
- from typing import Optional
 
3
  from mllm.markov_games.negotiation.nego_agent import NegotiationAgent
 
4
  from mllm.markov_games.negotiation.no_press_nego_agent import NoPressAgent
5
  from mllm.markov_games.negotiation.no_press_nego_simulation import NoPressObs
6
  from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
7
- from mllm.markov_games.negotiation.nego_simulation import Split
8
- from typing import Any, Tuple
9
 
10
  class HardCodedNegoWelfareMaximizingPolicy(NoPressAgent):
11
  async def act(self, observation: NoPressObs) -> Tuple[Any, AgentActLog]:
@@ -40,6 +46,7 @@ class HardCodedNegoWelfareMaximizingPolicy(NoPressAgent):
40
  )
41
  return action, act_log
42
 
 
43
  class HardCodedNegoGreedyPolicy(NoPressAgent):
44
  async def act(self, observation: NoPressObs) -> Tuple[Any, AgentActLog]:
45
  """
@@ -61,4 +68,3 @@ class HardCodedNegoGreedyPolicy(NoPressAgent):
61
  info=None,
62
  )
63
  return action, act_log
64
-
 
1
+ """
2
+ File: mllm/markov_games/negotiation/nego_hard_coded_policies.py
3
+ Summary: Provides deterministic negotiation policies for testing and baselines.
4
+ """
5
+
6
  import asyncio
7
+ from typing import Any, Optional, Tuple
8
+
9
  from mllm.markov_games.negotiation.nego_agent import NegotiationAgent
10
+ from mllm.markov_games.negotiation.nego_simulation import Split
11
  from mllm.markov_games.negotiation.no_press_nego_agent import NoPressAgent
12
  from mllm.markov_games.negotiation.no_press_nego_simulation import NoPressObs
13
  from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
14
+
 
15
 
16
  class HardCodedNegoWelfareMaximizingPolicy(NoPressAgent):
17
  async def act(self, observation: NoPressObs) -> Tuple[Any, AgentActLog]:
 
46
  )
47
  return action, act_log
48
 
49
+
50
  class HardCodedNegoGreedyPolicy(NoPressAgent):
51
  async def act(self, observation: NoPressObs) -> Tuple[Any, AgentActLog]:
52
  """
 
68
  info=None,
69
  )
70
  return action, act_log
 
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_simulation.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import copy
2
  from collections import defaultdict
3
  from dataclasses import dataclass
@@ -16,11 +21,15 @@ AgentId = str
16
 
17
  @dataclass
18
  class NoPressState(NegotiationState):
 
 
19
  pass
20
 
21
 
22
  @dataclass
23
  class NoPressObs(NegotiationObs):
 
 
24
  other_value: Dict[str, float]
25
 
26
 
@@ -39,6 +48,7 @@ class NoPressSimulation(NegotiationSimulation):
39
  super().__init__(*args, **kwargs)
40
 
41
  def _sample_values(self) -> Dict[AgentId, dict]:
 
42
  values = defaultdict(dict)
43
  if self.state is None:
44
  item_types = self.item_types
@@ -73,9 +83,11 @@ class NoPressSimulation(NegotiationSimulation):
73
  return values
74
 
75
  def _sample_quantities(self) -> Dict[str, int]:
 
76
  return {item.lower(): 10 for item in self.item_types}
77
 
78
  def set_new_round_of_variant(self):
 
79
  self.state.quantities = self._sample_quantities()
80
  self.state.values = self._sample_values()
81
  self.state.split_phase = True
@@ -83,6 +95,7 @@ class NoPressSimulation(NegotiationSimulation):
83
  def get_info_of_variant(
84
  self, state: NegotiationState, actions: Dict[AgentId, Any]
85
  ) -> Dict[str, Any]:
 
86
  return {
87
  "quantities": copy.deepcopy(state.quantities),
88
  "values": copy.deepcopy(state.values),
@@ -90,6 +103,7 @@ class NoPressSimulation(NegotiationSimulation):
90
  }
91
 
92
  def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
 
93
  return compute_tas_style_rewards(
94
  self.agent_ids, self.state.values, splits, self.state.quantities
95
  )
 
1
+ """
2
+ File: mllm/markov_games/negotiation/no_press_nego_simulation.py
3
+ Summary: Simulation driver for no-press negotiation scenarios.
4
+ """
5
+
6
  import copy
7
  from collections import defaultdict
8
  from dataclasses import dataclass
 
21
 
22
  @dataclass
23
  class NoPressState(NegotiationState):
24
+ """NegotiationState alias used to clarify we run in always-split phase."""
25
+
26
  pass
27
 
28
 
29
  @dataclass
30
  class NoPressObs(NegotiationObs):
31
+ """Observation that includes both agents' values (since there is no messaging)."""
32
+
33
  other_value: Dict[str, float]
34
 
35
 
 
48
  super().__init__(*args, **kwargs)
49
 
50
  def _sample_values(self) -> Dict[AgentId, dict]:
51
+ """Sample per-item valuations according to the configured template."""
52
  values = defaultdict(dict)
53
  if self.state is None:
54
  item_types = self.item_types
 
83
  return values
84
 
85
  def _sample_quantities(self) -> Dict[str, int]:
86
+ """No-press setups use symmetric 10-unit stocks for every item."""
87
  return {item.lower(): 10 for item in self.item_types}
88
 
89
  def set_new_round_of_variant(self):
90
+ """Refresh quantities/values and jump directly into the simultaneous split."""
91
  self.state.quantities = self._sample_quantities()
92
  self.state.values = self._sample_values()
93
  self.state.split_phase = True
 
95
  def get_info_of_variant(
96
  self, state: NegotiationState, actions: Dict[AgentId, Any]
97
  ) -> Dict[str, Any]:
98
+ """Surface quantities/values/splits so statistics modules can read them."""
99
  return {
100
  "quantities": copy.deepcopy(state.quantities),
101
  "values": copy.deepcopy(state.values),
 
103
  }
104
 
105
  def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
106
+ """Reuse TAS reward logic because the split arbitration is identical."""
107
  return compute_tas_style_rewards(
108
  self.agent_ids, self.state.values, splits, self.state.quantities
109
  )
src_code_for_reproducibility/markov_games/negotiation/tas_agent.py CHANGED
@@ -1,9 +1,16 @@
 
 
 
 
 
1
  from mllm.markov_games.negotiation.nego_agent import NegotiationAgent
2
  from mllm.markov_games.negotiation.nego_simulation import Split
3
  from mllm.markov_games.negotiation.tas_simulation import TrustAndSplitObs
4
 
5
 
6
  class TrustAndSplitAgent(NegotiationAgent):
 
 
7
  def __init__(self, num_message_chars, *args, **kwargs):
8
  self.num_message_chars = num_message_chars
9
  super().__init__(*args, **kwargs)
@@ -58,12 +65,14 @@ class TrustAndSplitAgent(NegotiationAgent):
58
  self.send_message_prompt = f"Send your message now in <message>...</message> (<={self.num_message_chars} chars)."
59
 
60
  def get_message_regex(self, observation: TrustAndSplitObs) -> str:
 
61
  return rf"<message>[\s\S]{{0,{self.num_message_chars}}}</message>"
62
 
63
  # def get_message_regex(self, observation: TrustAndSplitObs) -> str:
64
  # return rf"(?s).{{0,{self.num_message_chars}}}"
65
 
66
  def get_split_regex(self, observation: TrustAndSplitObs) -> str:
 
67
  items = list(observation.quantities.keys())
68
  # Accept both singular and plural forms
69
  item_pattern = "|".join(
@@ -75,6 +84,7 @@ class TrustAndSplitAgent(NegotiationAgent):
75
  def get_split_action(
76
  self, policy_output: str, observation: TrustAndSplitObs
77
  ) -> Split:
 
78
  items = list(observation.quantities.keys())
79
  import re as _re
80
 
 
1
+ """
2
+ File: mllm/markov_games/negotiation/tas_agent.py
3
+ Summary: Agent implementation for Take-and-Split negotiations.
4
+ """
5
+
6
  from mllm.markov_games.negotiation.nego_agent import NegotiationAgent
7
  from mllm.markov_games.negotiation.nego_simulation import Split
8
  from mllm.markov_games.negotiation.tas_simulation import TrustAndSplitObs
9
 
10
 
11
  class TrustAndSplitAgent(NegotiationAgent):
12
+ """Prompt/template wrapper for the classic multi-item Take-and-Split benchmark."""
13
+
14
  def __init__(self, num_message_chars, *args, **kwargs):
15
  self.num_message_chars = num_message_chars
16
  super().__init__(*args, **kwargs)
 
65
  self.send_message_prompt = f"Send your message now in <message>...</message> (<={self.num_message_chars} chars)."
66
 
67
  def get_message_regex(self, observation: TrustAndSplitObs) -> str:
68
+ """Constrain chat to bounded XML tags for stable parsing."""
69
  return rf"<message>[\s\S]{{0,{self.num_message_chars}}}</message>"
70
 
71
  # def get_message_regex(self, observation: TrustAndSplitObs) -> str:
72
  # return rf"(?s).{{0,{self.num_message_chars}}}"
73
 
74
  def get_split_regex(self, observation: TrustAndSplitObs) -> str:
75
+ """Allow natural-language item names while still returning machine-parsable XML."""
76
  items = list(observation.quantities.keys())
77
  # Accept both singular and plural forms
78
  item_pattern = "|".join(
 
84
  def get_split_action(
85
  self, policy_output: str, observation: TrustAndSplitObs
86
  ) -> Split:
87
+ """Convert human-readable allocation text back into canonical item IDs."""
88
  items = list(observation.quantities.keys())
89
  import re as _re
90