Muqeeth commited on
Commit
ae89587
·
verified ·
1 Parent(s): 5b81f21

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ run.log filter=lfs diff=lfs merge=lfs -text
run.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cb8cc65d0df3b94223880c94292e2672460bdfe63c53e5e64cbdb6150d1f851
3
+ size 34946212
seed_0/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f679c5ed39aec182f20a85183840cbc89a941dbb788458601b06ad191b3ca6
3
+ size 323014168
seed_0/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50cfa136e5499e5b1f83c90753b519572d60a378c94d09953a2738af6a8ae3c1
3
+ size 323014168
seed_0/agent_trainer/critic_optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1574fdb90735a922b09c67d07f7abdbd51181f00dc7bed878cb80adb5f50c1d
3
+ size 2631
seed_0/agent_trainer/policy_optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522cbc62251cc9e883d9bb60dcf6d649b49409bdde2c3eb8e46f40b93e965ca9
3
+ size 646269121
seed_0/agent_trainer/trainer_annealing_state.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b73f85d14299abf3b84835f8c7a0f0611826f06554743dfbd8cfc050dcb9a05d
3
+ size 104
seed_0/random_state.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca7aae26b9fcf70e0dc1bb0f32e68175a3727bf4c227cd59777c5f291fab8a4
3
+ size 12176
src_code_for_reproducibility/markov_games/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/__init__.py
3
+ Summary: Makes Markov-game subpackages importable from the top-level namespace.
4
+ """
src_code_for_reproducibility/markov_games/alternative_actions_runner.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/alternative_actions_runner.py
3
+ Summary: Generates rollout branches by replaying trajectories with unilateral action changes.
4
+ """
5
+
6
+ import asyncio
7
+ import copy
8
+ import json
9
+ import os.path
10
+ from typing import Any, Tuple
11
+
12
+ from mllm.markov_games.markov_game import AgentAndActionSafeCopy, MarkovGame
13
+ from mllm.markov_games.rollout_tree import (
14
+ AgentActLog,
15
+ RolloutTreeBranchNode,
16
+ RolloutTreeNode,
17
+ RolloutTreeRootNode,
18
+ StepLog,
19
+ )
20
+
21
+ AgentId = str
22
+
23
+
24
+ async def run_with_unilateral_alt_action(
25
+ markov_game: MarkovGame,
26
+ agent_id: AgentId,
27
+ time_step: int,
28
+ branch_node: RolloutTreeBranchNode,
29
+ max_depth: int,
30
+ ):
31
+ """
32
+ Roll out a counterfactual branch where ``agent_id`` deviates unilaterally.
33
+
34
+ Starting from ``branch_node`` (which already contains the main trajectory),
35
+ we replay the simulation with the deviating agent's action while freezing
36
+ all other agents/actions, then continue for ``max_depth`` steps.
37
+ """
38
+
39
+ # Generate alternative action and take a step
40
+ await markov_game.set_action_of_agent(agent_id)
41
+ terminated: bool = markov_game.take_simulation_step()
42
+ step_log = markov_game.get_step_log()
43
+ first_alternative_node = RolloutTreeNode(
44
+ step_log=step_log,
45
+ time_step=time_step,
46
+ )
47
+
48
+ # Generate rest of trajectory up to max depth
49
+ time_step += 1
50
+ counter = 1
51
+ previous_node = first_alternative_node
52
+ while not terminated and counter <= max_depth:
53
+ terminated, step_log = await markov_game.step()
54
+ current_node = RolloutTreeNode(step_log=step_log, time_step=time_step)
55
+ previous_node.child = current_node
56
+ previous_node = current_node
57
+ counter += 1
58
+ time_step += 1
59
+
60
+ if branch_node.branches == None:
61
+ branch_node.branches = {agent_id: [first_alternative_node]}
62
+ else:
63
+ agent_branches = branch_node.branches.get(agent_id, [])
64
+ agent_branches.append(first_alternative_node)
65
+ branch_node.branches[agent_id] = agent_branches
66
+
67
+
68
+ async def AlternativeActionsRunner(
69
+ markov_game: MarkovGame,
70
+ output_folder: str,
71
+ nb_alternative_actions: int,
72
+ max_depth: int,
73
+ branch_only_on_new_round: bool = False,
74
+ ):
75
+ """
76
+ Generate a rollout tree containing the main path plus unilateral deviation branches.
77
+
78
+ For each timestep we:
79
+ 1. Cache agent actions without side effects.
80
+ 2. Advance the main trajectory.
81
+ 3. Spawn ``nb_alternative_actions`` asynchronous deviations per agent,
82
+ each replaying up to ``max_depth`` steps from the cached pre-action state.
83
+ The resulting branches feed advantage-alignment estimators.
84
+ """
85
+
86
+ tasks = []
87
+ time_step = 0
88
+ terminated = False
89
+ root = RolloutTreeRootNode(id=markov_game.get_id(), crn_id=markov_game.get_crn_id())
90
+ previous_node = root
91
+
92
+ while not terminated:
93
+ mg_before_action = markov_game.get_safe_copy()
94
+
95
+ # Get safe copies for main branch
96
+ agent_action_safe_copies: dict[
97
+ AgentId, AgentAndActionSafeCopy
98
+ ] = await markov_game.get_actions_of_agents_without_side_effects()
99
+
100
+ markov_game.set_actions_of_agents_manually(agent_action_safe_copies)
101
+ terminated = markov_game.take_simulation_step()
102
+ main_node = RolloutTreeNode(
103
+ step_log=markov_game.get_step_log(), time_step=time_step
104
+ )
105
+ branch_node = RolloutTreeBranchNode(main_child=main_node)
106
+ previous_node.child = branch_node
107
+ previous_node = main_node
108
+
109
+ # Get alternative branches by generating new unilateral actions
110
+ for agent_id in markov_game.agent_ids:
111
+ for _ in range(nb_alternative_actions):
112
+ # Get safe copies for branches
113
+ branch_agent_action_safe_copies: dict[
114
+ AgentId, AgentAndActionSafeCopy
115
+ ] = {
116
+ agent_id: AgentAndActionSafeCopy(
117
+ action=copy.deepcopy(agent_action_safe_copy.action),
118
+ action_info=copy.deepcopy(agent_action_safe_copy.action_info),
119
+ agent_after_action=agent_action_safe_copy.agent_after_action.get_safe_copy(),
120
+ )
121
+ for agent_id, agent_action_safe_copy in agent_action_safe_copies.items()
122
+ }
123
+ mg_branch: MarkovGame = mg_before_action.get_safe_copy()
124
+ other_agent_id = [id for id in mg_branch.agent_ids if id != agent_id][0]
125
+ mg_branch.set_action_and_agent_after_action_manually(
126
+ agent_id=other_agent_id,
127
+ agent_action_safe_copy=branch_agent_action_safe_copies[
128
+ other_agent_id
129
+ ],
130
+ )
131
+ task = asyncio.create_task(
132
+ run_with_unilateral_alt_action(
133
+ markov_game=mg_branch,
134
+ time_step=time_step,
135
+ agent_id=agent_id,
136
+ branch_node=branch_node,
137
+ max_depth=max_depth,
138
+ )
139
+ )
140
+ tasks.append(task)
141
+ time_step += 1
142
+
143
+ # wait for all branches to complete
144
+ await asyncio.gather(*tasks)
145
+
146
+ return root
src_code_for_reproducibility/markov_games/ipd/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/ipd/__init__.py
3
+ Summary: Marks the Iterated Prisoner's Dilemma subpackage.
4
+ """
5
+
6
+ from .Ipd_hard_coded_agents import AlwaysCooperateIPDAgent, AlwaysDefectIPDAgent
7
+
8
+ __all__ = [
9
+ "AlwaysCooperateIPDAgent",
10
+ "AlwaysDefectIPDAgent",
11
+ ]
src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_simulation.cpython-312.pyc ADDED
Binary file (10.7 kB). View file
 
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_agent.cpython-312.pyc ADDED
Binary file (11.7 kB). View file
 
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_simulation.cpython-312.pyc ADDED
Binary file (12.6 kB). View file
 
src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_simulation.cpython-312.pyc ADDED
Binary file (11.7 kB). View file