Muqeeth commited on
Commit
99a14ce
·
verified ·
1 Parent(s): 62edd44

Add files using upload-large-folder tool

Browse files
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6e2c070d5b214399e5b5b2c59952896c83984ef5e9785cd825b34b193d318f
3
+ size 323014168
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2810cb0ec24072033412e5ff181e51188612e95b9f1685f9177794aa66a8bc0
3
+ size 323014168
seed_42/agent_trainer/critic_optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1574fdb90735a922b09c67d07f7abdbd51181f00dc7bed878cb80adb5f50c1d
3
+ size 2631
seed_42/agent_trainer/policy_optimizer_state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09a8f7a1cba58eee3b92c4c63eb09713d7bb2e9c1248bda1479bab99de86429
3
+ size 646269121
seed_42/agent_trainer/trainer_annealing_state.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1ece3508808aa0372885bc9aafb57c945a1aa92d15785b25ba6ae0f7fe9860
3
+ size 104
seed_42/random_state.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5363953b9a73a6aef9243748650aa4dfb203e5208ae92c87b3735e89bfa42c
3
+ size 12254
src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc ADDED
Binary file (1.64 kB). View file
 
src_code_for_reproducibility/markov_games/negotiation/dond_simulation.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/negotiation/dond_simulation.py
3
+ Summary: Simulates Deal-or-No-Deal negotiation games and logs rollouts.
4
+ """
5
+
6
+ import copy
7
+ from dataclasses import dataclass
8
+ from typing import Any, Dict, List, Tuple
9
+
10
+ from numpy.random import default_rng
11
+
12
+ from mllm.markov_games.negotiation.nego_simulation import (
13
+ NegotiationObs,
14
+ NegotiationSimulation,
15
+ NegotiationState,
16
+ Split,
17
+ )
18
+ from mllm.markov_games.rollout_tree import SimulationStepLog
19
+ from mllm.utils.get_coagent_id import get_coagent_id
20
+
21
+ AgentId = str
22
+
23
+
24
+ @dataclass
25
+ class DealNoDealState(NegotiationState):
26
+ """NegotiationState with per-agent value tables and item taxonomy."""
27
+
28
+ item_types: List[str]
29
+ values: Dict[AgentId, Dict[str, int]]
30
+
31
+
32
+ @dataclass
33
+ class DealNoDealObs(NegotiationObs):
34
+ """Observation that reveals own values and (lagged) opponent values."""
35
+
36
+ my_values: Dict[str, int]
37
+ item_types: List[str]
38
+ previous_values_coagent: Dict[str, int] | None
39
+
40
+
41
+ def random_partition_integer(rng, total: int, parts: int) -> List[int]:
42
+ """Sample non-negative integers summing to ``total`` across ``parts`` buckets."""
43
+ if parts <= 0:
44
+ return []
45
+ if total <= 0:
46
+ return [0 for _ in range(parts)]
47
+ cuts = sorted(rng.integers(0, total + 1, size=parts - 1).tolist())
48
+ vals = []
49
+ prev = 0
50
+ for c in cuts + [total]:
51
+ vals.append(c - prev)
52
+ prev = c
53
+ return vals
54
+
55
+
56
+ class DealNoDealSimulation(NegotiationSimulation):
57
+ """NegotiationSimulation variant implementing the Rubinstein-style Deal-or-No-Deal."""
58
+
59
+ def __init__(
60
+ self,
61
+ item_types: List[str] = ["books", "hats", "balls"],
62
+ *args,
63
+ **kwargs,
64
+ ):
65
+ super().__init__(item_types=item_types, *args, **kwargs)
66
+ self.reset()
67
+
68
+ def _other(self, agent_id: AgentId) -> AgentId:
69
+ return get_coagent_id(self.agent_ids, agent_id)
70
+
71
+ def _sample_stock(self) -> Dict[str, int]:
72
+ # total items between 5 and 7
73
+ total_items = int(self.rng.integers(5, 8))
74
+ # nonnegative per-type counts summing to total_items
75
+ parts = random_partition_integer(self.rng, total_items, len(self.item_types))
76
+ # allow zeros per type
77
+ return {t: int(c) for t, c in zip(self.item_types, parts)}
78
+
79
+ def _sample_values_pair(self) -> Dict[AgentId, Dict[str, int]]:
80
+ # Each agent has integer non-negative values that sum to 10
81
+ # Each item type valued by at least one agent
82
+ # Some item type valued by both agents
83
+ while True:
84
+ vals_a = random_partition_integer(self.rng, 10, len(self.item_types))
85
+ vals_b = random_partition_integer(self.rng, 10, len(self.item_types))
86
+ a = {t: int(v) for t, v in zip(self.item_types, vals_a)}
87
+ b = {t: int(v) for t, v in zip(self.item_types, vals_b)}
88
+ # each item valued by at least one
89
+ ok1 = all((a[t] > 0) or (b[t] > 0) for t in self.item_types)
90
+ # some item valued by both
91
+ ok2 = any((a[t] > 0) and (b[t] > 0) for t in self.item_types)
92
+ if ok1 and ok2:
93
+ return {self.agent_ids[0]: a, self.agent_ids[1]: b}
94
+
95
+ def _is_valid_allocation(
96
+ self, allocation: Dict[str, int], stock: Dict[str, int]
97
+ ) -> bool:
98
+ for t in self.item_types:
99
+ v = allocation.get(t)
100
+ if v is None:
101
+ return False
102
+ if not isinstance(v, int):
103
+ return False
104
+ if v < 0 or v > int(stock.get(t, 0)):
105
+ return False
106
+ return True
107
+
108
+ def set_new_round_of_variant(self):
109
+ # Keep same values, resample stock
110
+ self.state.quantities = self._sample_stock()
111
+
112
+ def get_info_of_variant(
113
+ self, state: NegotiationState, actions: Dict[AgentId, Any]
114
+ ) -> Dict[str, Any]:
115
+ return {
116
+ "quantities": copy.deepcopy(state.quantities),
117
+ "values": copy.deepcopy(state.values),
118
+ "splits": copy.deepcopy(state.splits),
119
+ }
120
+
121
+ def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
122
+ """
123
+ Returns the rewards for each agent.
124
+ """
125
+ split_a = splits[self.agent_ids[0]].items_given_to_self
126
+ split_b = splits[self.agent_ids[1]].items_given_to_self
127
+ rewards = {self.agent_ids[0]: 0, self.agent_ids[1]: 0}
128
+ for t in self.item_types:
129
+ # If not complementary, return 0!
130
+ if not split_a[t] + split_b[t] == self.state.quantities[t]:
131
+ return {self.agent_ids[0]: 0, self.agent_ids[1]: 0}
132
+ rewards[self.agent_ids[0]] += (
133
+ split_a[t] * self.state.values[self.agent_ids[0]][t]
134
+ )
135
+ rewards[self.agent_ids[1]] += (
136
+ split_b[t] * self.state.values[self.agent_ids[1]][t]
137
+ )
138
+ return rewards
139
+
140
+ def get_obs(self):
141
+ return {agent_id: self.get_obs_agent(agent_id) for agent_id in self.agent_ids}
142
+
143
+ def get_obs_agent(self, agent_id):
144
+ other_id = self._other(agent_id)
145
+ obs = DealNoDealObs(
146
+ round_nb=self.state.round_nb,
147
+ last_message=self.state.last_message,
148
+ current_agent=self.state.current_agent,
149
+ quantities=copy.deepcopy(self.state.quantities),
150
+ value=0.0, # unused in DOND
151
+ other_agent_split=None, # not meaningful until split
152
+ split_phase=self.state.split_phase,
153
+ quota_messages_per_agent_per_round=self.quota_messages_per_agent_per_round,
154
+ my_values=copy.deepcopy(self.state.values[agent_id]),
155
+ item_types=list(self.item_types),
156
+ previous_values_coagent=copy.deepcopy(self.state.values.get(other_id, {})),
157
+ )
158
+ return obs
159
+
160
+ def reset(self):
161
+ start_agent = self.agent_ids[self._starting_agent_index]
162
+ stock = self._sample_stock()
163
+ values = self._sample_values_pair()
164
+ self.state = DealNoDealState(
165
+ round_nb=0,
166
+ last_message="",
167
+ current_agent=start_agent,
168
+ quantities=stock,
169
+ values=values,
170
+ previous_values=None,
171
+ splits={aid: None for aid in self.agent_ids},
172
+ nb_messages_sent={aid: 0 for aid in self.agent_ids},
173
+ split_phase=False,
174
+ item_types=list(self.item_types),
175
+ )
176
+ return self.get_obs()
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_agent.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/negotiation/no_press_nego_agent.py
3
+ Summary: Agent variant for no-press negotiations without explicit messaging.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Tuple
7
+
8
+ from mllm.markov_games.negotiation.nego_agent import (
9
+ NegotiationAgent,
10
+ NegotiationAgentState,
11
+ )
12
+ from mllm.markov_games.negotiation.nego_simulation import Split
13
+ from mllm.markov_games.negotiation.no_press_nego_simulation import NoPressObs
14
+ from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
15
+
16
+
17
+ class NoPressAgent(NegotiationAgent):
18
+ def __init__(self, *args, **kwargs):
19
+ super().__init__(*args, **kwargs)
20
+ # No communication in this variant
21
+ self.intro_prompt = (
22
+ "Welcome to an iterated game. You are {agent}. The other agent is {other_agent}.\n"
23
+ "Setup:\n"
24
+ "1. The game consists of multiple independent rounds.\n"
25
+ "2. In each round, there are multiple items to split between the two agents.\n"
26
+ "3. Both agents are assigned a per-item value between 1 and 20 (inclusive) in each round.\n"
27
+ "4. You can observe per-item values of both agents.\n"
28
+ "5. Because assignments are random, both agents are equally likely to have same expected per-item value.\n"
29
+ "\n"
30
+ "Protocol:\n"
31
+ "1. Both agents simultaneously propose the amount of each item they will keep.\n"
32
+ "2. If the total sum of proposals is less than or equal to the item quantity, both agents receive their proposed amounts.\n"
33
+ "3. If the total sum of proposals exceeds the item quantity, they are allocated proportionally.\n"
34
+ "4. Your points for the round = (amount you receive per item) x (your per-item value for that round), added across all items.\n"
35
+ "5. Points are accumulated across rounds.\n"
36
+ "Your goal: {goal}\n"
37
+ )
38
+ self.new_round_prompt = (
39
+ "A New Round Begins\n"
40
+ "The items to split are {quantities}.\n"
41
+ "Your per-item values are {value} and {other_agent}'s per-item values are {other_value}."
42
+ )
43
+ self.last_round_prompt = (
44
+ "Last Round Summary:\n"
45
+ " - Items to split: {last_quantities}\n"
46
+ " - Your per-item values: {last_value_agent}\n"
47
+ " - {other_agent}'s per-item values: {last_value_coagent}\n"
48
+ " - You proposed: {last_split_agent}\n"
49
+ " - You earned: {last_points_agent} points\n"
50
+ " - {other_agent} proposed: {last_split_coagent}\n"
51
+ " - {other_agent} earned: {last_points_coagent} points\n"
52
+ " - Round Complete.\n"
53
+ )
54
+ self.send_split_prompt = "Submit Your Proposal\n" "Respond as {proposal_style}"
55
+
56
+ def get_message_regex(self, observation: NoPressObs) -> str:
57
+ """Return an empty pattern because the no-press variant forbids chat."""
58
+ return r"^$" # No messages allowed
59
+
60
+ def get_split_regex(self, observation: NoPressObs) -> str:
61
+ """Match proposals like ``Proposal: 4 coins, 6 apples`` case-insensitively."""
62
+ items = list(observation.quantities.keys())
63
+ # Accept both singular and plural forms
64
+ item_pattern = "|".join(
65
+ [f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?" for item in items]
66
+ )
67
+ regex = rf"(?i)Proposal:\s*((?:\s*(?P<num>(10|[0-9]))\s*(?P<item>{item_pattern})\s*,?)+)"
68
+ return regex
69
+
70
+ def get_split_action(self, policy_output: str, observation: NoPressObs) -> Split:
71
+ """
72
+ Parse the LLM proposal into a normalized ``Split`` structure.
73
+
74
+ The regex-based parser is lenient (accepts pluralization variants) so that
75
+ prompt tweaks do not require re-training the extraction logic.
76
+ """
77
+ items = list(observation.quantities.keys())
78
+ import re as _re
79
+
80
+ split_regex = self.get_split_regex(observation)
81
+ items_given_to_self = {item: 0 for item in items}
82
+ m = _re.match(split_regex, policy_output.strip())
83
+ if m:
84
+ # Find all (number, item) pairs
85
+ item_pattern = "|".join(
86
+ [
87
+ f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?"
88
+ for item in items
89
+ ]
90
+ )
91
+ inner_regex = rf"(?i)(10|[0-9])\s*({item_pattern})"
92
+
93
+ def normalize_item_name(item_str):
94
+ """Canonicalize plural/singular user text back to the config item id."""
95
+ for orig in items:
96
+ if item_str.lower() == orig.lower():
97
+ return orig
98
+ if orig.endswith("s") and item_str.lower() == orig[:-1].lower():
99
+ return orig
100
+ if (
101
+ not orig.endswith("s")
102
+ and item_str.lower() == orig.lower() + "s"
103
+ ):
104
+ return orig
105
+
106
+ for num, item in _re.findall(inner_regex, m.group(1)):
107
+ items_given_to_self[normalize_item_name(item)] = int(num)
108
+ return Split(items_given_to_self=items_given_to_self)
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_simulation.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: mllm/markov_games/negotiation/no_press_nego_simulation.py
3
+ Summary: Simulation driver for no-press negotiation scenarios.
4
+ """
5
+
6
+ import copy
7
+ from collections import defaultdict
8
+ from dataclasses import dataclass
9
+ from typing import Any, Dict, List, Literal, Tuple
10
+
11
+ from mllm.markov_games.negotiation.nego_simulation import (
12
+ NegotiationObs,
13
+ NegotiationSimulation,
14
+ NegotiationState,
15
+ Split,
16
+ compute_tas_style_rewards,
17
+ )
18
+
19
+ AgentId = str
20
+
21
+
22
+ @dataclass
23
+ class NoPressState(NegotiationState):
24
+ """NegotiationState alias used to clarify we run in always-split phase."""
25
+
26
+ pass
27
+
28
+
29
+ @dataclass
30
+ class NoPressObs(NegotiationObs):
31
+ """Observation that includes both agents' values (since there is no messaging)."""
32
+
33
+ other_value: Dict[str, float]
34
+
35
+
36
+ class NoPressSimulation(NegotiationSimulation):
37
+ def __init__(
38
+ self,
39
+ game_type: Literal["10-1-exclusive", "10-1-ties", "1-to-20"] = "1-to-20",
40
+ same_round_value: bool = True,
41
+ atleast_one_conflict: bool = False,
42
+ *args,
43
+ **kwargs,
44
+ ):
45
+ self.game_type = game_type
46
+ self.same_round_value = same_round_value
47
+ self.atleast_one_conflict = atleast_one_conflict
48
+ super().__init__(*args, **kwargs)
49
+
50
+ def _sample_values(self) -> Dict[AgentId, dict]:
51
+ """Sample per-item valuations according to the configured template."""
52
+ values = defaultdict(dict)
53
+ if self.state is None:
54
+ item_types = self.item_types
55
+ else:
56
+ item_types = list(self.state.quantities.keys())
57
+ while True:
58
+ for item in item_types:
59
+ if self.game_type == "10-1-exclusive":
60
+ v = int(self.rng.choice([1, 10]))
61
+ values[self.agent_ids[0]][item] = v
62
+ values[self.agent_ids[1]][item] = 10 if v == 1 else 1
63
+ elif self.game_type == "10-1-ties":
64
+ for aid in self.agent_ids:
65
+ values[aid][item] = int(self.rng.choice([1, 10]))
66
+ elif self.game_type == "1-to-20":
67
+ for aid in self.agent_ids:
68
+ values[aid][item] = int(self.rng.integers(1, 21))
69
+ if self.atleast_one_conflict:
70
+ has_conflict = False
71
+ for item in item_types:
72
+ agent_values_for_item = [
73
+ values[aid][item] for aid in self.agent_ids
74
+ ]
75
+ if len(set(agent_values_for_item)) > 1:
76
+ has_conflict = True
77
+ break
78
+ if not has_conflict:
79
+ continue
80
+ agent_values = [sum(v.values()) for v in values.values()]
81
+ if len(set(agent_values)) == 1 or not self.same_round_value:
82
+ break
83
+ return values
84
+
85
+ def _sample_quantities(self) -> Dict[str, int]:
86
+ """No-press setups use symmetric 10-unit stocks for every item."""
87
+ return {item.lower(): 10 for item in self.item_types}
88
+
89
+ def set_new_round_of_variant(self):
90
+ """Refresh quantities/values and jump directly into the simultaneous split."""
91
+ self.state.quantities = self._sample_quantities()
92
+ self.state.values = self._sample_values()
93
+ self.state.split_phase = True
94
+
95
+ def get_info_of_variant(
96
+ self, state: NegotiationState, actions: Dict[AgentId, Any]
97
+ ) -> Dict[str, Any]:
98
+ """Surface quantities/values/splits so statistics modules can read them."""
99
+ return {
100
+ "quantities": copy.deepcopy(state.quantities),
101
+ "values": copy.deepcopy(state.values),
102
+ "splits": copy.deepcopy(state.splits),
103
+ }
104
+
105
+ def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
106
+ """Reuse TAS reward logic because the split arbitration is identical."""
107
+ return compute_tas_style_rewards(
108
+ self.agent_ids, self.state.values, splits, self.state.quantities
109
+ )
110
+
111
+ def get_obs(self):
112
+ return {agent_id: self.get_obs_agent(agent_id) for agent_id in self.agent_ids}
113
+
114
+ def get_obs_agent(self, agent_id):
115
+ other_id = self._other(agent_id)
116
+ last_value_coagent = (
117
+ None
118
+ if self.state.previous_values is None
119
+ else self.state.previous_values.get(other_id)
120
+ )
121
+ last_points_coagent = (
122
+ None
123
+ if self.state.previous_points is None
124
+ else round(self.state.previous_points.get(other_id), 1)
125
+ )
126
+ last_value_agent = (
127
+ None
128
+ if self.state.previous_values is None
129
+ else self.state.previous_values.get(agent_id)
130
+ )
131
+ last_points_agent = (
132
+ None
133
+ if self.state.previous_points is None
134
+ else round(self.state.previous_points.get(agent_id), 1)
135
+ )
136
+ last_split_coagent = None
137
+ last_split_agent = None
138
+ if self.state.previous_splits is not None:
139
+ last_split_coagent = self.state.previous_splits[
140
+ other_id
141
+ ].items_given_to_self
142
+ last_split_agent = self.state.previous_splits[agent_id].items_given_to_self
143
+ obs = NoPressObs(
144
+ round_nb=self.state.round_nb,
145
+ last_message="",
146
+ quota_messages_per_agent_per_round=self.quota_messages_per_agent_per_round,
147
+ current_agent=self.state.current_agent,
148
+ other_agent=self.agent_id_to_name[other_id],
149
+ quantities=self.state.quantities,
150
+ item_types=self.item_types,
151
+ value=self.state.values[agent_id],
152
+ split_phase=self.state.split_phase,
153
+ last_split_agent=last_split_agent,
154
+ last_value_agent=last_value_agent,
155
+ last_points_agent=last_points_agent,
156
+ last_split_coagent=last_split_coagent,
157
+ last_value_coagent=last_value_coagent,
158
+ last_points_coagent=last_points_coagent,
159
+ other_value=self.state.values[other_id],
160
+ last_quantities=self.state.previous_quantities,
161
+ )
162
+ return obs
163
+
164
+ def reset(self):
165
+ start_agent = self.agent_ids[self._starting_agent_index]
166
+ quantities = self._sample_quantities()
167
+ values = self._sample_values()
168
+ self.state = NoPressState(
169
+ round_nb=0,
170
+ last_message="",
171
+ current_agent=start_agent,
172
+ quantities=quantities,
173
+ values=values,
174
+ previous_values=None,
175
+ splits={aid: None for aid in self.agent_ids},
176
+ nb_messages_sent={aid: 0 for aid in self.agent_ids},
177
+ split_phase=True,
178
+ previous_splits=None,
179
+ previous_points=None,
180
+ previous_quantities=None,
181
+ )
182
+ return self.get_obs()
src_code_for_reproducibility/models/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (269 Bytes). View file
 
src_code_for_reproducibility/models/__pycache__/inference_backend.cpython-312.pyc ADDED
Binary file (2.38 kB). View file
 
src_code_for_reproducibility/models/__pycache__/scalar_critic.cpython-312.pyc ADDED
Binary file (3.32 kB). View file
 
src_code_for_reproducibility/training/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (277 Bytes). View file