dereckpichemila commited on
Commit
40c85cf
·
verified ·
1 Parent(s): fa30e5a

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .hydra/config.yaml +159 -0
  2. .hydra/overrides.yaml +1 -0
  3. run.log +0 -0
  4. src_code_for_reproducibility/__init__.py +0 -0
  5. src_code_for_reproducibility/docs/Makefile +19 -0
  6. src_code_for_reproducibility/docs/generate_docs.py +249 -0
  7. src_code_for_reproducibility/docs/make.bat +35 -0
  8. src_code_for_reproducibility/docs/source/index.rst +22 -0
  9. src_code_for_reproducibility/docs/source/installation.rst +10 -0
  10. src_code_for_reproducibility/docs/source/launch.rst +0 -0
  11. src_code_for_reproducibility/docs/source/marl_standard.rst +141 -0
  12. src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst +7 -0
  13. src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst +7 -0
  14. src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
  15. src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst +7 -0
  16. src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst +7 -0
  17. src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
  18. src_code_for_reproducibility/docs/source/src.generation.rst +15 -0
  19. src_code_for_reproducibility/docs/source/src.models.local_llm.rst +7 -0
  20. src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
  21. src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
  22. src_code_for_reproducibility/docs/source/src.rst +28 -0
  23. src_code_for_reproducibility/docs/source/src.training.rst +19 -0
  24. src_code_for_reproducibility/docs/source/src.training.train_main.rst +7 -0
  25. src_code_for_reproducibility/markov_games/__pycache__/export_utils.cpython-310.pyc +0 -0
  26. src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-310.pyc +0 -0
  27. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-310.pyc +0 -0
  28. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-311.pyc +0 -0
  29. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_game.cpython-310.pyc +0 -0
  30. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_log_funcs.cpython-310.pyc +0 -0
  31. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_log_match.cpython-310.pyc +0 -0
  32. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_player.cpython-310.pyc +0 -0
  33. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-310.pyc +0 -0
  34. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-311.pyc +0 -0
  35. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-310.pyc +0 -0
  36. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-311.pyc +0 -0
  37. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics_funcs.cpython-310.pyc +0 -0
  38. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_training_data.cpython-310.pyc +0 -0
  39. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_training_data_funcs.cpython-310.pyc +0 -0
  40. src_code_for_reproducibility/markov_games/ipd/ipd_agent.py +122 -0
  41. src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py +162 -0
  42. src_code_for_reproducibility/markov_games/runners/__pycache__/alternative_actions_runner.cpython-311.pyc +0 -0
  43. src_code_for_reproducibility/markov_games/runners/__pycache__/linear_runner.cpython-311.pyc +0 -0
  44. src_code_for_reproducibility/utils/__init__.py +0 -0
  45. src_code_for_reproducibility/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  46. src_code_for_reproducibility/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  47. src_code_for_reproducibility/utils/__pycache__/common_imports.cpython-310.pyc +0 -0
  48. src_code_for_reproducibility/utils/__pycache__/dict_get_path.cpython-310.pyc +0 -0
  49. src_code_for_reproducibility/utils/__pycache__/extra_stats.cpython-310.pyc +0 -0
  50. src_code_for_reproducibility/utils/__pycache__/get_coagent_id.cpython-310.pyc +0 -0
.hydra/config.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ nb_epochs: 1000
3
+ nb_matches_per_iteration: 64
4
+ reinit_matches_each_it: true
5
+ checkpoint_every_n_iterations: 10
6
+ start_epoch: 0
7
+ resume_experiment: true
8
+ base_seed: 0
9
+ seed_group_size: 1
10
+ train: true
11
+ name: tas_rps_no_regex_prev_ad_align_buffer_gae
12
+ agent_buffer: true
13
+ keep_agent_buffer_count: ${lora_count}
14
+ agent_buffer_recent_k: -1
15
+ temperature: 1.0
16
+ markov_games:
17
+ runner_method_name: LinearRunner
18
+ runner_kwargs: {}
19
+ group_by_round: true
20
+ simulation_class_name: TrustAndSplitRPSSimulation
21
+ simulation_init_args:
22
+ nb_of_rounds: 10
23
+ quota_messages_per_agent_per_round: 1
24
+ agents:
25
+ 0:
26
+ agent_id: ${agent_0_id}
27
+ agent_name: Alice
28
+ agent_class_name: TrustAndSplitRPSAgent
29
+ policy_id: base_llm/agent_adapter
30
+ init_kwargs:
31
+ goal: Maximize your total points over the whole game.
32
+ num_message_chars: 500
33
+ 1:
34
+ agent_id: ${agent_1_id}
35
+ agent_name: Bob
36
+ agent_class_name: TrustAndSplitRPSAgent
37
+ policy_id: base_llm/agent_adapter
38
+ init_kwargs:
39
+ goal: Maximize your total points over the whole game.
40
+ num_message_chars: 500
41
+ models:
42
+ base_llm:
43
+ class: LeanLocalLLM
44
+ init_args:
45
+ llm_id: base_llm
46
+ model_name: Qwen/Qwen3-4B-Instruct-2507
47
+ inference_backend: vllm
48
+ hf_kwargs:
49
+ device_map: auto
50
+ torch_dtype: bfloat16
51
+ max_memory:
52
+ 0: 20GiB
53
+ attn_implementation: flash_attention_2
54
+ inference_backend_init_kwargs:
55
+ seed: ${experiment.base_seed}
56
+ enable_prefix_caching: true
57
+ max_model_len: 10000.0
58
+ gpu_memory_utilization: 0.5
59
+ dtype: bfloat16
60
+ trust_remote_code: true
61
+ max_lora_rank: 32
62
+ enforce_eager: false
63
+ max_loras: ${lora_count}
64
+ max_cpu_loras: ${lora_count}
65
+ enable_sleep_mode: true
66
+ enable_lora: true
67
+ inference_backend_sampling_params:
68
+ temperature: ${temperature}
69
+ top_p: 1.0
70
+ max_tokens: 400
71
+ top_k: -1
72
+ adapter_configs:
73
+ agent_adapter:
74
+ task_type: CAUSAL_LM
75
+ r: 32
76
+ lora_alpha: 64
77
+ lora_dropout: 0.0
78
+ target_modules: all-linear
79
+ critic_adapter:
80
+ task_type: CAUSAL_LM
81
+ r: 32
82
+ lora_alpha: 64
83
+ lora_dropout: 0.0
84
+ target_modules: all-linear
85
+ enable_thinking: false
86
+ regex_max_attempts: 3
87
+ critics:
88
+ agent_critic:
89
+ module_pointer:
90
+ - base_llm
91
+ - critic_adapter
92
+ optimizers:
93
+ agent_optimizer:
94
+ module_pointer:
95
+ - base_llm
96
+ - agent_adapter
97
+ optimizer_class_name: torch.optim.Adam
98
+ init_args:
99
+ lr: 3.0e-06
100
+ weight_decay: 0.0
101
+ critic_optimizer:
102
+ module_pointer: agent_critic
103
+ optimizer_class_name: torch.optim.Adam
104
+ init_args:
105
+ lr: 3.0e-06
106
+ weight_decay: 0.0
107
+ trainers:
108
+ agent_trainer:
109
+ class: TrainerAdAlign
110
+ module_pointers:
111
+ policy:
112
+ - base_llm
113
+ - agent_adapter
114
+ policy_optimizer: agent_optimizer
115
+ critic: agent_critic
116
+ critic_optimizer: critic_optimizer
117
+ kwargs:
118
+ entropy_coeff: 0.0
119
+ kl_coeff: 0.0
120
+ gradient_clipping: 1.0
121
+ restrict_tokens: null
122
+ mini_batch_size: 1
123
+ use_gradient_checkpointing: false
124
+ temperature: ${temperature}
125
+ device: cuda:0
126
+ use_gae: true
127
+ whiten_advantages: false
128
+ whiten_advantages_time_step_wise: false
129
+ skip_discounted_state_visitation: true
130
+ use_gae_lambda_annealing: false
131
+ gae_lambda_annealing_method: None
132
+ gae_lambda_annealing_method_params: None
133
+ gae_lambda_annealing_limit: 0.96
134
+ discount_factor: 0.98
135
+ use_rloo: false
136
+ enable_tokenwise_logging: false
137
+ pg_loss_normalization: batch
138
+ ad_align_force_coop_first_step: false
139
+ ad_align_clipping: null
140
+ ad_align_gamma: 0.98
141
+ ad_align_exclude_k_equals_t: false
142
+ ad_align_use_sign: false
143
+ ad_align_beta: 1.0
144
+ use_old_ad_align: true
145
+ use_time_regularization: false
146
+ rloo_branch: false
147
+ reuse_baseline: false
148
+ reward_normalizing_constant: 100.0
149
+ train_on_which_data:
150
+ agent_trainer: ${agent_ids}
151
+ lora_count: 10
152
+ common_agent_kwargs:
153
+ goal: Maximize your total points over the whole game.
154
+ num_message_chars: 500
155
+ agent_0_id: Alice
156
+ agent_1_id: Bob
157
+ agent_ids:
158
+ - Alice
159
+ - Bob
.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ - experiment.name=tas_rps_no_regex_prev_ad_align_buffer_gae
run.log ADDED
The diff for this file is too large to render. See raw diff
 
src_code_for_reproducibility/__init__.py ADDED
File without changes
src_code_for_reproducibility/docs/Makefile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimal makefile for Sphinx documentation
2
+
3
+ # You can set these variables from the command line, and also
4
+ # from the environment for the first two.
5
+ SPHINXOPTS ?=
6
+ SPHINXBUILD ?= sphinx-build
7
+ SOURCEDIR = source
8
+ BUILDDIR = build
9
+
10
+ # Put it first so that "make" without argument is like "make help".
11
+ help:
12
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
13
+
14
+ .PHONY: help Makefile
15
+
16
+ # Catch-all target: route all unknown targets to Sphinx using the new
17
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18
+ %: Makefile
19
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
src_code_for_reproducibility/docs/generate_docs.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to automatically generate Sphinx documentation for all modules and build the HTML website.
4
+ """
5
+ import importlib.util
6
+ import os
7
+ import subprocess
8
+ import sys
9
+
10
+
11
+ def check_and_install_dependencies():
12
+ """Check for required dependencies and install them if missing."""
13
+ required_packages = [
14
+ "sphinx",
15
+ "sphinx-rtd-theme",
16
+ "sphinxcontrib-napoleon",
17
+ "sphinxcontrib-mermaid",
18
+ "sphinx-autodoc-typehints",
19
+ ]
20
+
21
+ missing_packages = []
22
+
23
+ for package in required_packages:
24
+ # Convert package name to module name (replace - with _)
25
+ module_name = package.replace("-", "_")
26
+
27
+ # Check if the package is installed
28
+ if importlib.util.find_spec(module_name) is None:
29
+ missing_packages.append(package)
30
+
31
+ # Install missing packages
32
+ if missing_packages:
33
+ print(f"Installing missing dependencies: {', '.join(missing_packages)}")
34
+ subprocess.check_call(
35
+ [sys.executable, "-m", "pip", "install"] + missing_packages
36
+ )
37
+ print("Dependencies installed successfully")
38
+ else:
39
+ print("All required dependencies are already installed")
40
+
41
+
42
+ def create_makefile(docs_dir):
43
+ """Create a Makefile for Sphinx documentation if it doesn't exist."""
44
+ makefile_path = os.path.join(docs_dir, "Makefile")
45
+
46
+ if os.path.exists(makefile_path):
47
+ print(f"Makefile already exists at {makefile_path}")
48
+ return
49
+
50
+ print(f"Creating Makefile at {makefile_path}")
51
+
52
+ makefile_content = """# Minimal makefile for Sphinx documentation
53
+
54
+ # You can set these variables from the command line, and also
55
+ # from the environment for the first two.
56
+ SPHINXOPTS ?=
57
+ SPHINXBUILD ?= sphinx-build
58
+ SOURCEDIR = source
59
+ BUILDDIR = build
60
+
61
+ # Put it first so that "make" without argument is like "make help".
62
+ help:
63
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
64
+
65
+ .PHONY: help Makefile
66
+
67
+ # Catch-all target: route all unknown targets to Sphinx using the new
68
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
69
+ %: Makefile
70
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
71
+ """
72
+
73
+ with open(makefile_path, "w") as f:
74
+ f.write(makefile_content)
75
+
76
+ print("Makefile created successfully")
77
+
78
+
79
+ def create_make_bat(docs_dir):
80
+ """Create a make.bat file for Windows if it doesn't exist."""
81
+ make_bat_path = os.path.join(docs_dir, "make.bat")
82
+
83
+ if os.path.exists(make_bat_path):
84
+ print(f"make.bat already exists at {make_bat_path}")
85
+ return
86
+
87
+ print(f"Creating make.bat at {make_bat_path}")
88
+
89
+ make_bat_content = """@ECHO OFF
90
+
91
+ pushd %~dp0
92
+
93
+ REM Command file for Sphinx documentation
94
+
95
+ if "%SPHINXBUILD%" == "" (
96
+ set SPHINXBUILD=sphinx-build
97
+ )
98
+ set SOURCEDIR=source
99
+ set BUILDDIR=build
100
+
101
+ %SPHINXBUILD% >NUL 2>NUL
102
+ if errorlevel 9009 (
103
+ echo.
104
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
105
+ echo.installed, then set the SPHINXBUILD environment variable to point
106
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
107
+ echo.may add the Sphinx directory to PATH.
108
+ echo.
109
+ echo.If you don't have Sphinx installed, grab it from
110
+ echo.https://www.sphinx-doc.org/
111
+ exit /b 1
112
+ )
113
+
114
+ if "%1" == "" goto help
115
+
116
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
117
+ goto end
118
+
119
+ :help
120
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
121
+
122
+ :end
123
+ popd
124
+ """
125
+
126
+ with open(make_bat_path, "w") as f:
127
+ f.write(make_bat_content)
128
+
129
+ print("make.bat created successfully")
130
+
131
+
132
+ def main():
133
+ # Check and install required dependencies
134
+ print("=== Checking dependencies ===")
135
+ check_and_install_dependencies()
136
+
137
+ # Get the directory of this script
138
+ script_dir = os.path.dirname(os.path.abspath(__file__))
139
+
140
+ # Path to the project root
141
+ project_root = os.path.dirname(script_dir)
142
+
143
+ # Path to the source directory
144
+ source_dir = os.path.join(project_root, "src")
145
+
146
+ # Path to the docs source directory
147
+ docs_source_dir = os.path.join(script_dir, "source")
148
+
149
+ # Print paths for debugging
150
+ print(f"Script directory: {script_dir}")
151
+ print(f"Project root: {project_root}")
152
+ print(f"Source directory: {source_dir}")
153
+ print(f"Docs source directory: {docs_source_dir}")
154
+
155
+ # Make sure the source directory exists
156
+ if not os.path.exists(source_dir):
157
+ print(f"Error: Source directory {source_dir} does not exist!")
158
+ sys.exit(1)
159
+
160
+ # Make sure the docs source directory exists
161
+ if not os.path.exists(docs_source_dir):
162
+ print(f"Creating docs source directory: {docs_source_dir}")
163
+ os.makedirs(docs_source_dir)
164
+
165
+ # Step 1: Run sphinx-apidoc to generate .rst files for all modules
166
+ print("\n=== Generating API documentation ===")
167
+ cmd = [
168
+ "sphinx-apidoc",
169
+ "-f", # Force overwriting of existing files
170
+ "-e", # Put module documentation before submodule documentation
171
+ "-M", # Put module documentation before subpackage documentation
172
+ "-o",
173
+ docs_source_dir, # Output directory
174
+ source_dir, # Source code directory
175
+ ]
176
+
177
+ print(f"Running command: {' '.join(cmd)}")
178
+ result = subprocess.run(cmd, capture_output=True, text=True)
179
+
180
+ # Print the output of the command
181
+ print("STDOUT:")
182
+ print(result.stdout)
183
+
184
+ print("STDERR:")
185
+ print(result.stderr)
186
+
187
+ if result.returncode != 0:
188
+ print(f"Error: sphinx-apidoc failed with return code {result.returncode}")
189
+ sys.exit(1)
190
+
191
+ # List the files in the docs source directory
192
+ print("\nFiles in docs/source directory:")
193
+ for file in sorted(os.listdir(docs_source_dir)):
194
+ print(f" {file}")
195
+
196
+ print("\nDocumentation source files generated successfully!")
197
+
198
+ # Step 2: Create Makefile and make.bat if they don't exist
199
+ create_makefile(script_dir)
200
+ create_make_bat(script_dir)
201
+
202
+ # Step 3: Build the HTML documentation
203
+ print("\n=== Building HTML documentation ===")
204
+
205
+ # Determine the build command based on the platform
206
+ if os.name == "nt": # Windows
207
+ build_cmd = ["make.bat", "html"]
208
+ else: # Unix/Linux/Mac
209
+ build_cmd = ["make", "html"]
210
+
211
+ # Change to the docs directory to run the build command
212
+ os.chdir(script_dir)
213
+
214
+ print(f"Running command: {' '.join(build_cmd)}")
215
+ build_result = subprocess.run(build_cmd, capture_output=True, text=True)
216
+
217
+ # Print the output of the build command
218
+ print("STDOUT:")
219
+ print(build_result.stdout)
220
+
221
+ print("STDERR:")
222
+ print(build_result.stderr)
223
+
224
+ if build_result.returncode != 0:
225
+ print(f"Error: HTML build failed with return code {build_result.returncode}")
226
+ sys.exit(1)
227
+
228
+ # Get the path to the built HTML documentation
229
+ html_dir = os.path.join(script_dir, "build", "html")
230
+ index_path = os.path.join(html_dir, "index.html")
231
+
232
+ if os.path.exists(index_path):
233
+ print(f"\nHTML documentation built successfully!")
234
+ print(f"You can view it by opening: {index_path}")
235
+
236
+ # Try to open the documentation in a browser
237
+ try:
238
+ import webbrowser
239
+
240
+ print("\nAttempting to open documentation in your default browser...")
241
+ webbrowser.open(f"file://{index_path}")
242
+ except Exception as e:
243
+ print(f"Could not open browser automatically: {e}")
244
+ else:
245
+ print(f"\nWarning: HTML index file not found at {index_path}")
246
+
247
+
248
+ if __name__ == "__main__":
249
+ main()
src_code_for_reproducibility/docs/make.bat ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
src_code_for_reproducibility/docs/source/index.rst ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Welcome to LLM Negotiation's documentation!
2
+ ===========================================
3
+ This library is a collection of tools for training and evaluating LLM-based agents in multi-agent environments. It is designed to be easy to use and extend.
4
+
5
+ .. toctree::
6
+ :maxdepth: 3
7
+ :caption: Contents:
8
+
9
+ installation
10
+ marl_standard
11
+ environments
12
+ launch
13
+ usage
14
+ modules
15
+ contributing
16
+
17
+ Indices and tables
18
+ ==================
19
+
20
+ * :ref:`genindex`
21
+ * :ref:`modindex`
22
+ * :ref:`search`
src_code_for_reproducibility/docs/source/installation.rst ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Installation
2
+ ===========
3
+
4
+ To install the package, run:
5
+
6
+ .. code-block:: bash
7
+
8
+ git clone https://github.com/yourusername/llm_negotiation.git
9
+ cd llm_negotiation
10
+ pip install -e .
src_code_for_reproducibility/docs/source/launch.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/marl_standard.rst ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ Abstract Standard for Multi-Agent Negotiation Environments
3
+ =================
4
+
5
+ Multi-Agent Negotiation Environments require more features than gymnasium environments in order to be used as interfaces in general game running code.
6
+ The two fundamental differences between gymnasium environments and Multi-Agent Negotiation Environments are:
7
+
8
+ 1. Response from the LLM is a text action, not a discrete action. Therefore, appropriate parsing of the text is required. The model may need to be run multiple times to get the full action.
9
+ This is why we introduce the `AgentHandler` class, which is responsible for parsing the LLM's response.
10
+ 2. The environment needs to be able to handle multi-agent interactions.
11
+ This is why we introduce the `NegotiationEnvironment` class, which is responsible for handling the multi-agent interactions.
12
+ 3. MARL environments are complex to describe. In different contexts, the same environment may be described differently. Therefore, both the environement and the agent handlers are
13
+ responsible for describing a particular trajectory. This information is given by the `get_log_info` method.
14
+ 4. There might be a lot of overlap between the neural networks used by each agent. For instance, the same model may be used for all agents. This motivates a requirement for a
15
+ policy identifier for each agent.
16
+
17
+ Taking inspiration from the `gymnasium <https://gymnasium.farama.org/>`_ library, we introduce a new standard for Multi-Agent Negotiation Environments.
18
+
19
+ Our standard is based on the following features:
20
+
21
+ Environments are of the form:
22
+
23
+ .. code-block:: python
24
+
25
+ class MarlEnvironment():
26
+
27
+ def __init__(self):
28
+ """Initialize the environment."""
29
+ pass
30
+
31
+ def reset(self):
32
+ """Reset the environment to an initial state and return the initial observation.
33
+ Returns:
34
+ observation (dict): A dictionary where keys are agent identifiers and values are observations.
35
+ """
36
+ # (...)
37
+ return observation
38
+
39
+ def step(self, actions):
40
+ """Take a step in the environment using the provided actions.
41
+
42
+ Args:
43
+ actions (dict): A dictionary where keys are agent identifiers and values are actions.
44
+
45
+ Returns:
46
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
47
+ reward (dict): A dictionary where keys are agent identifiers and values are rewards.
48
+ done (bool): Whether the episode has ended.
49
+ info (dict): Additional information about the environment.
50
+ """
51
+ # (...)
52
+ return observations, done, info
53
+
54
+ def get_log_info(self):
55
+ """Get additional information about the environment. This information is used to log the game.
56
+ Returns:
57
+ log_info (dict): Information about the environment required to log the game.
58
+ """
59
+ # (...)
60
+ return log_info
61
+
62
+ def render(self):
63
+ """Render the current state of the environment."""
64
+ pass
65
+
66
+ def close(self):
67
+ """Perform any necessary cleanup."""
68
+ pass
69
+
70
+
71
+ class AgentState():
72
+
73
+ def __init__(self):
74
+ """Initialize the agent state."""
75
+ pass
76
+
77
+ def step(self, observation_from_env, policy_output=None):
78
+ """Update the agent state based on the observation and action.
79
+ The action is the output of the LLM.
80
+ """
81
+
82
+ Args:
83
+ observation_from_env (dict): The observation of the environment.
84
+ policy_output : The output of the policy.
85
+
86
+ Returns:
87
+ policy_id (str): The policy identifier.
88
+ policy_input (dict): The input to the policy.
89
+ action : The official action to be sent to the environment.
90
+ done (bool): Whether the LLM action is ready to be sent to the environment.
91
+ info (dict): Additional information about the agent.
92
+ """
93
+ # (...)
94
+ return policy_id, policy_input, action, done, info
95
+
96
+ def get_log_info(self):
97
+ """Get information about the agent required to log a trajectory.
98
+ Returns:
99
+ log_info (dict): Information about the agent required to log a trajectory.
100
+ """
101
+ # (...)
102
+ return log_info
103
+
104
+ def render(self):
105
+ """Render the current state of the environment."""
106
+ pass
107
+
108
+ def close(self):
109
+ """Perform any necessary cleanup."""
110
+ pass
111
+
112
+
113
+ Implicitely, the keys of the `observations` in the `step` method of the `MarlEnvironment` interface represent the set of agents from which an action is expected at the current step. The next step should only expect actions from the agents in the `observations` dictionary.
114
+
115
+ As you can see, both classes have a `get_log_info` method. This method is used to log the game. It returns a dictionary with keys being the agent identifiers and values being the information to log. The reason we need this is because the environment and the agent handler may need to log different information. It makes it easier to log from the perspective of each agent. The core environment class should not need to know about the details of the agent handler.
116
+
117
+
118
+
119
+ Running Environments in Parallel
120
+ --------------------------------
121
+ This standard allows the use of the `run_batched_matches` function (TODO: link) to run environments in an efficient way. The core idea is to batch the policy calls for all agents in the environment.
122
+
123
+ .. note::
124
+ The ``run_batched_matches`` function allows you to run multiple negotiation games, or "matches," in parallel.
125
+ After each environment is initialized, the function continuously loops over all active matches and checks which agents
126
+ are still pending actions. Each agent's logic can require multiple calls to the policy (e.g., an LLM) before an action
127
+ becomes "ready" to be sent to the environment. (For instance, an agent might need multiple policy calls before having a string which can be parsed into a valid action.) While an agent is waiting for a policy output, these calls for all agents across all matches are grouped together by unique policy identifier and processed in batch for efficiency. This is the core functionality of the ``run_batched_matches`` function.
128
+
129
+ Only once all actions from the required agents at a given step for an environment are ready does the function make a single ``env.step(...)`` call; this ensures
130
+ every match moves forward in lockstep for all its active agents. As soon as an environment signals it is done, the function
131
+ retrieves logged information from both the environment and the agent states before removing this match from the active set.
132
+
133
+ If there are more matches waiting to be processed, they are then started one by one to maintain the specified degree of parallelism.
134
+ This batching approach provides an efficient mechanism to handle multi-agent or multi-policy environments, ensuring minimal
135
+ overhead and a clear, unified flow for stepping through matches.
136
+
137
+ Here is a diagram that shows how the `run_batched_matches` function works at a high level:
138
+
139
+ .. image:: media/runbatch.png
140
+ :alt: Alternate text for the image
141
+ :width: 1000px
src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_return\_funcs module
2
+ ================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_return_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_training\_data\_funcs module
2
+ ========================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.dond package
2
+ =============================
3
+
4
+ .. automodule:: src.environments.dond
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond.dond_agent
16
+ src.environments.dond.dond_game
17
+ src.environments.dond.dond_log_funcs
18
+ src.environments.dond.dond_statistics_funcs
19
+ src.environments.dond.dond_training_data_funcs
src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.arithmetic\_test module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.arithmetic_test
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.generate\_and\_train module
2
+ ===========================================
3
+
4
+ .. automodule:: src.experiments.generate_and_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.last\_completion module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.last_completion
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.generation.rst ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.generation package
2
+ ======================
3
+
4
+ .. automodule:: src.generation
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.generation.run_games
src_code_for_reproducibility/docs/source/src.models.local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.local\_llm module
2
+ ============================
3
+
4
+ .. automodule:: src.models.local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.new\_local\_llm module
2
+ =================================
3
+
4
+ .. automodule:: src.models.new_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.oai\_agent module
2
+ ============================
3
+
4
+ .. automodule:: src.models.oai_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.rst ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src package
2
+ ===========
3
+
4
+ .. automodule:: src
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments
16
+ src.experiments
17
+ src.generation
18
+ src.models
19
+ src.training
20
+ src.utils
21
+
22
+ Submodules
23
+ ----------
24
+
25
+ .. toctree::
26
+ :maxdepth: 4
27
+
28
+ src.run
src_code_for_reproducibility/docs/source/src.training.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.training package
2
+ ====================
3
+
4
+ .. automodule:: src.training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.training.ppo_train
16
+ src.training.ppo_train_value_head
17
+ src.training.reinforce_training
18
+ src.training.rl_convs_processing
19
+ src.training.train_main
src_code_for_reproducibility/docs/source/src.training.train_main.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.train\_main module
2
+ ===============================
3
+
4
+ .. automodule:: src.training.train_main
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/markov_games/__pycache__/export_utils.cpython-310.pyc ADDED
Binary file (7.17 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (168 Bytes). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-310.pyc ADDED
Binary file (3.37 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-311.pyc ADDED
Binary file (5.31 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_game.cpython-310.pyc ADDED
Binary file (5.39 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_log_funcs.cpython-310.pyc ADDED
Binary file (1.36 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_log_match.cpython-310.pyc ADDED
Binary file (1.75 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_player.cpython-310.pyc ADDED
Binary file (8.22 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-310.pyc ADDED
Binary file (6.16 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-311.pyc ADDED
Binary file (7.06 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-310.pyc ADDED
Binary file (11.7 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-311.pyc ADDED
Binary file (915 Bytes). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics_funcs.cpython-310.pyc ADDED
Binary file (1.63 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_training_data.cpython-310.pyc ADDED
Binary file (5.59 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_training_data_funcs.cpython-310.pyc ADDED
Binary file (6.11 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/ipd_agent.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import random
4
+ import re
5
+ from collections.abc import Callable
6
+ from copy import deepcopy
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional, Tuple, Union
9
+
10
+ from mllm.markov_games.agent import Agent
11
+ from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
12
+
13
+
14
+ @dataclass
15
+ class IPDAgentState:
16
+ """
17
+ TOWRITE
18
+ """
19
+
20
+ nb_retries: int
21
+ round_nb: int
22
+ chat_counter: int
23
+ chat_history: List[ChatTurn]
24
+
25
+
26
+ @dataclass
27
+ class IPDAgent(Agent):
28
+ seed: int
29
+ agent_id: str
30
+ agent_name: str
31
+ policy: Callable[[List[Dict]], str]
32
+ intro_prompt: str # Introduction prompt explaining the game rules
33
+ goal_prompt: str # Prompt explaining the agent's goal
34
+ strategy_prompt: str # Prompt suggesting a strategy to the agent
35
+ max_errors: int # Maximum number of errors allowed before default action
36
+ allow_reasoning: bool # Whether to allow reasoning in the response
37
+ max_reasoning_chars: int # Maximum number of characters for reasoning
38
+ cooperate_string: str # string parsed as playing cooperate by simulation
39
+ defect_string: str # string parsed as playing defect by simulation
40
+
41
+ def __post_init__(self):
42
+ self.state = IPDAgentState(
43
+ nb_retries=0, round_nb=0, chat_counter=0, chat_history=[]
44
+ )
45
+
46
+ async def act(self, observation) -> Tuple[Any, AgentActLog]:
47
+ """
48
+ TOWRITE
49
+ """
50
+
51
+ action = None
52
+ action_is_ready = False
53
+ round_nb = observation.round_nb
54
+
55
+ # If it's the first round, we need to send the intro prompt
56
+ if round_nb == 0 and self.state.chat_counter == 0:
57
+ self.state.chat_history.append(
58
+ ChatTurn(
59
+ agent_id=self.agent_id,
60
+ role="user",
61
+ content=self.intro_prompt,
62
+ is_state_end=True,
63
+ )
64
+ )
65
+
66
+ # If new round
67
+ if round_nb > self.state.round_nb:
68
+ coagent_action = observation.last_coagent_move
69
+ user_message = f"Last round, the other agent played {coagent_action}."
70
+ self.state.chat_history.append(
71
+ ChatTurn(
72
+ agent_id=self.agent_id,
73
+ role="user",
74
+ content=user_message,
75
+ is_state_end=True,
76
+ )
77
+ )
78
+
79
+ # If not new round, try to get valid action from policy
80
+ prompt = [chat_item.dict() for chat_item in self.state.chat_history]
81
+ policy_output = await self.policy(
82
+ prompt=prompt, regex=f"({self.cooperate_string}|{self.defect_string})"
83
+ )
84
+ self.state.chat_history.append(
85
+ ChatTurn(
86
+ agent_id=self.agent_id,
87
+ role="assistant",
88
+ content=policy_output,
89
+ is_state_end=False,
90
+ )
91
+ )
92
+
93
+ action = policy_output
94
+
95
+ agent_step_log = AgentActLog(
96
+ chat_turns=self.state.chat_history[self.state.chat_counter :], info=None
97
+ )
98
+ self.state.chat_counter = len(self.state.chat_history)
99
+ self.state.round_nb = round_nb
100
+
101
+ return action, agent_step_log
102
+
103
+ def get_safe_copy(self):
104
+ """
105
+ Return a safe copy of the agent.
106
+ """
107
+ agent_copy = copy.copy(self)
108
+ agent_copy.state = copy.deepcopy(self.state)
109
+ return agent_copy
110
+
111
+ def reset(self):
112
+ self.state = IPDAgentState()
113
+ raise NotImplementedError
114
+
115
+ def render(self):
116
+ pass
117
+
118
+ def close(self):
119
+ pass
120
+
121
+ def get_agent_info(self):
122
+ pass
src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import random
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ import numpy as np
7
+
8
+ from mllm.markov_games.markov_game import Simulation
9
+ from mllm.markov_games.rollout_tree import SimulationStepLog
10
+ from mllm.utils.get_coagent_id import get_coagent_id
11
+
12
+
13
+ @dataclass
14
+ class IPDState:
15
+ """
16
+ State of the Iterated Prisoner's Dilemma game.
17
+ """
18
+
19
+ round_nb: int = 0
20
+ done: bool = False
21
+ last_moves: Dict[str, str] | None = None
22
+
23
+
24
+ @dataclass
25
+ class IPDObs:
26
+ """
27
+ Observation in Iterated Prisoner's Dilemma game.
28
+ """
29
+
30
+ round_nb: int
31
+ last_coagent_move: str | None
32
+
33
+
34
+ class IPD(Simulation):
35
+ """
36
+ Iterated Prisoner's Dilemma simulation following the standard.
37
+
38
+ In each round of the game, two agents simultaneously choose to either cooperate (C) or defect (D).
39
+ The payoffs are as follows:
40
+ - If both cooperate: Both receive the "reward" (usually 3 points)
41
+ - If both defect: Both receive the "punishment" (usually 1 point)
42
+ - If one cooperates and one defects: The defector receives the "temptation" (usually 5 points)
43
+ and the cooperator receives the "sucker" payoff (usually 0 points)
44
+
45
+ The game is played for a specified number of rounds.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ agent_ids: List[str],
51
+ agent_names: List[str],
52
+ seed: int,
53
+ rounds_per_game: int,
54
+ reward: float, # Both cooperate
55
+ punishment: float, # Both defect
56
+ temptation: float, # Defector's reward when other cooperates
57
+ sucker: float, # Cooperator's reward when other defects
58
+ cooperate_actions: List[str],
59
+ defect_actions: List[str],
60
+ ):
61
+ self.agent_ids = agent_ids
62
+ self.agent_names = agent_names
63
+ self.seed = seed
64
+ self.rounds_per_game = rounds_per_game
65
+ self.reward = reward
66
+ self.punishment = punishment
67
+ self.temptation = temptation
68
+ self.sucker = sucker
69
+ self.cooperate_actions = cooperate_actions
70
+ self.defect_actions = defect_actions
71
+ self.state = IPDState()
72
+
73
+ def step(self, actions: Dict[str, str]) -> Tuple[bool, SimulationStepLog]:
74
+ """
75
+ Take a step in the environment using the provided actions.
76
+ Here, the observations are just the states of the game.
77
+
78
+ Args:
79
+ actions (dict): A dictionary where keys are agent identifiers and values are actions ('C' or 'D').
80
+
81
+ Returns:
82
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
83
+ done (bool): Whether the episode has ended.
84
+ info (dict): Additional information about the environment.
85
+ """
86
+
87
+ # Calculate rewards using payoff matrix
88
+ agent0_action = actions[self.agent_ids[0]]
89
+ agent1_action = actions[self.agent_ids[1]]
90
+
91
+ # Normalize actions to standard cooperate/defect/gibberish format
92
+ def normalize_action(action):
93
+ if action in self.cooperate_actions:
94
+ return "C"
95
+ elif action in self.defect_actions:
96
+ return "D"
97
+ else:
98
+ return "D"
99
+
100
+ norm_action0 = normalize_action(agent0_action)
101
+ norm_action1 = normalize_action(agent1_action)
102
+
103
+ payoffs = {
104
+ ("C", "C"): [self.reward, self.reward],
105
+ ("C", "D"): [self.sucker, self.temptation],
106
+ ("D", "C"): [self.temptation, self.sucker],
107
+ ("D", "D"): [self.punishment, self.punishment],
108
+ }
109
+
110
+ round_rewards = {
111
+ self.agent_ids[0]: payoffs[(norm_action0, norm_action1)][0],
112
+ self.agent_ids[1]: payoffs[(norm_action0, norm_action1)][1],
113
+ }
114
+
115
+ # Update game state
116
+ self.state.round_nb += 1
117
+ self.state.last_moves = copy.deepcopy(actions)
118
+ done = self.state.round_nb >= self.rounds_per_game
119
+ step_log = SimulationStepLog(
120
+ rewards=round_rewards,
121
+ info={
122
+ "actions": {
123
+ self.agent_ids[0]: norm_action0,
124
+ self.agent_ids[1]: norm_action1,
125
+ }
126
+ },
127
+ )
128
+
129
+ return done, step_log
130
+
131
+ def get_obs(self):
132
+ """Returns all agent observations in dict
133
+ Returns:
134
+ observations
135
+ """
136
+ observations = {}
137
+ for agent_id in self.agent_ids:
138
+ observations[agent_id] = self.get_obs_agent(agent_id)
139
+ return observations
140
+
141
+ def get_obs_agent(self, agent_id):
142
+ """Returns observation for agent_id"""
143
+ if self.state.last_moves != None:
144
+ other_id = get_coagent_id(self.agent_ids, agent_id)
145
+ last_coagent_move = self.state.last_moves[other_id]
146
+ else:
147
+ last_coagent_move = None
148
+ obs = IPDObs(round_nb=self.state.round_nb, last_coagent_move=last_coagent_move)
149
+ return obs
150
+
151
+ def reset(self):
152
+ """Returns initial observations and states"""
153
+ self.state = IPDState()
154
+ return self.get_obs()
155
+
156
+ def get_safe_copy(self):
157
+ """
158
+ Return a safe copy of the simulation.
159
+ """
160
+ simulation_copy = copy.copy(self)
161
+ simulation_copy.state = copy.deepcopy(self.state)
162
+ return simulation_copy
src_code_for_reproducibility/markov_games/runners/__pycache__/alternative_actions_runner.cpython-311.pyc ADDED
Binary file (5.8 kB). View file
 
src_code_for_reproducibility/markov_games/runners/__pycache__/linear_runner.cpython-311.pyc ADDED
Binary file (2.08 kB). View file
 
src_code_for_reproducibility/utils/__init__.py ADDED
File without changes
src_code_for_reproducibility/utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (157 Bytes). View file
 
src_code_for_reproducibility/utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
src_code_for_reproducibility/utils/__pycache__/common_imports.cpython-310.pyc ADDED
Binary file (524 Bytes). View file
 
src_code_for_reproducibility/utils/__pycache__/dict_get_path.cpython-310.pyc ADDED
Binary file (418 Bytes). View file
 
src_code_for_reproducibility/utils/__pycache__/extra_stats.cpython-310.pyc ADDED
Binary file (190 Bytes). View file
 
src_code_for_reproducibility/utils/__pycache__/get_coagent_id.cpython-310.pyc ADDED
Binary file (364 Bytes). View file