Muqeeth commited on
Commit
252341d
·
verified ·
1 Parent(s): bcf4380

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. src_code_for_reproducibility/docs/generate_docs.py +249 -0
  2. src_code_for_reproducibility/docs/source/conf.py +48 -0
  3. src_code_for_reproducibility/docs/source/environments.rst +35 -0
  4. src_code_for_reproducibility/docs/source/installation.rst +10 -0
  5. src_code_for_reproducibility/docs/source/marl_standard.rst +141 -0
  6. src_code_for_reproducibility/docs/source/modules.rst +7 -0
  7. src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst +7 -0
  8. src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
  9. src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst +7 -0
  10. src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst +7 -0
  11. src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
  12. src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
  13. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
  14. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst +7 -0
  15. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst +7 -0
  16. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst +7 -0
  17. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst +7 -0
  18. src_code_for_reproducibility/docs/source/src.environments.ipd.rst +19 -0
  19. src_code_for_reproducibility/docs/source/src.environments.rst +25 -0
  20. src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst +7 -0
  21. src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst +7 -0
  22. src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst +7 -0
  23. src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
  24. src_code_for_reproducibility/docs/source/src.experiments.rst +17 -0
  25. src_code_for_reproducibility/docs/source/src.generation.rst +15 -0
  26. src_code_for_reproducibility/docs/source/src.generation.run_games.rst +7 -0
  27. src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst +7 -0
  28. src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
  29. src_code_for_reproducibility/docs/source/src.models.local_llm.rst +7 -0
  30. src_code_for_reproducibility/docs/source/src.models.rst +20 -0
  31. src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
  32. src_code_for_reproducibility/docs/source/src.rst +28 -0
  33. src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
  34. src_code_for_reproducibility/docs/source/src.training.reinforce_training.rst +7 -0
  35. src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst +7 -0
  36. src_code_for_reproducibility/docs/source/src.training.rst +19 -0
  37. src_code_for_reproducibility/docs/source/src.training.train_main.rst +7 -0
  38. src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst +7 -0
  39. src_code_for_reproducibility/docs/source/src.utils.log_gpu_usage.rst +7 -0
  40. src_code_for_reproducibility/docs/source/src.utils.rst +24 -0
  41. src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
  42. src_code_for_reproducibility/markov_games/diplomacy/diplomacy_agent.py +259 -0
  43. src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging.py +360 -0
  44. src_code_for_reproducibility/markov_games/ipd/__init__.py +7 -0
  45. src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc +0 -0
  46. src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc +0 -0
  47. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc +0 -0
  48. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc +0 -0
  49. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc +0 -0
  50. src_code_for_reproducibility/markov_games/ipd/ipd_agent.py +115 -0
src_code_for_reproducibility/docs/generate_docs.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to automatically generate Sphinx documentation for all modules and build the HTML website.
4
+ """
5
+ import importlib.util
6
+ import os
7
+ import subprocess
8
+ import sys
9
+
10
+
11
+ def check_and_install_dependencies():
12
+ """Check for required dependencies and install them if missing."""
13
+ required_packages = [
14
+ "sphinx",
15
+ "sphinx-rtd-theme",
16
+ "sphinxcontrib-napoleon",
17
+ "sphinxcontrib-mermaid",
18
+ "sphinx-autodoc-typehints",
19
+ ]
20
+
21
+ missing_packages = []
22
+
23
+ for package in required_packages:
24
+ # Convert package name to module name (replace - with _)
25
+ module_name = package.replace("-", "_")
26
+
27
+ # Check if the package is installed
28
+ if importlib.util.find_spec(module_name) is None:
29
+ missing_packages.append(package)
30
+
31
+ # Install missing packages
32
+ if missing_packages:
33
+ print(f"Installing missing dependencies: {', '.join(missing_packages)}")
34
+ subprocess.check_call(
35
+ [sys.executable, "-m", "pip", "install"] + missing_packages
36
+ )
37
+ print("Dependencies installed successfully")
38
+ else:
39
+ print("All required dependencies are already installed")
40
+
41
+
42
+ def create_makefile(docs_dir):
43
+ """Create a Makefile for Sphinx documentation if it doesn't exist."""
44
+ makefile_path = os.path.join(docs_dir, "Makefile")
45
+
46
+ if os.path.exists(makefile_path):
47
+ print(f"Makefile already exists at {makefile_path}")
48
+ return
49
+
50
+ print(f"Creating Makefile at {makefile_path}")
51
+
52
+ makefile_content = """# Minimal makefile for Sphinx documentation
53
+
54
+ # You can set these variables from the command line, and also
55
+ # from the environment for the first two.
56
+ SPHINXOPTS ?=
57
+ SPHINXBUILD ?= sphinx-build
58
+ SOURCEDIR = source
59
+ BUILDDIR = build
60
+
61
+ # Put it first so that "make" without argument is like "make help".
62
+ help:
63
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
64
+
65
+ .PHONY: help Makefile
66
+
67
+ # Catch-all target: route all unknown targets to Sphinx using the new
68
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
69
+ %: Makefile
70
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(SPHINXFLAGS)
71
+ """
72
+
73
+ with open(makefile_path, "w") as f:
74
+ f.write(makefile_content)
75
+
76
+ print("Makefile created successfully")
77
+
78
+
79
+ def create_make_bat(docs_dir):
80
+ """Create a make.bat file for Windows if it doesn't exist."""
81
+ make_bat_path = os.path.join(docs_dir, "make.bat")
82
+
83
+ if os.path.exists(make_bat_path):
84
+ print(f"make.bat already exists at {make_bat_path}")
85
+ return
86
+
87
+ print(f"Creating make.bat at {make_bat_path}")
88
+
89
+ make_bat_content = """@ECHO OFF
90
+
91
+ pushd %~dp0
92
+
93
+ REM Command file for Sphinx documentation
94
+
95
+ if "%SPHINXBUILD%" == "" (
96
+ set SPHINXBUILD=sphinx-build
97
+ )
98
+ set SOURCEDIR=source
99
+ set BUILDDIR=build
100
+
101
+ %SPHINXBUILD% >NUL 2>NUL
102
+ if errorlevel 9009 (
103
+ echo.
104
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
105
+ echo.installed, then set the SPHINXBUILD environment variable to point
106
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
107
+ echo.may add the Sphinx directory to PATH.
108
+ echo.
109
+ echo.If you don't have Sphinx installed, grab it from
110
+ echo.https://www.sphinx-doc.org/
111
+ exit /b 1
112
+ )
113
+
114
+ if "%1" == "" goto help
115
+
116
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
117
+ goto end
118
+
119
+ :help
120
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
121
+
122
+ :end
123
+ popd
124
+ """
125
+
126
+ with open(make_bat_path, "w") as f:
127
+ f.write(make_bat_content)
128
+
129
+ print("make.bat created successfully")
130
+
131
+
132
+ def main():
133
+ # Check and install required dependencies
134
+ print("=== Checking dependencies ===")
135
+ check_and_install_dependencies()
136
+
137
+ # Get the directory of this script
138
+ script_dir = os.path.dirname(os.path.abspath(__file__))
139
+
140
+ # Path to the project root
141
+ project_root = os.path.dirname(script_dir)
142
+
143
+ # Path to the source directory
144
+ source_dir = os.path.join(project_root, "src")
145
+
146
+ # Path to the docs source directory
147
+ docs_source_dir = os.path.join(script_dir, "source")
148
+
149
+ # Print paths for debugging
150
+ print(f"Script directory: {script_dir}")
151
+ print(f"Project root: {project_root}")
152
+ print(f"Source directory: {source_dir}")
153
+ print(f"Docs source directory: {docs_source_dir}")
154
+
155
+ # Make sure the source directory exists
156
+ if not os.path.exists(source_dir):
157
+ print(f"Error: Source directory {source_dir} does not exist!")
158
+ sys.exit(1)
159
+
160
+ # Make sure the docs source directory exists
161
+ if not os.path.exists(docs_source_dir):
162
+ print(f"Creating docs source directory: {docs_source_dir}")
163
+ os.makedirs(docs_source_dir)
164
+
165
+ # Step 1: Run sphinx-apidoc to generate .rst files for all modules
166
+ print("\n=== Generating API documentation ===")
167
+ cmd = [
168
+ "sphinx-apidoc",
169
+ "-f", # Force overwriting of existing files
170
+ "-e", # Put module documentation before submodule documentation
171
+ "-M", # Put module documentation before subpackage documentation
172
+ "-o",
173
+ docs_source_dir, # Output directory
174
+ source_dir, # Source code directory
175
+ ]
176
+
177
+ print(f"Running command: {' '.join(cmd)}")
178
+ result = subprocess.run(cmd, capture_output=True, text=True)
179
+
180
+ # Print the output of the command
181
+ print("STDOUT:")
182
+ print(result.stdout)
183
+
184
+ print("STDERR:")
185
+ print(result.stderr)
186
+
187
+ if result.returncode != 0:
188
+ print(f"Error: sphinx-apidoc failed with return code {result.returncode}")
189
+ sys.exit(1)
190
+
191
+ # List the files in the docs source directory
192
+ print("\nFiles in docs/source directory:")
193
+ for file in sorted(os.listdir(docs_source_dir)):
194
+ print(f" {file}")
195
+
196
+ print("\nDocumentation source files generated successfully!")
197
+
198
+ # Step 2: Create Makefile and make.bat if they don't exist
199
+ create_makefile(script_dir)
200
+ create_make_bat(script_dir)
201
+
202
+ # Step 3: Build the HTML documentation
203
+ print("\n=== Building HTML documentation ===")
204
+
205
+ # Determine the build command based on the platform
206
+ if os.name == "nt": # Windows
207
+ build_cmd = ["make.bat", "html"]
208
+ else: # Unix/Linux/Mac
209
+ build_cmd = ["make", "html"]
210
+
211
+ # Change to the docs directory to run the build command
212
+ os.chdir(script_dir)
213
+
214
+ print(f"Running command: {' '.join(build_cmd)}")
215
+ build_result = subprocess.run(build_cmd, capture_output=True, text=True)
216
+
217
+ # Print the output of the build command
218
+ print("STDOUT:")
219
+ print(build_result.stdout)
220
+
221
+ print("STDERR:")
222
+ print(build_result.stderr)
223
+
224
+ if build_result.returncode != 0:
225
+ print(f"Error: HTML build failed with return code {build_result.returncode}")
226
+ sys.exit(1)
227
+
228
+ # Get the path to the built HTML documentation
229
+ html_dir = os.path.join(script_dir, "build", "html")
230
+ index_path = os.path.join(html_dir, "index.html")
231
+
232
+ if os.path.exists(index_path):
233
+ print(f"\nHTML documentation built successfully!")
234
+ print(f"You can view it by opening: {index_path}")
235
+
236
+ # Try to open the documentation in a browser
237
+ try:
238
+ import webbrowser
239
+
240
+ print("\nAttempting to open documentation in your default browser...")
241
+ webbrowser.open(f"file://{index_path}")
242
+ except Exception as e:
243
+ print(f"Could not open browser automatically: {e}")
244
+ else:
245
+ print(f"\nWarning: HTML index file not found at {index_path}")
246
+
247
+
248
+ if __name__ == "__main__":
249
+ main()
src_code_for_reproducibility/docs/source/conf.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration file for the Sphinx documentation builder.
2
+ import os
3
+ import sys
4
+ sys.path.insert(0, os.path.abspath('../..'))
5
+
6
+ # -- Project information -----------------------------------------------------
7
+ project = 'llm_negotiation'
8
+ copyright = '2023, Your Name'
9
+ author = 'Your Name'
10
+
11
+ # -- General configuration ---------------------------------------------------
12
+ extensions = [
13
+ 'sphinx.ext.autodoc',
14
+ 'sphinx.ext.viewcode',
15
+ 'sphinx.ext.napoleon',
16
+ 'sphinx.ext.autosummary',
17
+ 'sphinx.ext.intersphinx',
18
+ 'sphinx.ext.mathjax',
19
+ 'sphinxcontrib.mermaid',
20
+ 'sphinx_rtd_theme',
21
+ ]
22
+
23
+ templates_path = ['_templates']
24
+ exclude_patterns = []
25
+
26
+ # -- Options for HTML output -------------------------------------------------
27
+ html_theme = 'sphinx_rtd_theme'
28
+ html_static_path = ['_static']
29
+
30
+ # -- Napoleon settings -------------------------------------------------------
31
+ napoleon_google_docstring = True
32
+ napoleon_numpy_docstring = False
33
+ napoleon_include_init_with_doc = True
34
+ napoleon_include_private_with_doc = False
35
+ napoleon_include_special_with_doc = True
36
+ napoleon_use_admonition_for_examples = False
37
+ napoleon_use_admonition_for_notes = False
38
+ napoleon_use_admonition_for_references = False
39
+ napoleon_use_ivar = False
40
+ napoleon_use_param = True
41
+ napoleon_use_rtype = True
42
+ napoleon_preprocess_types = False
43
+ napoleon_type_aliases = None
44
+ napoleon_attr_annotations = True
45
+
46
+ # -- Path setup --------------------------------------------------------------
47
+ # Make sure the project's modules can be found by Sphinx
48
+ sys.path.insert(0, os.path.abspath('../../src'))
src_code_for_reproducibility/docs/source/environments.rst ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ MARL Environments
3
+ =================
4
+
5
+ This section provides detailed documentation for the multi-agent negotiation environments included in the library.
6
+
7
+ Each environment follows the standard interface described in :doc:`../environments` but has its own unique game rules,
8
+ dynamics, and implementation details.
9
+
10
+ .. toctree::
11
+ :maxdepth: 2
12
+ :caption: Available Environments:
13
+
14
+ environments/ipd
15
+ environments/diplomacy
16
+ environments/dond
17
+
18
+ Overview
19
+ --------
20
+
21
+ The library currently includes the following environments:
22
+
23
+ 1. **Iterated Prisoner's Dilemma (IPD)**: A classic game theory problem where two agents repeatedly decide whether to cooperate or defect, with different payoffs based on their joint actions.
24
+
25
+ 2. **Diplomacy**: An adaptation of the board game Diplomacy, where seven European powers compete for control of supply centers through strategic moves and alliances.
26
+
27
+ 3. **Deal or No Deal (DOND)**: A negotiation environment based on `the paper Deal or No Deal? End-to-End Learning for Negotiation Dialogues <https://arxiv.org/pdf/1706.05125>`_ in which agents negotiate over the distribution of a set of prizes.
28
+
29
+ Each environment documentation includes:
30
+
31
+ - Game rules and background
32
+ - Implementation details
33
+ - API reference
34
+ - Example usage
35
+ - Advanced features and customization options
src_code_for_reproducibility/docs/source/installation.rst ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Installation
2
+ ===========
3
+
4
+ To install the package, run:
5
+
6
+ .. code-block:: bash
7
+
8
+ git clone https://github.com/yourusername/llm_negotiation.git
9
+ cd llm_negotiation
10
+ pip install -e .
src_code_for_reproducibility/docs/source/marl_standard.rst ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ Abstract Standard for Multi-Agent Negotiation Environments
3
+ =================
4
+
5
+ Multi-Agent Negotiation Environments require more features than gymnasium environments in order to be used as interfaces in general game running code.
6
+ The two fundamental differences between gymnasium environments and Multi-Agent Negotiation Environments are:
7
+
8
+ 1. Response from the LLM is a text action, not a discrete action. Therefore, appropriate parsing of the text is required. The model may need to be run multiple times to get the full action.
9
+ This is why we introduce the `AgentHandler` class, which is responsible for parsing the LLM's response.
10
+ 2. The environment needs to be able to handle multi-agent interactions.
11
+ This is why we introduce the `NegotiationEnvironment` class, which is responsible for handling the multi-agent interactions.
12
+ 3. MARL environments are complex to describe. In different contexts, the same environment may be described differently. Therefore, both the environement and the agent handlers are
13
+ responsible for describing a particular trajectory. This information is given by the `get_log_info` method.
14
+ 4. There might be a lot of overlap between the neural networks used by each agent. For instance, the same model may be used for all agents. This motivates a requirement for a
15
+ policy identifier for each agent.
16
+
17
+ Taking inspiration from the `gymnasium <https://gymnasium.farama.org/>`_ library, we introduce a new standard for Multi-Agent Negotiation Environments.
18
+
19
+ Our standard is based on the following features:
20
+
21
+ Environments are of the form:
22
+
23
+ .. code-block:: python
24
+
25
+ class MarlEnvironment():
26
+
27
+ def __init__(self):
28
+ """Initialize the environment."""
29
+ pass
30
+
31
+ def reset(self):
32
+ """Reset the environment to an initial state and return the initial observation.
33
+ Returns:
34
+ observation (dict): A dictionary where keys are agent identifiers and values are observations.
35
+ """
36
+ # (...)
37
+ return observation
38
+
39
+ def step(self, actions):
40
+ """Take a step in the environment using the provided actions.
41
+
42
+ Args:
43
+ actions (dict): A dictionary where keys are agent identifiers and values are actions.
44
+
45
+ Returns:
46
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
47
+ reward (dict): A dictionary where keys are agent identifiers and values are rewards.
48
+ done (bool): Whether the episode has ended.
49
+ info (dict): Additional information about the environment.
50
+ """
51
+ # (...)
52
+ return observations, done, info
53
+
54
+ def get_log_info(self):
55
+ """Get additional information about the environment. This information is used to log the game.
56
+ Returns:
57
+ log_info (dict): Information about the environment required to log the game.
58
+ """
59
+ # (...)
60
+ return log_info
61
+
62
+ def render(self):
63
+ """Render the current state of the environment."""
64
+ pass
65
+
66
+ def close(self):
67
+ """Perform any necessary cleanup."""
68
+ pass
69
+
70
+
71
+ class AgentState():
72
+
73
+ def __init__(self):
74
+ """Initialize the agent state."""
75
+ pass
76
+
77
+ def step(self, observation_from_env, policy_output=None):
78
+ """Update the agent state based on the observation and action.
79
+ The action is the output of the LLM.
80
+ """
81
+
82
+ Args:
83
+ observation_from_env (dict): The observation of the environment.
84
+ policy_output : The output of the policy.
85
+
86
+ Returns:
87
+ policy_id (str): The policy identifier.
88
+ policy_input (dict): The input to the policy.
89
+ action : The official action to be sent to the environment.
90
+ done (bool): Whether the LLM action is ready to be sent to the environment.
91
+ info (dict): Additional information about the agent.
92
+ """
93
+ # (...)
94
+ return policy_id, policy_input, action, done, info
95
+
96
+ def get_log_info(self):
97
+ """Get information about the agent required to log a trajectory.
98
+ Returns:
99
+ log_info (dict): Information about the agent required to log a trajectory.
100
+ """
101
+ # (...)
102
+ return log_info
103
+
104
+ def render(self):
105
+ """Render the current state of the environment."""
106
+ pass
107
+
108
+ def close(self):
109
+ """Perform any necessary cleanup."""
110
+ pass
111
+
112
+
113
+ Implicitely, the keys of the `observations` in the `step` method of the `MarlEnvironment` interface represent the set of agents from which an action is expected at the current step. The next step should only expect actions from the agents in the `observations` dictionary.
114
+
115
+ As you can see, both classes have a `get_log_info` method. This method is used to log the game. It returns a dictionary with keys being the agent identifiers and values being the information to log. The reason we need this is because the environment and the agent handler may need to log different information. It makes it easier to log from the perspective of each agent. The core environment class should not need to know about the details of the agent handler.
116
+
117
+
118
+
119
+ Running Environments in Parallel
120
+ --------------------------------
121
+ This standard allows the use of the `run_batched_matches` function (TODO: link) to run environments in an efficient way. The core idea is to batch the policy calls for all agents in the environment.
122
+
123
+ .. note::
124
+ The ``run_batched_matches`` function allows you to run multiple negotiation games, or "matches," in parallel.
125
+ After each environment is initialized, the function continuously loops over all active matches and checks which agents
126
+ are still pending actions. Each agent's logic can require multiple calls to the policy (e.g., an LLM) before an action
127
+ becomes "ready" to be sent to the environment. (For instance, an agent might need multiple policy calls before having a string which can be parsed into a valid action.) While an agent is waiting for a policy output, these calls for all agents across all matches are grouped together by unique policy identifier and processed in batch for efficiency. This is the core functionality of the ``run_batched_matches`` function.
128
+
129
+ Only once all actions from the required agents at a given step for an environment are ready does the function make a single ``env.step(...)`` call; this ensures
130
+ every match moves forward in lockstep for all its active agents. As soon as an environment signals it is done, the function
131
+ retrieves logged information from both the environment and the agent states before removing this match from the active set.
132
+
133
+ If there are more matches waiting to be processed, they are then started one by one to maintain the specified degree of parallelism.
134
+ This batching approach provides an efficient mechanism to handle multi-agent or multi-policy environments, ensuring minimal
135
+ overhead and a clear, unified flow for stepping through matches.
136
+
137
+ Here is a diagram that shows how the `run_batched_matches` function works at a high level:
138
+
139
+ .. image:: media/runbatch.png
140
+ :alt: Alternate text for the image
141
+ :width: 1000px
src_code_for_reproducibility/docs/source/modules.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src
2
+ ===
3
+
4
+ .. toctree::
5
+ :maxdepth: 4
6
+
7
+ src
src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_game module
2
+ =======================================
3
+
4
+ .. automodule:: src.environments.dond.dond_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_log\_funcs module
2
+ =============================================
3
+
4
+ .. automodule:: src.environments.dond.dond_log_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_agent module
2
+ =========================================
3
+
4
+ .. automodule:: src.environments.dond.dond_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_training\_data\_funcs module
2
+ ========================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.dond package
2
+ =============================
3
+
4
+ .. automodule:: src.environments.dond
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond.dond_agent
16
+ src.environments.dond.dond_game
17
+ src.environments.dond.dond_log_funcs
18
+ src.environments.dond.dond_statistics_funcs
19
+ src.environments.dond.dond_training_data_funcs
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.environment\_imports module
2
+ ============================================
3
+
4
+ .. automodule:: src.environments.environment_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_agent module
2
+ ======================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_game module
2
+ =====================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_log\_funcs module
2
+ ===========================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_log_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_statistics\_funcs module
2
+ ==================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_statistics_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_training\_data\_funcs module
2
+ ======================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.ipd package
2
+ ============================
3
+
4
+ .. automodule:: src.environments.ipd
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.ipd.ipd_agent
16
+ src.environments.ipd.ipd_game
17
+ src.environments.ipd.ipd_log_funcs
18
+ src.environments.ipd.ipd_statistics_funcs
19
+ src.environments.ipd.ipd_training_data_funcs
src_code_for_reproducibility/docs/source/src.environments.rst ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments package
2
+ ========================
3
+
4
+ .. automodule:: src.environments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond
16
+ src.environments.ipd
17
+
18
+ Submodules
19
+ ----------
20
+
21
+ .. toctree::
22
+ :maxdepth: 4
23
+
24
+ src.environments.env_imports
25
+ src.environments.environment_imports
src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.arithmetic\_test module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.arithmetic_test
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.dond\_run\_train module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.dond_run_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.generate\_and\_train module
2
+ ===========================================
3
+
4
+ .. automodule:: src.experiments.generate_and_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.last\_completion module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.last_completion
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.rst ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.experiments package
2
+ =======================
3
+
4
+ .. automodule:: src.experiments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.experiments.arithmetic_test
16
+ src.experiments.generate_and_train
17
+ src.experiments.last_completion
src_code_for_reproducibility/docs/source/src.generation.rst ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.generation package
2
+ ======================
3
+
4
+ .. automodule:: src.generation
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.generation.run_games
src_code_for_reproducibility/docs/source/src.generation.run_games.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.generation.run\_games module
2
+ ================================
3
+
4
+ .. automodule:: src.generation.run_games
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_hf\_agent module
2
+ ==================================
3
+
4
+ .. automodule:: src.models.dummy_llm_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_local\_llm module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.dummy_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.local\_llm module
2
+ ============================
3
+
4
+ .. automodule:: src.models.local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.rst ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.models package
2
+ ==================
3
+
4
+ .. automodule:: src.models
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.models.dummy_local_llm
16
+ src.models.local_llm
17
+ src.models.new_local_llm
18
+ src.models.server_llm
19
+ src.models.updatable_worker
20
+ src.models.vllm_worker_wrap
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.updatable\_worker module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.updatable_worker
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.rst ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src package
2
+ ===========
3
+
4
+ .. automodule:: src
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments
16
+ src.experiments
17
+ src.generation
18
+ src.models
19
+ src.training
20
+ src.utils
21
+
22
+ Submodules
23
+ ----------
24
+
25
+ .. toctree::
26
+ :maxdepth: 4
27
+
28
+ src.run
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train\_value\_head module
2
+ ===========================================
3
+
4
+ .. automodule:: src.training.ppo_train_value_head
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.reinforce_training.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.reinforce\_training module
2
+ =======================================
3
+
4
+ .. automodule:: src.training.reinforce_training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.rl\_convs\_processing module
2
+ =========================================
3
+
4
+ .. automodule:: src.training.rl_convs_processing
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.training package
2
+ ====================
3
+
4
+ .. automodule:: src.training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.training.ppo_train
16
+ src.training.ppo_train_value_head
17
+ src.training.reinforce_training
18
+ src.training.rl_convs_processing
19
+ src.training.train_main
src_code_for_reproducibility/docs/source/src.training.train_main.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.train\_main module
2
+ ===============================
3
+
4
+ .. automodule:: src.training.train_main
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.export_ppo_training_set.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.export\_ppo\_training\_set module
2
+ ===========================================
3
+
4
+ .. automodule:: src.utils.export_ppo_training_set
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.log_gpu_usage.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.log\_gpu\_usage module
2
+ ================================
3
+
4
+ .. automodule:: src.utils.log_gpu_usage
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.rst ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.utils package
2
+ =================
3
+
4
+ .. automodule:: src.utils
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.utils.common_imports
16
+ src.utils.export_ppo_training_set
17
+ src.utils.extra_stats
18
+ src.utils.inherit_args
19
+ src.utils.log_gpu_usage
20
+ src.utils.log_statistics
21
+ src.utils.model_to_cpu
22
+ src.utils.parallel_shuffle
23
+ src.utils.quick_stats
24
+ src.utils.update_start_epoch
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.update\_start\_epoch module
2
+ =====================================
3
+
4
+ .. automodule:: src.utils.update_start_epoch
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_agent.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple, Optional, Any
2
+ import copy
3
+
4
+ class DiplomacyAgent:
5
+ """Agent handler for Diplomacy game that follows the MARL standard.
6
+
7
+ This class is responsible for parsing LLM output into valid Diplomacy orders,
8
+ managing the agent state, and providing information for logging.
9
+ """
10
+
11
+ def __init__(self, policy_id: str, power_name: str, random_valid_move=False):
12
+ """Initialize the agent handler for a power in the Diplomacy game.
13
+
14
+ Args:
15
+ power_name: The name of the power this agent controls (e.g., 'FRANCE', 'ENGLAND')
16
+ policy_id: The identifier for the policy this agent uses
17
+ random_valid_move: If True, will select random valid moves instead of using LLM (default: False)
18
+ """
19
+ self.policy_id = policy_id
20
+ self.power_name = power_name
21
+ self.orders = []
22
+ self.wait = True
23
+ self.processing_state = "WAITING_FOR_ORDERS"
24
+ self.parsed_orders = []
25
+ self.order_status = {}
26
+ self.message_history = []
27
+ self.random_valid_move = random_valid_move
28
+
29
+ def step(self, observation_from_env, policy_output=None):
30
+ """Update the agent state based on the observation and LLM output.
31
+
32
+ Args:
33
+ observation_from_env: The observation from the environment
34
+ policy_output: The output from the LLM
35
+
36
+ Returns:
37
+ policy_id: The policy identifier
38
+ policy_input: The input to the policy
39
+ action: The official action to be sent to the environment
40
+ done: Whether the LLM action is ready to be sent to the environment
41
+ info: Additional information about the agent
42
+ """
43
+ info = {}
44
+
45
+ # If random_valid_move is enabled, select random valid moves
46
+ if self.random_valid_move:
47
+ valid_orders = self._select_random_valid_moves(observation_from_env)
48
+ self.orders = valid_orders
49
+ self.wait = False
50
+ action = {
51
+ "orders": valid_orders,
52
+ "wait": False
53
+ }
54
+ return self.policy_id, {}, action, True, info
55
+
56
+ # If no policy output, this is the initial step - prepare prompt
57
+ if policy_output is None:
58
+ # Create initial prompt for the LLM
59
+ phase = observation_from_env.get('phase', '')
60
+ units = observation_from_env.get('units', {}).get(self.power_name, [])
61
+ centers = observation_from_env.get('centers', {}).get(self.power_name, [])
62
+ orderable_locations = observation_from_env.get('orderable_locations', {})
63
+
64
+ prompt = self._create_prompt(phase, units, centers, orderable_locations)
65
+
66
+ return self.policy_id, {"prompt": prompt}, None, False, info
67
+
68
+ # Process the LLM output to extract orders
69
+ success, parsed_orders = self._parse_llm_output(policy_output)
70
+ self.parsed_orders = parsed_orders
71
+
72
+ if not success:
73
+ # Need more information from LLM
74
+ clarification_prompt = self._create_clarification_prompt(policy_output, parsed_orders)
75
+ return self.policy_id, {"prompt": clarification_prompt}, None, False, info
76
+
77
+ # Validate if the orders are valid for the current phase
78
+ valid_orders = self._validate_orders(parsed_orders, observation_from_env)
79
+
80
+ if valid_orders:
81
+ # Orders are valid, prepare action for environment
82
+ self.orders = valid_orders
83
+ self.wait = False
84
+ action = {
85
+ "orders": valid_orders,
86
+ "wait": False
87
+ }
88
+ return self.policy_id, {}, action, True, info
89
+ else:
90
+ # Orders are invalid, ask for new ones
91
+ error_prompt = self._create_error_prompt(parsed_orders, observation_from_env)
92
+ return self.policy_id, {"prompt": error_prompt}, None, False, info
93
+
94
+ def _create_prompt(self, phase, units, centers, orderable_locations):
95
+ """Create the initial prompt for the LLM.
96
+
97
+ Args:
98
+ phase: The current game phase
99
+ units: List of units controlled by this power
100
+ centers: List of supply centers controlled by this power
101
+ orderable_locations: List of locations where orders can be issued
102
+
103
+ Returns:
104
+ A prompt string for the LLM
105
+ """
106
+ prompt = f"You are playing as {self.power_name} in Diplomacy. The current phase is {phase}.\n\n"
107
+ prompt += f"Your units: {', '.join(units)}\n"
108
+ prompt += f"Your supply centers: {', '.join(centers)}\n"
109
+ prompt += f"Locations you can order: {', '.join(orderable_locations)}\n\n"
110
+
111
+ if phase.endswith('M'): # Movement phase
112
+ prompt += "Please provide orders for your units in the form:\n"
113
+ prompt += "- A LON H (hold)\n"
114
+ prompt += "- F NTH - NWY (move)\n"
115
+ prompt += "- A WAL S F LON (support)\n"
116
+ prompt += "- F NWG C A NWY - EDI (convoy)\n"
117
+ elif phase.endswith('R'): # Retreat phase
118
+ prompt += "Please provide retreat orders for your dislodged units:\n"
119
+ prompt += "- A PAR R MAR (retreat to MAR)\n"
120
+ prompt += "- A PAR D (disband)\n"
121
+ elif phase.endswith('A'): # Adjustment phase
122
+ if len(units) < len(centers):
123
+ prompt += "You can build units. Please provide build orders:\n"
124
+ prompt += "- A PAR B (build army in PAR)\n"
125
+ prompt += "- F BRE B (build fleet in BRE)\n"
126
+ prompt += "- WAIVE (waive a build)\n"
127
+ elif len(units) > len(centers):
128
+ prompt += "You must remove units. Please provide disbandment orders:\n"
129
+ prompt += "- A PAR D (disband army in PAR)\n"
130
+ prompt += "- F BRE D (disband fleet in BRE)\n"
131
+
132
+ prompt += "\nProvide your orders as a list, one per line."
133
+ return prompt
134
+
135
+ def _parse_llm_output(self, llm_output):
136
+ """Parse the LLM output to extract orders.
137
+
138
+ Args:
139
+ llm_output: The raw output from the LLM
140
+
141
+ Returns:
142
+ success: Whether parsing was successful
143
+ parsed_orders: List of parsed orders
144
+ """
145
+ # Simple parsing for now - extract lines that look like orders
146
+ lines = llm_output.strip().split('\n')
147
+ orders = []
148
+
149
+ for line in lines:
150
+ # Remove list markers, hyphens, etc.
151
+ line = line.strip('- *•').strip()
152
+
153
+ # Skip empty lines and lines that don't look like orders
154
+ if not line or line.startswith('I ') or line.startswith('Let\'s'):
155
+ continue
156
+
157
+ # Check if it looks like a Diplomacy order
158
+ if (' H' in line or ' -' in line or ' S ' in line or ' C ' in line or
159
+ ' R ' in line or ' D' in line or ' B' in line or line == 'WAIVE'):
160
+ orders.append(line)
161
+
162
+ return len(orders) > 0, orders
163
+
164
+ def _validate_orders(self, orders, observation):
165
+ """Validate if the orders are valid for the current phase.
166
+
167
+ Args:
168
+ orders: List of orders to validate
169
+ observation: Current observation from the environment
170
+
171
+ Returns:
172
+ List of valid orders or None if invalid
173
+ """
174
+ # For simplicity, we'll assume all parsed orders are valid
175
+ # In a real implementation, we would use the game's validation logic
176
+ return orders
177
+
178
+ def _create_clarification_prompt(self, previous_output, parsed_orders):
179
+ """Create a prompt asking for clarification when orders couldn't be parsed.
180
+
181
+ Args:
182
+ previous_output: The previous LLM output
183
+ parsed_orders: Any orders that were successfully parsed
184
+
185
+ Returns:
186
+ A prompt string for the LLM
187
+ """
188
+ prompt = f"I couldn't fully understand your orders for {self.power_name}. "
189
+
190
+ if parsed_orders:
191
+ prompt += f"I understood these orders:\n"
192
+ for order in parsed_orders:
193
+ prompt += f"- {order}\n"
194
+
195
+ prompt += "\nPlease provide clear, valid Diplomacy orders in the format:\n"
196
+ prompt += "- A LON H\n- F NTH - NWY\n- etc.\n"
197
+ return prompt
198
+
199
+ def _create_error_prompt(self, invalid_orders, observation):
200
+ """Create a prompt when orders are invalid.
201
+
202
+ Args:
203
+ invalid_orders: The invalid orders
204
+ observation: Current observation from the environment
205
+
206
+ Returns:
207
+ A prompt string for the LLM
208
+ """
209
+ prompt = f"The following orders for {self.power_name} are invalid:\n"
210
+ for order in invalid_orders:
211
+ prompt += f"- {order}\n"
212
+
213
+ prompt += "\nPlease provide valid orders for your units."
214
+ return prompt
215
+
216
+ def get_log_info(self):
217
+ """Get information about the agent required to log a trajectory.
218
+
219
+ Returns:
220
+ log_info: Information about the agent required to log a trajectory.
221
+ """
222
+ return {
223
+ "power_name": self.power_name,
224
+ "orders": self.orders,
225
+ "wait": self.wait,
226
+ "parsing_state": self.processing_state,
227
+ "message_history": self.message_history
228
+ }
229
+
230
+ def render(self):
231
+ """Render the current state of the agent."""
232
+ print(f"Power: {self.power_name}")
233
+ print(f"Orders: {self.orders}")
234
+ print(f"Wait: {self.wait}")
235
+
236
+ def close(self):
237
+ """Perform any necessary cleanup."""
238
+ pass
239
+
240
+ def _select_random_valid_moves(self, observation):
241
+ """Select random valid moves for all units.
242
+
243
+ Args:
244
+ observation: Current observation from the environment
245
+
246
+ Returns:
247
+ List of valid orders
248
+ """
249
+ import random
250
+
251
+ possible_orders = observation.get('possible_orders', {})
252
+ valid_orders = []
253
+
254
+ # For each location with possible orders, select one randomly
255
+ for location, orders in possible_orders.items():
256
+ if orders: # If there are any possible orders for this location
257
+ valid_orders.append(random.choice(orders))
258
+
259
+ return valid_orders
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from utils.common_imports import *
4
+
5
+
6
+
7
+ def diplomacy_log_match(
8
+ path,
9
+ agents_log_info,
10
+ env_log_info,
11
+ metrics_func=None,
12
+ metrics_func_args=None
13
+ ):
14
+ """
15
+ Logs the Diplomacy game data and generates HTML visualizations using the get_log_info methods.
16
+
17
+ Args:
18
+ path (str): Base path to save the data.
19
+ agents_log_info (list): List of agent information dictionaries containing the get_log_info results.
20
+ env_log_info (dict): Environment information from its get_log_info method.
21
+ metrics_func (str, optional): Name of the function to calculate metrics.
22
+ metrics_func_args (dict, optional): Arguments for the metrics function.
23
+ """
24
+ # Create directory structure
25
+ os.makedirs(path, exist_ok=True)
26
+
27
+ # Save the environment log info
28
+ env_log_path = os.path.join(path, "env_log.json")
29
+ with open(env_log_path, "w") as f:
30
+ json.dump(env_log_info, f, indent=4, default=_json_serialize)
31
+
32
+ # Process each agent's log info
33
+ for agent_log in agents_log_info:
34
+ power_name = agent_log["power_name"]
35
+
36
+ # Define paths for raw data and statistics subfolders
37
+ power_path = os.path.join(path, power_name)
38
+ raw_data_path = os.path.join(power_path, "raw_data")
39
+ statistics_path = os.path.join(power_path, "statistics")
40
+
41
+ # Ensure directories exist
42
+ os.makedirs(raw_data_path, exist_ok=True)
43
+ os.makedirs(statistics_path, exist_ok=True)
44
+
45
+ # Determine the next available file number for raw data
46
+ raw_files = os.listdir(raw_data_path)
47
+ raw_numbers = [int(f.split('_')[-1].split('.')[0]) for f in raw_files if f.startswith("log_")]
48
+ next_raw_number = max(raw_numbers, default=0) + 1
49
+ raw_file = os.path.join(raw_data_path, f"log_{next_raw_number}.json")
50
+
51
+ # Save agent log info
52
+ with open(raw_file, "w") as f:
53
+ json.dump(agent_log, f, indent=4, default=_json_serialize)
54
+
55
+ # Log metrics if a metrics function is provided
56
+ if metrics_func:
57
+ metrics_files = os.listdir(statistics_path)
58
+ metrics_numbers = [int(f.split('_')[-1].split('.')[0]) for f in metrics_files if f.startswith("metrics_")]
59
+ next_metrics_number = max(metrics_numbers, default=0) + 1
60
+ metrics_file = os.path.join(statistics_path, f"metrics_{next_metrics_number}.json")
61
+
62
+ metrics = globals()[metrics_func](agent_log, info, **metrics_func_args)
63
+ with open(metrics_file, "w") as f:
64
+ json.dump(metrics, f, indent=4)
65
+
66
+ # Generate the HTML visualization
67
+ html_content = generate_diplomacy_html(agents_log_info, env_log_info)
68
+
69
+ # Ensure the html directory exists
70
+ html_path = os.path.join(path, "html")
71
+ os.makedirs(html_path, exist_ok=True)
72
+
73
+ # Determine the next available file number for HTML
74
+ html_files = os.listdir(html_path)
75
+ html_numbers = [int(f.split('_')[-1].split('.')[0]) for f in html_files if f.startswith("game_summary_")]
76
+ next_html_number = max(html_numbers, default=0) + 1
77
+ html_file = os.path.join(html_path, f"game_summary_{next_html_number}.html")
78
+
79
+ # Save the HTML content to a file
80
+ with open(html_file, "w") as f:
81
+ f.write(html_content)
82
+
83
+ def generate_diplomacy_html(agent_infos, env_info):
84
+ """
85
+ Generate HTML visualization for a Diplomacy game.
86
+
87
+ Args:
88
+ agent_infos (list): List of agent information dictionaries from get_log_info.
89
+ env_info (dict): Environment information from get_log_info.
90
+
91
+ Returns:
92
+ str: HTML content for the game visualization.
93
+ """
94
+ # Extract game information
95
+ game_id = env_info.get("game_id", "Unknown")
96
+ phase = env_info.get("phase", "Unknown")
97
+ map_name = env_info.get("map_name", "standard")
98
+ is_game_done = env_info.get("is_game_done", False)
99
+ outcome = env_info.get("outcome", [])
100
+
101
+ centers = env_info.get("centers", {})
102
+ units = env_info.get("units", {})
103
+
104
+ # HTML head and style
105
+ html_content = """
106
+ <!DOCTYPE html>
107
+ <html lang="en">
108
+ <head>
109
+ <meta charset="UTF-8">
110
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
111
+ <title>Diplomacy Game {game_id}</title>
112
+ <style>
113
+ body {{
114
+ font-family: 'Arial', sans-serif;
115
+ background-color: #f5f5f5;
116
+ color: #333333;
117
+ margin: 0;
118
+ padding: 20px;
119
+ }}
120
+ .container {{
121
+ display: grid;
122
+ grid-template-columns: repeat(3, 1fr);
123
+ grid-gap: 20px;
124
+ margin-bottom: 30px;
125
+ }}
126
+ .central-info {{
127
+ grid-column: span 3;
128
+ background: #fff;
129
+ padding: 20px;
130
+ border-radius: 10px;
131
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
132
+ margin-bottom: 20px;
133
+ }}
134
+ .power-column {{
135
+ background: #fff;
136
+ padding: 15px;
137
+ border-radius: 10px;
138
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
139
+ }}
140
+ .message {{
141
+ margin-bottom: 15px;
142
+ padding: 12px;
143
+ border-radius: 8px;
144
+ box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1);
145
+ }}
146
+ .user {{
147
+ background: rgba(235, 245, 255, 0.8);
148
+ border-left: 4px solid #007bff;
149
+ }}
150
+ .assistant {{
151
+ background: rgba(240, 255, 240, 0.8);
152
+ border-right: 4px solid #28a745;
153
+ }}
154
+ .orders {{
155
+ background: rgba(255, 248, 225, 0.8);
156
+ border-left: 4px solid #ffc107;
157
+ }}
158
+ .role {{
159
+ font-weight: bold;
160
+ margin-bottom: 5px;
161
+ color: #333333;
162
+ }}
163
+ .power-name {{
164
+ text-align: center;
165
+ font-size: 1.4em;
166
+ margin-bottom: 15px;
167
+ color: #000;
168
+ font-weight: 600;
169
+ text-transform: uppercase;
170
+ letter-spacing: 1px;
171
+ }}
172
+ .game-info {{
173
+ display: grid;
174
+ grid-template-columns: repeat(2, 1fr);
175
+ grid-gap: 15px;
176
+ }}
177
+ .info-card {{
178
+ background: #f9f9f9;
179
+ padding: 15px;
180
+ border-radius: 8px;
181
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
182
+ }}
183
+ .supply-centers, .units-list {{
184
+ display: flex;
185
+ flex-wrap: wrap;
186
+ justify-content: space-between;
187
+ }}
188
+ .supply-center, .unit {{
189
+ flex: 0 0 30%;
190
+ margin-bottom: 10px;
191
+ padding: 8px;
192
+ background: #f0f0f0;
193
+ border-radius: 5px;
194
+ text-align: center;
195
+ }}
196
+ h2 {{
197
+ border-bottom: 2px solid #eee;
198
+ padding-bottom: 10px;
199
+ margin-top: 0;
200
+ }}
201
+ .outcome {{
202
+ background: #e8f5e9;
203
+ padding: 15px;
204
+ border-radius: 8px;
205
+ margin-top: 15px;
206
+ font-weight: bold;
207
+ text-align: center;
208
+ }}
209
+ .austria {{ border-top: 5px solid #ff5050; }}
210
+ .england {{ border-top: 5px solid #5050ff; }}
211
+ .france {{ border-top: 5px solid #50c0ff; }}
212
+ .germany {{ border-top: 5px solid #808080; }}
213
+ .italy {{ border-top: 5px solid #50ff50; }}
214
+ .russia {{ border-top: 5px solid #ffffff; border: 1px solid #ccc; }}
215
+ .turkey {{ border-top: 5px solid #c0c000; }}
216
+ </style>
217
+ </head>
218
+ <body>
219
+ <div class="central-info">
220
+ <h2>Game Information</h2>
221
+ <div class="game-info">
222
+ <div class="info-card">
223
+ <h3>Game Details</h3>
224
+ <p><strong>Game ID:</strong> {game_id}</p>
225
+ <p><strong>Phase:</strong> {phase}</p>
226
+ <p><strong>Map:</strong> {map_name}</p>
227
+ <p><strong>Status:</strong> {status}</p>
228
+ </div>
229
+ <div class="info-card">
230
+ <h3>Supply Centers</h3>
231
+ <div class="supply-centers">
232
+ """.format(
233
+ game_id=game_id,
234
+ phase=phase,
235
+ map_name=map_name,
236
+ status="Completed" if is_game_done else "Active"
237
+ )
238
+
239
+ # Add supply center information
240
+ for power, power_centers in centers.items():
241
+ html_content += f"""
242
+ <div class="supply-center">
243
+ <strong>{power}:</strong> {len(power_centers)}
244
+ </div>
245
+ """
246
+
247
+ html_content += """
248
+ </div>
249
+ </div>
250
+ </div>
251
+ """
252
+
253
+ # Add outcome if game is done
254
+ if is_game_done and outcome:
255
+ winners = outcome[1:] if len(outcome) > 1 else ["Draw"]
256
+ html_content += f"""
257
+ <div class="outcome">
258
+ <h3>Game Outcome</h3>
259
+ <p>Winners: {', '.join(winners)}</p>
260
+ </div>
261
+ """
262
+
263
+ html_content += """
264
+ </div>
265
+ <div class="container">
266
+ """
267
+
268
+ # Add each power's information
269
+ for agent_log in agent_infos:
270
+ power_name = agent_log["power_name"]
271
+ power_class = power_name.lower()
272
+ orders = agent_log.get("orders", [])
273
+ message_history = agent_log.get("message_history", [])
274
+
275
+ html_content += f"""
276
+ <div class="power-column {power_class}">
277
+ <div class="power-name">{power_name}</div>
278
+
279
+ <div class="info-card">
280
+ <h3>Units</h3>
281
+ <ul>
282
+ """
283
+
284
+ # Add units information
285
+ power_units = units.get(power_name, [])
286
+ for unit in power_units:
287
+ html_content += f"<li>{unit}</li>"
288
+
289
+ html_content += """
290
+ </ul>
291
+ </div>
292
+
293
+ <div class="message orders">
294
+ <div class="role">Final Orders</div>
295
+ <ul>
296
+ """
297
+
298
+ # Add orders
299
+ for order in orders:
300
+ html_content += f"<li>{order}</li>"
301
+
302
+ html_content += """
303
+ </ul>
304
+ </div>
305
+ """
306
+
307
+ # Add message history
308
+ for message in message_history:
309
+ if isinstance(message, dict):
310
+ # Skip system messages or handle differently
311
+ if message.get("role") == "system":
312
+ continue
313
+
314
+ role = message.get("role", "unknown")
315
+ content = message.get("content", "")
316
+
317
+ role_class = "user" if role == "user" else "assistant"
318
+ role_display = "Environment" if role == "user" else f"LLM ({power_name})"
319
+
320
+ # Escape HTML characters in content
321
+ content = content.replace("<", "&lt;").replace(">", "&gt;").replace("\n", "<br>")
322
+
323
+ html_content += f"""
324
+ <div class="message {role_class}">
325
+ <div class="role">{role_display}</div>
326
+ <p>{content}</p>
327
+ </div>
328
+ """
329
+ elif isinstance(message, str):
330
+ # Simple string messages (may be used in some implementations)
331
+ html_content += f"""
332
+ <div class="message">
333
+ <p>{message}</p>
334
+ </div>
335
+ """
336
+
337
+ html_content += """
338
+ </div>
339
+ """
340
+
341
+ html_content += """
342
+ </div>
343
+ </body>
344
+ </html>
345
+ """
346
+
347
+ return html_content
348
+
349
+ def _json_serialize(obj):
350
+ """
351
+ A helper function to convert non-JSON-serializable objects
352
+ (like OrderResult) into strings or dicts.
353
+ """
354
+ # Check for the specific object types you know are problematic
355
+ if obj.__class__.__name__ == "OrderResult":
356
+ # Return a string representation or a dict
357
+ return str(obj)
358
+
359
+ # Fallback: attempt to convert anything else to string
360
+ return str(obj)
src_code_for_reproducibility/markov_games/ipd/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .Ipd_hard_coded_agents import AlwaysCooperateIPDAgent, AlwaysDefectIPDAgent
2
+
3
+ __all__ = [
4
+ "AlwaysCooperateIPDAgent",
5
+ "AlwaysDefectIPDAgent",
6
+ ]
7
+
src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc ADDED
Binary file (2.86 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (308 Bytes). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc ADDED
Binary file (4.7 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc ADDED
Binary file (6.72 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc ADDED
Binary file (1.28 kB). View file
 
src_code_for_reproducibility/markov_games/ipd/ipd_agent.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import random
4
+ import re
5
+ from collections.abc import Callable
6
+ from copy import deepcopy
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional, Tuple, Union
9
+
10
+ from mllm.markov_games.agent import Agent
11
+ from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
12
+
13
+
14
+ @dataclass
15
+ class IPDAgentState:
16
+ """
17
+ TOWRITE
18
+ """
19
+
20
+ nb_retries: int
21
+ round_nb: int
22
+ chat_counter: int
23
+ chat_history: List[ChatTurn]
24
+
25
+
26
+ @dataclass
27
+ class IPDAgent(Agent):
28
+ seed: int
29
+ agent_id: str
30
+ agent_name: str
31
+ policy: Callable[[List[Dict]], str]
32
+ intro_prompt: str # Introduction prompt explaining the game rules
33
+ goal_prompt: str # Prompt explaining the agent's goal
34
+ strategy_prompt: str # Prompt suggesting a strategy to the agent
35
+ max_errors: int # Maximum number of errors allowed before default action
36
+ allow_reasoning: bool # Whether to allow reasoning in the response
37
+ max_reasoning_chars: int # Maximum number of characters for reasoning
38
+ cooperate_string: str # string parsed as playing cooperate by simulation
39
+ defect_string: str # string parsed as playing defect by simulation
40
+
41
+ def __post_init__(self):
42
+ self.state = IPDAgentState(
43
+ nb_retries=0, round_nb=0, chat_counter=0, chat_history=[]
44
+ )
45
+
46
+ async def act(self, observation) -> Tuple[Any, AgentActLog]:
47
+ """
48
+ TOWRITE
49
+ """
50
+
51
+ action = None
52
+ action_is_ready = False
53
+ round_nb = observation.round_nb
54
+
55
+ # If it's the first round, we need to send the intro prompt
56
+ if round_nb == 0 and self.state.chat_counter == 0:
57
+ self.state.chat_history.append(
58
+ ChatTurn(
59
+ agent_id=self.agent_id,
60
+ role="user",
61
+ content=self.intro_prompt,
62
+ is_state_end=True,
63
+ )
64
+ )
65
+
66
+ # If new round
67
+ if round_nb > self.state.round_nb:
68
+ coagent_action = observation.last_coagent_move
69
+ user_message = f"Last round, the other agent played {coagent_action}."
70
+ self.state.chat_history.append(
71
+ ChatTurn(
72
+ agent_id=self.agent_id,
73
+ role="user",
74
+ content=user_message,
75
+ is_state_end=True,
76
+ )
77
+ )
78
+
79
+ # If not new round, try to get valid action from policy
80
+ output_chat_turn: ChatTurn = await self.policy(
81
+ state=self.state.chat_history,
82
+ agent_id=self.agent_id,
83
+ regex=f"({self.cooperate_string}|{self.defect_string})",
84
+ )
85
+ self.state.chat_history.append(output_chat_turn)
86
+ action = output_chat_turn.content
87
+
88
+ agent_step_log = AgentActLog(
89
+ chat_turns=self.state.chat_history[self.state.chat_counter :], info=None
90
+ )
91
+ self.state.chat_counter = len(self.state.chat_history)
92
+ self.state.round_nb = round_nb
93
+
94
+ return action, agent_step_log
95
+
96
+ def get_safe_copy(self):
97
+ """
98
+ Return a safe copy of the agent.
99
+ """
100
+ agent_copy = copy.copy(self)
101
+ agent_copy.state = copy.deepcopy(self.state)
102
+ return agent_copy
103
+
104
+ def reset(self):
105
+ self.state = IPDAgentState()
106
+ raise NotImplementedError
107
+
108
+ def render(self):
109
+ pass
110
+
111
+ def close(self):
112
+ pass
113
+
114
+ def get_agent_info(self):
115
+ pass