Muqeeth commited on
Commit
46eb083
·
verified ·
1 Parent(s): 095042b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. src_code_for_reproducibility/docs/source/conf.py +48 -0
  2. src_code_for_reproducibility/docs/source/contributing.rst +0 -0
  3. src_code_for_reproducibility/docs/source/environments.rst +35 -0
  4. src_code_for_reproducibility/docs/source/index.rst +22 -0
  5. src_code_for_reproducibility/docs/source/installation.rst +10 -0
  6. src_code_for_reproducibility/docs/source/marl_standard.rst +141 -0
  7. src_code_for_reproducibility/docs/source/modules.rst +7 -0
  8. src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst +7 -0
  9. src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst +7 -0
  10. src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
  11. src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst +7 -0
  12. src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst +7 -0
  13. src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst +7 -0
  14. src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst +7 -0
  15. src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
  16. src_code_for_reproducibility/docs/source/src.environments.env_imports.rst +7 -0
  17. src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
  18. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
  19. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst +7 -0
  20. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst +7 -0
  21. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst +7 -0
  22. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst +7 -0
  23. src_code_for_reproducibility/docs/source/src.environments.ipd.rst +19 -0
  24. src_code_for_reproducibility/docs/source/src.environments.rst +25 -0
  25. src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst +7 -0
  26. src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst +7 -0
  27. src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst +7 -0
  28. src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
  29. src_code_for_reproducibility/docs/source/src.experiments.rst +17 -0
  30. src_code_for_reproducibility/docs/source/src.generation.rst +15 -0
  31. src_code_for_reproducibility/docs/source/src.generation.run_games.rst +7 -0
  32. src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst +7 -0
  33. src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
  34. src_code_for_reproducibility/docs/source/src.models.hf_agent.rst +7 -0
  35. src_code_for_reproducibility/docs/source/src.models.local_llm.rst +7 -0
  36. src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
  37. src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
  38. src_code_for_reproducibility/docs/source/src.models.rst +20 -0
  39. src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
  40. src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
  41. src_code_for_reproducibility/docs/source/src.models.vllm_worker_wrap.rst +7 -0
  42. src_code_for_reproducibility/docs/source/src.rst +28 -0
  43. src_code_for_reproducibility/docs/source/src.run.rst +7 -0
  44. src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
  45. src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
  46. src_code_for_reproducibility/docs/source/src.training.reinforce_training.rst +7 -0
  47. src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst +7 -0
  48. src_code_for_reproducibility/docs/source/src.training.rst +19 -0
  49. src_code_for_reproducibility/docs/source/src.training.train_main.rst +7 -0
  50. src_code_for_reproducibility/docs/source/src.utils.common_imports.rst +7 -0
src_code_for_reproducibility/docs/source/conf.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration file for the Sphinx documentation builder.
2
+ import os
3
+ import sys
4
+ sys.path.insert(0, os.path.abspath('../..'))
5
+
6
+ # -- Project information -----------------------------------------------------
7
+ project = 'llm_negotiation'
8
+ copyright = '2023, Your Name'
9
+ author = 'Your Name'
10
+
11
+ # -- General configuration ---------------------------------------------------
12
+ extensions = [
13
+ 'sphinx.ext.autodoc',
14
+ 'sphinx.ext.viewcode',
15
+ 'sphinx.ext.napoleon',
16
+ 'sphinx.ext.autosummary',
17
+ 'sphinx.ext.intersphinx',
18
+ 'sphinx.ext.mathjax',
19
+ 'sphinxcontrib.mermaid',
20
+ 'sphinx_rtd_theme',
21
+ ]
22
+
23
+ templates_path = ['_templates']
24
+ exclude_patterns = []
25
+
26
+ # -- Options for HTML output -------------------------------------------------
27
+ html_theme = 'sphinx_rtd_theme'
28
+ html_static_path = ['_static']
29
+
30
+ # -- Napoleon settings -------------------------------------------------------
31
+ napoleon_google_docstring = True
32
+ napoleon_numpy_docstring = False
33
+ napoleon_include_init_with_doc = True
34
+ napoleon_include_private_with_doc = False
35
+ napoleon_include_special_with_doc = True
36
+ napoleon_use_admonition_for_examples = False
37
+ napoleon_use_admonition_for_notes = False
38
+ napoleon_use_admonition_for_references = False
39
+ napoleon_use_ivar = False
40
+ napoleon_use_param = True
41
+ napoleon_use_rtype = True
42
+ napoleon_preprocess_types = False
43
+ napoleon_type_aliases = None
44
+ napoleon_attr_annotations = True
45
+
46
+ # -- Path setup --------------------------------------------------------------
47
+ # Make sure the project's modules can be found by Sphinx
48
+ sys.path.insert(0, os.path.abspath('../../src'))
src_code_for_reproducibility/docs/source/contributing.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/environments.rst ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ MARL Environments
3
+ =================
4
+
5
+ This section provides detailed documentation for the multi-agent negotiation environments included in the library.
6
+
7
+ Each environment follows the standard interface described in :doc:`../environments` but has its own unique game rules,
8
+ dynamics, and implementation details.
9
+
10
+ .. toctree::
11
+ :maxdepth: 2
12
+ :caption: Available Environments:
13
+
14
+ environments/ipd
15
+ environments/diplomacy
16
+ environments/dond
17
+
18
+ Overview
19
+ --------
20
+
21
+ The library currently includes the following environments:
22
+
23
+ 1. **Iterated Prisoner's Dilemma (IPD)**: A classic game theory problem where two agents repeatedly decide whether to cooperate or defect, with different payoffs based on their joint actions.
24
+
25
+ 2. **Diplomacy**: An adaptation of the board game Diplomacy, where seven European powers compete for control of supply centers through strategic moves and alliances.
26
+
27
+ 3. **Deal or No Deal (DOND)**: A negotiation environment based on `the paper Deal or No Deal? End-to-End Learning for Negotiation Dialogues <https://arxiv.org/pdf/1706.05125>`_ in which agents negotiate over the distribution of a set of prizes.
28
+
29
+ Each environment documentation includes:
30
+
31
+ - Game rules and background
32
+ - Implementation details
33
+ - API reference
34
+ - Example usage
35
+ - Advanced features and customization options
src_code_for_reproducibility/docs/source/index.rst ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Welcome to LLM Negotiation's documentation!
2
+ ===========================================
3
+ This library is a collection of tools for training and evaluating LLM-based agents in multi-agent environments. It is designed to be easy to use and extend.
4
+
5
+ .. toctree::
6
+ :maxdepth: 3
7
+ :caption: Contents:
8
+
9
+ installation
10
+ marl_standard
11
+ environments
12
+ launch
13
+ usage
14
+ modules
15
+ contributing
16
+
17
+ Indices and tables
18
+ ==================
19
+
20
+ * :ref:`genindex`
21
+ * :ref:`modindex`
22
+ * :ref:`search`
src_code_for_reproducibility/docs/source/installation.rst ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Installation
2
+ ===========
3
+
4
+ To install the package, run:
5
+
6
+ .. code-block:: bash
7
+
8
+ git clone https://github.com/yourusername/llm_negotiation.git
9
+ cd llm_negotiation
10
+ pip install -e .
src_code_for_reproducibility/docs/source/marl_standard.rst ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ Abstract Standard for Multi-Agent Negotiation Environments
3
+ =================
4
+
5
+ Multi-Agent Negotiation Environments require more features than gymnasium environments in order to be used as interfaces in general game running code.
6
+ The two fundamental differences between gymnasium environments and Multi-Agent Negotiation Environments are:
7
+
8
+ 1. Response from the LLM is a text action, not a discrete action. Therefore, appropriate parsing of the text is required. The model may need to be run multiple times to get the full action.
9
+ This is why we introduce the `AgentHandler` class, which is responsible for parsing the LLM's response.
10
+ 2. The environment needs to be able to handle multi-agent interactions.
11
+ This is why we introduce the `NegotiationEnvironment` class, which is responsible for handling the multi-agent interactions.
12
+ 3. MARL environments are complex to describe. In different contexts, the same environment may be described differently. Therefore, both the environement and the agent handlers are
13
+ responsible for describing a particular trajectory. This information is given by the `get_log_info` method.
14
+ 4. There might be a lot of overlap between the neural networks used by each agent. For instance, the same model may be used for all agents. This motivates a requirement for a
15
+ policy identifier for each agent.
16
+
17
+ Taking inspiration from the `gymnasium <https://gymnasium.farama.org/>`_ library, we introduce a new standard for Multi-Agent Negotiation Environments.
18
+
19
+ Our standard is based on the following features:
20
+
21
+ Environments are of the form:
22
+
23
+ .. code-block:: python
24
+
25
+ class MarlEnvironment():
26
+
27
+ def __init__(self):
28
+ """Initialize the environment."""
29
+ pass
30
+
31
+ def reset(self):
32
+ """Reset the environment to an initial state and return the initial observation.
33
+ Returns:
34
+ observation (dict): A dictionary where keys are agent identifiers and values are observations.
35
+ """
36
+ # (...)
37
+ return observation
38
+
39
+ def step(self, actions):
40
+ """Take a step in the environment using the provided actions.
41
+
42
+ Args:
43
+ actions (dict): A dictionary where keys are agent identifiers and values are actions.
44
+
45
+ Returns:
46
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
47
+ reward (dict): A dictionary where keys are agent identifiers and values are rewards.
48
+ done (bool): Whether the episode has ended.
49
+ info (dict): Additional information about the environment.
50
+ """
51
+ # (...)
52
+ return observations, done, info
53
+
54
+ def get_log_info(self):
55
+ """Get additional information about the environment. This information is used to log the game.
56
+ Returns:
57
+ log_info (dict): Information about the environment required to log the game.
58
+ """
59
+ # (...)
60
+ return log_info
61
+
62
+ def render(self):
63
+ """Render the current state of the environment."""
64
+ pass
65
+
66
+ def close(self):
67
+ """Perform any necessary cleanup."""
68
+ pass
69
+
70
+
71
+ class AgentState():
72
+
73
+ def __init__(self):
74
+ """Initialize the agent state."""
75
+ pass
76
+
77
+ def step(self, observation_from_env, policy_output=None):
78
+ """Update the agent state based on the observation and action.
79
+ The action is the output of the LLM.
80
+ """
81
+
82
+ Args:
83
+ observation_from_env (dict): The observation of the environment.
84
+ policy_output : The output of the policy.
85
+
86
+ Returns:
87
+ policy_id (str): The policy identifier.
88
+ policy_input (dict): The input to the policy.
89
+ action : The official action to be sent to the environment.
90
+ done (bool): Whether the LLM action is ready to be sent to the environment.
91
+ info (dict): Additional information about the agent.
92
+ """
93
+ # (...)
94
+ return policy_id, policy_input, action, done, info
95
+
96
+ def get_log_info(self):
97
+ """Get information about the agent required to log a trajectory.
98
+ Returns:
99
+ log_info (dict): Information about the agent required to log a trajectory.
100
+ """
101
+ # (...)
102
+ return log_info
103
+
104
+ def render(self):
105
+ """Render the current state of the environment."""
106
+ pass
107
+
108
+ def close(self):
109
+ """Perform any necessary cleanup."""
110
+ pass
111
+
112
+
113
+ Implicitely, the keys of the `observations` in the `step` method of the `MarlEnvironment` interface represent the set of agents from which an action is expected at the current step. The next step should only expect actions from the agents in the `observations` dictionary.
114
+
115
+ As you can see, both classes have a `get_log_info` method. This method is used to log the game. It returns a dictionary with keys being the agent identifiers and values being the information to log. The reason we need this is because the environment and the agent handler may need to log different information. It makes it easier to log from the perspective of each agent. The core environment class should not need to know about the details of the agent handler.
116
+
117
+
118
+
119
+ Running Environments in Parallel
120
+ --------------------------------
121
+ This standard allows the use of the `run_batched_matches` function (TODO: link) to run environments in an efficient way. The core idea is to batch the policy calls for all agents in the environment.
122
+
123
+ .. note::
124
+ The ``run_batched_matches`` function allows you to run multiple negotiation games, or "matches," in parallel.
125
+ After each environment is initialized, the function continuously loops over all active matches and checks which agents
126
+ are still pending actions. Each agent's logic can require multiple calls to the policy (e.g., an LLM) before an action
127
+ becomes "ready" to be sent to the environment. (For instance, an agent might need multiple policy calls before having a string which can be parsed into a valid action.) While an agent is waiting for a policy output, these calls for all agents across all matches are grouped together by unique policy identifier and processed in batch for efficiency. This is the core functionality of the ``run_batched_matches`` function.
128
+
129
+ Only once all actions from the required agents at a given step for an environment are ready does the function make a single ``env.step(...)`` call; this ensures
130
+ every match moves forward in lockstep for all its active agents. As soon as an environment signals it is done, the function
131
+ retrieves logged information from both the environment and the agent states before removing this match from the active set.
132
+
133
+ If there are more matches waiting to be processed, they are then started one by one to maintain the specified degree of parallelism.
134
+ This batching approach provides an efficient mechanism to handle multi-agent or multi-policy environments, ensuring minimal
135
+ overhead and a clear, unified flow for stepping through matches.
136
+
137
+ Here is a diagram that shows how the `run_batched_matches` function works at a high level:
138
+
139
+ .. image:: media/runbatch.png
140
+ :alt: Alternate text for the image
141
+ :width: 1000px
src_code_for_reproducibility/docs/source/modules.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src
2
+ ===
3
+
4
+ .. toctree::
5
+ :maxdepth: 4
6
+
7
+ src
src_code_for_reproducibility/docs/source/src.environments.dond.dond_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_agent module
2
+ ========================================
3
+
4
+ .. automodule:: src.environments.dond.dond_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_game module
2
+ =======================================
3
+
4
+ .. automodule:: src.environments.dond.dond_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_log\_funcs module
2
+ =============================================
3
+
4
+ .. automodule:: src.environments.dond.dond_log_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_agent module
2
+ =========================================
3
+
4
+ .. automodule:: src.environments.dond.dond_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_return_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_return\_funcs module
2
+ ================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_return_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_statistics_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_statistics\_funcs module
2
+ ====================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_statistics_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_training\_data\_funcs module
2
+ ========================================================
3
+
4
+ .. automodule:: src.environments.dond.dond_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.dond package
2
+ =============================
3
+
4
+ .. automodule:: src.environments.dond
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond.dond_agent
16
+ src.environments.dond.dond_game
17
+ src.environments.dond.dond_log_funcs
18
+ src.environments.dond.dond_statistics_funcs
19
+ src.environments.dond.dond_training_data_funcs
src_code_for_reproducibility/docs/source/src.environments.env_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.env\_imports module
2
+ ====================================
3
+
4
+ .. automodule:: src.environments.env_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.environment\_imports module
2
+ ============================================
3
+
4
+ .. automodule:: src.environments.environment_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_agent module
2
+ ======================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_game module
2
+ =====================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_log\_funcs module
2
+ ===========================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_log_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_statistics\_funcs module
2
+ ==================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_statistics_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_training\_data\_funcs module
2
+ ======================================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_training_data_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments.ipd package
2
+ ============================
3
+
4
+ .. automodule:: src.environments.ipd
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.ipd.ipd_agent
16
+ src.environments.ipd.ipd_game
17
+ src.environments.ipd.ipd_log_funcs
18
+ src.environments.ipd.ipd_statistics_funcs
19
+ src.environments.ipd.ipd_training_data_funcs
src_code_for_reproducibility/docs/source/src.environments.rst ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments package
2
+ ========================
3
+
4
+ .. automodule:: src.environments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond
16
+ src.environments.ipd
17
+
18
+ Submodules
19
+ ----------
20
+
21
+ .. toctree::
22
+ :maxdepth: 4
23
+
24
+ src.environments.env_imports
25
+ src.environments.environment_imports
src_code_for_reproducibility/docs/source/src.experiments.arithmetic_test.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.arithmetic\_test module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.arithmetic_test
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.dond\_run\_train module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.dond_run_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.generate_and_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.generate\_and\_train module
2
+ ===========================================
3
+
4
+ .. automodule:: src.experiments.generate_and_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.last\_completion module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.last_completion
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.rst ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.experiments package
2
+ =======================
3
+
4
+ .. automodule:: src.experiments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.experiments.arithmetic_test
16
+ src.experiments.generate_and_train
17
+ src.experiments.last_completion
src_code_for_reproducibility/docs/source/src.generation.rst ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.generation package
2
+ ======================
3
+
4
+ .. automodule:: src.generation
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.generation.run_games
src_code_for_reproducibility/docs/source/src.generation.run_games.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.generation.run\_games module
2
+ ================================
3
+
4
+ .. automodule:: src.generation.run_games
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_hf_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_hf\_agent module
2
+ ==================================
3
+
4
+ .. automodule:: src.models.dummy_llm_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_local\_llm module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.dummy_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.hf_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.hf\_agent module
2
+ ===========================
3
+
4
+ .. automodule:: src.models.hf_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.local\_llm module
2
+ ============================
3
+
4
+ .. automodule:: src.models.local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.new\_local\_llm module
2
+ =================================
3
+
4
+ .. automodule:: src.models.new_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.oai\_agent module
2
+ ============================
3
+
4
+ .. automodule:: src.models.oai_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.rst ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.models package
2
+ ==================
3
+
4
+ .. automodule:: src.models
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.models.dummy_local_llm
16
+ src.models.local_llm
17
+ src.models.new_local_llm
18
+ src.models.server_llm
19
+ src.models.updatable_worker
20
+ src.models.vllm_worker_wrap
src_code_for_reproducibility/docs/source/src.models.server_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.server\_llm module
2
+ =============================
3
+
4
+ .. automodule:: src.models.server_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.updatable\_worker module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.updatable_worker
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.vllm_worker_wrap.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.vllm\_worker\_wrap module
2
+ ====================================
3
+
4
+ .. automodule:: src.models.vllm_worker_wrap
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.rst ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src package
2
+ ===========
3
+
4
+ .. automodule:: src
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments
16
+ src.experiments
17
+ src.generation
18
+ src.models
19
+ src.training
20
+ src.utils
21
+
22
+ Submodules
23
+ ----------
24
+
25
+ .. toctree::
26
+ :maxdepth: 4
27
+
28
+ src.run
src_code_for_reproducibility/docs/source/src.run.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.run module
2
+ ==============
3
+
4
+ .. automodule:: src.run
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train module
2
+ ==============================
3
+
4
+ .. automodule:: src.training.ppo_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train\_value\_head module
2
+ ===========================================
3
+
4
+ .. automodule:: src.training.ppo_train_value_head
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.reinforce_training.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.reinforce\_training module
2
+ =======================================
3
+
4
+ .. automodule:: src.training.reinforce_training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.rl\_convs\_processing module
2
+ =========================================
3
+
4
+ .. automodule:: src.training.rl_convs_processing
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rst ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.training package
2
+ ====================
3
+
4
+ .. automodule:: src.training
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Submodules
10
+ ----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.training.ppo_train
16
+ src.training.ppo_train_value_head
17
+ src.training.reinforce_training
18
+ src.training.rl_convs_processing
19
+ src.training.train_main
src_code_for_reproducibility/docs/source/src.training.train_main.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.train\_main module
2
+ ===============================
3
+
4
+ .. automodule:: src.training.train_main
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.common_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.common\_imports module
2
+ ================================
3
+
4
+ .. automodule:: src.utils.common_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance: