Muqeeth commited on Jan 20

Commit

e799c61

verified ·

1 Parent(s): 325684c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

src_code_for_reproducibility/docs/source/conf.py +48 -0
src_code_for_reproducibility/docs/source/environments.rst +35 -0
src_code_for_reproducibility/docs/source/index.rst +22 -0
src_code_for_reproducibility/docs/source/media/runbatch.png +0 -0
src_code_for_reproducibility/docs/source/modules.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.rst +19 -0
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.rst +19 -0
src_code_for_reproducibility/docs/source/src.models.hf_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.rst +28 -0
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.rst +24 -0
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
src_code_for_reproducibility/markov_games/__pycache__/agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/gather_and_export_utils.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/markov_game.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/mg_utils.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/rollout_tree.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/run_markov_games.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_agent.py +259 -0
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_env.py +230 -0
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging.py +360 -0
src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging_for_training.py +0 -0
src_code_for_reproducibility/markov_games/ipd/Ipd_hard_coded_agents.py +72 -0
src_code_for_reproducibility/markov_games/ipd/__init__.py +7 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/ipd_agent.py +115 -0
src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py +162 -0
src_code_for_reproducibility/markov_games/ipd/ipd_statistics.py +18 -0
src_code_for_reproducibility/markov_games/negotiation/README.md +40 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_hard_coded_policies.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/negotiation_statistics.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_agent.cpython-312.pyc +0 -0

src_code_for_reproducibility/docs/source/conf.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Configuration file for the Sphinx documentation builder.
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../..'))
+# -- Project information -----------------------------------------------------
+project = 'llm_negotiation'
+copyright = '2023, Your Name'
+author = 'Your Name'
+# -- General configuration ---------------------------------------------------
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.intersphinx',
+    'sphinx.ext.mathjax',
+    'sphinxcontrib.mermaid',
+    'sphinx_rtd_theme',
+]
+templates_path = ['_templates']
+exclude_patterns = []
+# -- Options for HTML output -------------------------------------------------
+html_theme = 'sphinx_rtd_theme'
+html_static_path = ['_static']
+# -- Napoleon settings -------------------------------------------------------
+napoleon_google_docstring = True
+napoleon_numpy_docstring = False
+napoleon_include_init_with_doc = True
+napoleon_include_private_with_doc = False
+napoleon_include_special_with_doc = True
+napoleon_use_admonition_for_examples = False
+napoleon_use_admonition_for_notes = False
+napoleon_use_admonition_for_references = False
+napoleon_use_ivar = False
+napoleon_use_param = True
+napoleon_use_rtype = True
+napoleon_preprocess_types = False
+napoleon_type_aliases = None
+napoleon_attr_annotations = True
+# -- Path setup --------------------------------------------------------------
+# Make sure the project's modules can be found by Sphinx
+sys.path.insert(0, os.path.abspath('../../src'))

src_code_for_reproducibility/docs/source/environments.rst ADDED Viewed

	@@ -0,0 +1,35 @@

+=================
+MARL Environments
+=================
+This section provides detailed documentation for the multi-agent negotiation environments included in the library.
+Each environment follows the standard interface described in :doc:`../environments` but has its own unique game rules,
+dynamics, and implementation details.
+.. toctree::
+   :maxdepth: 2
+   :caption: Available Environments:
+   environments/ipd
+   environments/diplomacy
+   environments/dond
+Overview
+--------
+The library currently includes the following environments:
+1. **Iterated Prisoner's Dilemma (IPD)**: A classic game theory problem where two agents repeatedly decide whether to cooperate or defect, with different payoffs based on their joint actions.
+2. **Diplomacy**: An adaptation of the board game Diplomacy, where seven European powers compete for control of supply centers through strategic moves and alliances.
+3. **Deal or No Deal (DOND)**: A negotiation environment based on `the paper Deal or No Deal? End-to-End Learning for Negotiation Dialogues <https://arxiv.org/pdf/1706.05125>`_ in which agents negotiate over the distribution of a set of prizes.
+Each environment documentation includes:
+- Game rules and background
+- Implementation details
+- API reference
+- Example usage
+- Advanced features and customization options

src_code_for_reproducibility/docs/source/index.rst ADDED Viewed

	@@ -0,0 +1,22 @@

+Welcome to LLM Negotiation's documentation!
+===========================================
+This library is a collection of tools for training and evaluating LLM-based agents in multi-agent environments. It is designed to be easy to use and extend.
+.. toctree::
+   :maxdepth: 3
+   :caption: Contents:
+   installation
+   marl_standard
+   environments
+   launch
+   usage
+   modules
+   contributing
+Indices and tables
+==================
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`

src_code_for_reproducibility/docs/source/media/runbatch.png ADDED Viewed

src_code_for_reproducibility/docs/source/modules.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src
+===
+.. toctree::
+   :maxdepth: 4
+   src

src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_log\_funcs module
+=============================================
+.. automodule:: src.environments.dond.dond_log_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.rst ADDED Viewed

	@@ -0,0 +1,19 @@

+src.environments.dond package
+=============================
+.. automodule:: src.environments.dond
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.environments.dond.dond_agent
+   src.environments.dond.dond_game
+   src.environments.dond.dond_log_funcs
+   src.environments.dond.dond_statistics_funcs
+   src.environments.dond.dond_training_data_funcs

src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.environment\_imports module
+============================================
+.. automodule:: src.environments.environment_imports
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_log_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_log\_funcs module
+===========================================
+.. automodule:: src.environments.ipd.ipd_log_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_statistics_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_statistics\_funcs module
+==================================================
+.. automodule:: src.environments.ipd.ipd_statistics_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_training_data_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_training\_data\_funcs module
+======================================================
+.. automodule:: src.environments.ipd.ipd_training_data_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.rst ADDED Viewed

	@@ -0,0 +1,19 @@

+src.environments.ipd package
+============================
+.. automodule:: src.environments.ipd
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.environments.ipd.ipd_agent
+   src.environments.ipd.ipd_game
+   src.environments.ipd.ipd_log_funcs
+   src.environments.ipd.ipd_statistics_funcs
+   src.environments.ipd.ipd_training_data_funcs

src_code_for_reproducibility/docs/source/src.models.hf_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.hf\_agent module
+===========================
+.. automodule:: src.models.hf_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.rst ADDED Viewed

	@@ -0,0 +1,28 @@

+src package
+===========
+.. automodule:: src
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Subpackages
+-----------
+.. toctree::
+   :maxdepth: 4
+   src.environments
+   src.experiments
+   src.generation
+   src.models
+   src.training
+   src.utils
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.run

src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.extra\_stats module
+=============================
+.. automodule:: src.utils.extra_stats
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.rst ADDED Viewed

	@@ -0,0 +1,24 @@

+src.utils package
+=================
+.. automodule:: src.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.utils.common_imports
+   src.utils.export_ppo_training_set
+   src.utils.extra_stats
+   src.utils.inherit_args
+   src.utils.log_gpu_usage
+   src.utils.log_statistics
+   src.utils.model_to_cpu
+   src.utils.parallel_shuffle
+   src.utils.quick_stats
+   src.utils.update_start_epoch

src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.update\_start\_epoch module
+=====================================
+.. automodule:: src.utils.update_start_epoch
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/markov_games/__pycache__/agent.cpython-312.pyc ADDED Viewed

Binary file (3.2 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc ADDED Viewed

Binary file (4.95 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/gather_and_export_utils.cpython-312.pyc ADDED Viewed

Binary file (46.5 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc ADDED Viewed

Binary file (1.25 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/markov_game.cpython-312.pyc ADDED Viewed

Binary file (9.72 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/mg_utils.cpython-312.pyc ADDED Viewed

Binary file (3.98 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/rollout_tree.cpython-312.pyc ADDED Viewed

Binary file (3.67 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/run_markov_games.cpython-312.pyc ADDED Viewed

Binary file (1.14 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/simulation.cpython-312.pyc ADDED Viewed

Binary file (3.9 kB). View file

src_code_for_reproducibility/markov_games/diplomacy/diplomacy_agent.py ADDED Viewed

	@@ -0,0 +1,259 @@

+from typing import Dict, List, Tuple, Optional, Any
+import copy
+class DiplomacyAgent:
+    """Agent handler for Diplomacy game that follows the MARL standard.
+    This class is responsible for parsing LLM output into valid Diplomacy orders,
+    managing the agent state, and providing information for logging.
+    """
+    def __init__(self, policy_id: str, power_name: str, random_valid_move=False):
+        """Initialize the agent handler for a power in the Diplomacy game.
+        Args:
+            power_name: The name of the power this agent controls (e.g., 'FRANCE', 'ENGLAND')
+            policy_id: The identifier for the policy this agent uses
+            random_valid_move: If True, will select random valid moves instead of using LLM (default: False)
+        """
+        self.policy_id = policy_id
+        self.power_name = power_name
+        self.orders = []
+        self.wait = True
+        self.processing_state = "WAITING_FOR_ORDERS"
+        self.parsed_orders = []
+        self.order_status = {}
+        self.message_history = []
+        self.random_valid_move = random_valid_move
+    def step(self, observation_from_env, policy_output=None):
+        """Update the agent state based on the observation and LLM output.
+        Args:
+            observation_from_env: The observation from the environment
+            policy_output: The output from the LLM
+        Returns:
+            policy_id: The policy identifier
+            policy_input: The input to the policy
+            action: The official action to be sent to the environment
+            done: Whether the LLM action is ready to be sent to the environment
+            info: Additional information about the agent
+        """
+        info = {}
+        # If random_valid_move is enabled, select random valid moves
+        if self.random_valid_move:
+            valid_orders = self._select_random_valid_moves(observation_from_env)
+            self.orders = valid_orders
+            self.wait = False
+            action = {
+                "orders": valid_orders,
+                "wait": False
+            }
+            return self.policy_id, {}, action, True, info
+        # If no policy output, this is the initial step - prepare prompt
+        if policy_output is None:
+            # Create initial prompt for the LLM
+            phase = observation_from_env.get('phase', '')
+            units = observation_from_env.get('units', {}).get(self.power_name, [])
+            centers = observation_from_env.get('centers', {}).get(self.power_name, [])
+            orderable_locations = observation_from_env.get('orderable_locations', {})
+            prompt = self._create_prompt(phase, units, centers, orderable_locations)
+            return self.policy_id, {"prompt": prompt}, None, False, info
+        # Process the LLM output to extract orders
+        success, parsed_orders = self._parse_llm_output(policy_output)
+        self.parsed_orders = parsed_orders
+        if not success:
+            # Need more information from LLM
+            clarification_prompt = self._create_clarification_prompt(policy_output, parsed_orders)
+            return self.policy_id, {"prompt": clarification_prompt}, None, False, info
+        # Validate if the orders are valid for the current phase
+        valid_orders = self._validate_orders(parsed_orders, observation_from_env)
+        if valid_orders:
+            # Orders are valid, prepare action for environment
+            self.orders = valid_orders
+            self.wait = False
+            action = {
+                "orders": valid_orders,
+                "wait": False
+            }
+            return self.policy_id, {}, action, True, info
+        else:
+            # Orders are invalid, ask for new ones
+            error_prompt = self._create_error_prompt(parsed_orders, observation_from_env)
+            return self.policy_id, {"prompt": error_prompt}, None, False, info
+    def _create_prompt(self, phase, units, centers, orderable_locations):
+        """Create the initial prompt for the LLM.
+        Args:
+            phase: The current game phase
+            units: List of units controlled by this power
+            centers: List of supply centers controlled by this power
+            orderable_locations: List of locations where orders can be issued
+        Returns:
+            A prompt string for the LLM
+        """
+        prompt = f"You are playing as {self.power_name} in Diplomacy. The current phase is {phase}.\n\n"
+        prompt += f"Your units: {', '.join(units)}\n"
+        prompt += f"Your supply centers: {', '.join(centers)}\n"
+        prompt += f"Locations you can order: {', '.join(orderable_locations)}\n\n"
+        if phase.endswith('M'):  # Movement phase
+            prompt += "Please provide orders for your units in the form:\n"
+            prompt += "- A LON H (hold)\n"
+            prompt += "- F NTH - NWY (move)\n"
+            prompt += "- A WAL S F LON (support)\n"
+            prompt += "- F NWG C A NWY - EDI (convoy)\n"
+        elif phase.endswith('R'):  # Retreat phase
+            prompt += "Please provide retreat orders for your dislodged units:\n"
+            prompt += "- A PAR R MAR (retreat to MAR)\n"
+            prompt += "- A PAR D (disband)\n"
+        elif phase.endswith('A'):  # Adjustment phase
+            if len(units) < len(centers):
+                prompt += "You can build units. Please provide build orders:\n"
+                prompt += "- A PAR B (build army in PAR)\n"
+                prompt += "- F BRE B (build fleet in BRE)\n"
+                prompt += "- WAIVE (waive a build)\n"
+            elif len(units) > len(centers):
+                prompt += "You must remove units. Please provide disbandment orders:\n"
+                prompt += "- A PAR D (disband army in PAR)\n"
+                prompt += "- F BRE D (disband fleet in BRE)\n"
+        prompt += "\nProvide your orders as a list, one per line."
+        return prompt
+    def _parse_llm_output(self, llm_output):
+        """Parse the LLM output to extract orders.
+        Args:
+            llm_output: The raw output from the LLM
+        Returns:
+            success: Whether parsing was successful
+            parsed_orders: List of parsed orders
+        """
+        # Simple parsing for now - extract lines that look like orders
+        lines = llm_output.strip().split('\n')
+        orders = []
+        for line in lines:
+            # Remove list markers, hyphens, etc.
+            line = line.strip('- *•').strip()
+            # Skip empty lines and lines that don't look like orders
+            if not line or line.startswith('I ') or line.startswith('Let\'s'):
+                continue
+            # Check if it looks like a Diplomacy order
+            if (' H' in line or ' -' in line or ' S ' in line or ' C ' in line or
+                ' R ' in line or ' D' in line or ' B' in line or line == 'WAIVE'):
+                orders.append(line)
+        return len(orders) > 0, orders
+    def _validate_orders(self, orders, observation):
+        """Validate if the orders are valid for the current phase.
+        Args:
+            orders: List of orders to validate
+            observation: Current observation from the environment
+        Returns:
+            List of valid orders or None if invalid
+        """
+        # For simplicity, we'll assume all parsed orders are valid
+        # In a real implementation, we would use the game's validation logic
+        return orders
+    def _create_clarification_prompt(self, previous_output, parsed_orders):
+        """Create a prompt asking for clarification when orders couldn't be parsed.
+        Args:
+            previous_output: The previous LLM output
+            parsed_orders: Any orders that were successfully parsed
+        Returns:
+            A prompt string for the LLM
+        """
+        prompt = f"I couldn't fully understand your orders for {self.power_name}. "
+        if parsed_orders:
+            prompt += f"I understood these orders:\n"
+            for order in parsed_orders:
+                prompt += f"- {order}\n"
+        prompt += "\nPlease provide clear, valid Diplomacy orders in the format:\n"
+        prompt += "- A LON H\n- F NTH - NWY\n- etc.\n"
+        return prompt
+    def _create_error_prompt(self, invalid_orders, observation):
+        """Create a prompt when orders are invalid.
+        Args:
+            invalid_orders: The invalid orders
+            observation: Current observation from the environment
+        Returns:
+            A prompt string for the LLM
+        """
+        prompt = f"The following orders for {self.power_name} are invalid:\n"
+        for order in invalid_orders:
+            prompt += f"- {order}\n"
+        prompt += "\nPlease provide valid orders for your units."
+        return prompt
+    def get_log_info(self):
+        """Get information about the agent required to log a trajectory.
+        Returns:
+            log_info: Information about the agent required to log a trajectory.
+        """
+        return {
+            "power_name": self.power_name,
+            "orders": self.orders,
+            "wait": self.wait,
+            "parsing_state": self.processing_state,
+            "message_history": self.message_history
+        }
+    def render(self):
+        """Render the current state of the agent."""
+        print(f"Power: {self.power_name}")
+        print(f"Orders: {self.orders}")
+        print(f"Wait: {self.wait}")
+    def close(self):
+        """Perform any necessary cleanup."""
+        pass
+    def _select_random_valid_moves(self, observation):
+        """Select random valid moves for all units.
+        Args:
+            observation: Current observation from the environment
+        Returns:
+            List of valid orders
+        """
+        import random
+        possible_orders = observation.get('possible_orders', {})
+        valid_orders = []
+        # For each location with possible orders, select one randomly
+        for location, orders in possible_orders.items():
+            if orders:  # If there are any possible orders for this location
+                valid_orders.append(random.choice(orders))
+        return valid_orders

src_code_for_reproducibility/markov_games/diplomacy/diplomacy_env.py ADDED Viewed

	@@ -0,0 +1,230 @@

+from typing import Dict, List, Tuple, Optional, Any
+from diplomacy import Game
+import random
+class DiplomacyEnv:
+    """Multi-Agent Reinforcement Learning environment for Diplomacy.
+    This class wraps the Diplomacy game engine to provide an interface
+    compliant with the MARL standard.
+    """
+    def __init__(self, random_seed=None, map_name="standard", game_id=None, rules=None, max_steps=50):
+        """Initialize the Diplomacy environment.
+        Args:
+            map_name: The name of the map to use (default: "standard")
+            game_id: Optional game ID
+            rules: Optional rules to apply to the game
+            max_steps: Maximum number of steps before forcing game end (default: 10)
+        """
+        self.random_seed = random_seed
+        self.map_name = map_name
+        self.game_id = game_id
+        self.rules = rules or []
+        self.game = None
+        self.active_powers = []
+        self.render_mode = None
+        self.max_steps = max_steps
+        self.current_steps = 0
+    def reset(self):
+        """Reset the environment to an initial state and return the initial observation.
+        Returns:
+            observation: A dictionary where keys are agent identifiers and values are observations.
+        """
+        # Initialize a new game
+        self.game = Game(game_id=self.game_id, map_name=self.map_name)
+        # Apply rules
+        for rule in self.rules:
+            self.game.add_rule(rule)
+        # Determine active powers (not eliminated)
+        self.active_powers = [name for name, power in self.game.powers.items()
+                             if not power.is_eliminated()]
+        # Reset step counter
+        self.current_steps = 0
+        # Create initial observations for all powers
+        observations = {}
+        for power_name in self.active_powers:
+            observations[power_name] = self._create_observation(power_name)
+        return observations
+    def step(self, actions):
+        """Take a step in the environment using the provided actions.
+        Args:
+            actions: A dictionary where keys are agent identifiers and values are actions.
+        Returns:
+            observations: A dictionary where keys are agent identifiers and values are observations.
+            done: Whether the episode has ended.
+            info: Additional information about the environment.
+        """
+        print(f"stepping {self.current_steps}")
+        self.current_steps += 1
+        # Apply actions (orders) for each power
+        for power_name, action in actions.items():
+            if power_name in self.active_powers:
+                orders = action.get("orders", [])
+                wait = action.get("wait", True)
+                # Set orders for the power
+                if orders:
+                    self.game.set_orders(power_name, orders)
+                # Set wait flag
+                self.game.set_wait(power_name, wait)
+        # Check if all active powers are ready to proceed
+        if self.game.does_not_wait():
+            # Process the current phase
+            self.game.process()
+            # Update active powers list after processing
+            self.active_powers = [name for name, power in self.game.powers.items()
+                                 if not power.is_eliminated()]
+        # Create observations for all active powers
+        observations = {}
+        for power_name in self.active_powers:
+            observations[power_name] = self._create_observation(power_name)
+        # Check if the game is done (either naturally or due to max steps)
+        done = self.game.is_game_done or self.current_steps >= self.max_steps
+        # Create info dict
+        info = {
+            "phase": self.game.get_current_phase(),
+            "active_powers": self.active_powers,
+            "centers": self.game.get_centers(),
+            "units": self.game.get_units(),
+            "current_steps": self.current_steps,
+            "max_steps_reached": self.current_steps >= self.max_steps
+        }
+        return observations, done, info
+    def _create_observation(self, power_name):
+        """Create observation for a specific power.
+        Args:
+            power_name: The name of the power
+        Returns:
+            An observation dictionary
+        """
+        observation = {
+            "phase": self.game.get_current_phase(),
+            "units": self.game.get_units(),
+            "centers": self.game.get_centers(),
+            "orderable_locations": self.game.get_orderable_locations(power_name),
+            "order_status": self.game.get_order_status(power_name),
+            "possible_orders": self._get_possible_orders_for_power(power_name)
+        }
+        return observation
+    def _get_possible_orders_for_power(self, power_name):
+        """Get all possible orders for a power's units.
+        Args:
+            power_name: The name of the power
+        Returns:
+            A dictionary mapping units to their possible orders
+        """
+        all_possible_orders = self.game.get_all_possible_orders()
+        # Filter for only the locations where this power has units
+        power_units = self.game.get_units(power_name)
+        power_unit_locations = [unit[2:] for unit in power_units]
+        # For retreat phases, include retreating units
+        if self.game.phase_type == 'R':
+            power = self.game.get_power(power_name)
+            power_unit_locations.extend([unit[2:] for unit in power.retreats])
+        # For adjustment phases, include buildable locations
+        elif self.game.phase_type == 'A':
+            power = self.game.get_power(power_name)
+            # If we have more centers than units, we can build
+            if len(power.centers) > len(power.units):
+                buildable_sites = self.game._build_sites(power)
+                power_unit_locations.extend(buildable_sites)
+            # If we have more units than centers, we need to remove
+            elif len(power.units) > len(power.centers):
+                # All units are candidates for removal
+                pass
+        # Filter the possible orders to only those for this power's units/locations
+        power_possible_orders = {}
+        for loc, orders in all_possible_orders.items():
+            if loc[:3] in power_unit_locations:
+                power_possible_orders[loc] = orders
+        return power_possible_orders
+    def get_log_info(self):
+        """Get additional information about the environment for logging.
+        Returns:
+            log_info: Information about the environment required to log the game.
+        """
+        if not self.game:
+            return {}
+        return {
+            "game_id": self.game.game_id,
+            "phase": self.game.get_current_phase(),
+            "map_name": self.game.map_name,
+            "centers": self.game.get_centers(),
+            "units": self.game.get_units(),
+            "powers": {name: {
+                "units": power.units,
+                "centers": power.centers,
+                "is_eliminated": power.is_eliminated(),
+                "order_status": self.game.get_order_status(name)
+            } for name, power in self.game.powers.items()},
+            "orders": self.game.get_orders(),
+            "active_powers": self.active_powers,
+            "is_game_done": self.game.is_game_done,
+            "outcome": self.game.outcome if self.game.is_game_done else None
+        }
+    def render(self, mode='human'):
+        """Render the current state of the environment.
+        Args:
+            mode: The rendering mode ('human', 'svg', etc.)
+        Returns:
+            The rendered image if applicable
+        """
+        self.render_mode = mode
+        if self.game:
+            if mode == 'human':
+                # Just print basic game state
+                print(f"Game: {self.game.game_id}")
+                print(f"Phase: {self.game.get_current_phase()}")
+                print(f"Active Powers: {self.active_powers}")
+                print("Supply Centers:")
+                for power_name, centers in self.game.get_centers().items():
+                    print(f"  {power_name}: {centers}")
+                print("Units:")
+                for power_name, units in self.game.get_units().items():
+                    print(f"  {power_name}: {units}")
+                return None
+            elif mode == 'svg':
+                # Return SVG representation
+                return self.game.render(output_format='svg')
+        return None
+    def close(self):
+        """Perform any necessary cleanup."""
+        self.game = None

src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging.py ADDED Viewed

	@@ -0,0 +1,360 @@

+import os
+import json
+from utils.common_imports import *
+def diplomacy_log_match(
+        path,
+        agents_log_info,
+        env_log_info,
+        metrics_func=None,
+        metrics_func_args=None
+        ):
+    """
+    Logs the Diplomacy game data and generates HTML visualizations using the get_log_info methods.
+    Args:
+        path (str): Base path to save the data.
+        agents_log_info (list): List of agent information dictionaries containing the get_log_info results.
+        env_log_info (dict): Environment information from its get_log_info method.
+        metrics_func (str, optional): Name of the function to calculate metrics.
+        metrics_func_args (dict, optional): Arguments for the metrics function.
+    """
+    # Create directory structure
+    os.makedirs(path, exist_ok=True)
+    # Save the environment log info
+    env_log_path = os.path.join(path, "env_log.json")
+    with open(env_log_path, "w") as f:
+        json.dump(env_log_info, f, indent=4, default=_json_serialize)
+    # Process each agent's log info
+    for agent_log in agents_log_info:
+        power_name = agent_log["power_name"]
+        # Define paths for raw data and statistics subfolders
+        power_path = os.path.join(path, power_name)
+        raw_data_path = os.path.join(power_path, "raw_data")
+        statistics_path = os.path.join(power_path, "statistics")
+        # Ensure directories exist
+        os.makedirs(raw_data_path, exist_ok=True)
+        os.makedirs(statistics_path, exist_ok=True)
+        # Determine the next available file number for raw data
+        raw_files = os.listdir(raw_data_path)
+        raw_numbers = [int(f.split('_')[-1].split('.')[0]) for f in raw_files if f.startswith("log_")]
+        next_raw_number = max(raw_numbers, default=0) + 1
+        raw_file = os.path.join(raw_data_path, f"log_{next_raw_number}.json")
+        # Save agent log info
+        with open(raw_file, "w") as f:
+            json.dump(agent_log, f, indent=4, default=_json_serialize)
+        # Log metrics if a metrics function is provided
+        if metrics_func:
+            metrics_files = os.listdir(statistics_path)
+            metrics_numbers = [int(f.split('_')[-1].split('.')[0]) for f in metrics_files if f.startswith("metrics_")]
+            next_metrics_number = max(metrics_numbers, default=0) + 1
+            metrics_file = os.path.join(statistics_path, f"metrics_{next_metrics_number}.json")
+            metrics = globals()[metrics_func](agent_log, info, **metrics_func_args)
+            with open(metrics_file, "w") as f:
+                json.dump(metrics, f, indent=4)
+    # Generate the HTML visualization
+    html_content = generate_diplomacy_html(agents_log_info, env_log_info)
+    # Ensure the html directory exists
+    html_path = os.path.join(path, "html")
+    os.makedirs(html_path, exist_ok=True)
+    # Determine the next available file number for HTML
+    html_files = os.listdir(html_path)
+    html_numbers = [int(f.split('_')[-1].split('.')[0]) for f in html_files if f.startswith("game_summary_")]
+    next_html_number = max(html_numbers, default=0) + 1
+    html_file = os.path.join(html_path, f"game_summary_{next_html_number}.html")
+    # Save the HTML content to a file
+    with open(html_file, "w") as f:
+        f.write(html_content)
+def generate_diplomacy_html(agent_infos, env_info):
+    """
+    Generate HTML visualization for a Diplomacy game.
+    Args:
+        agent_infos (list): List of agent information dictionaries from get_log_info.
+        env_info (dict): Environment information from get_log_info.
+    Returns:
+        str: HTML content for the game visualization.
+    """
+    # Extract game information
+    game_id = env_info.get("game_id", "Unknown")
+    phase = env_info.get("phase", "Unknown")
+    map_name = env_info.get("map_name", "standard")
+    is_game_done = env_info.get("is_game_done", False)
+    outcome = env_info.get("outcome", [])
+    centers = env_info.get("centers", {})
+    units = env_info.get("units", {})
+    # HTML head and style
+    html_content = """
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Diplomacy Game {game_id}</title>
+        <style>
+            body {{
+                font-family: 'Arial', sans-serif;
+                background-color: #f5f5f5;
+                color: #333333;
+                margin: 0;
+                padding: 20px;
+            }}
+            .container {{
+                display: grid;
+                grid-template-columns: repeat(3, 1fr);
+                grid-gap: 20px;
+                margin-bottom: 30px;
+            }}
+            .central-info {{
+                grid-column: span 3;
+                background: #fff;
+                padding: 20px;
+                border-radius: 10px;
+                box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+                margin-bottom: 20px;
+            }}
+            .power-column {{
+                background: #fff;
+                padding: 15px;
+                border-radius: 10px;
+                box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
+            }}
+            .message {{
+                margin-bottom: 15px;
+                padding: 12px;
+                border-radius: 8px;
+                box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1);
+            }}
+            .user {{
+                background: rgba(235, 245, 255, 0.8);
+                border-left: 4px solid #007bff;
+            }}
+            .assistant {{
+                background: rgba(240, 255, 240, 0.8);
+                border-right: 4px solid #28a745;
+            }}
+            .orders {{
+                background: rgba(255, 248, 225, 0.8);
+                border-left: 4px solid #ffc107;
+            }}
+            .role {{
+                font-weight: bold;
+                margin-bottom: 5px;
+                color: #333333;
+            }}
+            .power-name {{
+                text-align: center;
+                font-size: 1.4em;
+                margin-bottom: 15px;
+                color: #000;
+                font-weight: 600;
+                text-transform: uppercase;
+                letter-spacing: 1px;
+            }}
+            .game-info {{
+                display: grid;
+                grid-template-columns: repeat(2, 1fr);
+                grid-gap: 15px;
+            }}
+            .info-card {{
+                background: #f9f9f9;
+                padding: 15px;
+                border-radius: 8px;
+                box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+            }}
+            .supply-centers, .units-list {{
+                display: flex;
+                flex-wrap: wrap;
+                justify-content: space-between;
+            }}
+            .supply-center, .unit {{
+                flex: 0 0 30%;
+                margin-bottom: 10px;
+                padding: 8px;
+                background: #f0f0f0;
+                border-radius: 5px;
+                text-align: center;
+            }}
+            h2 {{
+                border-bottom: 2px solid #eee;
+                padding-bottom: 10px;
+                margin-top: 0;
+            }}
+            .outcome {{
+                background: #e8f5e9;
+                padding: 15px;
+                border-radius: 8px;
+                margin-top: 15px;
+                font-weight: bold;
+                text-align: center;
+            }}
+            .austria {{ border-top: 5px solid #ff5050; }}
+            .england {{ border-top: 5px solid #5050ff; }}
+            .france {{ border-top: 5px solid #50c0ff; }}
+            .germany {{ border-top: 5px solid #808080; }}
+            .italy {{ border-top: 5px solid #50ff50; }}
+            .russia {{ border-top: 5px solid #ffffff; border: 1px solid #ccc; }}
+            .turkey {{ border-top: 5px solid #c0c000; }}
+        </style>
+    </head>
+    <body>
+        <div class="central-info">
+            <h2>Game Information</h2>
+            <div class="game-info">
+                <div class="info-card">
+                    <h3>Game Details</h3>
+                    <p><strong>Game ID:</strong> {game_id}</p>
+                    <p><strong>Phase:</strong> {phase}</p>
+                    <p><strong>Map:</strong> {map_name}</p>
+                    <p><strong>Status:</strong> {status}</p>
+                </div>
+                <div class="info-card">
+                    <h3>Supply Centers</h3>
+                    <div class="supply-centers">
+    """.format(
+        game_id=game_id,
+        phase=phase,
+        map_name=map_name,
+        status="Completed" if is_game_done else "Active"
+    )
+    # Add supply center information
+    for power, power_centers in centers.items():
+        html_content += f"""
+                        <div class="supply-center">
+                            <strong>{power}:</strong> {len(power_centers)}
+                        </div>
+        """
+    html_content += """
+                    </div>
+                </div>
+            </div>
+    """
+    # Add outcome if game is done
+    if is_game_done and outcome:
+        winners = outcome[1:] if len(outcome) > 1 else ["Draw"]
+        html_content += f"""
+            <div class="outcome">
+                <h3>Game Outcome</h3>
+                <p>Winners: {', '.join(winners)}</p>
+            </div>
+        """
+    html_content += """
+        </div>
+        <div class="container">
+    """
+    # Add each power's information
+    for agent_log in agent_infos:
+        power_name = agent_log["power_name"]
+        power_class = power_name.lower()
+        orders = agent_log.get("orders", [])
+        message_history = agent_log.get("message_history", [])
+        html_content += f"""
+            <div class="power-column {power_class}">
+                <div class="power-name">{power_name}</div>
+                <div class="info-card">
+                    <h3>Units</h3>
+                    <ul>
+        """
+        # Add units information
+        power_units = units.get(power_name, [])
+        for unit in power_units:
+            html_content += f"<li>{unit}</li>"
+        html_content += """
+                    </ul>
+                </div>
+                <div class="message orders">
+                    <div class="role">Final Orders</div>
+                    <ul>
+        """
+        # Add orders
+        for order in orders:
+            html_content += f"<li>{order}</li>"
+        html_content += """
+                    </ul>
+                </div>
+        """
+        # Add message history
+        for message in message_history:
+            if isinstance(message, dict):
+                # Skip system messages or handle differently
+                if message.get("role") == "system":
+                    continue
+                role = message.get("role", "unknown")
+                content = message.get("content", "")
+                role_class = "user" if role == "user" else "assistant"
+                role_display = "Environment" if role == "user" else f"LLM ({power_name})"
+                # Escape HTML characters in content
+                content = content.replace("<", "&lt;").replace(">", "&gt;").replace("\n", "<br>")
+                html_content += f"""
+                <div class="message {role_class}">
+                    <div class="role">{role_display}</div>
+                    <p>{content}</p>
+                </div>
+                """
+            elif isinstance(message, str):
+                # Simple string messages (may be used in some implementations)
+                html_content += f"""
+                <div class="message">
+                    <p>{message}</p>
+                </div>
+                """
+        html_content += """
+            </div>
+        """
+    html_content += """
+        </div>
+    </body>
+    </html>
+    """
+    return html_content
+def _json_serialize(obj):
+    """
+    A helper function to convert non-JSON-serializable objects
+    (like OrderResult) into strings or dicts.
+    """
+    # Check for the specific object types you know are problematic
+    if obj.__class__.__name__ == "OrderResult":
+        # Return a string representation or a dict
+        return str(obj)
+    # Fallback: attempt to convert anything else to string
+    return str(obj)

src_code_for_reproducibility/markov_games/diplomacy/diplomacy_logging_for_training.py ADDED Viewed

File without changes

src_code_for_reproducibility/markov_games/ipd/Ipd_hard_coded_agents.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from dataclasses import dataclass
+from typing import Any, Tuple
+from mllm.markov_games.ipd.ipd_agent import IPDAgent
+from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
+@dataclass
+class AlwaysCooperateIPDAgent(IPDAgent):
+	async def act(self, observation) -> Tuple[Any, AgentActLog]:
+		"""
+		Always plays the cooperate action, ignoring observation.
+		Returns the configured cooperate_string so the simulation parses it as "C".
+		"""
+		action = self.cooperate_string
+		# Log a minimal, structured chat turn for consistency with other agents
+		turn_text = f"Playing cooperate: {action}"
+		self.state.chat_history.append(
+			ChatTurn(
+				agent_id=self.agent_id,
+				role="assistant",
+				content=turn_text,
+				is_state_end=True,
+			)
+		)
+		act_log = AgentActLog(
+			chat_turns=[self.state.chat_history[-1]],
+			info=None,
+		)
+		# Advance internal counters similar to IPDAgent semantics
+		self.state.chat_counter = len(self.state.chat_history)
+		self.state.round_nb = observation.round_nb
+		return action, act_log
+@dataclass
+class AlwaysDefectIPDAgent(IPDAgent):
+	async def act(self, observation) -> Tuple[Any, AgentActLog]:
+		"""
+		Always plays the defect action, ignoring observation.
+		Returns the configured defect_string so the simulation parses it as "D".
+		"""
+		action = self.defect_string
+		# Log a minimal, structured chat turn for consistency with other agents
+		turn_text = f"Playing defect: {action}"
+		self.state.chat_history.append(
+			ChatTurn(
+				agent_id=self.agent_id,
+				role="assistant",
+				content=turn_text,
+				is_state_end=True,
+			)
+		)
+		act_log = AgentActLog(
+			chat_turns=[self.state.chat_history[-1]],
+			info=None,
+		)
+		# Advance internal counters similar to IPDAgent semantics
+		self.state.chat_counter = len(self.state.chat_history)
+		self.state.round_nb = observation.round_nb
+		return action, act_log

src_code_for_reproducibility/markov_games/ipd/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .Ipd_hard_coded_agents import AlwaysCooperateIPDAgent, AlwaysDefectIPDAgent
+__all__ = [
+	"AlwaysCooperateIPDAgent",
+	"AlwaysDefectIPDAgent",
+]

src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc ADDED Viewed

Binary file (2.86 kB). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (308 Bytes). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc ADDED Viewed

Binary file (4.7 kB). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc ADDED Viewed

Binary file (1.28 kB). View file

src_code_for_reproducibility/markov_games/ipd/ipd_agent.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import copy
+import json
+import random
+import re
+from collections.abc import Callable
+from copy import deepcopy
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple, Union
+from mllm.markov_games.agent import Agent
+from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
+@dataclass
+class IPDAgentState:
+    """
+    TOWRITE
+    """
+    nb_retries: int
+    round_nb: int
+    chat_counter: int
+    chat_history: List[ChatTurn]
+@dataclass
+class IPDAgent(Agent):
+    seed: int
+    agent_id: str
+    agent_name: str
+    policy: Callable[[List[Dict]], str]
+    intro_prompt: str  # Introduction prompt explaining the game rules
+    goal_prompt: str  # Prompt explaining the agent's goal
+    strategy_prompt: str  # Prompt suggesting a strategy to the agent
+    max_errors: int  # Maximum number of errors allowed before default action
+    allow_reasoning: bool  # Whether to allow reasoning in the response
+    max_reasoning_chars: int  # Maximum number of characters for reasoning
+    cooperate_string: str  # string parsed as playing cooperate by simulation
+    defect_string: str  # string parsed as playing defect by simulation
+    def __post_init__(self):
+        self.state = IPDAgentState(
+            nb_retries=0, round_nb=0, chat_counter=0, chat_history=[]
+        )
+    async def act(self, observation) -> Tuple[Any, AgentActLog]:
+        """
+        TOWRITE
+        """
+        action = None
+        action_is_ready = False
+        round_nb = observation.round_nb
+        # If it's the first round, we need to send the intro prompt
+        if round_nb == 0 and self.state.chat_counter == 0:
+            self.state.chat_history.append(
+                ChatTurn(
+                    agent_id=self.agent_id,
+                    role="user",
+                    content=self.intro_prompt,
+                    is_state_end=True,
+                )
+            )
+        # If new round
+        if round_nb > self.state.round_nb:
+            coagent_action = observation.last_coagent_move
+            user_message = f"Last round, the other agent played {coagent_action}."
+            self.state.chat_history.append(
+                ChatTurn(
+                    agent_id=self.agent_id,
+                    role="user",
+                    content=user_message,
+                    is_state_end=True,
+                )
+            )
+        # If not new round, try to get valid action from policy
+        output_chat_turn: ChatTurn = await self.policy(
+            state=self.state.chat_history,
+            agent_id=self.agent_id,
+            regex=f"({self.cooperate_string}|{self.defect_string})",
+        )
+        self.state.chat_history.append(output_chat_turn)
+        action = output_chat_turn.content
+        agent_step_log = AgentActLog(
+            chat_turns=self.state.chat_history[self.state.chat_counter :], info=None
+        )
+        self.state.chat_counter = len(self.state.chat_history)
+        self.state.round_nb = round_nb
+        return action, agent_step_log
+    def get_safe_copy(self):
+        """
+        Return a safe copy of the agent.
+        """
+        agent_copy = copy.copy(self)
+        agent_copy.state = copy.deepcopy(self.state)
+        return agent_copy
+    def reset(self):
+        self.state = IPDAgentState()
+        raise NotImplementedError
+    def render(self):
+        pass
+    def close(self):
+        pass
+    def get_agent_info(self):
+        pass

src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import copy
+import random
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+from mllm.markov_games.markov_game import Simulation
+from mllm.markov_games.rollout_tree import SimulationStepLog
+from mllm.utils.get_coagent_id import get_coagent_id
+@dataclass
+class IPDState:
+    """
+    State of the Iterated Prisoner's Dilemma game.
+    """
+    round_nb: int = 0
+    done: bool = False
+    last_moves: Dict[str, str] | None = None
+@dataclass
+class IPDObs:
+    """
+    Observation in Iterated Prisoner's Dilemma game.
+    """
+    round_nb: int
+    last_coagent_move: str | None
+class IPD(Simulation):
+    """
+    Iterated Prisoner's Dilemma simulation following the standard.
+    In each round of the game, two agents simultaneously choose to either cooperate (C) or defect (D).
+    The payoffs are as follows:
+    - If both cooperate: Both receive the "reward" (usually 3 points)
+    - If both defect: Both receive the "punishment" (usually 1 point)
+    - If one cooperates and one defects: The defector receives the "temptation" (usually 5 points)
+      and the cooperator receives the "sucker" payoff (usually 0 points)
+    The game is played for a specified number of rounds.
+    """
+    def __init__(
+        self,
+        agent_ids: List[str],
+        agent_names: List[str],
+        seed: int,
+        rounds_per_game: int,
+        reward: float,  # Both cooperate
+        punishment: float,  # Both defect
+        temptation: float,  # Defector's reward when other cooperates
+        sucker: float,  # Cooperator's reward when other defects
+        cooperate_actions: List[str],
+        defect_actions: List[str],
+    ):
+        self.agent_ids = agent_ids
+        self.agent_names = agent_names
+        self.seed = seed
+        self.rounds_per_game = rounds_per_game
+        self.reward = reward
+        self.punishment = punishment
+        self.temptation = temptation
+        self.sucker = sucker
+        self.cooperate_actions = cooperate_actions
+        self.defect_actions = defect_actions
+        self.state = IPDState()
+    def step(self, actions: Dict[str, str]) -> Tuple[bool, SimulationStepLog]:
+        """
+        Take a step in the environment using the provided actions.
+        Here, the observations are just the states of the game.
+        Args:
+            actions (dict): A dictionary where keys are agent identifiers and values are actions ('C' or 'D').
+        Returns:
+            observations (dict): A dictionary where keys are agent identifiers and values are observations.
+            done (bool): Whether the episode has ended.
+            info (dict): Additional information about the environment.
+        """
+        # Calculate rewards using payoff matrix
+        agent0_action = actions[self.agent_ids[0]]
+        agent1_action = actions[self.agent_ids[1]]
+        # Normalize actions to standard cooperate/defect/gibberish format
+        def normalize_action(action):
+            if action in self.cooperate_actions:
+                return "C"
+            elif action in self.defect_actions:
+                return "D"
+            else:
+                return "D"
+        norm_action0 = normalize_action(agent0_action)
+        norm_action1 = normalize_action(agent1_action)
+        payoffs = {
+            ("C", "C"): [self.reward, self.reward],
+            ("C", "D"): [self.sucker, self.temptation],
+            ("D", "C"): [self.temptation, self.sucker],
+            ("D", "D"): [self.punishment, self.punishment],
+        }
+        round_rewards = {
+            self.agent_ids[0]: payoffs[(norm_action0, norm_action1)][0],
+            self.agent_ids[1]: payoffs[(norm_action0, norm_action1)][1],
+        }
+        # Update game state
+        self.state.round_nb += 1
+        self.state.last_moves = copy.deepcopy(actions)
+        done = self.state.round_nb >= self.rounds_per_game
+        step_log = SimulationStepLog(
+            rewards=round_rewards,
+            info={
+                "actions": {
+                    self.agent_ids[0]: norm_action0,
+                    self.agent_ids[1]: norm_action1,
+                }
+            },
+        )
+        return done, step_log
+    def get_obs(self):
+        """Returns all agent observations in dict
+        Returns:
+            observations
+        """
+        observations = {}
+        for agent_id in self.agent_ids:
+            observations[agent_id] = self.get_obs_agent(agent_id)
+        return observations
+    def get_obs_agent(self, agent_id):
+        """Returns observation for agent_id"""
+        if self.state.last_moves != None:
+            other_id = get_coagent_id(self.agent_ids, agent_id)
+            last_coagent_move = self.state.last_moves[other_id]
+        else:
+            last_coagent_move = None
+        obs = IPDObs(round_nb=self.state.round_nb, last_coagent_move=last_coagent_move)
+        return obs
+    def reset(self):
+        """Returns initial observations and states"""
+        self.state = IPDState()
+        return self.get_obs()
+    def get_safe_copy(self):
+        """
+        Return a safe copy of the simulation.
+        """
+        simulation_copy = copy.copy(self)
+        simulation_copy.state = copy.deepcopy(self.state)
+        return simulation_copy

src_code_for_reproducibility/markov_games/ipd/ipd_statistics.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from __future__ import annotations
+from typing import Dict, Callable, List, Tuple
+from mllm.markov_games.rollout_tree import SimulationStepLog
+def avg_reward(sl: SimulationStepLog) -> List[Tuple[str, float]]:
+    for aid in sl.rewards.keys():
+        if "buffer" in str(aid) and "live" not in str(aid):
+            return None
+    # One value per agent at each step
+    rewards_dict = {f"reward-{aid}": float(v) for aid, v in (sl.rewards or {}).items()}
+    return [(key, value) for key, value in rewards_dict.items() if value is not None]
+stat_functs: list[Callable[[SimulationStepLog], List[Tuple[str, float]]]] = [
+    avg_reward,
+]

src_code_for_reproducibility/markov_games/negotiation/README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+## Negotiation Games: core mechanics and variants
+This family of games feature two agents who, in each round, may briefly communicate and then simultaneously propose how to split a fixed resource (most commonly 10 coins). Rewards are the amount kept multiplied by an agent’s per-unit value. The starting speaker alternates deterministically across rounds.
+Communication is optional and variant-dependent: some settings encourage rich messaging to share private information, while others remove messaging entirely to focus on allocation behavior.
+Proportional splitting is used when the two proposals exceed the available total: allocations are scaled proportionally rather than discarded. This preserves a useful learning signal even when agents over-claim.
+### Variants (in increasing difficulty)
+- No‑Press Split
+  - Single item type (coins)
+  - No communication; agents go straight to making split proposals, with the starting player alternating deterministically.
+  - Motivation: mirrors no‑communication setups (e.g., Advantage Alignment) while keeping the split decision nontrivial.
+  - Deterministic Mode: values are fixed and public: one agent values coins at 10, the other at 1 (alternates each round).
+  - Stochastic Mode: values are random and uncorrelated.
+- Trust-and-Split RPS (TAS-RPS)
+  - Single item type (coins)
+  - Each round, a rock–paper–scissors hand draw creates a strong asymmetry: the winner’s per-coin value is 10, the loser’s is 1.
+  - Each agent initially sees only their own hand and must communicate to coordinate an optimal split.
+  - Motivation: enforce large value disparity so one’s own value reveals little about the other’s (avoiding ceiling effects) and incentivize meaningful communication.
+- Trust-and-Split (TAS)
+  - Single item type (coins); each round, each agent’s per-coin value is independently sampled in a broad range (e.g., 1–20).
+  - Each agent observes only their own value; they may use short messages to share and negotiate.
+  - Motivation: a simple blend that tests whether agents learn to exchange private information and coordinate proportional, value-aware splits.
+- Deal-or-No-Deal (DOND)
+  - Introduced in [Deal or No Deal? End-to-End Learning for Negotiation Dialogues](https://arxiv.org/pdf/1706.05125)
+  - Multiple item types (typically "books", "hats" and "balls") with limited stocks; each agent has its own per-type values.
+  - A deal pays out only if both proposals exactly agree and respect the stock; otherwise no deal (zero reward) that round.
+  - Motivation: a known benchmark closer to real-world bargaining, where both parties must explicitly agree.

src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_agent.cpython-312.pyc ADDED Viewed

Binary file (4.19 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_simulation.cpython-312.pyc ADDED Viewed

Binary file (10.2 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_agent.cpython-312.pyc ADDED Viewed

Binary file (10.9 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_hard_coded_policies.cpython-312.pyc ADDED Viewed

Binary file (3.23 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_simulation.cpython-312.pyc ADDED Viewed

Binary file (12.2 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/negotiation_statistics.cpython-312.pyc ADDED Viewed

Binary file (14.1 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_agent.cpython-312.pyc ADDED Viewed

Binary file (5.5 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_simulation.cpython-312.pyc ADDED Viewed

Binary file (9.06 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_agent.cpython-312.pyc ADDED Viewed

Binary file (6.14 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_agent.cpython-312.pyc ADDED Viewed

Binary file (5.59 kB). View file