Muqeeth commited on Mar 31

Commit

143850b

verified ·

1 Parent(s): eeb6360

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

.hydra/config.yaml +168 -0
.hydra/hydra.yaml +154 -0
.hydra/overrides.yaml +1 -0
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json +46 -0
seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json +46 -0
src_code_for_reproducibility/chat_utils/__pycache__/apply_template.cpython-312.pyc +0 -0
src_code_for_reproducibility/chat_utils/__pycache__/chat_turn.cpython-312.pyc +0 -0
src_code_for_reproducibility/chat_utils/__pycache__/template_specific.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/__init__.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/rollout_tree.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/__pycache__/run_markov_games.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/Ipd_hard_coded_agents.py +76 -0
src_code_for_reproducibility/markov_games/ipd/__init__.py +11 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/ipd/ipd_agent.py +120 -0
src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py +167 -0
src_code_for_reproducibility/markov_games/ipd/ipd_statistics.py +24 -0
src_code_for_reproducibility/markov_games/negotiation/README.md +27 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_hard_coded_policies.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/negotiation_statistics.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_agent.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/markov_games/negotiation/nego_simulation.py +252 -0
src_code_for_reproducibility/markov_games/negotiation/no_press_nego_agent.py +108 -0
src_code_for_reproducibility/markov_games/negotiation/tas_rps_simulation.py +257 -0
src_code_for_reproducibility/models/__pycache__/large_language_model_api.cpython-312.pyc +0 -0
src_code_for_reproducibility/models/__pycache__/large_language_model_local.cpython-312.pyc +0 -0
src_code_for_reproducibility/training/__pycache__/__init__.cpython-312.pyc +0 -0
src_code_for_reproducibility/training/credit_methods.py +307 -0
src_code_for_reproducibility/utils/__init__.py +4 -0
src_code_for_reproducibility/utils/gather_training_stats.py +262 -0
src_code_for_reproducibility/utils/resource_context.py +83 -0
src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py +1597 -0
src_code_for_reproducibility/utils/rollout_tree_gather_utils.py +314 -0
src_code_for_reproducibility/utils/rollout_tree_stats.py +55 -0
src_code_for_reproducibility/utils/short_id_gen.py +16 -0
src_code_for_reproducibility/utils/stat_pack.py +117 -0
src_code_for_reproducibility/utils/wandb_utils.py +170 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,168 @@

+experiment:
+  wandb_enabled: true
+  nb_epochs: 3000
+  nb_matches_per_iteration: 64
+  reinit_matches_each_it: true
+  checkpoint_every_n_iterations: 50
+  start_epoch: 0
+  resume_experiment: true
+  base_seed: 42
+  seed_group_size: 8
+  train: true
+  stat_methods_for_live_wandb: mllm.markov_games.negotiation.negotiation_statistics
+  name: split_no_comm_naive_seed42
+  agent_buffer: false
+  keep_agent_buffer_count: ${lora_count}
+  agent_buffer_recent_k: -1
+logging:
+  wandb:
+    enabled: false
+    project: llm-negotiation
+    entity: null
+    mode: online
+    name: null
+    group: null
+    tags: []
+    notes: null
+temperature: 1.0
+markov_games:
+  runner_method_name: LinearRunner
+  runner_kwargs: {}
+  group_by_round: true
+  simulation_class_name: NoPressSimulation
+  simulation_init_args:
+    nb_of_rounds: 10
+    quota_messages_per_agent_per_round: 0
+    game_type: 10-1-ties
+    atleast_one_conflict: true
+    item_types:
+    - hats
+    - books
+    - balls
+  agents:
+    0:
+      agent_id: ${agent_0_id}
+      agent_name: Alice
+      agent_class_name: NoPressAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+    1:
+      agent_id: ${agent_1_id}
+      agent_name: Bob
+      agent_class_name: NoPressAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+models:
+  base_llm:
+    class: LeanLocalLLM
+    init_args:
+      llm_id: base_llm
+      model_name: Qwen/Qwen2.5-7B-Instruct
+      inference_backend: vllm
+      hf_kwargs:
+        device_map: auto
+        torch_dtype: bfloat16
+        max_memory:
+          0: 20GiB
+        attn_implementation: flash_attention_2
+      inference_backend_init_kwargs:
+        enable_lora: true
+        seed: ${experiment.base_seed}
+        enable_prefix_caching: true
+        max_model_len: 10000.0
+        gpu_memory_utilization: 0.5
+        dtype: bfloat16
+        trust_remote_code: true
+        max_lora_rank: 32
+        enforce_eager: false
+        max_loras: ${lora_count}
+        max_cpu_loras: ${lora_count}
+        enable_sleep_mode: true
+      inference_backend_sampling_params:
+        temperature: ${temperature}
+        top_p: 1.0
+        max_tokens: 400
+        top_k: -1
+        logprobs: 0
+      adapter_configs:
+        agent_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+        critic_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+      enable_thinking: null
+      regex_max_attempts: 3
+critics:
+  agent_critic:
+    module_pointer:
+    - base_llm
+    - critic_adapter
+optimizers:
+  agent_optimizer:
+    module_pointer:
+    - base_llm
+    - agent_adapter
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 3.0e-06
+      weight_decay: 0.0
+  critic_optimizer:
+    module_pointer: agent_critic
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 3.0e-06
+      weight_decay: 0.0
+trainers:
+  agent_trainer:
+    class: TrainerNaive
+    module_pointers:
+      policy:
+      - base_llm
+      - agent_adapter
+      policy_optimizer: agent_optimizer
+      critic: agent_critic
+      critic_optimizer: critic_optimizer
+    kwargs:
+      entropy_coeff: 0.0
+      entropy_topk: null
+      entropy_mask_regex: null
+      kl_coeff: 0.001
+      gradient_clipping: 1.0
+      restrict_tokens: null
+      mini_batch_size: 1
+      use_gradient_checkpointing: false
+      temperature: ${temperature}
+      device: cuda:0
+      use_gae: false
+      whiten_advantages: false
+      whiten_advantages_time_step_wise: false
+      skip_discounted_state_visitation: true
+      use_gae_lambda_annealing: false
+      gae_lambda_annealing_method: None
+      gae_lambda_annealing_method_params: None
+      gae_lambda_annealing_limit: 0.95
+      discount_factor: 0.9
+      use_rloo: true
+      enable_tokenwise_logging: false
+      pg_loss_normalization: nb_tokens
+      truncated_importance_sampling_ratio_cap: 2.0
+      reward_normalizing_constant: 100.0
+train_on_which_data:
+  agent_trainer: ${agent_ids}
+lora_count: 30
+common_agent_kwargs:
+  goal: Maximize your total points over the whole game.
+agent_0_id: Alice
+agent_1_id: Bob
+agent_ids:
+- Alice
+- Bob

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,154 @@

+hydra:
+  run:
+    dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task: []
+  job:
+    name: run
+    chdir: false
+    override_dirname: ''
+    id: ???
+    num: ???
+    config_name: split_no_comm_naive_seed42.yaml
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.1'
+    cwd: /lustre10/scratch/muqeeth/AdAlignLLM
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /lustre10/scratch/muqeeth/AdAlignLLM/configs
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/muqeeth/llm_negotiation/2026_03/split_no_comm_naive_seed42
+    choices:
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj",
+    "k_proj",
+    "v_proj",
+    "up_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

seed_42/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj",
+    "k_proj",
+    "v_proj",
+    "up_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

src_code_for_reproducibility/chat_utils/__pycache__/apply_template.cpython-312.pyc ADDED Viewed

Binary file (4.13 kB). View file

src_code_for_reproducibility/chat_utils/__pycache__/chat_turn.cpython-312.pyc ADDED Viewed

Binary file (1.46 kB). View file

src_code_for_reproducibility/chat_utils/__pycache__/template_specific.cpython-312.pyc ADDED Viewed

Binary file (4.4 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (297 Bytes). View file

src_code_for_reproducibility/markov_games/__pycache__/alternative_actions_runner.cpython-312.pyc ADDED Viewed

Binary file (5.43 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/linear_runner.cpython-312.pyc ADDED Viewed

Binary file (1.64 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/rollout_tree.cpython-312.pyc ADDED Viewed

Binary file (3.97 kB). View file

src_code_for_reproducibility/markov_games/__pycache__/run_markov_games.cpython-312.pyc ADDED Viewed

Binary file (1.53 kB). View file

src_code_for_reproducibility/markov_games/ipd/Ipd_hard_coded_agents.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""
+File: mllm/markov_games/ipd/Ipd_hard_coded_agents.py
+Summary: Contains hand-crafted IPD policies used as deterministic baselines.
+"""
+from dataclasses import dataclass
+from typing import Any, Tuple
+from mllm.markov_games.ipd.ipd_agent import IPDAgent
+from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
+@dataclass
+class AlwaysCooperateIPDAgent(IPDAgent):
+    async def act(self, observation) -> Tuple[Any, AgentActLog]:
+        """
+        Always plays the cooperate action, ignoring observation.
+        Returns the configured cooperate_string so the simulation parses it as "C".
+        """
+        action = self.cooperate_string
+        # Log a minimal, structured chat turn for consistency with other agents
+        turn_text = f"Playing cooperate: {action}"
+        self.state.chat_history.append(
+            ChatTurn(
+                agent_id=self.agent_id,
+                role="assistant",
+                content=turn_text,
+                is_state_end=True,
+            )
+        )
+        act_log = AgentActLog(
+            chat_turns=[self.state.chat_history[-1]],
+            info=None,
+        )
+        # Advance internal counters similar to IPDAgent semantics
+        self.state.chat_counter = len(self.state.chat_history)
+        self.state.round_nb = observation.round_nb
+        return action, act_log
+@dataclass
+class AlwaysDefectIPDAgent(IPDAgent):
+    async def act(self, observation) -> Tuple[Any, AgentActLog]:
+        """
+        Always plays the defect action, ignoring observation.
+        Returns the configured defect_string so the simulation parses it as "D".
+        """
+        action = self.defect_string
+        # Log a minimal, structured chat turn for consistency with other agents
+        turn_text = f"Playing defect: {action}"
+        self.state.chat_history.append(
+            ChatTurn(
+                agent_id=self.agent_id,
+                role="assistant",
+                content=turn_text,
+                is_state_end=True,
+            )
+        )
+        act_log = AgentActLog(
+            chat_turns=[self.state.chat_history[-1]],
+            info=None,
+        )
+        # Advance internal counters similar to IPDAgent semantics
+        self.state.chat_counter = len(self.state.chat_history)
+        self.state.round_nb = observation.round_nb
+        return action, act_log

src_code_for_reproducibility/markov_games/ipd/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+File: mllm/markov_games/ipd/__init__.py
+Summary: Marks the Iterated Prisoner's Dilemma subpackage.
+"""
+from .Ipd_hard_coded_agents import AlwaysCooperateIPDAgent, AlwaysDefectIPDAgent
+__all__ = [
+    "AlwaysCooperateIPDAgent",
+    "AlwaysDefectIPDAgent",
+]

src_code_for_reproducibility/markov_games/ipd/__pycache__/Ipd_hard_coded_agents.cpython-312.pyc ADDED Viewed

Binary file (3.05 kB). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (435 Bytes). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_agent.cpython-312.pyc ADDED Viewed

Binary file (4.97 kB). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc ADDED Viewed

Binary file (6.87 kB). View file

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_statistics.cpython-312.pyc ADDED Viewed

Binary file (1.42 kB). View file

src_code_for_reproducibility/markov_games/ipd/ipd_agent.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+File: mllm/markov_games/ipd/ipd_agent.py
+Summary: Implements the IPD agent abstraction used during simulations.
+"""
+import copy
+import json
+import random
+import re
+from collections.abc import Callable
+from copy import deepcopy
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple, Union
+from mllm.markov_games.agent import Agent
+from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
+@dataclass
+class IPDAgentState:
+    """
+    Tracks retry count, round index, and chat history for a single IPD agent.
+    """
+    nb_retries: int
+    round_nb: int
+    chat_counter: int
+    chat_history: List[ChatTurn]
+@dataclass
+class IPDAgent(Agent):
+    seed: int
+    agent_id: str
+    agent_name: str
+    policy: Callable[[List[Dict]], str]
+    intro_prompt: str  # Introduction prompt explaining the game rules
+    goal_prompt: str  # Prompt explaining the agent's goal
+    strategy_prompt: str  # Prompt suggesting a strategy to the agent
+    max_errors: int  # Maximum number of errors allowed before default action
+    allow_reasoning: bool  # Whether to allow reasoning in the response
+    max_reasoning_chars: int  # Maximum number of characters for reasoning
+    cooperate_string: str  # string parsed as playing cooperate by simulation
+    defect_string: str  # string parsed as playing defect by simulation
+    def __post_init__(self):
+        self.state = IPDAgentState(
+            nb_retries=0, round_nb=0, chat_counter=0, chat_history=[]
+        )
+    async def act(self, observation) -> Tuple[Any, AgentActLog]:
+        """
+        Run the LLM policy conversation until a valid cooperate/defect action is produced.
+        """
+        action = None
+        action_is_ready = False
+        round_nb = observation.round_nb
+        # If it's the first round, we need to send the intro prompt
+        if round_nb == 0 and self.state.chat_counter == 0:
+            self.state.chat_history.append(
+                ChatTurn(
+                    agent_id=self.agent_id,
+                    role="user",
+                    content=self.intro_prompt,
+                    is_state_end=True,
+                )
+            )
+        # If new round
+        if round_nb > self.state.round_nb:
+            coagent_action = observation.last_coagent_move
+            user_message = f"Last round, the other agent played {coagent_action}."
+            self.state.chat_history.append(
+                ChatTurn(
+                    agent_id=self.agent_id,
+                    role="user",
+                    content=user_message,
+                    is_state_end=True,
+                )
+            )
+        # If not new round, try to get valid action from policy
+        output_chat_turn: ChatTurn = await self.policy(
+            state=self.state.chat_history,
+            agent_id=self.agent_id,
+            regex=f"({self.cooperate_string}|{self.defect_string})",
+        )
+        self.state.chat_history.append(output_chat_turn)
+        action = output_chat_turn.content
+        agent_step_log = AgentActLog(
+            chat_turns=self.state.chat_history[self.state.chat_counter :], info=None
+        )
+        self.state.chat_counter = len(self.state.chat_history)
+        self.state.round_nb = round_nb
+        return action, agent_step_log
+    def get_safe_copy(self):
+        """
+        Return a safe copy of the agent.
+        """
+        agent_copy = copy.copy(self)
+        agent_copy.state = copy.deepcopy(self.state)
+        return agent_copy
+    def reset(self):
+        self.state = IPDAgentState()
+        raise NotImplementedError
+    def render(self):
+        pass
+    def close(self):
+        pass
+    def get_agent_info(self):
+        pass

src_code_for_reproducibility/markov_games/ipd/ipd_simulation.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""
+File: mllm/markov_games/ipd/ipd_simulation.py
+Summary: Runs Iterated Prisoner's Dilemma simulations under the Markov-game API.
+"""
+import copy
+import random
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+from mllm.markov_games.markov_game import Simulation
+from mllm.markov_games.rollout_tree import SimulationStepLog
+from mllm.utils.get_coagent_id import get_coagent_id
+@dataclass
+class IPDState:
+    """
+    State of the Iterated Prisoner's Dilemma game.
+    """
+    round_nb: int = 0
+    done: bool = False
+    last_moves: Dict[str, str] | None = None
+@dataclass
+class IPDObs:
+    """
+    Observation in Iterated Prisoner's Dilemma game.
+    """
+    round_nb: int
+    last_coagent_move: str | None
+class IPD(Simulation):
+    """
+    Iterated Prisoner's Dilemma simulation following the standard.
+    In each round of the game, two agents simultaneously choose to either cooperate (C) or defect (D).
+    The payoffs are as follows:
+    - If both cooperate: Both receive the "reward" (usually 3 points)
+    - If both defect: Both receive the "punishment" (usually 1 point)
+    - If one cooperates and one defects: The defector receives the "temptation" (usually 5 points)
+      and the cooperator receives the "sucker" payoff (usually 0 points)
+    The game is played for a specified number of rounds.
+    """
+    def __init__(
+        self,
+        agent_ids: List[str],
+        agent_names: List[str],
+        seed: int,
+        rounds_per_game: int,
+        reward: float,  # Both cooperate
+        punishment: float,  # Both defect
+        temptation: float,  # Defector's reward when other cooperates
+        sucker: float,  # Cooperator's reward when other defects
+        cooperate_actions: List[str],
+        defect_actions: List[str],
+    ):
+        self.agent_ids = agent_ids
+        self.agent_names = agent_names
+        self.seed = seed
+        self.rounds_per_game = rounds_per_game
+        self.reward = reward
+        self.punishment = punishment
+        self.temptation = temptation
+        self.sucker = sucker
+        self.cooperate_actions = cooperate_actions
+        self.defect_actions = defect_actions
+        self.state = IPDState()
+    def step(self, actions: Dict[str, str]) -> Tuple[bool, SimulationStepLog]:
+        """
+        Take a step in the environment using the provided actions.
+        Here, the observations are just the states of the game.
+        Args:
+            actions (dict): A dictionary where keys are agent identifiers and values are actions ('C' or 'D').
+        Returns:
+            observations (dict): A dictionary where keys are agent identifiers and values are observations.
+            done (bool): Whether the episode has ended.
+            info (dict): Additional information about the environment.
+        """
+        # Calculate rewards using payoff matrix
+        agent0_action = actions[self.agent_ids[0]]
+        agent1_action = actions[self.agent_ids[1]]
+        # Normalize actions to standard cooperate/defect/gibberish format
+        def normalize_action(action):
+            if action in self.cooperate_actions:
+                return "C"
+            elif action in self.defect_actions:
+                return "D"
+            else:
+                return "D"
+        norm_action0 = normalize_action(agent0_action)
+        norm_action1 = normalize_action(agent1_action)
+        payoffs = {
+            ("C", "C"): [self.reward, self.reward],
+            ("C", "D"): [self.sucker, self.temptation],
+            ("D", "C"): [self.temptation, self.sucker],
+            ("D", "D"): [self.punishment, self.punishment],
+        }
+        round_rewards = {
+            self.agent_ids[0]: payoffs[(norm_action0, norm_action1)][0],
+            self.agent_ids[1]: payoffs[(norm_action0, norm_action1)][1],
+        }
+        # Update game state
+        self.state.round_nb += 1
+        self.state.last_moves = copy.deepcopy(actions)
+        done = self.state.round_nb >= self.rounds_per_game
+        step_log = SimulationStepLog(
+            rewards=round_rewards,
+            info={
+                "actions": {
+                    self.agent_ids[0]: norm_action0,
+                    self.agent_ids[1]: norm_action1,
+                }
+            },
+        )
+        return done, step_log
+    def get_obs(self):
+        """Returns all agent observations in dict
+        Returns:
+            observations
+        """
+        observations = {}
+        for agent_id in self.agent_ids:
+            observations[agent_id] = self.get_obs_agent(agent_id)
+        return observations
+    def get_obs_agent(self, agent_id):
+        """Returns observation for agent_id"""
+        if self.state.last_moves != None:
+            other_id = get_coagent_id(self.agent_ids, agent_id)
+            last_coagent_move = self.state.last_moves[other_id]
+        else:
+            last_coagent_move = None
+        obs = IPDObs(round_nb=self.state.round_nb, last_coagent_move=last_coagent_move)
+        return obs
+    def reset(self):
+        """Returns initial observations and states"""
+        self.state = IPDState()
+        return self.get_obs()
+    def get_safe_copy(self):
+        """
+        Return a safe copy of the simulation.
+        """
+        simulation_copy = copy.copy(self)
+        simulation_copy.state = copy.deepcopy(self.state)
+        return simulation_copy

src_code_for_reproducibility/markov_games/ipd/ipd_statistics.py ADDED Viewed

	@@ -0,0 +1,24 @@

+"""
+File: mllm/markov_games/ipd/ipd_statistics.py
+Summary: Computes statistics and summaries for IPD experiments.
+"""
+from __future__ import annotations
+from typing import Callable, Dict, List, Tuple
+from mllm.markov_games.rollout_tree import SimulationStepLog
+def avg_reward(sl: SimulationStepLog) -> List[Tuple[str, float]]:
+    for aid in sl.rewards.keys():
+        if "buffer" in str(aid) and "live" not in str(aid):
+            return None
+    # One value per agent at each step
+    rewards_dict = {f"reward-{aid}": float(v) for aid, v in (sl.rewards or {}).items()}
+    return [(key, value) for key, value in rewards_dict.items() if value is not None]
+stat_functs: list[Callable[[SimulationStepLog], List[Tuple[str, float]]]] = [
+    avg_reward,
+]

src_code_for_reproducibility/markov_games/negotiation/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+## Negotiation Games: core mechanics and variants
+This family of games feature two agents who, in each round, may briefly communicate and then simultaneously propose how to split a fixed resource (most commonly 10 coins). Rewards are the amount kept multiplied by an agent’s per-unit value. The starting speaker alternates deterministically across rounds.
+Communication is optional and variant-dependent: some settings encourage rich messaging to share private information, while others remove messaging entirely to focus on allocation behavior.
+Proportional splitting is used when the two proposals exceed the available total: allocations are scaled proportionally rather than discarded. This preserves a useful learning signal even when agents over-claim.
+### Variants (in increasing difficulty)
+- No‑Press Split
+  - Multiple item types (e.g., hats, balls, books)
+  - The item values for each agent are public.
+  - No communication; agents go straight to making split proposals.
+  - Motivation: mirrors no‑communication setups (e.g., Advantage Alignment) while keeping the split decision nontrivial.
+- Trust-and-Split RPS (TAS-RPS)
+  - Single item type (coins)
+  - Each round, a rock–paper–scissors hand draw creates a strong asymmetry: the winner’s per-coin value is 10, the loser’s is 1.
+  - Each agent initially sees only their own hand and must communicate to coordinate an optimal split.
+  - Motivation: enforce large value disparity so one’s own value reveals little about the other’s (avoiding ceiling effects) and incentivize meaningful communication.

src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_agent.cpython-312.pyc ADDED Viewed

Binary file (4.66 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/dond_simulation.cpython-312.pyc ADDED Viewed

Binary file (10.7 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_agent.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_hard_coded_policies.cpython-312.pyc ADDED Viewed

Binary file (3.39 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/nego_simulation.cpython-312.pyc ADDED Viewed

Binary file (12.6 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/negotiation_statistics.cpython-312.pyc ADDED Viewed

Binary file (14.3 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_agent.cpython-312.pyc ADDED Viewed

Binary file (6.11 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/no_press_nego_simulation.cpython-312.pyc ADDED Viewed

Binary file (9.72 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_agent.cpython-312.pyc ADDED Viewed

Binary file (6.05 kB). View file

src_code_for_reproducibility/markov_games/negotiation/__pycache__/tas_rps_simulation.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

src_code_for_reproducibility/markov_games/negotiation/nego_simulation.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+File: mllm/markov_games/negotiation/nego_simulation.py
+Summary: Simulation harness for general negotiation environments.
+"""
+import copy
+from abc import abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple
+from numpy.random import default_rng
+from mllm.markov_games.rollout_tree import SimulationStepLog
+from mllm.markov_games.simulation import Simulation
+from mllm.utils.get_coagent_id import get_coagent_id
+AgentId = str
+@dataclass
+class Split:
+    """Structured proposal describing how many units of each item an agent keeps."""
+    items_given_to_self: Dict[str, int]
+@dataclass
+class Message:
+    """Single chat utterance exchanged during the negotiation phase."""
+    message: str
+@dataclass  # gets extended by variants
+class NegotiationState:
+    """Full simulator state snapshot shared by all negotiation variants."""
+    round_nb: int
+    last_message: str
+    current_agent: AgentId
+    quantities: Dict[str, int]
+    values: Dict[AgentId, Dict[str, float]]
+    splits: Dict[AgentId, Split | None]
+    nb_messages_sent: Dict[AgentId, int]
+    previous_values: Dict[AgentId, Dict[str, float]] | None
+    previous_splits: Dict[AgentId, Dict[str, int] | None] | None
+    previous_points: Dict[AgentId, float] | None
+    previous_quantities: Dict[str, int] | None
+    split_phase: bool
+@dataclass  # gets extended by variants
+class NegotiationObs:
+    """Observation presented to agents each turn (base fields; variants extend)."""
+    round_nb: int
+    last_message: str
+    quota_messages_per_agent_per_round: int
+    current_agent: AgentId
+    other_agent: str
+    quantities: Dict[str, int]
+    item_types: List[str]
+    value: Dict[str, int]
+    split_phase: bool
+    last_split_agent: Dict[str, int] | None
+    last_value_agent: Dict[str, int] | None
+    last_points_agent: float | None
+    last_split_coagent: Dict[str, int] | None
+    last_value_coagent: Dict[str, int] | None
+    last_points_coagent: float | None
+    last_quantities: Dict[str, int] | None
+def compute_tas_style_rewards(
+    agent_ids: List[AgentId],
+    values: Dict[AgentId, float],
+    splits: Dict[AgentId, Split],
+    quantities: Dict[str, int],
+) -> Dict[AgentId, float]:
+    """
+    TAS-like reward computation: if sum of proposed coins exceeds max_coins,
+    allocate proportionally. Otherwise, use proposed amounts directly.
+    Rewards are quantity_kept * per-coin value for each agent.
+    """
+    a0, a1 = agent_ids[0], agent_ids[1]
+    r0, r1 = 0.0, 0.0
+    for item in quantities:
+        max_item = quantities[item]
+        item_to_self_0 = int(
+            (splits[a0].items_given_to_self.get(item, 0))
+            if splits[a0] is not None
+            else 0
+        )
+        item_to_self_1 = int(
+            (splits[a1].items_given_to_self.get(item, 0))
+            if splits[a1] is not None
+            else 0
+        )
+        denom = max(int(max_item), item_to_self_0 + item_to_self_1)
+        q0 = float(max_item) * float(item_to_self_0) / float(denom)
+        q1 = float(max_item) * float(item_to_self_1) / float(denom)
+        if type(values[a0]) is not dict:
+            r0 += q0 * float(values[a0])
+            r1 += q1 * float(values[a1])
+        else:
+            r0 += q0 * float(values[a0][item])
+            r1 += q1 * float(values[a1][item])
+    return {a0: r0, a1: r1}
+class NegotiationSimulation(Simulation):
+    def __init__(
+        self,
+        agent_ids: List[AgentId],
+        agent_names: List[str],
+        seed: int,
+        nb_of_rounds: int,
+        quota_messages_per_agent_per_round: int,
+        item_types: List[str] | None = None,
+    ):
+        self.seed = seed
+        self.rng = default_rng(self.seed)
+        self.agent_ids = list(agent_ids)
+        self.agent_names = agent_names
+        self.agent_id_to_name = {
+            agent_id: agent_name for agent_id, agent_name in zip(agent_ids, agent_names)
+        }
+        self.nb_of_rounds = int(nb_of_rounds)
+        self.quota_messages_per_agent_per_round = int(
+            quota_messages_per_agent_per_round
+        )
+        if item_types is not None:
+            self.item_types = [item.lower() for item in item_types]
+        else:
+            self.item_types = ["coins"]
+        self.state: NegotiationState | None = None
+        self._starting_agent_index = self.rng.choice([0, 1])
+        self.reset()
+    def _other(self, agent_id: AgentId) -> AgentId:
+        return get_coagent_id(self.agent_ids, agent_id)
+    @abstractmethod
+    def set_new_round_of_variant(self):
+        """Variant hook: sample new private values / stock before each round."""
+        pass
+    @abstractmethod
+    def get_info_of_variant(
+        self, state: NegotiationState, actions: Dict[AgentId, Any]
+    ) -> Dict[str, Any]:
+        """Variant hook: populate SimulationStepLog.info with custom diagnostics."""
+        pass
+    def step(self, actions: Any) -> Tuple[bool, SimulationStepLog]:
+        """
+        Returns terminated, step_log
+        """
+        assert self.state is not None
+        current_agent = self.state.current_agent
+        a0, a1 = self.agent_ids[0], self.agent_ids[1]
+        action = actions.get(current_agent)
+        # Split phase: require both splits in the same timestep
+        if self.state.split_phase:
+            action_a0 = actions.get(a0)
+            action_a1 = actions.get(a1)
+            have_both_splits = isinstance(action_a0, Split) and isinstance(
+                action_a1, Split
+            )
+            if not have_both_splits:
+                rewards = {agent_id: 0.0 for agent_id in self.agent_ids}
+                return False, SimulationStepLog(
+                    rewards=rewards, info={"type": "waiting_for_splits"}
+                )
+            # Record splits
+            self.state.splits[a0] = action_a0
+            self.state.splits[a1] = action_a1
+            # Compute rewards and end round
+            rewards = self.get_rewards(self.state.splits)
+            # Info
+            info = self.get_info_of_variant(self.state, actions)
+            # Prepare next round
+            # Alternate starting agent
+            self.state.round_nb += 1
+            self._starting_agent_index = 1 - self._starting_agent_index
+            self.state.current_agent = self.agent_ids[self._starting_agent_index]
+            self.state.previous_values = copy.deepcopy(self.state.values)
+            self.state.previous_splits = copy.deepcopy(self.state.splits)
+            self.state.previous_quantities = copy.deepcopy(self.state.quantities)
+            self.state.previous_points = copy.deepcopy(rewards)
+            self.state.last_message = ""
+            self.set_new_round_of_variant()  # variant specific
+            self.state.splits = {agent_id: None for agent_id in self.agent_ids}
+            self.state.nb_messages_sent = {agent_id: 0 for agent_id in self.agent_ids}
+            is_last_timestep_in_round = True
+            done = self.state.round_nb >= self.nb_of_rounds
+        # Message phase: roll the conversation forward a single turn.
+        elif isinstance(action, Message):
+            self.state.last_message = action.message
+            self.state.nb_messages_sent[current_agent] += 1
+            # Move turn to other agent
+            self.state.current_agent = self._other(current_agent)
+            # If both agents have reached their message quota, enter split phase
+            if all(
+                self.state.nb_messages_sent[agent_id]
+                >= self.quota_messages_per_agent_per_round
+                for agent_id in self.agent_ids
+            ):
+                self.state.split_phase = True
+            is_last_timestep_in_round = False
+            done = False
+            rewards = {agent_id: 0.0 for agent_id in self.agent_ids}
+            info = {"type": "message"}
+        info[
+            "is_last_timestep_in_round"
+        ] = is_last_timestep_in_round  # Used later to group round timesteps if needed
+        return done, SimulationStepLog(rewards=rewards, info=info)
+    def get_obs(self):
+        """Returns all agent observations in dict"""
+        return {agent_id: self.get_obs_agent(agent_id) for agent_id in self.agent_ids}
+    @abstractmethod
+    def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
+        pass
+    @abstractmethod
+    def get_obs_agent(self, agent_id):
+        pass
+    def get_state(self):
+        return self.state
+    def get_safe_copy(self):
+        """Return a safe copy of the simulation."""
+        simulation_copy = copy.copy(self)
+        simulation_copy.state = copy.deepcopy(self.state)
+        return simulation_copy
+    @abstractmethod
+    def reset(self) -> dict[AgentId, NegotiationObs]:
+        pass

src_code_for_reproducibility/markov_games/negotiation/no_press_nego_agent.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+File: mllm/markov_games/negotiation/no_press_nego_agent.py
+Summary: Agent variant for no-press negotiations without explicit messaging.
+"""
+from typing import Any, Dict, List, Tuple
+from mllm.markov_games.negotiation.nego_agent import (
+    NegotiationAgent,
+    NegotiationAgentState,
+)
+from mllm.markov_games.negotiation.nego_simulation import Split
+from mllm.markov_games.negotiation.no_press_nego_simulation import NoPressObs
+from mllm.markov_games.rollout_tree import AgentActLog, ChatTurn
+class NoPressAgent(NegotiationAgent):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # No communication in this variant
+        self.intro_prompt = (
+            "Welcome to an iterated game. You are {agent}. The other agent is {other_agent}.\n"
+            "Setup:\n"
+            "1. The game consists of multiple independent rounds.\n"
+            "2. In each round, there are multiple items to split between the two agents.\n"
+            "3. Both agents are assigned a per-item value between 1 and 20 (inclusive) in each round.\n"
+            "4. You can observe per-item values of both agents.\n"
+            "5. Because assignments are random, both agents are equally likely to have same expected per-item value.\n"
+            "\n"
+            "Protocol:\n"
+            "1. Both agents simultaneously propose the amount of each item they will keep.\n"
+            "2. If the total sum of proposals is less than or equal to the item quantity, both agents receive their proposed amounts.\n"
+            "3. If the total sum of proposals exceeds the item quantity, they are allocated proportionally.\n"
+            "4. Your points for the round = (amount you receive per item) x (your per-item value for that round), added across all items.\n"
+            "5. Points are accumulated across rounds.\n"
+            "Your goal: {goal}\n"
+        )
+        self.new_round_prompt = (
+            "A New Round Begins\n"
+            "The items to split are {quantities}.\n"
+            "Your per-item values are {value} and {other_agent}'s per-item values are  {other_value}."
+        )
+        self.last_round_prompt = (
+            "Last Round Summary:\n"
+            "   - Items to split: {last_quantities}\n"
+            "   - Your per-item values: {last_value_agent}\n"
+            "   - {other_agent}'s per-item values: {last_value_coagent}\n"
+            "   - You proposed: {last_split_agent}\n"
+            "   - You earned: {last_points_agent} points\n"
+            "   - {other_agent} proposed: {last_split_coagent}\n"
+            "   - {other_agent} earned: {last_points_coagent} points\n"
+            "   - Round Complete.\n"
+        )
+        self.send_split_prompt = "Submit Your Proposal\n" "Respond as {proposal_style}"
+    def get_message_regex(self, observation: NoPressObs) -> str:
+        """Return an empty pattern because the no-press variant forbids chat."""
+        return r"^$"  # No messages allowed
+    def get_split_regex(self, observation: NoPressObs) -> str:
+        """Match proposals like ``Proposal: 4 coins, 6 apples`` case-insensitively."""
+        items = list(observation.quantities.keys())
+        # Accept both singular and plural forms
+        item_pattern = "|".join(
+            [f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?" for item in items]
+        )
+        regex = rf"(?i)Proposal:\s*((?:\s*(?P<num>(10|[0-9]))\s*(?P<item>{item_pattern})\s*,?)+)"
+        return regex
+    def get_split_action(self, policy_output: str, observation: NoPressObs) -> Split:
+        """
+        Parse the LLM proposal into a normalized ``Split`` structure.
+        The regex-based parser is lenient (accepts pluralization variants) so that
+        prompt tweaks do not require re-training the extraction logic.
+        """
+        items = list(observation.quantities.keys())
+        import re as _re
+        split_regex = self.get_split_regex(observation)
+        items_given_to_self = {item: 0 for item in items}
+        m = _re.match(split_regex, policy_output.strip())
+        if m:
+            # Find all (number, item) pairs
+            item_pattern = "|".join(
+                [
+                    f"{item[:-1]}s?" if item.endswith("s") else f"{item}s?"
+                    for item in items
+                ]
+            )
+            inner_regex = rf"(?i)(10|[0-9])\s*({item_pattern})"
+            def normalize_item_name(item_str):
+                """Canonicalize plural/singular user text back to the config item id."""
+                for orig in items:
+                    if item_str.lower() == orig.lower():
+                        return orig
+                    if orig.endswith("s") and item_str.lower() == orig[:-1].lower():
+                        return orig
+                    if (
+                        not orig.endswith("s")
+                        and item_str.lower() == orig.lower() + "s"
+                    ):
+                        return orig
+            for num, item in _re.findall(inner_regex, m.group(1)):
+                items_given_to_self[normalize_item_name(item)] = int(num)
+        return Split(items_given_to_self=items_given_to_self)

src_code_for_reproducibility/markov_games/negotiation/tas_rps_simulation.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""
+File: mllm/markov_games/negotiation/tas_rps_simulation.py
+Summary: Simulation for TAS Rock-Paper-Scissors blended scenarios.
+"""
+import copy
+from dataclasses import dataclass
+from typing import Any, Dict, List, Literal, Tuple
+from mllm.markov_games.negotiation.nego_simulation import (
+    Message,
+    NegotiationObs,
+    NegotiationSimulation,
+    NegotiationState,
+    Split,
+    compute_tas_style_rewards,
+)
+from mllm.markov_games.rollout_tree import SimulationStepLog
+AgentId = str
+def _get_rps_winner(
+    hand1: Literal["rock", "paper", "scissors"],
+    hand2: Literal["rock", "paper", "scissors"],
+) -> Literal["rock", "paper", "scissors"]:
+    """Determine winner of rock-paper-scissors between two hands."""
+    if hand1 == hand2:
+        raise ValueError("Hands should be different")
+    if (
+        (hand1 == "rock" and hand2 == "scissors")
+        or (hand1 == "paper" and hand2 == "rock")
+        or (hand1 == "scissors" and hand2 == "paper")
+    ):
+        return hand1
+    else:
+        return hand2
+@dataclass
+class TrustAndSplitRPSState(NegotiationState):
+    """Negotiation state augmented with the current and previous RPS hands."""
+    hands: Dict[
+        AgentId, Literal["rock", "paper", "scissors"]
+    ]  # rock, paper, or scissors
+    previous_hands: Dict[AgentId, Literal["rock", "paper", "scissors"]] | None
+@dataclass
+class TrustAndSplitRPSObs(NegotiationObs):
+    """Agent-facing observation enriched with last-hand metadata."""
+    hand: Literal["rock", "paper", "scissors"]
+    last_hand_agent: Literal["rock", "paper", "scissors"] | None
+    last_hand_coagent: Literal["rock", "paper", "scissors"] | None
+    last_hand_value_coagent: Literal["upper", "lower"] | None
+class TrustAndSplitRPSSimulation(NegotiationSimulation):
+    """Negotiation variant that splices TAS splitting with RPS-determined stakes."""
+    def __init__(
+        self,
+        alternating_hands: bool = False,
+        alternating_mix_ratio: float = None,
+        *args,
+        **kwargs,
+    ):
+        self.alternating_hands = alternating_hands
+        self.alternating_mix_ratio = alternating_mix_ratio
+        super().__init__(*args, **kwargs)
+        if self.alternating_mix_ratio is not None:
+            if self.rng.random() < self.alternating_mix_ratio:
+                self.alternating_hands = True
+            else:
+                self.alternating_hands = False
+    def _sample_hands_and_values(
+        self,
+        alternate_hands: bool = False,
+    ) -> Tuple[Dict[AgentId, str], Dict[AgentId, float]]:
+        """
+        Sample a rock-paper-scissors hand for each agent plus the per-hand value.
+        When ``alternate_hands`` is True we deliberately flip the previous round's
+        winner/loser roles to create nonstationary payoffs; otherwise we draw
+        uniformly without replacement.
+        """
+        hands = ["rock", "paper", "scissors"]
+        if alternate_hands:
+            previous_hands = list(self.state.previous_hands.values())
+            hand1, hand2 = self.rng.choice(hands, size=2, replace=False)
+            winner = _get_rps_winner(hand1, hand2)
+            loser = hand1 if winner == hand2 else hand2
+            previous_winner = _get_rps_winner(previous_hands[0], previous_hands[1])
+            agent_hands, values = {}, {}
+            for agent_id in self.agent_ids:
+                if self.state.previous_hands[agent_id] == previous_winner:
+                    agent_hands[agent_id] = loser
+                    values[agent_id] = 1.0
+                else:
+                    agent_hands[agent_id] = winner
+                    values[agent_id] = 10.0
+            return agent_hands, values
+        else:
+            # Assign different hands to each agent
+            hand1, hand2 = self.rng.choice(hands, size=2, replace=False)
+            agent_hands = {self.agent_ids[0]: hand1, self.agent_ids[1]: hand2}
+            # Determine winner and assign values
+            winner = _get_rps_winner(hand1, hand2)
+            values = {}
+            for agent_id in self.agent_ids:
+                if agent_hands[agent_id] == winner:
+                    values[agent_id] = 10.0  # Winner gets value 10
+                else:
+                    values[agent_id] = 1.0  # Loser gets value 1
+            return agent_hands, values
+    def set_new_round_of_variant(self):
+        """Refresh hands/values and reset round-specific state."""
+        self.state.previous_hands = copy.deepcopy(self.state.hands)
+        new_hands, new_values = self._sample_hands_and_values(
+            alternate_hands=self.alternating_hands
+        )
+        self.state.hands = new_hands
+        self.state.values = new_values
+        # Quantities are constant in TAS
+        self.state.quantities = {"coins": 10}
+        self.state.split_phase = False
+    def get_info_of_variant(
+        self, state: NegotiationState, actions: Dict[AgentId, Any]
+    ) -> Dict[str, Any]:
+        """Expose variant-specific tensors for downstream logging/analysis."""
+        return {
+            "quantities": copy.deepcopy(state.quantities),
+            "hands": copy.deepcopy(state.hands),
+            "values": copy.deepcopy(state.values),
+            "previous_hands": copy.deepcopy(state.previous_hands),
+            "previous_values": copy.deepcopy(state.previous_values),
+            "splits": copy.deepcopy(state.splits),
+        }
+    def get_rewards(self, splits: Dict[AgentId, Split]) -> Dict[AgentId, float]:
+        """Delegates to TAS reward helper because the payout rule is identical."""
+        return compute_tas_style_rewards(
+            self.agent_ids, self.state.values, splits, self.state.quantities
+        )
+    def get_obs_agent(self, agent_id):
+        """Return a full Trust-and-Split observation for ``agent_id``."""
+        other_id = self._other(agent_id)
+        last_value_coagent = (
+            None
+            if self.state.previous_values is None
+            else self.state.previous_values.get(other_id)
+        )
+        last_hand_coagent = (
+            None
+            if self.state.previous_hands is None
+            else self.state.previous_hands.get(other_id)
+        )
+        last_points_coagent = (
+            None
+            if self.state.previous_points is None
+            else round(self.state.previous_points.get(other_id), 1)
+        )
+        last_value_agent = (
+            None
+            if self.state.previous_values is None
+            else self.state.previous_values.get(agent_id)
+        )
+        last_hand_agent = (
+            None
+            if self.state.previous_hands is None
+            else self.state.previous_hands.get(agent_id)
+        )
+        last_points_agent = (
+            None
+            if self.state.previous_points is None
+            else round(self.state.previous_points.get(agent_id), 1)
+        )
+        last_split_coagent = None
+        last_split_agent = None
+        if self.state.previous_splits is not None:
+            last_split_coagent = self.state.previous_splits[
+                other_id
+            ].items_given_to_self["coins"]
+            last_split_agent = self.state.previous_splits[agent_id].items_given_to_self[
+                "coins"
+            ]
+        if last_hand_agent is None or last_hand_coagent is None:
+            last_hand_value_coagent = None
+        else:
+            winner = _get_rps_winner(last_hand_agent, last_hand_coagent)
+            last_hand_value_coagent = (
+                "upper" if winner == last_hand_coagent else "lower"
+            )
+        obs = TrustAndSplitRPSObs(
+            round_nb=self.state.round_nb,
+            last_message=self.state.last_message,
+            quota_messages_per_agent_per_round=self.quota_messages_per_agent_per_round,
+            current_agent=self.state.current_agent,
+            other_agent=self.agent_id_to_name[other_id],
+            quantities={"coins": 10},
+            item_types=self.item_types,
+            value=self.state.values[agent_id],
+            split_phase=self.state.split_phase,
+            last_split_agent=last_split_agent,
+            last_value_agent=last_value_agent,
+            last_points_agent=last_points_agent,
+            last_split_coagent=last_split_coagent,
+            last_value_coagent=last_value_coagent,
+            last_points_coagent=last_points_coagent,
+            hand=self.state.hands[agent_id],
+            last_hand_coagent=last_hand_coagent,
+            last_hand_agent=last_hand_agent,
+            last_quantities=self.state.previous_quantities,
+            last_hand_value_coagent=last_hand_value_coagent,
+        )
+        return obs
+    def get_state(self):
+        return self.state
+    def get_safe_copy(self):
+        """Return a safe copy of the simulation."""
+        simulation_copy = copy.copy(self)
+        simulation_copy.state = copy.deepcopy(self.state)
+        return simulation_copy
+    def reset(self):
+        """Initialize and return initial observations"""
+        # Decide starting agent alternating across resets for determinism
+        start_agent = self.agent_ids[self._starting_agent_index]
+        hands, values = self._sample_hands_and_values()
+        self.state = TrustAndSplitRPSState(
+            round_nb=0,
+            last_message="",
+            current_agent=start_agent,
+            quantities={"coins": 10},
+            values=values,
+            splits={aid: None for aid in self.agent_ids},
+            nb_messages_sent={aid: 0 for aid in self.agent_ids},
+            previous_values=None,
+            previous_splits=None,
+            previous_points=None,
+            split_phase=False,
+            hands=hands,
+            previous_hands=None,
+            previous_quantities=None,
+        )
+        return self.get_obs()

src_code_for_reproducibility/models/__pycache__/large_language_model_api.cpython-312.pyc ADDED Viewed

Binary file (7.08 kB). View file

src_code_for_reproducibility/models/__pycache__/large_language_model_local.cpython-312.pyc ADDED Viewed

Binary file (16.5 kB). View file

src_code_for_reproducibility/training/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (277 Bytes). View file

src_code_for_reproducibility/training/credit_methods.py ADDED Viewed

	@@ -0,0 +1,307 @@

+"""
+File: mllm/training/credit_methods.py
+Summary: Holds credit-assignment routines for reinforcement learning updates.
+"""
+import torch
+def whiten_advantages(advantages: torch.Tensor) -> torch.Tensor:
+    """
+    Normalize a vector of advantages to zero mean / unit variance (global).
+    Useful for variance reduction before computing gradients.
+    """
+    whitened_advantages = (advantages - torch.mean(advantages)) / (
+        torch.std(advantages) + 1e-9
+    )
+    return whitened_advantages
+def whiten_advantages_time_step_wise(
+    advantages: torch.Tensor,  # (B, T)
+) -> torch.Tensor:
+    """
+    Whiten advantages independently per timestep (column-wise mean/std).
+    Helps when rollout lengths differ or certain positions have very different scales.
+    """
+    assert advantages.dim() == 2, "Wrong dimensions."
+    whitened_advantages_time_step_wise = (
+        advantages - advantages.mean(dim=0, keepdim=True)
+    ) / (advantages.std(dim=0, keepdim=True) + 1e-9)
+    return whitened_advantages_time_step_wise
+def get_discounted_state_visitation_credits(
+    credits: torch.Tensor, discount_factor: float  # (B, T)
+) -> torch.Tensor:
+    """
+    Apply geometric discounting to credits so earlier visits count less.
+    Equivalent to per-timestep multiplication by ``gamma^t``.
+    """
+    return credits * (
+        discount_factor ** torch.arange(credits.shape[1], device=credits.device)
+    )
+def get_discounted_returns(
+    rewards: torch.Tensor,  # (B, T)
+    discount_factor: float,
+) -> torch.Tensor:
+    """
+    Computes Monte Carlo discounted returns for a sequence of rewards.
+    Args:
+        rewards (torch.Tensor): Array of rewards for each timestep.
+    Returns:
+        torch.Tensor: Array of discounted returns.
+    """
+    assert rewards.dim() == 2, "Wrong dimensions."
+    B, T = rewards.shape
+    discounted_returns = torch.zeros_like(rewards)
+    accumulator = torch.zeros(B, device=rewards.device, dtype=rewards.dtype)
+    for t in reversed(range(T)):
+        accumulator = rewards[:, t] + discount_factor * accumulator
+        discounted_returns[:, t] = accumulator
+    return discounted_returns
+def get_rloo_credits(credits: torch.Tensor):  # (B, S)
+    """Compute leave-one-out baselines for a batch of credits."""
+    assert credits.dim() == 2, "Wrong dimensions."
+    rloo_baselines = torch.zeros_like(credits)
+    n = credits.shape[0]
+    if n == 1:
+        return credits, rloo_baselines
+    rloo_baselines = (torch.sum(credits, dim=0, keepdim=True) - credits) / (n - 1)
+    rloo_credits = credits - rloo_baselines
+    return rloo_credits, rloo_baselines
+def get_generalized_advantage_estimates(
+    rewards: torch.Tensor,  # (B, T)
+    value_estimates: torch.Tensor,  # (B, T+1)
+    discount_factor: float,
+    lambda_coef: float,
+) -> torch.Tensor:
+    """
+    Compute Generalized Advantage Estimates (GAE).
+    See https://arxiv.org/pdf/1506.02438 for derivation.
+    """
+    assert rewards.dim() == value_estimates.dim() == 2, "Wrong dimensions."
+    assert (
+        rewards.shape[0] == value_estimates.shape[0]
+    ), f"Got shapes {rewards.shape} and {value_estimates.shape} of rewards and value estimates."
+    assert (
+        rewards.shape[1] == value_estimates.shape[1] - 1
+    ), f"Got shapes {rewards.shape} and {value_estimates.shape} of rewards and value estimates."
+    T = rewards.shape[1]
+    tds = rewards + discount_factor * value_estimates[:, 1:] - value_estimates[:, :-1]
+    gaes = torch.zeros_like(tds)
+    acc = 0.0
+    for t in reversed(range(T)):
+        acc = tds[:, t] + lambda_coef * discount_factor * acc
+        gaes[:, t] = acc
+    return gaes
+def get_advantage_alignment_weights(
+    advantages: torch.Tensor,  # (B, T)
+    exclude_k_equals_t: bool,
+    gamma: float,
+    discount_t: bool,
+) -> torch.Tensor:
+    """
+    The advantage alignment credit is calculated as
+    \[
+        A^*(s_t, a_t, b_t) = A^1(s_t, a_t, b_t) + \beta \cdot
+        \left( \sum_{k < t} \gamma^{t-k} A^1(s_k, a_k, b_k) \right)
+        A^2(s_t, a_t, b_t)
+    \]
+    Here, the weights are defined as \( \beta \cdot
+        \left( \sum_{k < t} \gamma^{t-k} A^1(s_k, a_k, b_k) \)
+    """
+    T = advantages.shape[1]
+    discounted_advantages = advantages * (
+        gamma * torch.ones((1, T), device=advantages.device)
+    ) ** (-torch.arange(0, T, 1, device=advantages.device))
+    if exclude_k_equals_t:
+        sub = torch.eye(T, device=advantages.device)
+    else:
+        sub = torch.zeros((T, T), device=advantages.device)
+    # Identity is for \( k < t \), remove for \( k \leq t \)
+    ad_align_weights = discounted_advantages @ (
+        torch.triu(torch.ones((T, T), device=advantages.device)) - sub
+    )
+    t_discounts = (gamma * torch.ones((1, T), device=advantages.device)) ** (
+        torch.arange(0, T, 1, device=advantages.device)
+    )
+    ad_align_weights = t_discounts * ad_align_weights
+    if discount_t:
+        time_discounted_advantages = advantages * (
+            gamma * torch.ones((1, T), device=advantages.device)
+        ) ** (torch.arange(0, T, 1, device=advantages.device))
+        ad_align_weights = ad_align_weights - advantages + time_discounted_advantages
+    return ad_align_weights
+def get_advantage_alignment_credits(
+    a1: torch.Tensor,  # (B, S)
+    a1_alternative: torch.Tensor,  # (B, S, A)
+    a2: torch.Tensor,  # (B, S)
+    exclude_k_equals_t: bool,
+    beta: float,
+    gamma: float = 1.0,
+    use_old_ad_align: bool = False,
+    use_sign: bool = False,
+    clipping: float | None = None,
+    use_time_regularization: bool = False,
+    force_coop_first_step: bool = False,
+    use_variance_regularization: bool = False,
+    rloo_branch: bool = False,
+    reuse_baseline: bool = False,
+    mean_normalize_ad_align: bool = False,
+    whiten_adalign_advantages: bool = False,
+    whiten_adalign_advantages_time_step_wise: bool = False,
+    discount_t: bool = False,
+) -> torch.Tensor:
+    """
+    Calculate the advantage alignment credits with vectorization, as described in https://arxiv.org/abs/2406.14662.
+    Recall that the advantage opponent shaping term of the AdAlign policy gradient is:
+    \[
+        \beta \mathbb{E}_{\substack{
+        \tau \sim \text{Pr}_{\mu}^{\pi^1, \pi^2} \\
+        a_t' \sim \pi^1(\cdot \mid s_t)
+        }}
+        \left[\sum_{t=0}^\infty  \gamma^{t}\left( \sum_{k\leq t} A^1(s_k,a^{\prime}_k,b_k) \right) A^{2}(s_t,a_t, b_t)\nabla_{\theta^1}\text{log } \pi^1(a_t|s_t) \right]
+    \]
+    This method computes the following:
+    \[
+        Credit(s_t, a_t, b_t) = \gamma^t \left[ A^1(s_t, a_t, b_t) + \beta \left( \sum_{k\leq t} A^1(s_k,a^{\prime}_k,b_k) \right) A^{2}(s_t,a_t, b_t) \right]
+    \]
+    Args:
+        a1: Advantages of the main trajectories for the current agent.
+        a1_alternative: Advantages of the alternative trajectories for the current agent.
+        a2: Advantages of the main trajectories for the other agent.
+        discount_factor: Discount factor for the advantage alignment.
+        beta: Beta parameter for the advantage alignment.
+        gamma: Gamma parameter for the advantage alignment.
+        use_sign_in_ad_align: Whether to use sign in the advantage alignment.
+    Returns:
+        torch.Tensor: The advantage alignment credits.
+    """
+    assert a1.dim() == a2.dim() == 2, "Advantages must be of shape (B, S)"
+    if a1_alternative is not None:
+        assert (
+            a1_alternative.dim() == 3
+        ), "Alternative advantages must be of shape (B, S, A)"
+        B, T, A = a1_alternative.shape
+    else:
+        B, T = a1.shape
+    assert a1.shape == a2.shape, "Not the same shape"
+    sub_tensors = {}
+    if use_old_ad_align:
+        ad_align_weights = get_advantage_alignment_weights(
+            advantages=a1,
+            exclude_k_equals_t=exclude_k_equals_t,
+            gamma=gamma,
+            discount_t=discount_t,
+        )
+        sub_tensors["ad_align_weights_prev"] = ad_align_weights
+        if exclude_k_equals_t:
+            ad_align_weights = gamma * ad_align_weights
+    else:
+        assert a1_alternative is not None, "Alternative advantages must be provided"
+        if rloo_branch:
+            a1_alternative = torch.cat([a1.unsqueeze(2), a1_alternative], dim=2)
+            a1_alternative = a1_alternative.mean(dim=2)
+            a1, baseline = get_rloo_credits(a1)
+            if reuse_baseline:
+                a1_alternative = a1_alternative - baseline
+            else:
+                a1_alternative, _ = get_rloo_credits(a1_alternative)
+        assert a1.shape == a1_alternative.shape, "Not the same shape"
+        ad_align_weights = get_advantage_alignment_weights(
+            advantages=a1_alternative,
+            exclude_k_equals_t=exclude_k_equals_t,
+            gamma=gamma,
+        )
+        sub_tensors["ad_align_weights"] = ad_align_weights
+    # Use sign
+    if use_sign:
+        assert beta == 1.0, "beta should be 1.0 when using sign"
+        positive_signs = ad_align_weights > 0
+        negative_signs = ad_align_weights < 0
+        ad_align_weights[positive_signs] = 1
+        ad_align_weights[negative_signs] = -1
+        sub_tensors["ad_align_weights_sign"] = ad_align_weights
+        # (rest are 0)
+    ###################
+    # Process weights
+    ###################
+    # Use clipping
+    if clipping not in [0.0, None]:
+        upper_mask = ad_align_weights > 1
+        lower_mask = ad_align_weights < -1
+        ad_align_weights = torch.clip(
+            ad_align_weights,
+            -clipping,
+            clipping,
+        )
+        clipping_ratio = (
+            torch.sum(upper_mask) + torch.sum(lower_mask)
+        ) / upper_mask.size
+        sub_tensors["clipped_ad_align_weights"] = ad_align_weights
+    # 1/1+t Regularization
+    if use_time_regularization:
+        t_values = torch.arange(1, T + 1).to(ad_align_weights.device)
+        ad_align_weights = ad_align_weights / t_values
+        sub_tensors["time_regularized_ad_align_weights"] = ad_align_weights
+    # Use coop on t=0
+    if force_coop_first_step:
+        ad_align_weights[:, 0] = 1
+        sub_tensors["coop_first_step_ad_align_weights"] = ad_align_weights
+    ####################################
+    # Compose elements together
+    ####################################
+    opp_shaping_terms = beta * ad_align_weights * a2
+    sub_tensors["ad_align_opp_shaping_terms"] = opp_shaping_terms
+    credits = a1 + opp_shaping_terms
+    if mean_normalize_ad_align:
+        credits = credits - credits.mean(dim=0)
+        sub_tensors["mean_normalized_ad_align_credits"] = credits
+    if whiten_adalign_advantages:
+        credits = (credits - credits.mean()) / (credits.std() + 1e-9)
+        sub_tensors["whitened_ad_align_credits"] = credits
+    if whiten_adalign_advantages_time_step_wise:
+        credits = (credits - credits.mean(dim=0, keepdim=True)) / (
+            credits.std(dim=0, keepdim=True) + 1e-9
+        )
+        sub_tensors["whitened_ad_align_credits_time_step_wise"] = credits
+    sub_tensors["final_ad_align_credits"] = credits
+    return credits, sub_tensors

src_code_for_reproducibility/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+File: mllm/utils/__init__.py
+Summary: Utility package exposing helper modules.
+"""

src_code_for_reproducibility/utils/gather_training_stats.py ADDED Viewed

	@@ -0,0 +1,262 @@

+"""
+File: mllm/utils/gather_training_stats.py
+Summary: Aggregates training statistics from rollouts and exports artifacts.
+"""
+import copy
+import csv
+import gc
+import json
+import logging
+import os
+import pickle
+import random
+import re
+import subprocess
+import sys
+import time
+from datetime import datetime
+from statistics import mean
+from typing import Any, Dict
+import hydra
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import torch
+from omegaconf import OmegaConf
+from mllm.training.tally_metrics import Tally
+from mllm.utils.stat_pack import StatPack
+def get_from_nested_dict(dictio: dict, path: list[str]):
+    for sp in path[:-1]:
+        dictio = dictio[sp]
+    return dictio.get(path[-1])
+def set_at_path(dictio: dict, path: list[str], value):
+    for sp in path[:-1]:
+        if sp not in dictio:
+            dictio[sp] = {}
+        dictio = dictio[sp]
+    dictio[path[-1]] = value
+def produce_tabular_render(inpath: str, outpath: str = None):
+    """
+    Convert a JSON metrics dump into per-rollout CSV tables for easier inspection.
+    """
+    with open(inpath, "r") as f:
+        data = json.load(f)
+    rollout_paths = data.keys()
+    for rollout_path in rollout_paths:
+        if outpath is None:
+            m_path = rollout_path.replace("/", "|")
+            m_path = m_path.replace(".json", "")
+            m_path = (
+                os.path.split(inpath)[0]
+                + "/contextualized_tabular_renders/"
+                + m_path
+                + "_tabular_render.render.csv"
+            )
+        # import pdb; pdb.set_trace()
+        os.makedirs(os.path.split(m_path)[0], exist_ok=True)
+        metrics = data[rollout_path]
+        d = {k: [] for k in metrics[0].keys()}
+        for m in metrics:
+            for k, v in m.items():
+                d[k].append(v)
+        d = pd.DataFrame(d)
+        d.to_csv(m_path)
+def get_metric_paths(data: list[dict]):
+    d = data[0]
+    paths = []
+    def traverse_dict(d, current_path=[]):
+        for key, value in d.items():
+            new_path = current_path + [key]
+            if isinstance(value, dict):
+                traverse_dict(value, new_path)
+            else:
+                paths.append(new_path)
+    traverse_dict(d)
+    return paths
+def print_metric_paths(data: list[dict]):
+    paths = get_metric_paths(data)
+    for p in paths:
+        print(p)
+def get_metric_iteration_list(data: list[dict], metric_path: list[str]):
+    if isinstance(metric_path, str):
+        metric_path = [metric_path]
+    sgl = []
+    for d in data:
+        sgl.append(get_from_nested_dict(d, metric_path))
+    return sgl
+def to_1d_numeric(x):
+    """Return a 1-D float array (or None if not numeric). Accepts scalars, numpy arrays, or nested list/tuple of them."""
+    if x is None:
+        return None
+    if isinstance(x, (int, float, np.number)):
+        return np.array([float(x)], dtype=float)
+    if isinstance(x, np.ndarray):
+        try:
+            return x.astype(float).ravel()
+        except Exception:
+            return None
+    if isinstance(x, (list, tuple)):
+        parts = []
+        for e in x:
+            arr = to_1d_numeric(e)
+            if arr is not None and arr.size > 0:
+                parts.append(arr)
+        if parts:
+            return np.concatenate(parts)
+        return None
+    return None
+def get_single_metric_vector(data, metric_path, iterations=None):
+    if isinstance(metric_path, str):
+        metric_path = [metric_path]
+    if iterations == None:
+        iterations = len(data)
+    vecs = []
+    for d in data:
+        ar = get_from_nested_dict(d, metric_path)
+        arr = to_1d_numeric(ar)
+        if arr is not None:
+            vecs.append(arr)
+    return np.concatenate(vecs) if vecs else np.empty(0, dtype=float)
+def _load_metrics_file(file_path: str):
+    if not (file_path.endswith(".tally.pkl") or file_path.endswith(".pkl")):
+        raise ValueError("Only *.tally.pkl files are supported.")
+    import pickle
+    with open(file_path, "rb") as f:
+        tree = pickle.load(f)
+    return tree
+def get_leaf_items(array_tally: dict, prefix: list[str] = None):
+    if prefix is None:
+        prefix = []
+    for key, value in array_tally.items():
+        next_prefix = prefix + [str(key)]
+        if isinstance(value, dict):
+            yield from get_leaf_items(value, next_prefix)
+        else:
+            yield next_prefix, value
+def _sanitize_filename_part(part: str) -> str:
+    s = part.replace("/", "|")
+    s = s.replace(" ", "_")
+    return s
+def render_rt_tally_pkl_to_csvs(pkl_path: str, outdir: str):
+    """
+    This method takes care of tokenwise logging.
+    """
+    with open(pkl_path, "rb") as f:
+        payload = pickle.load(f)
+    # Backward compatibility: older tallies stored the dict directly
+    if isinstance(payload, dict) and "array_tally" in payload:
+        array_tally = payload.get("array_tally", {})
+    else:
+        array_tally = payload
+    os.makedirs(outdir, exist_ok=True)
+    trainer_id = os.path.basename(pkl_path).replace(".rt_tally.pkl", "")
+    for path_list, rollout_tally_items in get_leaf_items(array_tally):
+        # Create file and initiate writer
+        path_part = ".".join(_sanitize_filename_part(p) for p in path_list)
+        filename = f"{trainer_id}__{path_part}.render.csv"
+        out_path = os.path.join(outdir, filename)
+        # Write metric rows to CSV
+        with open(out_path, "w", newline="") as f:
+            writer = csv.writer(f)
+            # Write header row - need to determine metric column count from first rollout_tally_item
+            first_item = rollout_tally_items[0]
+            metric_cols = (
+                first_item.metric_matrix.shape[1]
+                if first_item.metric_matrix.ndim > 1
+                else 1
+            )
+            header = ["agent_id", "crn_id", "rollout_id"] + [
+                f"t_{i}" for i in range(metric_cols)
+            ]
+            writer.writerow(header)
+            for rollout_tally_item in rollout_tally_items:
+                crn_ids = rollout_tally_item.crn_ids
+                rollout_ids = rollout_tally_item.rollout_ids
+                agent_ids = rollout_tally_item.agent_ids
+                metric_matrix = rollout_tally_item.metric_matrix
+                for i in range(metric_matrix.shape[0]):
+                    row_vals = metric_matrix[i].reshape(-1)
+                    # Convert row_vals to a list to avoid numpy concatenation issues
+                    row_vals = (
+                        row_vals.tolist()
+                        if hasattr(row_vals, "tolist")
+                        else list(row_vals)
+                    )
+                    row_prefix = [
+                        agent_ids[i],
+                        crn_ids[i],
+                        rollout_ids[i],
+                    ]
+                    writer.writerow(row_prefix + row_vals)
+def tally_to_stat_pack(tally: Dict[str, Any]):
+    stat_pack = StatPack()
+    if "array_tally" in tally:
+        tally = tally["array_tally"]
+        # backward compatibility: will remove later, flatten keys in tally
+        def get_from_nested_dict(dictio: dict, path: list[str]):
+            for sp in path[:-1]:
+                dictio = dictio[sp]
+            return dictio.get(path[-1])
+        def get_metric_paths(tally: dict):
+            paths = []
+            def traverse_dict(tally, current_path=[]):
+                for key, value in tally.items():
+                    new_path = current_path + [key]
+                    if isinstance(value, dict):
+                        traverse_dict(value, new_path)
+                    else:
+                        paths.append(new_path)
+            traverse_dict(tally)
+            return paths
+        paths = get_metric_paths(tally)
+        modified_tally = {}
+        for p in paths:
+            val = get_from_nested_dict(tally, p)
+            modified_tally["_".join(p)] = np.mean(val)
+        del tally
+        tally = modified_tally
+    for key, value in tally.items():
+        stat_pack.add_stat(key, value)
+    return stat_pack

src_code_for_reproducibility/utils/resource_context.py ADDED Viewed

	@@ -0,0 +1,83 @@

+"""
+File: mllm/utils/resource_context.py
+Summary: Tracks system resource usage via a context manager.
+"""
+import logging
+import time
+from contextlib import contextmanager
+import torch
+def vram_usage():
+    output = ""
+    for i in range(torch.cuda.device_count()):
+        gpu_memory_allocated = torch.cuda.memory_allocated(i) / (
+            1024**3
+        )  # Convert bytes to GB
+        gpu_memory_reserved = torch.cuda.memory_reserved(i) / (
+            1024**3
+        )  # Convert bytes to GB
+        output += f"GPU {i}: Memory Allocated: {gpu_memory_allocated:.2f} GB, Memory Reserved: {gpu_memory_reserved:.2f} GB"
+    return output
+def ram_usage():
+    import psutil
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    ram_used = memory_info.rss / (1024**3)  # Convert bytes to GB
+    return f"RAM Usage: {ram_used:.2f} GB"
+@contextmanager
+def resource_logger_context(logger: logging.Logger, task_description: str):
+    """
+    Context manager to log the resource usage of the current task.
+    Args:
+        logger: The logger to use to log the resource usage.
+        task_description: The description of the task to log.
+    Returns:
+        None
+    """
+    try:
+        initial_time = time.time()
+        # Assume CUDA is available and use device 0 only
+        total_mem_bytes = torch.cuda.get_device_properties(0).total_memory
+        initial_total_bytes = torch.cuda.memory_allocated(
+            0
+        ) + torch.cuda.memory_reserved(0)
+        torch.cuda.reset_peak_memory_stats(0)
+        yield None
+    finally:
+        final_time = time.time()
+        # Ensure kernels within the block are accounted for
+        torch.cuda.synchronize()
+        # Compute metrics
+        final_allocated_bytes = torch.cuda.memory_allocated(0)
+        final_reserved_bytes = torch.cuda.memory_reserved(0)
+        final_total_bytes = final_allocated_bytes + final_reserved_bytes
+        delta_vram_percent_total = (
+            100 * (final_total_bytes - initial_total_bytes) / total_mem_bytes
+            if total_mem_bytes
+            else 0.0
+        )
+        current_percent_vram_taken = (
+            100 * final_total_bytes / total_mem_bytes if total_mem_bytes else 0.0
+        )
+        block_peak_percent = (
+            100 * torch.cuda.max_memory_allocated(0) / total_mem_bytes
+            if total_mem_bytes
+            else 0.0
+        )
+        delta_time_str = time.strftime(
+            "%H:%M:%S", time.gmtime(final_time - initial_time)
+        )
+        logger.info(
+            f"For task: {task_description}, ΔVRAM % (total): {delta_vram_percent_total:.2f}%, Current % of VRAM taken: {current_percent_vram_taken:.2f}%, Block Peak % of device VRAM: {block_peak_percent:.2f}%, ΔTime: {delta_time_str}"
+        )

src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py ADDED Viewed

	@@ -0,0 +1,1597 @@

+"""
+File: mllm/utils/rollout_tree_chat_htmls.py
+Summary: Renders rollout tree chat transcripts into HTML artifacts.
+"""
+from pathlib import Path
+from typing import List
+from mllm.utils.rollout_tree_gather_utils import *
+def html_from_chat_turns(chat_turns: List[ChatTurnLog]) -> str:
+    """
+    Render chat turns as a single, wrapping sequence of messages in time order.
+    Keep badge and message bubble styles, include time on every badge and
+    include rewards on assistant badges. Each message is individually
+    hide/show by click; when hidden, only the badge remains and "(...)" is
+    shown inline (not inside a bubble).
+    """
+    import html
+    import re as _re
+    # Prepare ordering: sort by (time_step, original_index) to keep stable order within same step
+    indexed_turns = list(enumerate(chat_turns))
+    indexed_turns.sort(key=lambda t: (t[1].time_step, t[0]))
+    # Get unique agent IDs and sort alphabetically for consistent assignment
+    # Agent with alphabetically lower name gets agent-0 (left, green)
+    # Agent with alphabetically higher name gets agent-1 (right, orange)
+    unique_agent_ids = sorted(
+        set(turn.agent_id for turn in chat_turns if turn.role == "assistant")
+    )
+    agent_id_to_index = {aid: idx for idx, aid in enumerate(unique_agent_ids)}
+    # CSS styles (simplified layout; no time-step or agent-column backgrounds)
+    css = """
+    <style>
+        :root {
+            --font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            --bg: #ffffff;
+            --text: #1c0b00;
+            --muted-text: #2C3E50;
+            --accent-muted: #BDC3C7;
+            --accent-muted-2: #D0D7DE;
+            --panel-bg: #F8FAFC;
+            --reward-color: #3a2e00; /* dark text for reward pill */
+            --font-size: 14px;
+            --border-width: 2px;
+            --corner-radius: 6px;
+            --pill-radius-left: 999px 0 0 999px;
+            --pill-radius-right: 0 999px 999px 0;
+            --inset-shadow: 0 1px 0 rgba(0,0,0,0.03) inset;
+            /* Chat View Colors */
+            --agent-0-bg: #dcf8c6;
+            --agent-0-border: #0eb224;
+            --agent-1-bg: #ffe4cc;
+            --agent-1-border: #ef8323;
+            --user-bg: #f5f5f5;
+            --chat-bg: #ffffff;
+        }
+        body {
+            font-family: var(--font-family);
+            margin: 12px;
+            background-color: var(--bg);
+            color: var(--text);
+            font-size: var(--font-size);
+            line-height: 1.5;
+        }
+        /* Chat View Styles */
+        #flow-chat {
+            max-width: 900px;
+            margin: 0 auto;
+            background: var(--chat-bg);
+            padding: 12px 16px 12px 8px;
+            border-radius: 8px;
+        }
+        .simultaneous-messages {
+            display: flex !important;
+            flex-direction: row !important;
+            flex-wrap: nowrap !important;
+            gap: 8px;
+            margin-bottom: 4px;
+            align-items: flex-start;
+            width: 100%;
+            overflow: hidden;
+            box-sizing: border-box;
+        }
+        .simultaneous-messages .chat-message {
+            flex: 1 1 0 !important;
+            margin-bottom: 0 !important;
+            display: flex !important;
+            flex-direction: row !important;
+            align-items: flex-start !important;
+            margin-left: 0 !important;
+            min-width: 0 !important;
+            max-width: 50% !important;
+            gap: 0 !important;
+            overflow: hidden !important;
+        }
+        .simultaneous-messages .chat-message-content {
+            max-width: 100% !important;
+            width: 100%;
+            align-items: flex-start !important;
+            margin-left: 0 !important;
+            overflow: hidden !important;
+        }
+        .simultaneous-messages .chat-message.agent-0 {
+            justify-content: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.agent-1 {
+            justify-content: flex-end !important;
+        }
+        .simultaneous-messages .chat-message.agent-0 .chat-message-content {
+            margin-left: 0 !important;
+            align-items: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.agent-1 .chat-message-content {
+            margin-left: auto !important;
+            margin-right: 0 !important;
+            align-items: flex-end !important;
+        }
+        .simultaneous-messages .chat-bubble {
+            max-width: 100%;
+            word-break: break-word;
+            overflow-wrap: break-word;
+            box-sizing: border-box;
+        }
+        .simultaneous-messages .chat-message.agent-0 .chat-bubble {
+            border-radius: 10px;
+        }
+        .simultaneous-messages .chat-message.agent-1 .chat-bubble {
+            border-radius: 10px;
+        }
+        .simultaneous-messages .chat-message.agent-0 .chat-header {
+            justify-content: flex-start;
+            flex-shrink: 0;
+        }
+        .simultaneous-messages .chat-message.agent-1 .chat-header {
+            justify-content: flex-end;
+            flex-shrink: 0;
+        }
+        .simultaneous-messages .chat-reasoning {
+            max-width: 100%;
+            overflow-wrap: break-word;
+        }
+        /* Styling for user prompts in simultaneous-messages */
+        .simultaneous-messages .chat-message.role-user {
+            flex: 1 1 0 !important;
+            margin-bottom: 0 !important;
+            display: flex !important;
+            opacity: 0.7;
+            cursor: pointer;
+        }
+        .simultaneous-messages .chat-message.role-user:hover {
+            opacity: 1;
+        }
+        .simultaneous-messages .chat-message.role-user.collapsed .chat-bubble {
+            display: none;
+        }
+        .simultaneous-messages .chat-message.role-user.collapsed .chat-header::after {
+            content: ' (collapsed)';
+            font-weight: normal;
+            font-style: italic;
+            color: #999;
+            font-size: 0.9em;
+        }
+        .simultaneous-messages .chat-message.role-user.agent-0 {
+            justify-content: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.role-user.agent-1 {
+            justify-content: flex-end !important;
+        }
+        .simultaneous-messages .chat-message.role-user.agent-0 .chat-message-content {
+            margin-left: 0 !important;
+            align-items: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.role-user.agent-1 .chat-message-content {
+            margin-left: auto !important;
+            margin-right: 0 !important;
+            align-items: flex-end !important;
+        }
+        /* Styling for split-agent-context when wrapped */
+        .simultaneous-messages .split-agent-context {
+            width: 100%;
+            display: flex !important;
+        }
+        .chat-message {
+            display: flex;
+            margin-bottom: 2px;
+            align-items: flex-end;
+            gap: 6px;
+            position: relative;
+            margin-left: 36px;
+        }
+        .chat-message.agent-0 {
+            margin-left: 0;
+        }
+        .chat-message.agent-1 {
+            margin-left: 0;
+        }
+        .chat-message.agent-0::before {
+            left: 0;
+        }
+        .chat-message.agent-1::before {
+            left: 0;
+        }
+        .chat-message.role-user {
+            opacity: 0.7;
+            cursor: pointer;
+        }
+        .chat-message.role-user.collapsed .chat-bubble {
+            display: none;
+        }
+        .chat-message.role-user.collapsed .chat-header::after {
+            content: ' (collapsed)';
+            font-weight: normal;
+            font-style: italic;
+            color: #999;
+            font-size: 0.9em;
+        }
+        .chat-message.role-user:hover {
+            opacity: 1;
+        }
+        .chat-message::before {
+            content: '';
+            position: absolute;
+            left: -36px;
+            top: 0;
+            bottom: 0;
+            width: 36px;
+            pointer-events: auto;
+        }
+        .merge-btn {
+            position: absolute;
+            left: -30px;
+            top: 50%;
+            transform: translateY(-50%);
+            width: 26px;
+            height: 26px;
+            border-radius: 4px;
+            border: 1.5px solid var(--accent-muted);
+            background: white;
+            cursor: pointer;
+            font-size: var(--font-size);
+            opacity: 0;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: opacity 0.2s ease, transform 0.1s ease;
+            padding: 0;
+            line-height: 1;
+            z-index: 10;
+        }
+        .chat-message:hover .merge-btn,
+        .merge-btn:hover {
+            opacity: 1;
+        }
+        .merge-btn:hover {
+            background: var(--panel-bg);
+            border-color: var(--accent-muted-2);
+            transform: translateY(-50%) scale(1.15);
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15);
+        }
+        .merge-btn:active {
+            transform: translateY(-50%) scale(0.95);
+        }
+        .chat-message.agent-0 .merge-btn {
+            left: -30px;
+        }
+        .chat-message.agent-1 .merge-btn {
+            left: -30px;
+        }
+        .chat-message.role-user .merge-btn {
+            display: none !important;
+        }
+        .simultaneous-messages .merge-btn {
+            opacity: 0 !important;
+            pointer-events: none;
+        }
+        .simultaneous-messages {
+            padding: 6px 0 6px 0 !important;
+            margin-left: 0 !important;
+            margin-right: 0 !important;
+            position: relative !important;
+            background: transparent !important;
+            border-radius: 0 !important;
+            box-sizing: border-box !important;
+            overflow: visible !important;
+            max-width: 100% !important;
+            border: none !important;
+            transition: padding 0.2s ease !important;
+        }
+        .simultaneous-messages:hover {
+            padding-top: 40px !important;
+        }
+        .simultaneous-messages::before {
+            content: '⇅ Merged';
+            position: absolute;
+            left: 0 !important;
+            top: 8px !important;
+            font-size: var(--font-size);
+            font-weight: 500;
+            color: #888;
+            pointer-events: none;
+            opacity: 0;
+            transition: opacity 0.2s ease;
+        }
+        .simultaneous-messages:hover::before {
+            opacity: 1;
+        }
+        .unmerge-btn {
+            position: absolute !important;
+            right: 0 !important;
+            top: 6px !important;
+            width: 36px !important;
+            height: 28px !important;
+            border-radius: 5px !important;
+            border: 2px solid #d63031 !important;
+            background: white !important;
+            cursor: pointer !important;
+            font-size: var(--font-size) !important;
+            font-weight: bold !important;
+            color: #d63031 !important;
+            display: flex !important;
+            align-items: center !important;
+            justify-content: center !important;
+            transition: all 0.2s ease !important;
+            padding: 0 !important;
+            line-height: 1 !important;
+            z-index: 1000 !important;
+            flex: none !important;
+            pointer-events: auto !important;
+            box-shadow: 0 2px 6px rgba(214, 48, 49, 0.3) !important;
+            opacity: 0 !important;
+        }
+        .simultaneous-messages:hover .unmerge-btn {
+            opacity: 1 !important;
+        }
+        .unmerge-btn:hover {
+            background: #ffe5e5 !important;
+            border-color: #b71c1c !important;
+            transform: scale(1.1) !important;
+            box-shadow: 0 3px 8px rgba(214, 48, 49, 0.4) !important;
+        }
+        .unmerge-btn:active {
+            transform: scale(0.95) !important;
+            background: #ffcccc !important;
+        }
+        .chat-message-content {
+            max-width: 72%;
+            display: flex;
+            flex-direction: column;
+            gap: 2px;
+        }
+        .chat-message.agent-0 .chat-message-content {
+            align-items: flex-start;
+        }
+        .chat-message.agent-1 .chat-message-content {
+            align-items: flex-end;
+            margin-left: auto;
+        }
+        .chat-bubble {
+            padding: 6px 10px;
+            border-radius: 10px;
+            word-wrap: break-word;
+            position: relative;
+            box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+            line-height: 1.4;
+        }
+        .chat-message.agent-0 .chat-bubble {
+            background: var(--agent-0-bg);
+            border: 2px solid var(--agent-0-border);
+            border-radius: 10px 10px 10px 2px;
+        }
+        .chat-message.agent-1 .chat-bubble {
+            background: var(--agent-1-bg);
+            border: 2px solid var(--agent-1-border);
+            border-radius: 10px 10px 2px 10px;
+        }
+        .chat-message.role-user .chat-bubble {
+            background: var(--user-bg);
+            border: 2px solid #d0d0d0;
+        }
+        .chat-header {
+            display: flex;
+            align-items: center;
+            gap: 4px;
+            margin-bottom: 2px;
+            font-size: var(--font-size);
+            font-weight: 600;
+            line-height: 1.2;
+        }
+        .chat-message.agent-0 .chat-header {
+            color: var(--agent-0-border);
+        }
+        .chat-message.agent-1 .chat-header {
+            color: var(--agent-1-border);
+        }
+        .chat-timestamp {
+            font-size: var(--font-size);
+            color: var(--muted-text);
+            margin-top: 1px;
+            opacity: 0.75;
+        }
+        .chat-reward {
+            display: inline-flex;
+            align-items: center;
+            background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
+            color: #000000;
+            font-weight: 600;
+            font-size: var(--font-size);
+            padding: 1px 5px;
+            border-radius: 3px;
+            border: 1px solid #f4e6a8;
+            margin-left: 4px;
+            line-height: 1.3;
+        }
+        .chat-reasoning {
+            font-size: var(--font-size);
+            font-style: italic;
+            color: #555;
+            margin-bottom: 2px;
+            padding: 4px 8px;
+            background: rgba(0, 0, 0, 0.03);
+            border-radius: 5px;
+            cursor: pointer;
+            line-height: 1.3;
+        }
+        .chat-reasoning.collapsed .reasoning-text {
+            display: none;
+        }
+        .chat-reasoning.collapsed::after {
+            content: ' (click to expand)';
+            color: #777;
+        }
+        .chat-group-divider {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            width: 100%;
+            margin: 8px 0 4px 0;
+            position: relative;
+            cursor: pointer;
+            user-select: none;
+        }
+        .chat-group-divider::before,
+        .chat-group-divider::after {
+            content: "";
+            flex: 1 1 auto;
+            height: 2px;
+            background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
+        }
+        .chat-group-label {
+            display: inline-block;
+            background: white;
+            padding: 2px 12px;
+            border-radius: 999px;
+            font-size: var(--font-size);
+            font-weight: 700;
+            color: var(--muted-text);
+            border: 1.5px solid var(--accent-muted);
+            box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+            line-height: 1.4;
+            position: relative;
+            transition: background 0.2s ease;
+        }
+        .chat-group-divider:hover .chat-group-label {
+            background: var(--panel-bg);
+        }
+        .chat-group-label::before {
+            content: '▼ ';
+            font-size: 0.8em;
+            display: inline-block;
+            transition: transform 0.2s ease;
+            opacity: 0;
+        }
+        .chat-group-divider:hover .chat-group-label::before {
+            opacity: 1;
+        }
+        .chat-group-divider.collapsed .chat-group-label::before {
+            content: '▶ ';
+            opacity: 1;
+        }
+        .chat-group-divider.collapsed + * {
+            display: none !important;
+        }
+        /* Hide collapsed rounds in strong hide mode */
+        .strong-hide .chat-group-divider.collapsed {
+            display: none !important;
+        }
+        /* Chat view width control */
+        #flow-chat {
+            --chat-width: 900px;
+            max-width: var(--chat-width);
+            margin: 0 auto;
+        }
+        /* Hide user messages when toggle is on */
+        #flow-chat.hide-user-messages .chat-message.role-user {
+            display: none;
+        }
+        /* Hide rewards when hiding user messages */
+        #flow-chat.hide-user-messages .chat-reward {
+            display: none;
+        }
+        /* Round context annotations */
+        .round-context {
+            text-align: center;
+            margin: 4px auto;
+            max-width: 100%;
+        }
+        .round-context-edit {
+            min-height: 20px;
+            padding: 5px 10px;
+            border: 1.5px dashed var(--accent-muted);
+            border-radius: 6px;
+            background: #fafafa;
+            cursor: text;
+            transition: all 0.2s ease;
+            outline: none;
+            font-size: var(--font-size);
+            line-height: 1.3;
+            user-select: text;
+            -webkit-user-select: text;
+            -moz-user-select: text;
+            -ms-user-select: text;
+        }
+        .round-context-edit:focus {
+            border-style: solid;
+            border-color: var(--accent-muted-2);
+            background: #ffffff;
+            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+        }
+        .round-context-edit:empty:before {
+            content: attr(data-placeholder);
+            color: #999;
+            font-style: italic;
+        }
+        .round-context-controls {
+            display: none;
+            justify-content: center;
+            gap: 4px;
+            margin-top: 4px;
+            flex-wrap: wrap;
+        }
+        .round-context-edit:focus + .round-context-controls,
+        .round-context-controls:hover,
+        .round-context:focus-within .round-context-controls {
+            display: flex;
+        }
+        .context-color-btn {
+            width: 22px;
+            height: 22px;
+            border-radius: 50%;
+            border: 1.5px solid #fff;
+            box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
+            cursor: pointer;
+            transition: transform 0.1s ease;
+        }
+        .context-color-btn:hover {
+            transform: scale(1.15);
+        }
+        .context-color-btn:active {
+            transform: scale(0.95);
+        }
+        /* Split agent context boxes */
+        .split-agent-context {
+            display: flex;
+            gap: 6px;
+            margin: 4px auto;
+            max-width: 100%;
+            align-items: flex-start;
+        }
+        .agent-context-box {
+            flex: 1;
+            min-width: 0;
+            position: relative;
+        }
+        .agent-context-box .round-context-edit {
+            margin: 0;
+            border-radius: 6px;
+            padding: 4px 8px;
+            min-height: 18px;
+        }
+        .agent-context-box.agent-0 .round-context-edit {
+            border-color: var(--agent-0-border);
+            background: rgba(14, 178, 36, 0.03);
+        }
+        .agent-context-box.agent-1 .round-context-edit {
+            border-color: var(--agent-1-border);
+            background: rgba(239, 131, 35, 0.03);
+        }
+        .agent-context-box.agent-0 .round-context-edit:focus {
+            border-color: var(--agent-0-border);
+            box-shadow: 0 2px 8px rgba(14, 178, 36, 0.2);
+            background: rgba(14, 178, 36, 0.05);
+        }
+        .agent-context-box.agent-1 .round-context-edit:focus {
+            border-color: var(--agent-1-border);
+            box-shadow: 0 2px 8px rgba(239, 131, 35, 0.2);
+            background: rgba(239, 131, 35, 0.05);
+        }
+        .agent-context-box .round-context-edit::before {
+            font-weight: 700;
+            font-size: var(--font-size);
+            margin-right: 5px;
+            letter-spacing: 0.2px;
+        }
+        .agent-context-box.agent-0 .round-context-edit::before {
+            content: 'Agent 0 Prompt Summary:';
+            color: var(--agent-0-border);
+        }
+        .agent-context-box.agent-1 .round-context-edit::before {
+            content: 'Agent 1 Prompt Summary:';
+            color: var(--agent-1-border);
+        }
+        /* Empty context boxes will be hidden by JavaScript when strong hide is enabled */
+        .toolbar {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            margin-bottom: 0;
+            font-size: var(--font-size);
+            max-height: 0;
+            overflow: hidden;
+            opacity: 0;
+            pointer-events: none;
+            transition: max-height 0.2s ease, opacity 0.2s ease;
+            flex-wrap: wrap;
+        }
+        .toolbar-wrap { position: sticky; top: 0; z-index: 10; background: var(--bg); }
+        .toolbar-hotzone { height: 6px; }
+        .toolbar-wrap:hover .toolbar { max-height: 500px; opacity: 1; pointer-events: auto; margin-bottom: 12px; }
+        .toolbar * { pointer-events: auto !important; }
+        .toolbar input,
+        .toolbar select { z-index: 100 !important; position: relative; }
+        .toolbar input[type="number"],
+        .toolbar input[type="text"],
+        .toolbar select {
+            width: 72px;
+            padding: 2px 6px;
+            border: 1px solid var(--accent-muted);
+            border-radius: var(--corner-radius);
+            background: var(--bg);
+            user-select: text !important;
+            -webkit-user-select: text !important;
+            -moz-user-select: text !important;
+            -ms-user-select: text !important;
+            pointer-events: auto !important;
+            cursor: pointer !important;
+        }
+        .toolbar input[type="text"] {
+            cursor: text !important;
+        }
+        .toolbar input[type="text"]:focus,
+        .toolbar input[type="number"]:focus,
+        .toolbar select:focus {
+            outline: 2px solid #0066cc;
+            outline-offset: 1px;
+        }
+        .toolbar button {
+            padding: 4px 8px;
+            border: 1px solid var(--accent-muted);
+            background: var(--panel-bg);
+            border-radius: var(--corner-radius);
+            cursor: pointer;
+        }
+        .emoji-bw { filter: grayscale(100%); opacity: 0.95; font-size: var(--font-size); vertical-align: baseline; margin: 0; position: relative; top: -1px; line-height: 1; display: inline-block; }
+    </style>
+    """
+    # HTML structure
+    html_parts = [
+        "<!DOCTYPE html>",
+        "<html>",
+        "<head>",
+        "<meta charset='UTF-8'>",
+        "<title>Chat Turns</title>",
+        css,
+        "<script>\n"
+        "document.addEventListener('DOMContentLoaded', function() {\n"
+        "  const chatFlow = document.getElementById('flow-chat');\n"
+        "  let strongHideOn = false;\n"
+        "  let hideUserMessages = false;\n"
+        "  const hideUserBtn = document.getElementById('toggle-hide-user-messages');\n"
+        "  const hideUserStateEl = document.getElementById('hide-user-state');\n"
+        "  const widthControl = document.getElementById('chat-width-control');\n"
+        "  const widthSlider = document.getElementById('chat-width-slider');\n"
+        "  const widthValue = document.getElementById('chat-width-value');\n"
+        "  const strongHideBtn = document.getElementById('toggle-strong-hide');\n"
+        "  const strongHideStateEl = document.getElementById('strong-hide-state');\n"
+        "  if (strongHideBtn) {\n"
+        "    const setLabel = () => { if (strongHideStateEl) { strongHideStateEl.textContent = strongHideOn ? 'On' : 'Off'; } };\n"
+        "    strongHideBtn.addEventListener('click', () => { strongHideOn = !strongHideOn; chatFlow.classList.toggle('strong-hide', strongHideOn); setLabel(); applyStrongHideToChat(); });\n"
+        "    setLabel();\n"
+        "  }\n"
+        "  if (hideUserBtn && hideUserStateEl && chatFlow) {\n"
+        "    const updateHideUser = () => { hideUserStateEl.textContent = hideUserMessages ? 'On' : 'Off'; };\n"
+        "    hideUserBtn.addEventListener('click', () => {\n"
+        "      hideUserMessages = !hideUserMessages;\n"
+        "      chatFlow.classList.toggle('hide-user-messages', hideUserMessages);\n"
+        "      updateHideUser();\n"
+        "    });\n"
+        "    updateHideUser();\n"
+        "  }\n"
+        "  if (widthSlider && widthValue && chatFlow) {\n"
+        "    const savedWidth = localStorage.getItem('chat-view-width');\n"
+        "    if (savedWidth) {\n"
+        "      widthSlider.value = savedWidth;\n"
+        "      chatFlow.style.setProperty('--chat-width', savedWidth + 'px');\n"
+        "      widthValue.textContent = savedWidth + 'px';\n"
+        "    }\n"
+        "    widthSlider.addEventListener('input', (e) => {\n"
+        "      const width = e.target.value;\n"
+        "      chatFlow.style.setProperty('--chat-width', width + 'px');\n"
+        "      widthValue.textContent = width + 'px';\n"
+        "      localStorage.setItem('chat-view-width', width);\n"
+        "    });\n"
+        "  }\n"
+        "  const fontFamilySelect = document.getElementById('font-family-select');\n"
+        "  const fontSizeInput = document.getElementById('font-size-input');\n"
+        "  if (fontFamilySelect) {\n"
+        "    const savedFont = localStorage.getItem('render-font-family');\n"
+        "    if (savedFont) {\n"
+        "      fontFamilySelect.value = savedFont;\n"
+        "      document.body.style.setProperty('--font-family', savedFont);\n"
+        "    }\n"
+        "    fontFamilySelect.addEventListener('change', (e) => {\n"
+        "      const font = e.target.value;\n"
+        "      document.body.style.setProperty('--font-family', font);\n"
+        "      localStorage.setItem('render-font-family', font);\n"
+        "    });\n"
+        "  }\n"
+        "  if (fontSizeInput) {\n"
+        "    const savedSize = localStorage.getItem('render-font-size');\n"
+        "    if (savedSize) {\n"
+        "      fontSizeInput.value = savedSize;\n"
+        "      document.body.style.setProperty('--font-size', savedSize + 'px');\n"
+        "    }\n"
+        "    fontSizeInput.addEventListener('input', (e) => {\n"
+        "      const size = e.target.value;\n"
+        "      document.body.style.setProperty('--font-size', size + 'px');\n"
+        "      localStorage.setItem('render-font-size', size);\n"
+        "    });\n"
+        "  }\n"
+        "  const agent0EmojiInput = document.getElementById('agent0-emoji-input');\n"
+        "  const agent0NameInput = document.getElementById('agent0-name-input');\n"
+        "  const agent1EmojiInput = document.getElementById('agent1-emoji-input');\n"
+        "  const agent1NameInput = document.getElementById('agent1-name-input');\n"
+        "  const applyAgentNamesBtn = document.getElementById('apply-agent-names');\n"
+        "  function loadAgentNames() {\n"
+        "    if (agent0EmojiInput && agent0NameInput && agent1EmojiInput && agent1NameInput) {\n"
+        "      const savedAgent0Emoji = localStorage.getItem('agent0-emoji') || '🤖';\n"
+        "      const savedAgent0Name = localStorage.getItem('agent0-name') || document.getElementById('agent0-name-input').placeholder;\n"
+        "      const savedAgent1Emoji = localStorage.getItem('agent1-emoji') || '🤖';\n"
+        "      const savedAgent1Name = localStorage.getItem('agent1-name') || document.getElementById('agent1-name-input').placeholder;\n"
+        "      agent0EmojiInput.value = savedAgent0Emoji;\n"
+        "      agent0NameInput.value = savedAgent0Name;\n"
+        "      agent1EmojiInput.value = savedAgent1Emoji;\n"
+        "      agent1NameInput.value = savedAgent1Name;\n"
+        "      applyAgentNamesToDOM(savedAgent0Emoji, savedAgent0Name, savedAgent1Emoji, savedAgent1Name);\n"
+        "    }\n"
+        "  }\n"
+        "  function applyAgentNamesToDOM(agent0Emoji, agent0Name, agent1Emoji, agent1Name) {\n"
+        "    const agentMap = { '0': { name: agent0Name, emoji: agent0Emoji }, '1': { name: agent1Name, emoji: agent1Emoji } };\n"
+        "    document.querySelectorAll('[data-agent-index]').forEach(el => {\n"
+        "      const agentIndex = el.getAttribute('data-agent-index');\n"
+        "      if (!agentMap[agentIndex]) return;\n"
+        "      if (el.classList.contains('agent-name')) {\n"
+        "        el.textContent = agentMap[agentIndex].name;\n"
+        "      } else if (el.classList.contains('emoji-bw')) {\n"
+        "        const currentEmoji = el.textContent.trim();\n"
+        "        if (currentEmoji === '🤖' || currentEmoji === '👤') {\n"
+        "          el.textContent = agentMap[agentIndex].emoji;\n"
+        "        }\n"
+        "      }\n"
+        "    });\n"
+        "    const style = document.createElement('style');\n"
+        "    style.id = 'dynamic-agent-names-style';\n"
+        "    const existingStyle = document.getElementById('dynamic-agent-names-style');\n"
+        "    if (existingStyle) existingStyle.remove();\n"
+        "    style.textContent = `\n"
+        "      .agent-context-box.agent-0 .round-context-edit::before {\n"
+        "        content: '${agent0Name} Prompt Summary:';\n"
+        "      }\n"
+        "      .agent-context-box.agent-1 .round-context-edit::before {\n"
+        "        content: '${agent1Name} Prompt Summary:';\n"
+        "      }\n"
+        "    `;\n"
+        "    document.head.appendChild(style);\n"
+        "  }\n"
+        "  if (applyAgentNamesBtn && agent0EmojiInput && agent0NameInput && agent1EmojiInput && agent1NameInput) {\n"
+        "    [agent0EmojiInput, agent0NameInput, agent1EmojiInput, agent1NameInput].forEach(input => {\n"
+        "      input.style.pointerEvents = 'auto';\n"
+        "      if (input.tagName === 'INPUT') {\n"
+        "        input.style.userSelect = 'text';\n"
+        "        input.style.webkitUserSelect = 'text';\n"
+        "        input.readOnly = false;\n"
+        "      }\n"
+        "      input.disabled = false;\n"
+        "      const stopAll = (e) => { e.stopPropagation(); e.stopImmediatePropagation(); };\n"
+        "      input.addEventListener('mousedown', stopAll, true);\n"
+        "      input.addEventListener('mouseup', stopAll, true);\n"
+        "      input.addEventListener('click', stopAll, true);\n"
+        "      input.addEventListener('dblclick', stopAll, true);\n"
+        "      input.addEventListener('focus', stopAll, true);\n"
+        "      input.addEventListener('blur', stopAll, true);\n"
+        "      input.addEventListener('paste', stopAll, true);\n"
+        "      input.addEventListener('cut', stopAll, true);\n"
+        "      input.addEventListener('copy', stopAll, true);\n"
+        "      input.addEventListener('select', stopAll, true);\n"
+        "      input.addEventListener('selectstart', stopAll, true);\n"
+        "      input.addEventListener('keydown', stopAll, true);\n"
+        "      input.addEventListener('keyup', stopAll, true);\n"
+        "      input.addEventListener('keypress', stopAll, true);\n"
+        "      input.addEventListener('input', stopAll, true);\n"
+        "      input.addEventListener('change', stopAll, true);\n"
+        "      input.addEventListener('contextmenu', stopAll, true);\n"
+        "    });\n"
+        "    const applyNames = () => {\n"
+        "      const agent0Emoji = agent0EmojiInput.value || '🤖';\n"
+        "      const agent0Name = agent0NameInput.value.trim() || agent0NameInput.placeholder;\n"
+        "      const agent1Emoji = agent1EmojiInput.value || '🤖';\n"
+        "      const agent1Name = agent1NameInput.value.trim() || agent1NameInput.placeholder;\n"
+        "      localStorage.setItem('agent0-emoji', agent0Emoji);\n"
+        "      localStorage.setItem('agent0-name', agent0Name);\n"
+        "      localStorage.setItem('agent1-emoji', agent1Emoji);\n"
+        "      localStorage.setItem('agent1-name', agent1Name);\n"
+        "      applyAgentNamesToDOM(agent0Emoji, agent0Name, agent1Emoji, agent1Name);\n"
+        "    };\n"
+        "    applyAgentNamesBtn.addEventListener('click', applyNames);\n"
+        "    [agent0NameInput, agent1NameInput].forEach(input => {\n"
+        "      input.addEventListener('keydown', (e) => {\n"
+        "        if (e.key === 'Enter') {\n"
+        "          e.preventDefault();\n"
+        "          e.stopPropagation();\n"
+        "          e.stopImmediatePropagation();\n"
+        "          applyNames();\n"
+        "        }\n"
+        "      }, true);\n"
+        "    });\n"
+        "    [agent0EmojiInput, agent1EmojiInput].forEach(select => {\n"
+        "      select.addEventListener('change', applyNames);\n"
+        "    });\n"
+        "  }\n"
+        "  loadAgentNames();\n"
+        "  function setupRoundCollapse() {\n"
+        "    document.addEventListener('click', function(e) {\n"
+        "      if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
+        "      const divider = e.target.closest('.chat-group-divider, .group-divider');\n"
+        "      if (!divider) return;\n"
+        "      divider.classList.toggle('collapsed');\n"
+        "      const isCollapsed = divider.classList.contains('collapsed');\n"
+        "      let nextElement = divider.nextElementSibling;\n"
+        "      while (nextElement) {\n"
+        "        if (nextElement.classList.contains('chat-group-divider') || nextElement.classList.contains('group-divider')) {\n"
+        "          break;\n"
+        "        }\n"
+        "        if (isCollapsed) {\n"
+        "          if (!nextElement.dataset.originalDisplay) {\n"
+        "            nextElement.dataset.originalDisplay = nextElement.style.display || getComputedStyle(nextElement).display;\n"
+        "          }\n"
+        "          nextElement.style.display = 'none';\n"
+        "        } else {\n"
+        "          if (nextElement.dataset.originalDisplay) {\n"
+        "            const originalDisplay = nextElement.dataset.originalDisplay;\n"
+        "            nextElement.style.display = originalDisplay === 'none' ? '' : originalDisplay;\n"
+        "            if (nextElement.style.display === originalDisplay && originalDisplay !== 'none') {\n"
+        "              nextElement.style.display = '';\n"
+        "            }\n"
+        "            delete nextElement.dataset.originalDisplay;\n"
+        "          } else {\n"
+        "            nextElement.style.display = '';\n"
+        "          }\n"
+        "        }\n"
+        "        nextElement = nextElement.nextElementSibling;\n"
+        "      }\n"
+        "      e.stopPropagation();\n"
+        "    });\n"
+        "  }\n"
+        "  setupRoundCollapse();\n"
+        "  const strongHideBtnChat = document.getElementById('toggle-strong-hide');\n"
+        "  function applyStrongHideToChat() {\n"
+        "    if (!chatFlow) return;\n"
+        "    chatFlow.classList.toggle('strong-hide', strongHideOn);\n"
+        "    const contextEdits = chatFlow.querySelectorAll('.round-context-edit');\n"
+        "    contextEdits.forEach(edit => {\n"
+        "      const parent = edit.closest('.round-context, .agent-context-box, .split-agent-context');\n"
+        "      if (parent) {\n"
+        "        if (strongHideOn && edit.textContent.trim() === '') {\n"
+        "          parent.style.display = 'none';\n"
+        "        } else {\n"
+        "          parent.style.display = '';\n"
+        "        }\n"
+        "      }\n"
+        "    });\n"
+        "    const splitContexts = chatFlow.querySelectorAll('.split-agent-context');\n"
+        "    splitContexts.forEach(split => {\n"
+        "      if (strongHideOn) {\n"
+        "        const boxes = split.querySelectorAll('.agent-context-box');\n"
+        "        const allEmpty = Array.from(boxes).every(box => {\n"
+        "          const edit = box.querySelector('.round-context-edit');\n"
+        "          return edit && edit.textContent.trim() === '';\n"
+        "        });\n"
+        "        if (allEmpty) split.style.display = 'none';\n"
+        "      }\n"
+        "    });\n"
+        "  }\n"
+        "  if (strongHideBtnChat && chatFlow) {\n"
+        "    strongHideBtnChat.addEventListener('click', () => {\n"
+        "      setTimeout(() => applyStrongHideToChat(), 0);\n"
+        "    });\n"
+        "  }\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input, textarea, select, .round-context-edit, .toolbar')) { return; }\n"
+        "    const chatReasoning = e.target.closest('.chat-reasoning');\n"
+        "    if (chatReasoning) {\n"
+        "      chatReasoning.classList.toggle('collapsed');\n"
+        "      return;\n"
+        "    }\n"
+        "    const userMessage = e.target.closest('.chat-message.role-user');\n"
+        "    if (userMessage && !e.target.closest('.merge-btn, .unmerge-btn')) {\n"
+        "      userMessage.classList.toggle('collapsed');\n"
+        "    }\n"
+        "  });\n"
+        "  function applyColorToSelection(color, element) {\n"
+        "    const selection = window.getSelection();\n"
+        "    if (!selection.rangeCount) return false;\n"
+        "    const range = selection.getRangeAt(0);\n"
+        "    if (!element.contains(range.commonAncestorContainer)) return false;\n"
+        "    const selectedText = range.toString();\n"
+        "    if (!selectedText) return false;\n"
+        "    if (color === 'default') {\n"
+        "      // Remove styling - just extract the text content\n"
+        "      const textNode = document.createTextNode(selectedText);\n"
+        "      range.deleteContents();\n"
+        "      range.insertNode(textNode);\n"
+        "    } else {\n"
+        "      const span = document.createElement('span');\n"
+        "      span.style.color = color;\n"
+        "      span.style.fontWeight = '600';\n"
+        "      try {\n"
+        "        range.surroundContents(span);\n"
+        "      } catch (e) {\n"
+        "        const contents = range.extractContents();\n"
+        "        span.appendChild(contents);\n"
+        "        range.insertNode(span);\n"
+        "      }\n"
+        "    }\n"
+        "    return true;\n"
+        "  }\n"
+        "  let lastFocusedContextEdit = null;\n"
+        "  document.addEventListener('focusin', function(e) {\n"
+        "    if (e.target.classList.contains('round-context-edit')) {\n"
+        "      lastFocusedContextEdit = e.target;\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('mousedown', function(e) {\n"
+        "    if (e.target.classList.contains('context-color-btn')) {\n"
+        "      e.preventDefault();\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input:not(.round-context-edit), textarea, select') && !e.target.classList.contains('context-color-btn')) { return; }\n"
+        "    if (e.target.classList.contains('context-color-btn')) {\n"
+        "      e.preventDefault();\n"
+        "      const color = e.target.dataset.color;\n"
+        "      const controls = e.target.closest('.round-context-controls');\n"
+        "      const contextEdit = controls ? controls.previousElementSibling : null;\n"
+        "      if (contextEdit && contextEdit.classList.contains('round-context-edit')) {\n"
+        "        contextEdit.focus();\n"
+        "        const selection = window.getSelection();\n"
+        "        if (selection.rangeCount > 0 && selection.toString().length > 0 && contextEdit.contains(selection.anchorNode)) {\n"
+        "          if (applyColorToSelection(color, contextEdit)) {\n"
+        "            const key = contextEdit.dataset.contextKey;\n"
+        "            localStorage.setItem(key, contextEdit.innerHTML);\n"
+        "          }\n"
+        "        } else {\n"
+        "          try {\n"
+        "            if (color !== 'default') {\n"
+        "              document.execCommand('styleWithCSS', false, true);\n"
+        "              document.execCommand('foreColor', false, color);\n"
+        "            }\n"
+        "            const key = contextEdit.dataset.contextKey;\n"
+        "            setTimeout(() => localStorage.setItem(key, contextEdit.innerHTML), 10);\n"
+        "          } catch (e) {\n"
+        "            console.log('Color command failed:', e);\n"
+        "          }\n"
+        "        }\n"
+        "      }\n"
+        "    }\n"
+        "  });\n"
+        "  const contextEdits = document.querySelectorAll('.round-context-edit');\n"
+        "  contextEdits.forEach(edit => {\n"
+        "    edit.addEventListener('input', function() {\n"
+        "      const key = this.dataset.contextKey;\n"
+        "      localStorage.setItem(key, this.innerHTML);\n"
+        "    });\n"
+        "    const key = edit.dataset.contextKey;\n"
+        "    const saved = localStorage.getItem(key);\n"
+        "    if (saved) {\n"
+        "      edit.innerHTML = saved;\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input, textarea, select, .round-context-edit') && !e.target.classList.contains('merge-btn') && !e.target.classList.contains('unmerge-btn')) { return; }\n"
+        "    if (e.target.classList.contains('merge-btn')) {\n"
+        "      e.preventDefault();\n"
+        "      e.stopPropagation();\n"
+        "      const msgId = e.target.dataset.msgId;\n"
+        "      const currentMsg = e.target.closest('.chat-message');\n"
+        "      if (!currentMsg) return;\n"
+        "      if (currentMsg.classList.contains('role-user')) {\n"
+        "        alert('Cannot merge user messages');\n"
+        "        return;\n"
+        "      }\n"
+        "      let nextMsg = currentMsg.nextElementSibling;\n"
+        "      while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
+        "        nextMsg = nextMsg.nextElementSibling;\n"
+        "      }\n"
+        "      while (nextMsg && nextMsg.classList.contains('role-user')) {\n"
+        "        nextMsg = nextMsg.nextElementSibling;\n"
+        "        while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
+        "          nextMsg = nextMsg.nextElementSibling;\n"
+        "        }\n"
+        "      }\n"
+        "      if (!nextMsg || nextMsg.classList.contains('chat-message') === false) {\n"
+        "        alert('No next assistant message to merge with');\n"
+        "        return;\n"
+        "      }\n"
+        "      if (nextMsg.classList.contains('role-user')) {\n"
+        "        alert('Cannot merge with user messages');\n"
+        "        return;\n"
+        "      }\n"
+        "      \n"
+        "      // Find the user prompts that precede each assistant message\n"
+        "      let currentPrompt = currentMsg.previousElementSibling;\n"
+        "      while (currentPrompt && !currentPrompt.classList.contains('chat-message')) {\n"
+        "        currentPrompt = currentPrompt.previousElementSibling;\n"
+        "      }\n"
+        "      if (currentPrompt && !currentPrompt.classList.contains('role-user')) {\n"
+        "        currentPrompt = null;\n"
+        "      }\n"
+        "      \n"
+        "      let nextPrompt = nextMsg.previousElementSibling;\n"
+        "      while (nextPrompt && !nextPrompt.classList.contains('chat-message')) {\n"
+        "        nextPrompt = nextPrompt.previousElementSibling;\n"
+        "      }\n"
+        "      if (nextPrompt && !nextPrompt.classList.contains('role-user')) {\n"
+        "        nextPrompt = null;\n"
+        "      }\n"
+        "      \n"
+        "      // Find the split-agent-context that precedes the first prompt or assistant message\n"
+        "      let splitContext = null;\n"
+        "      let searchStart = currentPrompt || currentMsg;\n"
+        "      let elem = searchStart.previousElementSibling;\n"
+        "      while (elem) {\n"
+        "        if (elem.classList.contains('split-agent-context')) {\n"
+        "          splitContext = elem;\n"
+        "          break;\n"
+        "        }\n"
+        "        if (elem.classList.contains('chat-message') || elem.classList.contains('chat-group-divider')) {\n"
+        "          break;\n"
+        "        }\n"
+        "        elem = elem.previousElementSibling;\n"
+        "      }\n"
+        "      \n"
+        "      const parent = currentMsg.parentElement;\n"
+        "      if (parent.classList.contains('simultaneous-messages')) {\n"
+        "        const wrapper = parent;\n"
+        "        currentMsg.style.display = '';\n"
+        "        currentMsg.classList.remove('merged');\n"
+        "        const refNode = wrapper.nextElementSibling;\n"
+        "        parent.parentElement.insertBefore(currentMsg, refNode);\n"
+        "        if (nextMsg.parentElement === wrapper) {\n"
+        "          parent.parentElement.insertBefore(nextMsg, refNode);\n"
+        "        }\n"
+        "        if (wrapper.children.length === 0) {\n"
+        "          wrapper.remove();\n"
+        "        }\n"
+        "      } else {\n"
+        "        // If split-agent-context exists, wrap it\n"
+        "        if (splitContext && !splitContext.classList.contains('merged')) {\n"
+        "          const splitWrapper = document.createElement('div');\n"
+        "          splitWrapper.className = 'simultaneous-messages';\n"
+        "          const splitUnmergeBtn = document.createElement('button');\n"
+        "          splitUnmergeBtn.className = 'unmerge-btn';\n"
+        "          splitUnmergeBtn.innerHTML = '✕';\n"
+        "          splitUnmergeBtn.title = 'Click to unmerge messages';\n"
+        "          splitWrapper.appendChild(splitUnmergeBtn);\n"
+        "          splitWrapper.dataset.isSplitContext = 'true';\n"
+        "          parent.insertBefore(splitWrapper, splitContext);\n"
+        "          splitWrapper.appendChild(splitContext);\n"
+        "          splitContext.classList.add('merged');\n"
+        "        }\n"
+        "        \n"
+        "        // Create wrapper for prompts if both exist\n"
+        "        if (currentPrompt && nextPrompt) {\n"
+        "          const promptWrapper = document.createElement('div');\n"
+        "          promptWrapper.className = 'simultaneous-messages';\n"
+        "          const promptUnmergeBtn = document.createElement('button');\n"
+        "          promptUnmergeBtn.className = 'unmerge-btn';\n"
+        "          promptUnmergeBtn.innerHTML = '✕';\n"
+        "          promptUnmergeBtn.title = 'Click to unmerge messages';\n"
+        "          promptWrapper.appendChild(promptUnmergeBtn);\n"
+        "          promptWrapper.dataset.firstMsgId = currentPrompt.dataset.msgId;\n"
+        "          promptWrapper.dataset.secondMsgId = nextPrompt.dataset.msgId;\n"
+        "          \n"
+        "          // Determine order: agent-0 first, agent-1 second\n"
+        "          const firstPrompt = currentPrompt.classList.contains('agent-0') ? currentPrompt : nextPrompt;\n"
+        "          const secondPrompt = currentPrompt.classList.contains('agent-0') ? nextPrompt : currentPrompt;\n"
+        "          \n"
+        "          parent.insertBefore(promptWrapper, currentPrompt);\n"
+        "          promptWrapper.appendChild(firstPrompt);\n"
+        "          promptWrapper.appendChild(secondPrompt);\n"
+        "          currentPrompt.classList.add('merged');\n"
+        "          nextPrompt.classList.add('merged');\n"
+        "        }\n"
+        "        \n"
+        "        // Create wrapper for assistant messages\n"
+        "        const wrapper = document.createElement('div');\n"
+        "        wrapper.className = 'simultaneous-messages';\n"
+        "        const unmergeBtn = document.createElement('button');\n"
+        "        unmergeBtn.className = 'unmerge-btn';\n"
+        "        unmergeBtn.innerHTML = '✕';\n"
+        "        unmergeBtn.title = 'Click to unmerge messages';\n"
+        "        wrapper.appendChild(unmergeBtn);\n"
+        "        wrapper.dataset.firstMsgId = currentMsg.dataset.msgId;\n"
+        "        wrapper.dataset.secondMsgId = nextMsg.dataset.msgId;\n"
+        "        \n"
+        "        // Determine order: agent-0 first, agent-1 second\n"
+        "        const firstAssistant = currentMsg.classList.contains('agent-0') ? currentMsg : nextMsg;\n"
+        "        const secondAssistant = currentMsg.classList.contains('agent-0') ? nextMsg : currentMsg;\n"
+        "        \n"
+        "        parent.insertBefore(wrapper, currentMsg);\n"
+        "        wrapper.appendChild(firstAssistant);\n"
+        "        wrapper.appendChild(secondAssistant);\n"
+        "        currentMsg.classList.add('merged');\n"
+        "        nextMsg.classList.add('merged');\n"
+        "      }\n"
+        "    }\n"
+        "    if (e.target.classList.contains('unmerge-btn')) {\n"
+        "      const wrapper = e.target.closest('.simultaneous-messages');\n"
+        "      if (!wrapper) return;\n"
+        "      const parent = wrapper.parentElement;\n"
+        "      \n"
+        "      // Check if this is a split-context wrapper\n"
+        "      if (wrapper.dataset.isSplitContext === 'true') {\n"
+        "        const splitContext = wrapper.querySelector('.split-agent-context');\n"
+        "        if (splitContext) {\n"
+        "          splitContext.classList.remove('merged');\n"
+        "          parent.insertBefore(splitContext, wrapper.nextElementSibling);\n"
+        "        }\n"
+        "        wrapper.remove();\n"
+        "        return;\n"
+        "      }\n"
+        "      \n"
+        "      const firstMsgId = wrapper.dataset.firstMsgId;\n"
+        "      const secondMsgId = wrapper.dataset.secondMsgId;\n"
+        "      const messages = Array.from(wrapper.querySelectorAll('.chat-message'));\n"
+        "      const refNode = wrapper.nextElementSibling;\n"
+        "      const firstMsg = messages.find(m => m.dataset.msgId === firstMsgId);\n"
+        "      const secondMsg = messages.find(m => m.dataset.msgId === secondMsgId);\n"
+        "      \n"
+        "      // Check for preceding wrappers to also unmerge (prompts and split-context)\n"
+        "      let currentElem = wrapper.previousElementSibling;\n"
+        "      const wrappersToUnmerge = [];\n"
+        "      \n"
+        "      while (currentElem) {\n"
+        "        if (currentElem.classList.contains('simultaneous-messages')) {\n"
+        "          wrappersToUnmerge.push(currentElem);\n"
+        "        } else if (currentElem.classList.contains('chat-message') || currentElem.classList.contains('chat-group-divider')) {\n"
+        "          break;\n"
+        "        }\n"
+        "        currentElem = currentElem.previousElementSibling;\n"
+        "      }\n"
+        "      \n"
+        "      // Unmerge preceding wrappers\n"
+        "      for (const prevWrapper of wrappersToUnmerge) {\n"
+        "        if (prevWrapper.dataset.isSplitContext === 'true') {\n"
+        "          const splitContext = prevWrapper.querySelector('.split-agent-context');\n"
+        "          if (splitContext) {\n"
+        "            splitContext.classList.remove('merged');\n"
+        "            parent.insertBefore(splitContext, prevWrapper.nextElementSibling);\n"
+        "          }\n"
+        "          prevWrapper.remove();\n"
+        "        } else {\n"
+        "          const prevMessages = Array.from(prevWrapper.querySelectorAll('.chat-message'));\n"
+        "          const prevFirstMsgId = prevWrapper.dataset.firstMsgId;\n"
+        "          const prevSecondMsgId = prevWrapper.dataset.secondMsgId;\n"
+        "          const prevFirstMsg = prevMessages.find(m => m.dataset.msgId === prevFirstMsgId);\n"
+        "          const prevSecondMsg = prevMessages.find(m => m.dataset.msgId === prevSecondMsgId);\n"
+        "          const prevRefNode = prevWrapper.nextElementSibling;\n"
+        "          \n"
+        "          if (prevFirstMsg) {\n"
+        "            prevFirstMsg.classList.remove('merged');\n"
+        "            prevFirstMsg.style.display = '';\n"
+        "            parent.insertBefore(prevFirstMsg, prevRefNode);\n"
+        "          }\n"
+        "          if (prevSecondMsg) {\n"
+        "            prevSecondMsg.classList.remove('merged');\n"
+        "            prevSecondMsg.style.display = '';\n"
+        "            parent.insertBefore(prevSecondMsg, prevRefNode);\n"
+        "          }\n"
+        "          prevWrapper.remove();\n"
+        "        }\n"
+        "      }\n"
+        "      \n"
+        "      // Unmerge the main assistant messages\n"
+        "      if (firstMsg) {\n"
+        "        firstMsg.classList.remove('merged');\n"
+        "        firstMsg.style.display = '';\n"
+        "        parent.insertBefore(firstMsg, refNode);\n"
+        "      }\n"
+        "      if (secondMsg) {\n"
+        "        secondMsg.classList.remove('merged');\n"
+        "        secondMsg.style.display = '';\n"
+        "        parent.insertBefore(secondMsg, refNode);\n"
+        "      }\n"
+        "      wrapper.remove();\n"
+        "    }\n"
+        "  });\n"
+        "});\n"
+        "</script>",
+        "</head>",
+        "<body>",
+        '<div class="toolbar-wrap">',
+        '<div class="toolbar-hotzone"></div>',
+        '<div class="toolbar">',
+        '<button id="toggle-strong-hide"><span class="emoji-bw">🗜️</span> Strong Hide: <span id="strong-hide-state">Off</span></button>',
+        '<button id="toggle-hide-user-messages"><span class="emoji-bw">👁️</span> Hide Prompts: <span id="hide-user-state">Off</span></button>',
+        '<span id="chat-width-control" style="margin-left:8px;">',
+        '<label for="chat-width-slider"><span class="emoji-bw">↔️</span> Width:</label>',
+        '<input id="chat-width-slider" type="range" min="600" max="1600" step="50" value="900" style="width:120px; vertical-align:middle;" />',
+        '<span id="chat-width-value" style="margin-left:4px;">900px</span>',
+        "</span>",
+        '<span style="margin-left:12px;">',
+        '<label for="font-family-select"><span class="emoji-bw">🔤</span> Font:</label>',
+        '<select id="font-family-select" style="padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        "<option value=\"'Segoe UI', Tahoma, Geneva, Verdana, sans-serif\">Segoe UI</option>",
+        '<option value="Arial, sans-serif">Arial</option>',
+        "<option value=\"'Helvetica Neue', Helvetica, sans-serif\">Helvetica</option>",
+        "<option value=\"'Times New Roman', Times, serif\">Times New Roman</option>",
+        '<option value="Georgia, serif">Georgia</option>',
+        "<option value=\"'Courier New', Courier, monospace\">Courier New</option>",
+        "<option value=\"'Comic Sans MS', cursive\">Comic Sans</option>",
+        "<option value=\"'Trebuchet MS', sans-serif\">Trebuchet MS</option>",
+        '<option value="Verdana, sans-serif">Verdana</option>',
+        "<option value=\"'Palatino Linotype', 'Book Antiqua', Palatino, serif\">Palatino</option>",
+        "<option value=\"'Lucida Console', Monaco, monospace\">Lucida Console</option>",
+        "</select>",
+        "</span>",
+        '<span style="margin-left:8px;">',
+        '<label for="font-size-input"><span class="emoji-bw">📏</span> Size:</label>',
+        '<input id="font-size-input" type="number" min="8" max="24" step="1" value="14" style="width:50px;" />',
+        "<span>px</span>",
+        "</span>",
+        '<span style="margin-left:12px; display:flex; align-items:center; gap:8px;">',
+        '<label style="font-weight:600;">Agent Names:</label>',
+        f'<select id="agent0-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        '<option value="🤖">🤖 Robot</option>',
+        '<option value="👤">👤 Human</option>',
+        "</select>",
+        f'<input id="agent0-name-input" type="text" placeholder="{html.escape(unique_agent_ids[0]) if len(unique_agent_ids) > 0 else "Agent 0"}" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
+        '<span style="margin:0 4px;">|</span>',
+        f'<select id="agent1-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        '<option value="🤖">🤖 Robot</option>',
+        '<option value="👤">👤 Human</option>',
+        "</select>",
+        f'<input id="agent1-name-input" type="text" placeholder="{html.escape(unique_agent_ids[1]) if len(unique_agent_ids) > 1 else "Agent 1"}" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
+        '<button id="apply-agent-names" style="padding:4px 8px; border:1px solid var(--accent-muted); background:var(--panel-bg); border-radius:var(--corner-radius); cursor:pointer;">Apply</button>',
+        "</span>",
+        "</div>",
+        "</div>",
+    ]
+    # Add Chat View
+    import html as _html_mod
+    html_parts.append('<div id="flow-chat" class="messages-flow">')
+    # Helper function to add context annotation areas
+    def add_context_area(position: str, time_step: int):
+        context_key = f"round-context-{position}-{time_step}"
+        placeholder = f"Add context {position} round {time_step}..."
+        color_buttons = ""
+        # Add default/reset color button first
+        color_buttons += (
+            f'<div class="context-color-btn" data-color="default" '
+            f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
+            f"linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); "
+            f"background-size: 4px 4px; background-position: 0 0, 2px 2px; "
+            f'background-color: #fff;" title="Default color"></div>'
+        )
+        for color_name, color_value in [
+            ("red", "#d32f2f"),
+            ("orange", "#f57c00"),
+            ("yellow", "#f9a825"),
+            ("green", "#388e3c"),
+            ("blue", "#1976d2"),
+            ("purple", "#7b1fa2"),
+            ("gray", "#666666"),
+        ]:
+            color_buttons += (
+                f'<div class="context-color-btn" data-color="{color_value}" '
+                f'style="background-color: {color_value};" title="{color_name}"></div>'
+            )
+        html_parts.append(
+            f'<div class="round-context">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{context_key}" '
+            f'data-placeholder="{placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f"</div>"
+        )
+    # Helper function to add split agent context boxes
+    def add_split_agent_contexts(position: str, time_step: int):
+        color_buttons = ""
+        # Add default/reset color button first
+        color_buttons += (
+            f'<div class="context-color-btn" data-color="default" '
+            f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
+            f"linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); "
+            f"background-size: 4px 4px; background-position: 0 0, 2px 2px; "
+            f'background-color: #fff;" title="Default color"></div>'
+        )
+        for color_name, color_value in [
+            ("red", "#d32f2f"),
+            ("orange", "#f57c00"),
+            ("yellow", "#f9a825"),
+            ("green", "#388e3c"),
+            ("blue", "#1976d2"),
+            ("purple", "#7b1fa2"),
+            ("gray", "#666666"),
+        ]:
+            color_buttons += (
+                f'<div class="context-color-btn" data-color="{color_value}" '
+                f'style="background-color: {color_value};" title="{color_name}"></div>'
+            )
+        html_parts.append('<div class="split-agent-context">')
+        # Agent 0 box
+        agent0_key = f"agent-context-0-{position}-{time_step}"
+        agent0_placeholder = f"..."
+        html_parts.append(
+            f'<div class="agent-context-box agent-0">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{agent0_key}" '
+            f'data-placeholder="{agent0_placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f"</div>"
+        )
+        # Agent 1 box
+        agent1_key = f"agent-context-1-{position}-{time_step}"
+        agent1_placeholder = f"..."
+        html_parts.append(
+            f'<div class="agent-context-box agent-1">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{agent1_key}" '
+            f'data-placeholder="{agent1_placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f"</div>"
+        )
+        html_parts.append("</div>")  # split-agent-context
+    last_time_step_chat = None
+    for original_index, turn in indexed_turns:
+        # Use agent index for CSS class (agent-0 or agent-1) instead of agent ID
+        agent_index = agent_id_to_index.get(turn.agent_id, 0)
+        agent_class = f"agent-{agent_index}"
+        role_class = f"role-{turn.role}"
+        # Add time step divider and beginning context
+        if last_time_step_chat is None or turn.time_step != last_time_step_chat:
+            # Add end contexts for previous round (only regular context, not prompt summary)
+            if last_time_step_chat is not None:
+                add_context_area("end", last_time_step_chat)
+            html_parts.append(
+                f'<div class="chat-group-divider">'
+                f'<span class="chat-group-label">⏱ Round {turn.time_step + 1}</span>'
+                f"</div>"
+            )
+            # Add beginning contexts for new round (both context and prompt summary)
+            add_context_area("beginning", turn.time_step)
+            add_split_agent_contexts("beginning", turn.time_step)
+            last_time_step_chat = turn.time_step
+        # Build chat message with merge controls
+        html_parts.append(
+            f'<div class="chat-message {agent_class} {role_class}" data-msg-id="{original_index}">'
+        )
+        # Add merge control button
+        html_parts.append(
+            f'<button class="merge-btn" title="Merge with next message" data-msg-id="{original_index}">⇄</button>'
+        )
+        html_parts.append('<div class="chat-message-content">')
+        # Header with agent name and reward (always show reward)
+        if turn.role == "assistant":
+            name = _html_mod.escape(turn.agent_id)
+            raw_val = turn.reward
+            if isinstance(raw_val, (int, float)):
+                reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
+                if len(reward_val) > 8:
+                    reward_val = reward_val[:8] + "…"
+            else:
+                reward_val = str(raw_val)
+            header_html = (
+                f'<div class="chat-header">'
+                f'<span class="emoji-bw" data-agent-index="{agent_index}">🤖</span> <span class="agent-name" data-agent-index="{agent_index}">{name}</span>'
+                f'<span class="chat-reward">⚑ {reward_val}</span>'
+                f"</div>"
+            )
+        else:
+            name = _html_mod.escape(turn.agent_id)
+            header_html = f'<div class="chat-header">Prompt of <span class="agent-name" data-agent-index="{agent_index}">{name}</span></div>'
+        html_parts.append(header_html)
+        # Reasoning content if present
+        if turn.reasoning_content:
+            _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
+            _raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
+            esc_reasoning = _html_mod.escape(_raw_reasoning)
+            html_parts.append(
+                f'<div class="chat-reasoning collapsed">'
+                f'<span class="reasoning-icon">💭</span> '
+                f'<span class="reasoning-text">{esc_reasoning}</span>'
+                f"</div>"
+            )
+        # Message bubble
+        esc_content = _html_mod.escape(turn.content)
+        html_parts.append(f'<div class="chat-bubble">{esc_content}</div>')
+        html_parts.append("</div>")  # chat-message-content
+        html_parts.append("</div>")  # chat-message
+    # Add end contexts for the last round (only regular context, not prompt summary)
+    if last_time_step_chat is not None:
+        add_context_area("end", last_time_step_chat)
+    html_parts.append("</div>")  # flow-chat
+    html_parts.extend(["</body>", "</html>"])
+    return "\n".join(html_parts)
+def export_html_from_rollout_tree(path: Path, outdir: Path, main_only: bool = False):
+    """Process a rollout tree file and generate HTML files for each path.
+    Creates separate HTML files for the main path and each branch path.
+    The main path is saved in the root output directory, while branch paths
+    are saved in a 'branches' subdirectory.
+    Args:
+        path: Path to the rollout tree JSON file
+        outdir: Output directory for HTML files
+        main_only: If True, only export the main trajectory (default: False)
+    """
+    root = load_rollout_tree(path)
+    mgid = root.id
+    main_path, branch_paths = get_rollout_tree_paths(root)
+    outdir.mkdir(parents=True, exist_ok=True)
+    # Create branches subdirectory if we have branch paths
+    if not main_only and branch_paths:
+        branches_dir = outdir / f"mgid:{mgid}_branches_html_renders"
+        branches_dir.mkdir(parents=True, exist_ok=True)
+    # Generate HTML for the main path
+    chat_turns = gather_all_chat_turns_for_path(main_path)
+    html_content = html_from_chat_turns(chat_turns)
+    output_file = outdir / f"mgid:{mgid}_main_html_render.render.html"
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    # Generate HTML for each branch path
+    for path_obj in branch_paths:
+        chat_turns = gather_all_chat_turns_for_path(path_obj)
+        html_content = html_from_chat_turns(chat_turns)
+        path_id: str = path_obj.id
+        output_filename = f"{path_id}_html_render.render.html"
+        output_file = branches_dir / output_filename
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(html_content)

src_code_for_reproducibility/utils/rollout_tree_gather_utils.py ADDED Viewed

	@@ -0,0 +1,314 @@

+"""
+File: mllm/utils/rollout_tree_gather_utils.py
+Summary: Utilities for gathering rollout tree files and metadata.
+"""
+from __future__ import annotations
+import csv
+import os
+import pickle
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
+from mllm.markov_games.rollout_tree import *
+def load_rollout_tree(path: Path) -> RolloutTreeRootNode:
+    """Load a rollout tree from a PKL file containing a dict."""
+    with open(path, "rb") as f:
+        data = pickle.load(f)
+    return RolloutTreeRootNode.model_validate(data)
+@dataclass
+class RolloutNodeList:
+    id: str
+    nodes: List[RolloutTreeNode]
+def get_rollout_tree_paths(
+    root: RolloutTreeRootNode, mgid: Optional[str] = None
+) -> Tuple[RolloutNodeList, List[RolloutNodeList]]:
+    """
+    Returns:
+        main_path: The main path from the root to the end of the tree.
+        branch_paths: A list of all branch paths from the root to the end of the tree.
+        Each branch path contains a list of nodes that are part of the branch, including the nodes from the main path before the branch was taken.
+    """
+    branch_paths = []
+    def collect_path_nodes(current) -> List[RolloutTreeNode]:
+        """Recursively collect all nodes in a path starting from current node."""
+        if current is None:
+            return []
+        if isinstance(current, RolloutTreeNode):
+            return [current] + collect_path_nodes(current.child)
+        elif isinstance(current, RolloutTreeBranchNode):
+            # For branch nodes, we only follow the main_child for path collection
+            if current.main_child:
+                return [current.main_child] + collect_path_nodes(
+                    current.main_child.child
+                )
+            else:
+                return []
+    def traverse_for_branches(
+        current,
+        main_path_prefix: List[RolloutTreeNode],
+        path_id: str,
+        current_time_step: Optional[int] = 0,
+    ):
+        """Traverse tree to collect all branch paths."""
+        if current is None:
+            return
+        if isinstance(current, RolloutTreeNode):
+            # Continue traversing with this node added to the main path prefix
+            new_prefix = main_path_prefix + [current]
+            traverse_for_branches(current.child, new_prefix, path_id, current.time_step)
+        elif isinstance(current, RolloutTreeBranchNode):
+            # Collect all branch paths
+            if current.branches:
+                for agent_id, branch_node_list in current.branches.items():
+                    if branch_node_list:
+                        # Start with the main path prefix, then recursively collect all nodes in this branch
+                        branch_path_nodes = main_path_prefix.copy()
+                        for branch_node in branch_node_list:
+                            branch_path_nodes.extend(collect_path_nodes(branch_node))
+                        # Create proper branch path ID with mgid, agent_id, and time_step
+                        mgid_str = mgid or str(root.id)
+                        branch_path_id = f"mgid:{mgid_str}_type:branch_agent:{agent_id}_time_step:{current_time_step}"
+                        branch_paths.append(
+                            RolloutNodeList(id=branch_path_id, nodes=branch_path_nodes)
+                        )
+            # Process the main child and add to prefix
+            new_prefix = main_path_prefix
+            if current.main_child:
+                new_prefix = main_path_prefix + [current.main_child]
+            # Continue traversing the main path
+            if current.main_child:
+                traverse_for_branches(
+                    current.main_child.child,
+                    new_prefix,
+                    path_id,
+                    current.main_child.time_step,
+                )
+    # Collect the main path nodes
+    main_path_nodes = collect_path_nodes(root.child)
+    # Traverse to collect all branch paths
+    traverse_for_branches(root.child, [], "")
+    # Create the main path with proper mgid format
+    mgid_str = mgid or str(root.id)
+    main_path = RolloutNodeList(id=f"mgid:{mgid_str}_type:main", nodes=main_path_nodes)
+    return main_path, branch_paths
+class ChatTurnLog(BaseModel):
+    time_step: int
+    agent_id: str
+    role: str
+    content: str
+    reasoning_content: Optional[str] = None
+    is_state_end: bool
+    reward: float
+def gather_agent_chat_turns_for_path(
+    agent_id: str, path: RolloutNodeList
+) -> List[ChatTurnLog]:
+    """Iterate through all chat turns for a specific agent in a path sorted by time step."""
+    turns = []
+    for node in path.nodes:
+        action_log = node.step_log.action_logs.get(agent_id, [])
+        if action_log:
+            for chat_turn in action_log.chat_turns or []:
+                turns.append(
+                    ChatTurnLog(
+                        time_step=node.time_step,
+                        agent_id=agent_id,
+                        role=chat_turn.role,
+                        content=chat_turn.content,
+                        reasoning_content=getattr(chat_turn, "reasoning_content", None),
+                        is_state_end=chat_turn.is_state_end,
+                        reward=node.step_log.simulation_step_log.rewards.get(
+                            agent_id, 0
+                        ),
+                    )
+                )
+    return turns
+def gather_all_chat_turns_for_path(path: RolloutNodeList) -> List[ChatTurnLog]:
+    """Iterate through all chat turns for all agents in a path sorted by time step."""
+    turns = []
+    # Collect turns from all agents, but interleave them per timestep by (user, assistant) pairs
+    for node in path.nodes:
+        # Build (user[, assistant]) pairs for each agent at this timestep
+        agent_ids = sorted(list(node.step_log.action_logs.keys()))
+        per_agent_pairs: Dict[str, List[List[ChatTurnLog]]] = {}
+        for agent_id in agent_ids:
+            action_log = node.step_log.action_logs.get(agent_id)
+            pairs: List[List[ChatTurnLog]] = []
+            current_pair: List[ChatTurnLog] = []
+            if action_log and action_log.chat_turns:
+                for chat_turn in action_log.chat_turns:
+                    turn_log = ChatTurnLog(
+                        time_step=node.time_step,
+                        agent_id=agent_id,
+                        role=chat_turn.role,
+                        content=chat_turn.content,
+                        reasoning_content=getattr(chat_turn, "reasoning_content", None),
+                        is_state_end=chat_turn.is_state_end,
+                        reward=node.step_log.simulation_step_log.rewards.get(
+                            agent_id, 0
+                        ),
+                    )
+                    if chat_turn.role == "user":
+                        # If a previous pair is open, close it and start a new one
+                        if current_pair:
+                            pairs.append(current_pair)
+                            current_pair = []
+                        current_pair = [turn_log]
+                    else:
+                        # assistant: attach to an open user message if present; otherwise stand alone
+                        if (
+                            current_pair
+                            and len(current_pair) == 1
+                            and current_pair[0].role == "user"
+                        ):
+                            current_pair.append(turn_log)
+                            pairs.append(current_pair)
+                            current_pair = []
+                        else:
+                            # No preceding user or already paired; treat as its own unit
+                            pairs.append([turn_log])
+                if current_pair:
+                    # Unpaired trailing user message
+                    pairs.append(current_pair)
+            per_agent_pairs[agent_id] = pairs
+        # Interleave pairs across agents: A1, B1, A2, B2, ...
+        index = 0
+        while True:
+            added_any = False
+            for agent_id in agent_ids:
+                agent_pairs = per_agent_pairs.get(agent_id, [])
+                if index < len(agent_pairs):
+                    for tl in agent_pairs[index]:
+                        turns.append(tl)
+                    added_any = True
+            if not added_any:
+                break
+            index += 1
+    return turns
+def chat_turns_to_dict(chat_turns: Iterator[ChatTurnLog]) -> Iterator[Dict[str, Any]]:
+    """Render all chat turns for a path as structured data for JSON."""
+    for chat_turn in chat_turns:
+        yield chat_turn.model_dump()
+def get_all_agents(root: RolloutTreeRootNode) -> List[str]:
+    """list of all agent IDs that appear in the tree."""
+    if root.child is None:
+        return []
+    # Get the first node to extract all agent IDs
+    first_node = root.child
+    if isinstance(first_node, RolloutTreeBranchNode):
+        first_node = first_node.main_child
+    if first_node is None:
+        return []
+    # All agents should be present in the first node
+    agents = set(first_node.step_log.action_logs.keys())
+    agents.update(first_node.step_log.simulation_step_log.rewards.keys())
+    return sorted(list(agents))
+def gather_agent_main_rewards(agent_id: str, path: RolloutNodeList) -> List[float]:
+    """Gather main rewards for a specific agent in a path."""
+    rewards = []
+    for node in path.nodes:
+        reward = node.step_log.simulation_step_log.rewards[agent_id]
+        rewards.append(reward)
+    return rewards
+def gather_all_rewards(path: RolloutNodeList) -> List[Dict[AgentId, float]]:
+    """Gather main rewards from main trajectory in a path."""
+    rewards = []
+    for node in path.nodes:
+        rewards.append(node.step_log.simulation_step_log.rewards.copy())
+    return rewards
+def gather_simulation_stats(
+    path: RolloutNodeList,
+    filter: Callable[[SimulationStepLog], bool],
+    stat_func: Callable[[SimulationStepLog], Any],
+) -> List[Any]:
+    """Gather stats from main trajectory in a path."""
+    stats = []
+    for node in path.nodes:
+        sl = node.step_log.simulation_step_log
+        if filter(sl):
+            stats.append(stat_func(sl))
+    return stats
+def gather_simulation_step_logs(path: RolloutNodeList) -> List[SimulationStepLog]:
+    """Gather simulation information from main trajectory in a path."""
+    infos = []
+    for node in path.nodes:
+        infos.append(node.step_log.simulation_step_log)
+    return infos
+def export_chat_logs(path: Path, outdir: Path):
+    """Process a rollout tree PKL file and generate a JSONL of chat turns as dicts.
+    Each line contains an object with path_id and chat_turns for a single path.
+    """
+    import json
+    root = load_rollout_tree(path)
+    mgid = root.id
+    main_path, branch_paths = get_rollout_tree_paths(root)
+    all_paths = [main_path] + branch_paths
+    outdir.mkdir(parents=True, exist_ok=True)
+    output_file = outdir / f"mgid:{mgid}_plucked_chats.render.jsonl"
+    with open(output_file, "w", encoding="utf-8") as f:
+        for path_obj in all_paths:
+            chat_turns = gather_all_chat_turns_for_path(path_obj)
+            output_obj = {
+                "path_id": str(path_obj.id),
+                "chat_turns": list(chat_turns_to_dict(iter(chat_turns))),
+            }
+            f.write(json.dumps(output_obj, ensure_ascii=False) + "\n")

src_code_for_reproducibility/utils/rollout_tree_stats.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+File: mllm/utils/rollout_tree_stats.py
+Summary: Computes descriptive statistics from rollout tree collections.
+"""
+from typing import Any, Callable, List, Tuple
+from mllm.markov_games.rollout_tree import RolloutTreeRootNode
+from mllm.markov_games.simulation import SimulationStepLog
+from mllm.utils.rollout_tree_gather_utils import (
+    gather_simulation_step_logs,
+    get_rollout_tree_paths,
+)
+from mllm.utils.stat_pack import StatPack
+def get_rollout_tree_stat_tally(
+    rollout_tree: RolloutTreeRootNode,
+    metrics: List[Callable[[SimulationStepLog], List[Tuple[str, float]]]],
+) -> StatPack:
+    stat_tally = StatPack()
+    # get simulation step logs
+    node_list = get_rollout_tree_paths(rollout_tree)[0]
+    simulation_step_logs = gather_simulation_step_logs(node_list)
+    for simulation_step_log in simulation_step_logs:
+        for metric in metrics:
+            metric_result = metric(simulation_step_log)
+            if metric_result is not None:
+                for key, value in metric_result:
+                    stat_tally.add_stat(key, value)
+    return stat_tally
+def get_rollout_tree_mean_stats(
+    rollout_tree: RolloutTreeRootNode, metrics: List[Callable[[SimulationStepLog], Any]]
+) -> StatPack:
+    """Get the mean stats for a rollout tree."""
+    stat_tally = get_rollout_tree_stat_tally(rollout_tree, metrics)
+    return stat_tally.mean()
+def get_mean_rollout_tree_stats(
+    rollout_trees: List[RolloutTreeRootNode],
+    metrics: List[Callable[[SimulationStepLog], Any]],
+) -> StatPack:
+    """Get the mean stats for a list of rollout trees."""
+    # Compute per-rollout means first, then aggregate them across the entire batch.
+    stat_tallies = [
+        get_rollout_tree_mean_stats(rollout_tree, metrics)
+        for rollout_tree in rollout_trees
+    ]
+    mean_stat_tally = StatPack()
+    for stat_tally in stat_tallies:
+        mean_stat_tally.add_stats(stat_tally)
+    return mean_stat_tally.mean()

src_code_for_reproducibility/utils/short_id_gen.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""
+File: mllm/utils/short_id_gen.py
+Summary: Generates short unique identifiers for experiment assets.
+"""
+import uuid
+def generate_short_id() -> int:
+    """
+    Generates a short unique ID for tracking adapter versions.
+    Returns:
+        int: An 8-digit integer ID.
+    """
+    return int(str(uuid.uuid4().int)[:8])

src_code_for_reproducibility/utils/stat_pack.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+File: mllm/utils/stat_pack.py
+Summary: Implements the StatPack container for incremental statistics.
+"""
+import csv
+import json
+import os
+import pickle
+from collections import Counter
+from copy import deepcopy
+from locale import strcoll
+from statistics import mean
+from typing import Any, Dict, Iterator, List, Optional, Tuple, TypedDict
+import matplotlib.pyplot as plt
+import numpy as np
+style_path = os.environ.get("ADALIGN_MPLSTYLE")
+if style_path:
+    plt.style.use(style_path)
+import wandb
+from . import wandb_utils
+class StatPack:
+    def __init__(self):
+        self.data = {}
+    def add_stat(self, key: str, value: float | int | None):
+        assert (
+            isinstance(value, float) or isinstance(value, int) or value is None
+        ), f"Value {value} is not a valid type"
+        if key not in self.data:
+            self.data[key] = []
+        self.data[key].append(value)
+    def add_stats(self, other: "StatPack"):
+        for key in other.keys():
+            self.add_stat(key, other[key])
+    def __getitem__(self, key: str):
+        return self.data[key]
+    def __setitem__(self, key: str, value: Any):
+        self.data[key] = value
+    def __contains__(self, key: str):
+        return key in self.data
+    def __len__(self):
+        return len(self.data)
+    def __iter__(self):
+        return iter(self.data)
+    def keys(self):
+        return self.data.keys()
+    def values(self):
+        return self.data.values()
+    def items(self):
+        return self.data.items()
+    def mean(self):
+        mean_st = StatPack()
+        for key in self.keys():
+            if isinstance(self[key], list):
+                # Ignore None entries so missing measurements do not bias the mean.
+                non_none_values = [v for v in self[key] if v is not None]
+                if non_none_values:
+                    mean_st[key] = np.mean(np.array(non_none_values))
+                else:
+                    mean_st[key] = None
+        return mean_st
+    def store_plots(self, folder: str):
+        os.makedirs(folder, exist_ok=True)
+        for key in self.keys():
+            plt.figure(figsize=(10, 5))
+            plt.plot(self[key])
+            plt.title(key)
+            plt.savefig(os.path.join(folder, f"{key}.pdf"))
+            plt.close()
+    def store_numpy(self, folder: str):
+        os.makedirs(folder, exist_ok=True)
+        for key in self.keys():
+            # Sanitize filename components (avoid slashes, spaces, etc.)
+            safe_key = str(key).replace(os.sep, "_").replace("/", "_").replace(" ", "_")
+            values = self[key]
+            # Convert None to NaN for numpy compatibility
+            arr = np.array(
+                [(np.nan if (v is None) else v) for v in values], dtype=float
+            )
+            np.save(os.path.join(folder, f"{safe_key}.npy"), arr)
+    def store_json(self, folder: str, filename: str = "stats.json"):
+        os.makedirs(folder, exist_ok=True)
+        with open(os.path.join(folder, filename), "w") as f:
+            json.dump(self.data, f, indent=4)
+    def store_csv(self, folder: str):
+        os.makedirs(folder, exist_ok=True)
+        for key in self.keys():
+            with open(os.path.join(folder, f"stats.csv"), "w") as f:
+                writer = csv.writer(f)
+                writer.writerow([key] + self[key])
+    def store_pickle(self, folder: str):
+        os.makedirs(folder, exist_ok=True)
+        for key in self.keys():
+            with open(os.path.join(folder, f"stats.pkl"), "wb") as f:
+                pickle.dump(self[key], f)

src_code_for_reproducibility/utils/wandb_utils.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+File: mllm/utils/wandb_utils.py
+Summary: Shared Weights & Biases helper functions.
+"""
+import os
+from typing import Any, Dict, Optional
+_WANDB_AVAILABLE = False
+_WANDB_RUN = None
+def _try_import_wandb():
+    global _WANDB_AVAILABLE
+    if _WANDB_AVAILABLE:
+        return True
+    try:
+        import wandb  # type: ignore
+        _WANDB_AVAILABLE = True
+        return True
+    except Exception:
+        _WANDB_AVAILABLE = False
+        return False
+def _safe_get(cfg: Dict[str, Any], path: list[str], default: Any = None) -> Any:
+    cur: Any = cfg
+    for key in path:
+        if not isinstance(cur, dict) or key not in cur:
+            return default
+        cur = cur[key]
+    return cur
+def is_enabled(cfg: Dict[str, Any]) -> bool:
+    return bool(_safe_get(cfg, ["logging", "wandb", "enabled"], False))
+def init(cfg: Dict[str, Any], run_dir: str, run_name: Optional[str] = None) -> None:
+    """
+    Initialize Weights & Biases if enabled in config. No-op if disabled or wandb not installed.
+    """
+    global _WANDB_RUN
+    if not is_enabled(cfg):
+        return
+    if not _try_import_wandb():
+        return
+    import wandb  # type: ignore
+    project = _safe_get(cfg, ["logging", "wandb", "project"], "llm-negotiation")
+    entity = _safe_get(cfg, ["logging", "wandb", "entity"], None)
+    mode = _safe_get(cfg, ["logging", "wandb", "mode"], "online")
+    tags = _safe_get(cfg, ["logging", "wandb", "tags"], []) or []
+    notes = _safe_get(cfg, ["logging", "wandb", "notes"], None)
+    group = _safe_get(cfg, ["logging", "wandb", "group"], None)
+    name = _safe_get(cfg, ["logging", "wandb", "name"], run_name)
+    # Ensure files are written into the hydra run directory
+    os.makedirs(run_dir, exist_ok=True)
+    os.environ.setdefault("WANDB_DIR", run_dir)
+    # Convert cfg to plain types for W&B config; fallback to minimal dictionary
+    try:
+        from omegaconf import OmegaConf  # type: ignore
+        cfg_container = OmegaConf.to_container(cfg, resolve=True)  # type: ignore
+    except Exception:
+        cfg_container = cfg
+    _WANDB_RUN = wandb.init(
+        project=project,
+        entity=entity,
+        mode=mode,
+        name=name,
+        group=group,
+        tags=tags,
+        notes=notes,
+        config=cfg_container,
+        dir=run_dir,
+        reinit=True,
+    )
+def log(metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+    """Log a flat dictionary of metrics to W&B if active."""
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    try:
+        import wandb  # type: ignore
+        wandb.log(metrics if step is None else dict(metrics, step=step))
+    except Exception:
+        pass
+def _flatten(prefix: str, data: Dict[str, Any], out: Dict[str, Any]) -> None:
+    for k, v in data.items():
+        key = f"{prefix}.{k}" if prefix else k
+        if isinstance(v, dict):
+            _flatten(key, v, out)
+        else:
+            out[key] = v
+def _summarize_value(value: Any) -> Dict[str, Any]:
+    import numpy as np  # local import to avoid hard dependency during disabled mode
+    if value is None:
+        return {"none": 1}
+    # Scalars
+    if isinstance(value, (int, float)):
+        return {"value": float(value)}
+    # Lists or arrays
+    try:
+        arr = np.asarray(value)
+        if arr.size == 0:
+            return {"size": 0}
+        return {
+            "mean": float(np.nanmean(arr)),
+            "min": float(np.nanmin(arr)),
+            "max": float(np.nanmax(arr)),
+            "last": float(arr.reshape(-1)[-1]),
+            "size": int(arr.size),
+        }
+    except Exception:
+        # Fallback: string repr
+        return {"text": str(value)}
+def log_tally(
+    array_tally: Dict[str, Any], prefix: str = "", step: Optional[int] = None
+) -> None:
+    """
+    Flatten and summarize Tally.array_tally and log to WandB.
+    Each leaf list/array is summarized with mean/min/max/last/size.
+    """
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    summarized: Dict[str, Any] = {}
+    def walk(node: Any, path: list[str]):
+        if isinstance(node, dict):
+            for k, v in node.items():
+                walk(v, path + [k])
+            return
+        # node is a list of values accumulated over time
+        key = ".".join([p for p in ([prefix] if prefix else []) + path])
+        try:
+            summary = _summarize_value(node)
+            for sk, sv in summary.items():
+                summarized[f"{key}.{sk}"] = sv
+        except Exception:
+            summarized[f"{key}.error"] = 1
+    walk(array_tally, [])
+    if summarized:
+        log(summarized, step=step)
+def log_flat_stats(
+    stats: Dict[str, Any], prefix: str = "", step: Optional[int] = None
+) -> None:
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    flat: Dict[str, Any] = {}
+    _flatten(prefix, stats, flat)
+    if flat:
+        log(flat, step=step)