Muqeeth commited on Nov 27, 2025

Commit

3346216

verified ·

1 Parent(s): 1f4f273

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

.hydra/config.yaml +173 -0
.hydra/hydra.yaml +154 -0
.hydra/overrides.yaml +1 -0
run.log +0 -0
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md +207 -0
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json +42 -0
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json +42 -0
src_code_for_reproducibility/__init__.py +0 -0
src_code_for_reproducibility/docs/source/contributing.rst +0 -0
src_code_for_reproducibility/docs/source/environments/dond.rst +410 -0
src_code_for_reproducibility/docs/source/launch.rst +0 -0
src_code_for_reproducibility/docs/source/media/runbatch.png +0 -0
src_code_for_reproducibility/docs/source/modules.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.environments.rst +25 -0
src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst +7 -0
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
src_code_for_reproducibility/docs/source/src.generation.run_games.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
src_code_for_reproducibility/docs/source/src.rst +28 -0
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst +7 -0
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
src_code_for_reproducibility/docs/source/usage.rst +0 -0
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc +0 -0
src_code_for_reproducibility/utils/__init__.py +0 -0
src_code_for_reproducibility/utils/dict_get_path.py +12 -0
src_code_for_reproducibility/utils/gather_training_stats.py +257 -0
src_code_for_reproducibility/utils/get_stochastic_game_lengths.py +30 -0
src_code_for_reproducibility/utils/kill_sglang.py +17 -0
src_code_for_reproducibility/utils/output_source_code.py +6 -0
src_code_for_reproducibility/utils/resource_context.py +78 -0
src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py +1921 -0
src_code_for_reproducibility/utils/rollout_tree_gather_utils.py +314 -0
src_code_for_reproducibility/utils/rollout_tree_stats.py +50 -0
src_code_for_reproducibility/utils/update_start_epoch.py +9 -0
src_code_for_reproducibility/utils/wandb_utils.py +164 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,173 @@

+experiment:
+  wandb_enabled: true
+  nb_epochs: 3000
+  nb_matches_per_iteration: 64
+  reinit_matches_each_it: true
+  checkpoint_every_n_iterations: 50
+  start_epoch: 0
+  resume_experiment: true
+  base_seed: 9999
+  seed_group_size: 8
+  train: true
+  stat_methods_for_live_wandb: mllm.markov_games.negotiation.negotiation_statistics
+  name: tas_rps_startend_naive_seed9999
+  agent_buffer: false
+  keep_agent_buffer_count: ${lora_count}
+  agent_buffer_recent_k: -1
+  description: Trust-and-Split Rock Paper Scissors negotiation game
+logging:
+  wandb:
+    enabled: false
+    project: llm-negotiation
+    entity: null
+    mode: online
+    name: null
+    group: null
+    tags: []
+    notes: null
+temperature: 1.0
+markov_games:
+  runner_method_name: LinearRunner
+  runner_kwargs: {}
+  group_by_round: true
+  simulation_class_name: TrustAndSplitRPSSimulation
+  simulation_init_args:
+    nb_of_rounds: 10
+    quota_messages_per_agent_per_round: 1
+    alternating_hands: false
+  agents:
+    0:
+      agent_id: ${agent_0_id}
+      agent_name: Alice
+      agent_class_name: TrustAndSplitRPSAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+        num_message_chars: 500
+        message_start_end_format: true
+        proposal_start_end_format: true
+    1:
+      agent_id: ${agent_1_id}
+      agent_name: Bob
+      agent_class_name: TrustAndSplitRPSAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        goal: Maximize your total points over the whole game.
+        num_message_chars: 500
+        message_start_end_format: true
+        proposal_start_end_format: true
+models:
+  base_llm:
+    class: LeanLocalLLM
+    init_args:
+      llm_id: base_llm
+      model_name: Qwen/Qwen2.5-7B-Instruct
+      inference_backend: vllm
+      hf_kwargs:
+        device_map: auto
+        torch_dtype: bfloat16
+        max_memory:
+          0: 20GiB
+        attn_implementation: flash_attention_2
+      inference_backend_init_kwargs:
+        enable_lora: true
+        seed: ${experiment.base_seed}
+        enable_prefix_caching: true
+        max_model_len: 10000.0
+        gpu_memory_utilization: 0.5
+        dtype: bfloat16
+        trust_remote_code: true
+        max_lora_rank: 32
+        enforce_eager: false
+        max_loras: ${lora_count}
+        max_cpu_loras: ${lora_count}
+        enable_sleep_mode: true
+      inference_backend_sampling_params:
+        temperature: ${temperature}
+        top_p: 1.0
+        max_tokens: 400
+        top_k: -1
+        logprobs: 0
+      adapter_configs:
+        agent_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+        critic_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+      enable_thinking: null
+      regex_max_attempts: 1
+critics:
+  agent_critic:
+    module_pointer:
+    - base_llm
+    - critic_adapter
+optimizers:
+  agent_optimizer:
+    module_pointer:
+    - base_llm
+    - agent_adapter
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 3.0e-06
+      weight_decay: 0.0
+  critic_optimizer:
+    module_pointer: agent_critic
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 3.0e-06
+      weight_decay: 0.0
+trainers:
+  agent_trainer:
+    class: TrainerNaive
+    module_pointers:
+      policy:
+      - base_llm
+      - agent_adapter
+      policy_optimizer: agent_optimizer
+      critic: agent_critic
+      critic_optimizer: critic_optimizer
+    kwargs:
+      entropy_coeff: 0.0
+      entropy_topk: null
+      entropy_mask_regex: null
+      kl_coeff: 0.001
+      gradient_clipping: 1.0
+      restrict_tokens: null
+      mini_batch_size: 1
+      use_gradient_checkpointing: true
+      temperature: ${temperature}
+      device: cuda:0
+      use_gae: false
+      whiten_advantages: false
+      whiten_advantages_time_step_wise: false
+      skip_discounted_state_visitation: true
+      use_gae_lambda_annealing: false
+      gae_lambda_annealing_method: None
+      gae_lambda_annealing_method_params: None
+      gae_lambda_annealing_limit: 0.95
+      discount_factor: 0.96
+      use_rloo: true
+      enable_tokenwise_logging: false
+      pg_loss_normalization: nb_tokens
+      truncated_importance_sampling_ratio_cap: 2.0
+      reward_normalizing_constant: 100.0
+train_on_which_data:
+  agent_trainer: ${agent_ids}
+lora_count: 30
+common_agent_kwargs:
+  goal: Maximize your total points over the whole game.
+  num_message_chars: 500
+  message_start_end_format: true
+  proposal_start_end_format: true
+agent_0_id: Alice
+agent_1_id: Bob
+agent_ids:
+- Alice
+- Bob

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,154 @@

+hydra:
+  run:
+    dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task: []
+  job:
+    name: run
+    chdir: false
+    override_dirname: ''
+    id: ???
+    num: ???
+    config_name: tas_rps_startend_naive_seed9999.yaml
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.1'
+    cwd: /scratch/m/muqeeth/llm_negotiation
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /scratch/m/muqeeth/llm_negotiation/configs
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /scratch/m/muqeeth/llm_negotiation/2025_11/tas_rps_startend_naive_seed9999
+    choices:
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ []

run.log ADDED Viewed

The diff for this file is too large to render. See raw diff

seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: Qwen/Qwen2.5-7B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen2.5-7B-Instruct
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "gate_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

src_code_for_reproducibility/__init__.py ADDED Viewed

File without changes

src_code_for_reproducibility/docs/source/contributing.rst ADDED Viewed

File without changes

src_code_for_reproducibility/docs/source/environments/dond.rst ADDED Viewed

	@@ -0,0 +1,410 @@

+=================
+Deal or No Deal
+=================
+The Deal or No Deal (DoND) environment provides a multi-agent negotiation interface where players trade
+items with different values. This document describes the API for interacting with the DoND environment
+and its associated agent handler.
+Overview
+--------
+Deal or No Deal is a negotiation game where two agents must agree on how to divide a set of items,
+each of which has different values to each agent. The agents engage in a back-and-forth dialogue to
+determine an allocation of the items, with each trying to maximize their own total value.
+Our implementation follows the Multi-Agent Negotiation Environment standard, allowing it to be used
+with LLM agents through a text-based interface.
+Game Rules
+----------
+### Basic Structure
+The core mechanics of Deal or No Deal are:
+1. Two agents negotiate over a set of items (e.g., books, balls, hats)
+2. Each item has:
+   - A specific quantity (how many of each item is available)
+   - A value for each agent (which may differ between agents)
+3. Agents take turns sending messages to negotiate how to split the items
+4. Once an agreement is reached, agents finalize the deal
+5. Points are awarded based on the value of items each agent receives
+### Detailed Gameplay
+#### Setup Phase
+The game begins with:
+- A set of items (e.g., "book", "hat", "ball")
+- Each item has a quantity (e.g., 6 books, 2 hats, 4 balls)
+- Each agent has private values for each item (e.g., books might be worth 5 points to one agent but only 2 points to the other)
+- Agents are assigned roles (starting negotiator and responding negotiator)
+#### Negotiation Phase
+1. Agents take turns sending free-form text messages to each other
+2. Messages can include offers, counter-offers, questions, or strategic communication
+3. There is a maximum number of messages permitted (preventing endless negotiations)
+4. Either agent can propose to finalize an agreement at any time
+For example:
+- Agent 1: "I propose I get all the books and you get all the hats and balls."
+- Agent 2: "That doesn't work for me. How about you get 3 books and I get 3 books, all the hats, and all the balls?"
+- Agent 1: "Let me counter-offer: I get 4 books and 2 balls, you get 2 books, all hats, and 2 balls."
+#### Finalization Phase
+1. When an agent wants to finalize a deal, they must specify the exact allocation:
+   - How many of each item they receive
+   - How many of each item the other agent receives
+2. The other agent must then either agree (by submitting the same allocation) or reject the finalization
+3. If both agents submit matching finalizations, the deal is executed
+4. If finalizations don't match, no agreement is reached, and both agents receive 0 points
+#### Scoring
+1. Each agent's score is calculated based on the value of items they receive
+2. The formula is: Sum(quantity_of_item_i × value_of_item_i_to_agent)
+3. If no agreement is reached, both agents receive 0 points
+### Example Game
+Let's walk through a simple example:
+**Setup:**
+- Items: Books (4), Hats (2), Balls (6)
+- Agent 1 values: Books=5, Hats=1, Balls=2
+- Agent 2 values: Books=3, Hats=6, Balls=1
+**Negotiation (simplified):**
+1. Agent 1: "I would like all the books and balls. You can have the hats."
+2. Agent 2: "That doesn't work for me. Books are valuable. I propose I get all the hats and 2 books, you get 2 books and all the balls."
+3. Agent 1: "How about I get 3 books and all the balls, and you get 1 book and all the hats?"
+4. Agent 2: "I accept your proposal."
+**Finalization:**
+- Agent 1 submits: Agent 1 gets (Books: 3, Hats: 0, Balls: 6), Agent 2 gets (Books: 1, Hats: 2, Balls: 0)
+- Agent 2 submits the same allocation, confirming agreement
+**Scoring:**
+- Agent 1 score: (3 books × 5) + (0 hats × 1) + (6 balls × 2) = 15 + 0 + 12 = 27 points
+- Agent 2 score: (1 book × 3) + (2 hats × 6) + (0 balls × 1) = 3 + 12 + 0 = 15 points
+### Game Variations
+The DoND environment supports several variations through configuration parameters:
+#### Different Value Distributions
+The environment offers multiple ways to assign values to items:
+1. **Standard Random Setup (dond_random_setup)**:
+   - Items have even-numbered quantities
+   - Each agent receives distinct random values for each item
+   - Values are drawn from a uniform distribution
+2. **Independent Random Values (independent_random_vals)**:
+   - Item quantities can be any number in the specified range
+   - Values for each agent are drawn independently
+   - Creates more varied negotiation scenarios
+3. **Bicameral Value Distribution (bicameral_vals_assignator)**:
+   - Creates a "high value" and "low value" distribution for each item
+   - Each agent values approximately half the items highly and half lowly
+   - Values are drawn from normal distributions with different means
+   - Creates scenarios with clear trade opportunities
+#### Visibility Options
+1. **Finalization Visibility**:
+   - When enabled, both agents can see each other's finalization proposals
+   - When disabled, finalization proposals remain private until both are submitted
+2. **Other Values Visibility**:
+   - When enabled, agents can see each other's value functions
+   - When disabled, agents only know their own values
+   - Creates information asymmetry and richer negotiation dynamics
+#### Game Modes
+1. **Cooperative Mode ("coop")**:
+   - Agents are encouraged to find mutually beneficial solutions
+   - Success is measured by the sum of both agents' scores
+2. **Competitive Mode ("comp")**:
+   - Agents aim to maximize their individual scores
+   - Creates more adversarial negotiations
+#### Round Structure
+1. **Single Round**:
+   - One negotiation session between the same agents
+   - Simple evaluation of negotiation skills
+2. **Multiple Rounds**:
+   - Agents negotiate multiple times with different item setups
+   - Allows for learning and adaptation over time
+   - Roles can be swapped between rounds
+DondEnv
+------------
+The ``DondEnv`` class provides an interface to the Deal or No Deal environment that follows the Multi-Agent
+Negotiation Environment standard.
+.. code-block:: python
+    class DondEnv:
+        """
+        Multi-Agent Negotiation Environment for Deal or No Deal.
+        """
+        def __init__(
+            self,
+            agents,
+            mode="coop",
+            max_messages=None,
+            min_messages=None,
+            max_chars_per_message=None,
+            rounds_per_game=1,
+            random_setup_func=None,
+            random_setup_kwargs=None,
+            role_assignator_func=None,
+            role_assignator_func_kwargs=None,
+            finalization_visibility=False,
+            other_values_visibility=False,
+            random_seed=None
+        ):
+            """Initialize the Deal or No Deal environment.
+            Args:
+                agents: List of agent IDs participating in the game
+                mode: Game mode ("coop" or "comp")
+                max_messages: Maximum number of messages per agent per round
+                min_messages: Minimum number of messages per agent per round
+                max_chars_per_message: Maximum characters per message
+                rounds_per_game: Number of negotiation rounds to play
+                random_setup_func: Function to generate item quantities and values
+                random_setup_kwargs: Arguments for the random setup function
+                role_assignator_func: Function to assign roles to agents
+                role_assignator_func_kwargs: Arguments for the role assignator
+                finalization_visibility: Whether agents can see each other's finalizations
+                other_values_visibility: Whether agents can see each other's values
+                random_seed: Seed for reproducibility
+            """
+            # ...
+        def reset(self):
+            """Reset the environment to an initial state and return the initial observation.
+            Returns:
+                observation (dict): A dictionary where keys are agent identifiers and values are observations.
+            """
+            # ...
+        def step(self, actions):
+            """Take a step in the environment using the provided actions.
+            Args:
+                actions (dict): A dictionary where keys are agent identifiers and values are actions.
+                    Actions can be messages or finalization proposals.
+            Returns:
+                observations (dict): A dictionary where keys are agent identifiers and values are observations.
+                done (bool): Whether the episode has ended.
+                info (dict): Additional information about the environment.
+            """
+            # ...
+        def get_state(self):
+            """Retrieve the current state of the game.
+            Returns:
+                state (dict): The current state of the game, including items, quantities, values, etc.
+            """
+            # ...
+Key Implementation Details
+~~~~~~~~~~~~~~~~~~~~~~~~~
+The ``DondEnv`` class implements several key features:
+1. **Multi-Agent Support**: The environment tracks two agents and manages their alternating messages.
+2. **Turn-Based Dialogue**: The environment enforces turn structure and limits on message count.
+3. **Finalization Processing**: The environment validates and processes finalization proposals.
+4. **Random Setup**: The environment supports multiple methods of generating negotiation scenarios.
+5. **Round Management**: The environment can handle multiple rounds with different setups.
+Observation Structure
+~~~~~~~~~~~~~~~~~~~~
+Each agent receives an observation (state) dictionary with rich information about the game:
+.. code-block:: python
+    {
+        "mode": str,                 # Game mode ("coop" or "comp")
+        "role_values": dict,         # Value mappings for each role
+        "role_props": dict,          # Properties for each role
+        "agent_to_role": dict,       # Mapping from agent IDs to roles
+        "is_new_round": bool,        # Whether this is the start of a new round
+        "is_new_game": bool,         # Whether this is the start of a new game
+        "game_over": bool,           # Whether the game is over
+        "items": list,               # List of item names
+        "quantities": dict,          # Quantities of each item
+        "has_finalized": bool,       # Whether finalization has been proposed
+        "last_message": dict,        # The last message sent
+        "messages_remaining": dict,  # Number of messages each agent can still send
+        # And various history tracking fields
+    }
+Action Structure
+~~~~~~~~~~~~~~~
+Actions can be:
+1. **Text Messages**: Free-form text for negotiation.
+2. **Finalization Proposals**: Structured data specifying the exact allocation of items.
+Example finalization format:
+.. code-block:: python
+    {
+        "type": "finalize",
+        "allocation": {
+            "agent1": {"book": 3, "hat": 0, "ball": 6},
+            "agent2": {"book": 1, "hat": 2, "ball": 0}
+        }
+    }
+Value Setup Functions
+--------------------
+The DoND environment provides several functions for setting up item values:
+.. code-block:: python
+    def dond_random_setup(items, min_quant, max_quant, min_val, max_val, random_seed=None):
+        """
+        Generates items, even-numbered quantities and distinct random values for each category for both agents.
+        Args:
+            items (list): List of items.
+            min_quant (int): Minimum quantity per item.
+            max_quant (int): Maximum quantity per item.
+            min_val (int): Minimum value per item.
+            max_val (int): Maximum value per item.
+            random_seed (int, optional): Seed for random generation.
+        Returns:
+            tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
+        """
+        # ...
+    def independent_random_vals(items, min_quant, max_quant, min_val, max_val, random_seed=None):
+        """
+        Generates random quantities and independent random values for both agents.
+        Args:
+            Similar to dond_random_setup
+        Returns:
+            tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
+        """
+        # ...
+    def bicameral_vals_assignator(items, min_quant, max_quant, low_val_mean, low_val_std, high_val_mean, high_val_std, random_seed=None):
+        """
+        Generates values with a bicameral distribution - each agent values half the items highly.
+        Args:
+            items (list): List of items.
+            min_quant, max_quant: Range for quantities
+            low_val_mean, low_val_std: Mean and standard deviation for the "low value" distribution
+            high_val_mean, high_val_std: Mean and standard deviation for the "high value" distribution
+            random_seed: Seed for reproducibility
+        Returns:
+            tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
+        """
+        # ...
+Running DoND Games
+----------------------
+To run Deal or No Deal games with LLM agents, you can use the following structure:
+.. code-block:: python
+    from mllm.environments.dond.dond_game import DondEnv
+    from mllm.environments.dond.dond_agent import DondAgent
+    from src.run_matches import run_batched_matches
+    # Create environment
+    env = DondEnv(
+        agents=["agent1", "agent2"],
+        mode="coop",
+        max_messages=10,
+        rounds_per_game=1,
+        random_setup_func="dond_random_setup",
+        random_setup_kwargs={
+            "items": ["book", "hat", "ball"],
+            "min_quant": 2,
+            "max_quant": 8,
+            "min_val": 1,
+            "max_val": 10
+        },
+        finalization_visibility=False
+    )
+    # Create agent handlers (implementation details would vary)
+    agent_handlers = {
+        "agent1": DondAgent(agent_id="agent1"),
+        "agent2": DondAgent(agent_id="agent2")
+    }
+    # Define policy mapping
+    policy_mapping = {
+        "llm_policy": my_llm_policy_function
+    }
+    # Run the game
+    game_results = run_batched_matches(
+        envs=[env],
+        agent_handlers_per_env=[agent_handlers],
+        policy_mapping=policy_mapping,
+        max_parallel_matches=1
+    )
+Limitations and Considerations
+-----------------------------
+1. **Negotiation Complexity**: The open-ended nature of negotiations can be challenging for some LLM agents.
+2. **Parsing Challenges**: Extracting structured finalization proposals from free-form text requires robust parsing.
+3. **Optimization Opportunities**: Different agents may employ different negotiation strategies to optimize outcomes.
+4. **Fairness Evaluation**: The environment allows research into questions of fair division and Pareto optimality.
+5. **Strategic Deception**: Agents might strategically misrepresent their true values, adding complexity to negotiations.
+Advanced Usage
+------------
+For advanced usage, you can:
+1. **Custom Value Functions**: Create more complex distributions of item values for specific research questions.
+2. **Novel Negotiation Scenarios**: Design item sets and values to test specific negotiation skills.
+3. **Curriculum Learning**: Create progressively more difficult negotiation scenarios.
+4. **Communication Analysis**: Analyze the language and strategies used in successful negotiations.
+5. **Multi-Round Dynamics**: Study how agents adapt their strategies over multiple rounds.

src_code_for_reproducibility/docs/source/launch.rst ADDED Viewed

File without changes

src_code_for_reproducibility/docs/source/media/runbatch.png ADDED Viewed

src_code_for_reproducibility/docs/source/modules.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src
+===
+.. toctree::
+   :maxdepth: 4
+   src

src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_game module
+=======================================
+.. automodule:: src.environments.dond.dond_game
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_log\_funcs module
+=============================================
+.. automodule:: src.environments.dond.dond_log_funcs
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.dond.dond\_agent module
+=========================================
+.. automodule:: src.environments.dond.dond_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.environment\_imports module
+============================================
+.. automodule:: src.environments.environment_imports
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.environments.ipd.ipd\_agent module
+======================================
+.. automodule:: src.environments.ipd.ipd_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.environments.rst ADDED Viewed

	@@ -0,0 +1,25 @@

+src.environments package
+========================
+.. automodule:: src.environments
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Subpackages
+-----------
+.. toctree::
+   :maxdepth: 4
+   src.environments.dond
+   src.environments.ipd
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.environments.env_imports
+   src.environments.environment_imports

src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.experiments.dond\_run\_train module
+=======================================
+.. automodule:: src.experiments.dond_run_train
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.experiments.last\_completion module
+=======================================
+.. automodule:: src.experiments.last_completion
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.generation.run_games.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.generation.run\_games module
+================================
+.. automodule:: src.generation.run_games
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.dummy\_local\_llm module
+===================================
+.. automodule:: src.models.dummy_local_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.new\_local\_llm module
+=================================
+.. automodule:: src.models.new_local_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.oai\_agent module
+============================
+.. automodule:: src.models.oai_agent
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.server_llm.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.server\_llm module
+=============================
+.. automodule:: src.models.server_llm
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.models.updatable\_worker module
+===================================
+.. automodule:: src.models.updatable_worker
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.rst ADDED Viewed

	@@ -0,0 +1,28 @@

+src package
+===========
+.. automodule:: src
+   :members:
+   :undoc-members:
+   :show-inheritance:
+Subpackages
+-----------
+.. toctree::
+   :maxdepth: 4
+   src.environments
+   src.experiments
+   src.generation
+   src.models
+   src.training
+   src.utils
+Submodules
+----------
+.. toctree::
+   :maxdepth: 4
+   src.run

src_code_for_reproducibility/docs/source/src.training.ppo_train.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.ppo\_train module
+==============================
+.. automodule:: src.training.ppo_train
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.ppo\_train\_value\_head module
+===========================================
+.. automodule:: src.training.ppo_train_value_head
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.training.rl\_convs\_processing module
+=========================================
+.. automodule:: src.training.rl_convs_processing
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.extra\_stats module
+=============================
+.. automodule:: src.utils.extra_stats
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.inherit\_args module
+==============================
+.. automodule:: src.utils.inherit_args
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.parallel\_shuffle module
+==================================
+.. automodule:: src.utils.parallel_shuffle
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.quick\_stats module
+=============================
+.. automodule:: src.utils.quick_stats
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED Viewed

	@@ -0,0 +1,7 @@

+src.utils.update\_start\_epoch module
+=====================================
+.. automodule:: src.utils.update_start_epoch
+   :members:
+   :undoc-members:
+   :show-inheritance:

src_code_for_reproducibility/docs/source/usage.rst ADDED Viewed

File without changes

src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc ADDED Viewed

Binary file (6.72 kB). View file

src_code_for_reproducibility/utils/__init__.py ADDED Viewed

File without changes

src_code_for_reproducibility/utils/dict_get_path.py ADDED Viewed

	@@ -0,0 +1,12 @@

+def get_from_nested_dict(a:dict, path) -> any:
+    # path is string or list of string
+    try:
+        if isinstance(path, str):
+            return a[path]
+        else:
+            for p in path:
+                a = a[p]
+            return a
+    except Exception:
+        return None

src_code_for_reproducibility/utils/gather_training_stats.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import copy
+import csv
+import gc
+import json
+import logging
+import os
+import pickle
+import random
+import re
+import subprocess
+import sys
+import time
+from datetime import datetime
+from statistics import mean
+from typing import Any, Dict
+import hydra
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import torch
+from omegaconf import OmegaConf
+from mllm.training.tally_metrics import Tally
+from mllm.utils.stat_pack import StatPack
+def get_from_nested_dict(dictio: dict, path: list[str]):
+    for sp in path[:-1]:
+        dictio = dictio[sp]
+    return dictio.get(path[-1])
+def set_at_path(dictio: dict, path: list[str], value):
+    for sp in path[:-1]:
+        if sp not in dictio:
+            dictio[sp] = {}
+        dictio = dictio[sp]
+    dictio[path[-1]] = value
+def produce_tabular_render(inpath: str, outpath: str = None):
+    """
+    TODO: docstring
+    """
+    with open(inpath, "r") as f:
+        data = json.load(f)
+    rollout_paths = data.keys()
+    for rollout_path in rollout_paths:
+        if outpath is None:
+            m_path = rollout_path.replace("/", "|")
+            m_path = m_path.replace(".json", "")
+            m_path = (
+                os.path.split(inpath)[0]
+                + "/contextualized_tabular_renders/"
+                + m_path
+                + "_tabular_render.render.csv"
+            )
+        # import pdb; pdb.set_trace()
+        os.makedirs(os.path.split(m_path)[0], exist_ok=True)
+        metrics = data[rollout_path]
+        d = {k: [] for k in metrics[0].keys()}
+        for m in metrics:
+            for k, v in m.items():
+                d[k].append(v)
+        d = pd.DataFrame(d)
+        d.to_csv(m_path)
+def get_metric_paths(data: list[dict]):
+    d = data[0]
+    paths = []
+    def traverse_dict(d, current_path=[]):
+        for key, value in d.items():
+            new_path = current_path + [key]
+            if isinstance(value, dict):
+                traverse_dict(value, new_path)
+            else:
+                paths.append(new_path)
+    traverse_dict(d)
+    return paths
+def print_metric_paths(data: list[dict]):
+    paths = get_metric_paths(data)
+    for p in paths:
+        print(p)
+def get_metric_iteration_list(data: list[dict], metric_path: list[str]):
+    if isinstance(metric_path, str):
+        metric_path = [metric_path]
+    sgl = []
+    for d in data:
+        sgl.append(get_from_nested_dict(d, metric_path))
+    return sgl
+def to_1d_numeric(x):
+    """Return a 1-D float array (or None if not numeric). Accepts scalars, numpy arrays, or nested list/tuple of them."""
+    if x is None:
+        return None
+    if isinstance(x, (int, float, np.number)):
+        return np.array([float(x)], dtype=float)
+    if isinstance(x, np.ndarray):
+        try:
+            return x.astype(float).ravel()
+        except Exception:
+            return None
+    if isinstance(x, (list, tuple)):
+        parts = []
+        for e in x:
+            arr = to_1d_numeric(e)
+            if arr is not None and arr.size > 0:
+                parts.append(arr)
+        if parts:
+            return np.concatenate(parts)
+        return None
+    return None
+def get_single_metric_vector(data, metric_path, iterations=None):
+    if isinstance(metric_path, str):
+        metric_path = [metric_path]
+    if iterations == None:
+        iterations = len(data)
+    vecs = []
+    for d in data:
+        ar = get_from_nested_dict(d, metric_path)
+        arr = to_1d_numeric(ar)
+        if arr is not None:
+            vecs.append(arr)
+    return np.concatenate(vecs) if vecs else np.empty(0, dtype=float)
+def _load_metrics_file(file_path: str):
+    if not (file_path.endswith(".tally.pkl") or file_path.endswith(".pkl")):
+        raise ValueError("Only *.tally.pkl files are supported.")
+    import pickle
+    with open(file_path, "rb") as f:
+        tree = pickle.load(f)
+    return tree
+def get_leaf_items(array_tally: dict, prefix: list[str] = None):
+    if prefix is None:
+        prefix = []
+    for key, value in array_tally.items():
+        next_prefix = prefix + [str(key)]
+        if isinstance(value, dict):
+            yield from get_leaf_items(value, next_prefix)
+        else:
+            yield next_prefix, value
+def _sanitize_filename_part(part: str) -> str:
+    s = part.replace("/", "|")
+    s = s.replace(" ", "_")
+    return s
+def render_rt_tally_pkl_to_csvs(pkl_path: str, outdir: str):
+    """
+    This method takes care of tokenwise logging.
+    """
+    with open(pkl_path, "rb") as f:
+        payload = pickle.load(f)
+    # Backward compatibility: older tallies stored the dict directly
+    if isinstance(payload, dict) and "array_tally" in payload:
+        array_tally = payload.get("array_tally", {})
+    else:
+        array_tally = payload
+    os.makedirs(outdir, exist_ok=True)
+    trainer_id = os.path.basename(pkl_path).replace(".rt_tally.pkl", "")
+    for path_list, rollout_tally_items in get_leaf_items(array_tally):
+        # Create file and initiate writer
+        path_part = ".".join(_sanitize_filename_part(p) for p in path_list)
+        filename = f"{trainer_id}__{path_part}.render.csv"
+        out_path = os.path.join(outdir, filename)
+        # Write metric rows to CSV
+        with open(out_path, "w", newline="") as f:
+            writer = csv.writer(f)
+            # Write header row - need to determine metric column count from first rollout_tally_item
+            first_item = rollout_tally_items[0]
+            metric_cols = (
+                first_item.metric_matrix.shape[1]
+                if first_item.metric_matrix.ndim > 1
+                else 1
+            )
+            header = ["agent_id", "crn_id", "rollout_id"] + [
+                f"t_{i}" for i in range(metric_cols)
+            ]
+            writer.writerow(header)
+            for rollout_tally_item in rollout_tally_items:
+                crn_ids = rollout_tally_item.crn_ids
+                rollout_ids = rollout_tally_item.rollout_ids
+                agent_ids = rollout_tally_item.agent_ids
+                metric_matrix = rollout_tally_item.metric_matrix
+                for i in range(metric_matrix.shape[0]):
+                    row_vals = metric_matrix[i].reshape(-1)
+                    # Convert row_vals to a list to avoid numpy concatenation issues
+                    row_vals = (
+                        row_vals.tolist()
+                        if hasattr(row_vals, "tolist")
+                        else list(row_vals)
+                    )
+                    row_prefix = [
+                        agent_ids[i],
+                        crn_ids[i],
+                        rollout_ids[i],
+                    ]
+                    writer.writerow(row_prefix + row_vals)
+def tally_to_stat_pack(tally: Dict[str, Any]):
+    stat_pack = StatPack()
+    if "array_tally" in tally:
+        tally = tally["array_tally"]
+        # backward compatibility: will remove later, flatten keys in tally
+        def get_from_nested_dict(dictio: dict, path: list[str]):
+            for sp in path[:-1]:
+                dictio = dictio[sp]
+            return dictio.get(path[-1])
+        def get_metric_paths(tally: dict):
+            paths = []
+            def traverse_dict(tally, current_path=[]):
+                for key, value in tally.items():
+                    new_path = current_path + [key]
+                    if isinstance(value, dict):
+                        traverse_dict(value, new_path)
+                    else:
+                        paths.append(new_path)
+            traverse_dict(tally)
+            return paths
+        paths = get_metric_paths(tally)
+        modified_tally = {}
+        for p in paths:
+            val = get_from_nested_dict(tally, p)
+            modified_tally["_".join(p)] = np.mean(val)
+        del tally
+        tally = modified_tally
+    for key, value in tally.items():
+        stat_pack.add_stat(key, value)
+    return stat_pack

src_code_for_reproducibility/utils/get_stochastic_game_lengths.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import numpy as np
+def get_stochastic_game_lengths(
+    max_length,
+    nb_games,
+    continuation_prob,
+    same_length_batch=False
+):
+    """
+    Generates stochastic game lengths based on a geometric distribution.
+    Args:
+        max_length (int): The maximum length a game can have.
+        nb_games (int): The number of games to generate lengths for.
+        continuation_prob (float): The probability of the game continuing after each round.
+        same_length_batch (bool): If True, all games will have the same length.
+    Returns:
+        Array: An array of game lengths.
+    """
+    if continuation_prob == 1:
+        return [max_length] * nb_games
+    if same_length_batch:
+        length = np.random.geometric(1 - continuation_prob, 1)
+        game_lengths = np.repeat(length, nb_games)
+    else:
+        game_lengths = np.random.geometric(1 - continuation_prob, nb_games)
+    game_lengths = np.where(game_lengths > max_length, max_length, game_lengths)
+    return game_lengths.tolist()

src_code_for_reproducibility/utils/kill_sglang.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import psutil
+import signal
+target_name = "sglang::scheduler"
+killed = []
+def kill_sglang():
+    for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
+        try:
+            # Some processes may not have a name or cmdline
+            cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
+            if target_name in cmdline:
+                print(f"Killing PID {proc.pid}: {cmdline}")
+                proc.send_signal(signal.SIGKILL)
+                killed.append(proc.pid)
+        except (psutil.NoSuchProcess, psutil.AccessDenied):
+            pass

src_code_for_reproducibility/utils/output_source_code.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def output_source_code(model, output_path: str) -> None:
+    """
+    Outputs the source code of the model to the given path.
+    """
+    with open(output_path, "w") as f:
+        f.write(model.source_code)

src_code_for_reproducibility/utils/resource_context.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import logging
+import time
+from contextlib import contextmanager
+import torch
+def vram_usage():
+    output = ""
+    for i in range(torch.cuda.device_count()):
+        gpu_memory_allocated = torch.cuda.memory_allocated(i) / (
+            1024**3
+        )  # Convert bytes to GB
+        gpu_memory_reserved = torch.cuda.memory_reserved(i) / (
+            1024**3
+        )  # Convert bytes to GB
+        output += f"GPU {i}: Memory Allocated: {gpu_memory_allocated:.2f} GB, Memory Reserved: {gpu_memory_reserved:.2f} GB"
+    return output
+def ram_usage():
+    import psutil
+    process = psutil.Process()
+    memory_info = process.memory_info()
+    ram_used = memory_info.rss / (1024**3)  # Convert bytes to GB
+    return f"RAM Usage: {ram_used:.2f} GB"
+@contextmanager
+def resource_logger_context(logger: logging.Logger, task_description: str):
+    """
+    Context manager to log the resource usage of the current task.
+    Args:
+        logger: The logger to use to log the resource usage.
+        task_description: The description of the task to log.
+    Returns:
+        None
+    """
+    try:
+        initial_time = time.time()
+        # Assume CUDA is available and use device 0 only
+        total_mem_bytes = torch.cuda.get_device_properties(0).total_memory
+        initial_total_bytes = (
+            torch.cuda.memory_allocated(0) + torch.cuda.memory_reserved(0)
+        )
+        torch.cuda.reset_peak_memory_stats(0)
+        yield None
+    finally:
+        final_time = time.time()
+        # Ensure kernels within the block are accounted for
+        torch.cuda.synchronize()
+        # Compute metrics
+        final_allocated_bytes = torch.cuda.memory_allocated(0)
+        final_reserved_bytes = torch.cuda.memory_reserved(0)
+        final_total_bytes = final_allocated_bytes + final_reserved_bytes
+        delta_vram_percent_total = (
+            100 * (final_total_bytes - initial_total_bytes) / total_mem_bytes
+            if total_mem_bytes
+            else 0.0
+        )
+        current_percent_vram_taken = (
+            100 * final_total_bytes / total_mem_bytes if total_mem_bytes else 0.0
+        )
+        block_peak_percent = (
+            100 * torch.cuda.max_memory_allocated(0) / total_mem_bytes
+            if total_mem_bytes
+            else 0.0
+        )
+        delta_time_str = time.strftime(
+            '%H:%M:%S', time.gmtime(final_time - initial_time)
+        )
+        logger.info(
+            f"For task: {task_description}, ΔVRAM % (total): {delta_vram_percent_total:.2f}%, Current % of VRAM taken: {current_percent_vram_taken:.2f}%, Block Peak % of device VRAM: {block_peak_percent:.2f}%, ΔTime: {delta_time_str}"
+        )

src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py ADDED Viewed

	@@ -0,0 +1,1921 @@

+from pathlib import Path
+from typing import List
+from mllm.utils.rollout_tree_gather_utils import *
+def html_from_chat_turns(chat_turns: List[ChatTurnLog]) -> str:
+    """
+    Render chat turns as a single, wrapping sequence of messages in time order.
+    Keep badge and message bubble styles, include time on every badge and
+    include rewards on assistant badges. Each message is individually
+    hide/show by click; when hidden, only the badge remains and "(...)" is
+    shown inline (not inside a bubble).
+    """
+    import html
+    import re as _re
+    # Prepare ordering: sort by (time_step, original_index) to keep stable order within same step
+    indexed_turns = list(enumerate(chat_turns))
+    indexed_turns.sort(key=lambda t: (t[1].time_step, t[0]))
+    assistant_agents = sorted({t.agent_id for t in chat_turns if t.role == "assistant"})
+    enable_split_view = len(assistant_agents) == 2
+    # CSS styles (simplified layout; no time-step or agent-column backgrounds)
+    css = """
+    <style>
+        :root {
+            --font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            --bg: #ffffff;
+            --text: #1c0b00;
+            --muted-text: #2C3E50;
+            --accent-muted: #BDC3C7;
+            --accent-muted-2: #D0D7DE;
+            --panel-bg: #F8FAFC;
+            --reward-color: #3a2e00; /* dark text for reward pill */
+            --font-size: 14px;
+            --border-width: 2px;
+            --corner-radius: 6px;
+            --pill-radius-left: 999px 0 0 999px;
+            --pill-radius-right: 0 999px 999px 0;
+            --inset-shadow: 0 1px 0 rgba(0,0,0,0.03) inset;
+            /* Chat View Colors */
+            --alice-bg: #dcf8c6;
+            --alice-border: #0eb224;
+            --bob-bg: #ffe4cc;
+            --bob-border: #ef8323;
+            --user-bg: #f5f5f5;
+            --chat-bg: #ffffff;
+        }
+        body {
+            font-family: var(--font-family);
+            margin: 12px;
+            background-color: var(--bg);
+            color: var(--text);
+            font-size: var(--font-size);
+            line-height: 1.5;
+        }
+        /* Chat View Styles */
+        #flow-chat {
+            max-width: 900px;
+            margin: 0 auto;
+            background: var(--chat-bg);
+            padding: 12px 16px 12px 8px;
+            border-radius: 8px;
+        }
+        .simultaneous-messages {
+            display: flex !important;
+            flex-direction: row !important;
+            flex-wrap: nowrap !important;
+            gap: 8px;
+            margin-bottom: 4px;
+            align-items: flex-start;
+            width: 100%;
+            overflow: hidden;
+            box-sizing: border-box;
+        }
+        .simultaneous-messages .chat-message {
+            flex: 1 1 0 !important;
+            margin-bottom: 0 !important;
+            display: flex !important;
+            flex-direction: row !important;
+            align-items: flex-start !important;
+            margin-left: 0 !important;
+            min-width: 0 !important;
+            max-width: 50% !important;
+            gap: 0 !important;
+            overflow: hidden !important;
+        }
+        .simultaneous-messages .chat-message-content {
+            max-width: 100% !important;
+            width: 100%;
+            align-items: flex-start !important;
+            margin-left: 0 !important;
+            overflow: hidden !important;
+        }
+        .simultaneous-messages .chat-message.agent-alice {
+            justify-content: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.agent-bob {
+            justify-content: flex-end !important;
+        }
+        .simultaneous-messages .chat-message.agent-alice .chat-message-content {
+            margin-left: 0 !important;
+            align-items: flex-start !important;
+        }
+        .simultaneous-messages .chat-message.agent-bob .chat-message-content {
+            margin-left: auto !important;
+            margin-right: 0 !important;
+            align-items: flex-end !important;
+        }
+        .simultaneous-messages .chat-bubble {
+            max-width: 100%;
+            word-break: break-word;
+            overflow-wrap: break-word;
+            box-sizing: border-box;
+        }
+        .simultaneous-messages .chat-message.agent-alice .chat-bubble {
+            border-radius: 10px;
+        }
+        .simultaneous-messages .chat-message.agent-bob .chat-bubble {
+            border-radius: 10px;
+        }
+        .simultaneous-messages .chat-message.agent-alice .chat-header {
+            justify-content: flex-start;
+            flex-shrink: 0;
+        }
+        .simultaneous-messages .chat-message.agent-bob .chat-header {
+            justify-content: flex-end;
+            flex-shrink: 0;
+        }
+        .simultaneous-messages .chat-reasoning {
+            max-width: 100%;
+            overflow-wrap: break-word;
+        }
+        .chat-message {
+            display: flex;
+            margin-bottom: 2px;
+            align-items: flex-end;
+            gap: 6px;
+            position: relative;
+            margin-left: 36px;
+        }
+        .chat-message.agent-alice {
+            margin-left: 0;
+        }
+        .chat-message.agent-alice::before {
+            left: 0;
+        }
+        .chat-message.role-user {
+            opacity: 0.7;
+        }
+        .chat-message::before {
+            content: '';
+            position: absolute;
+            left: -36px;
+            top: 0;
+            bottom: 0;
+            width: 36px;
+            pointer-events: auto;
+        }
+        .merge-btn {
+            position: absolute;
+            left: -30px;
+            top: 50%;
+            transform: translateY(-50%);
+            width: 26px;
+            height: 26px;
+            border-radius: 4px;
+            border: 1.5px solid var(--accent-muted);
+            background: white;
+            cursor: pointer;
+            font-size: var(--font-size);
+            opacity: 0;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: opacity 0.2s ease, transform 0.1s ease;
+            padding: 0;
+            line-height: 1;
+            z-index: 10;
+        }
+        .chat-message:hover .merge-btn,
+        .merge-btn:hover {
+            opacity: 1;
+        }
+        .merge-btn:hover {
+            background: var(--panel-bg);
+            border-color: var(--accent-muted-2);
+            transform: translateY(-50%) scale(1.15);
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15);
+        }
+        .merge-btn:active {
+            transform: translateY(-50%) scale(0.95);
+        }
+        .chat-message.agent-alice .merge-btn {
+            left: -30px;
+        }
+        .chat-message.role-user .merge-btn {
+            display: none !important;
+        }
+        .simultaneous-messages .merge-btn {
+            opacity: 0 !important;
+            pointer-events: none;
+        }
+        .simultaneous-messages {
+            padding: 6px 0 6px 0 !important;
+            margin-left: 0 !important;
+            margin-right: 0 !important;
+            position: relative !important;
+            background: transparent !important;
+            border-radius: 0 !important;
+            box-sizing: border-box !important;
+            overflow: visible !important;
+            max-width: 100% !important;
+            border: none !important;
+            transition: padding 0.2s ease !important;
+        }
+        .simultaneous-messages:hover {
+            padding-top: 40px !important;
+        }
+        .simultaneous-messages::before {
+            content: '⇅ Merged';
+            position: absolute;
+            left: 0 !important;
+            top: 8px !important;
+            font-size: var(--font-size);
+            font-weight: 500;
+            color: #888;
+            pointer-events: none;
+            opacity: 0;
+            transition: opacity 0.2s ease;
+        }
+        .simultaneous-messages:hover::before {
+            opacity: 1;
+        }
+        .unmerge-btn {
+            position: absolute !important;
+            right: 0 !important;
+            top: 6px !important;
+            width: 36px !important;
+            height: 28px !important;
+            border-radius: 5px !important;
+            border: 2px solid #d63031 !important;
+            background: white !important;
+            cursor: pointer !important;
+            font-size: var(--font-size) !important;
+            font-weight: bold !important;
+            color: #d63031 !important;
+            display: flex !important;
+            align-items: center !important;
+            justify-content: center !important;
+            transition: all 0.2s ease !important;
+            padding: 0 !important;
+            line-height: 1 !important;
+            z-index: 1000 !important;
+            flex: none !important;
+            pointer-events: auto !important;
+            box-shadow: 0 2px 6px rgba(214, 48, 49, 0.3) !important;
+            opacity: 0 !important;
+        }
+        .simultaneous-messages:hover .unmerge-btn {
+            opacity: 1 !important;
+        }
+        .unmerge-btn:hover {
+            background: #ffe5e5 !important;
+            border-color: #b71c1c !important;
+            transform: scale(1.1) !important;
+            box-shadow: 0 3px 8px rgba(214, 48, 49, 0.4) !important;
+        }
+        .unmerge-btn:active {
+            transform: scale(0.95) !important;
+            background: #ffcccc !important;
+        }
+        .chat-message-content {
+            max-width: 72%;
+            display: flex;
+            flex-direction: column;
+            gap: 2px;
+        }
+        .chat-message.agent-alice .chat-message-content {
+            align-items: flex-start;
+        }
+        .chat-message.agent-bob .chat-message-content {
+            align-items: flex-end;
+            margin-left: auto;
+        }
+        .chat-bubble {
+            padding: 6px 10px;
+            border-radius: 10px;
+            word-wrap: break-word;
+            position: relative;
+            box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+            line-height: 1.4;
+        }
+        .chat-message.agent-alice .chat-bubble {
+            background: var(--alice-bg);
+            border: 2px solid var(--alice-border);
+            border-radius: 10px 10px 10px 2px;
+        }
+        .chat-message.agent-bob .chat-bubble {
+            background: var(--bob-bg);
+            border: 2px solid var(--bob-border);
+            border-radius: 10px 10px 2px 10px;
+        }
+        .chat-message.role-user .chat-bubble {
+            background: var(--user-bg);
+            border: 2px solid #d0d0d0;
+        }
+        .chat-header {
+            display: flex;
+            align-items: center;
+            gap: 4px;
+            margin-bottom: 2px;
+            font-size: var(--font-size);
+            font-weight: 600;
+            line-height: 1.2;
+        }
+        .chat-message.agent-alice .chat-header {
+            color: var(--alice-border);
+        }
+        .chat-message.agent-bob .chat-header {
+            color: var(--bob-border);
+        }
+        .chat-timestamp {
+            font-size: var(--font-size);
+            color: var(--muted-text);
+            margin-top: 1px;
+            opacity: 0.75;
+        }
+        .chat-reward {
+            display: inline-flex;
+            align-items: center;
+            background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
+            color: #000000;
+            font-weight: 600;
+            font-size: var(--font-size);
+            padding: 1px 5px;
+            border-radius: 3px;
+            border: 1px solid #f4e6a8;
+            margin-left: 4px;
+            line-height: 1.3;
+        }
+        .chat-reasoning {
+            font-size: var(--font-size);
+            font-style: italic;
+            color: #555;
+            margin-bottom: 2px;
+            padding: 4px 8px;
+            background: rgba(0, 0, 0, 0.03);
+            border-radius: 5px;
+            cursor: pointer;
+            line-height: 1.3;
+        }
+        .chat-reasoning.collapsed .reasoning-text {
+            display: none;
+        }
+        .chat-reasoning.collapsed::after {
+            content: ' (click to expand)';
+            color: #777;
+        }
+        .chat-group-divider {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            width: 100%;
+            margin: 8px 0 4px 0;
+            position: relative;
+            cursor: pointer;
+            user-select: none;
+        }
+        .chat-group-divider::before,
+        .chat-group-divider::after {
+            content: "";
+            flex: 1 1 auto;
+            height: 2px;
+            background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
+        }
+        .chat-group-label {
+            display: inline-block;
+            background: white;
+            padding: 2px 12px;
+            border-radius: 999px;
+            font-size: var(--font-size);
+            font-weight: 700;
+            color: var(--muted-text);
+            border: 1.5px solid var(--accent-muted);
+            box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
+            line-height: 1.4;
+            position: relative;
+            transition: background 0.2s ease;
+        }
+        .chat-group-divider:hover .chat-group-label {
+            background: var(--panel-bg);
+        }
+        .chat-group-label::before {
+            content: '▼ ';
+            font-size: 0.8em;
+            display: inline-block;
+            transition: transform 0.2s ease;
+            opacity: 0;
+        }
+        .chat-group-divider:hover .chat-group-label::before {
+            opacity: 1;
+        }
+        .chat-group-divider.collapsed .chat-group-label::before {
+            content: '▶ ';
+            opacity: 1;
+        }
+        .chat-group-divider.collapsed + * {
+            display: none !important;
+        }
+        /* Hide collapsed rounds in strong hide mode */
+        .strong-hide .chat-group-divider.collapsed {
+            display: none !important;
+        }
+        /* Chat view width control */
+        #flow-chat {
+            --chat-width: 900px;
+            max-width: var(--chat-width);
+            margin: 0 auto;
+        }
+        /* Hide user messages when toggle is on */
+        #flow-chat.hide-user-messages .chat-message.role-user {
+            display: none;
+        }
+        /* Hide rewards when hiding user messages */
+        #flow-chat.hide-user-messages .chat-reward {
+            display: none;
+        }
+        /* Round context annotations */
+        .round-context {
+            text-align: center;
+            margin: 4px auto;
+            max-width: 100%;
+        }
+        .round-context-edit {
+            min-height: 20px;
+            padding: 5px 10px;
+            border: 1.5px dashed var(--accent-muted);
+            border-radius: 6px;
+            background: #fafafa;
+            cursor: text;
+            transition: all 0.2s ease;
+            outline: none;
+            font-size: var(--font-size);
+            line-height: 1.3;
+            user-select: text;
+            -webkit-user-select: text;
+            -moz-user-select: text;
+            -ms-user-select: text;
+        }
+        .round-context-edit:focus {
+            border-style: solid;
+            border-color: var(--accent-muted-2);
+            background: #ffffff;
+            box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+        }
+        .round-context-edit:empty:before {
+            content: attr(data-placeholder);
+            color: #999;
+            font-style: italic;
+        }
+        .round-context-controls {
+            display: none;
+            justify-content: center;
+            gap: 4px;
+            margin-top: 4px;
+            flex-wrap: wrap;
+        }
+        .round-context-edit:focus + .round-context-controls,
+        .round-context-controls:hover,
+        .round-context:focus-within .round-context-controls {
+            display: flex;
+        }
+        .context-color-btn {
+            width: 22px;
+            height: 22px;
+            border-radius: 50%;
+            border: 1.5px solid #fff;
+            box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
+            cursor: pointer;
+            transition: transform 0.1s ease;
+        }
+        .context-color-btn:hover {
+            transform: scale(1.15);
+        }
+        .context-color-btn:active {
+            transform: scale(0.95);
+        }
+        /* Split agent context boxes */
+        .split-agent-context {
+            display: flex;
+            gap: 6px;
+            margin: 4px auto;
+            max-width: 100%;
+            align-items: flex-start;
+        }
+        .agent-context-box {
+            flex: 1;
+            min-width: 0;
+            position: relative;
+        }
+        .agent-context-box .round-context-edit {
+            margin: 0;
+            border-radius: 6px;
+            padding: 4px 8px;
+            min-height: 18px;
+        }
+        .agent-context-box.agent-alice .round-context-edit {
+            border-color: var(--alice-border);
+            background: rgba(14, 178, 36, 0.03);
+        }
+        .agent-context-box.agent-bob .round-context-edit {
+            border-color: var(--bob-border);
+            background: rgba(239, 131, 35, 0.03);
+        }
+        .agent-context-box.agent-alice .round-context-edit:focus {
+            border-color: var(--alice-border);
+            box-shadow: 0 2px 8px rgba(14, 178, 36, 0.2);
+            background: rgba(14, 178, 36, 0.05);
+        }
+        .agent-context-box.agent-bob .round-context-edit:focus {
+            border-color: var(--bob-border);
+            box-shadow: 0 2px 8px rgba(239, 131, 35, 0.2);
+            background: rgba(239, 131, 35, 0.05);
+        }
+        .agent-context-box .round-context-edit::before {
+            font-weight: 700;
+            font-size: var(--font-size);
+            margin-right: 5px;
+            letter-spacing: 0.2px;
+        }
+        .agent-context-box.agent-alice .round-context-edit::before {
+            content: 'Alice Prompt Summary:';
+            color: var(--alice-border);
+        }
+        .agent-context-box.agent-bob .round-context-edit::before {
+            content: 'Bob Prompt Summary:';
+            color: var(--bob-border);
+        }
+        /* Empty context boxes will be hidden by JavaScript when strong hide is enabled */
+        .messages-flow { display: block; }
+        .split-wrapper { display: flex; gap: 4px; align-items: flex-start; position: relative; }
+        .split-col { flex:1 1 0; min-width:0; }
+        /* In split view keep same inline density as linear view */
+        .split-col .chat-turn { display: inline; }
+        .split-wrapper.resizing { user-select: none; }
+    .split-resizer { width:4px; cursor: col-resize; flex:0 0 auto; align-self: stretch; position: relative; background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0)); border-radius:2px; transition: background .15s ease, width .15s ease; }
+    .split-resizer:hover { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 35%, var(--accent-muted) 65%, rgba(224,230,235,0)); }
+    .split-resizer.dragging { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0)); }
+    /* Inline reasoning (removed toggle to prevent layout shift on click) */
+    .reasoning-inline { display:inline; font-size:var(--font-size); font-style:italic; color:#555; white-space:pre-wrap; margin-right:4px; cursor:pointer; position:relative; }
+    .reasoning-inline .reasoning-text { display:inline; }
+    .reasoning-inline .reasoning-icon { display:inline-block; margin-right:2px; }
+    .reasoning-inline.collapsed .reasoning-text { display:none; }
+    .reasoning-inline.collapsed::after { content:'(...)'; font-style:italic; color:#777; margin-left:4px; }
+    .message-box .main-content { white-space:normal; }
+        /* tighten spacing */
+        .split-col .group-divider { margin:4px 0 2px 0; }
+        .toolbar {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            margin-bottom: 0;
+            font-size: var(--font-size);
+            max-height: 0;
+            overflow: hidden;
+            opacity: 0;
+            pointer-events: none;
+            transition: max-height 0.2s ease, opacity 0.2s ease;
+            flex-wrap: wrap;
+        }
+        .toolbar-wrap { position: sticky; top: 0; z-index: 10; background: var(--bg); }
+        .toolbar-hotzone { height: 6px; }
+        .toolbar-wrap:hover .toolbar { max-height: 500px; opacity: 1; pointer-events: auto; margin-bottom: 12px; }
+        .toolbar * { pointer-events: auto !important; }
+        .toolbar input,
+        .toolbar select { z-index: 100 !important; position: relative; }
+        .toolbar input[type="number"],
+        .toolbar input[type="text"],
+        .toolbar select {
+            width: 72px;
+            padding: 2px 6px;
+            border: 1px solid var(--accent-muted);
+            border-radius: var(--corner-radius);
+            background: var(--bg);
+            user-select: text !important;
+            -webkit-user-select: text !important;
+            -moz-user-select: text !important;
+            -ms-user-select: text !important;
+            pointer-events: auto !important;
+            cursor: pointer !important;
+        }
+        .toolbar input[type="text"] {
+            cursor: text !important;
+        }
+        .toolbar input[type="text"]:focus,
+        .toolbar input[type="number"]:focus,
+        .toolbar select:focus {
+            outline: 2px solid #0066cc;
+            outline-offset: 1px;
+        }
+        .toolbar button {
+            padding: 4px 8px;
+            border: 1px solid var(--accent-muted);
+            background: var(--panel-bg);
+            border-radius: var(--corner-radius);
+            cursor: pointer;
+        }
+        .chat-turn {
+            display: inline; /* inline like text */
+            background: transparent;
+            position: relative;
+            cursor: pointer;
+        }
+        /* No agent-specific background distinctions */
+        .turn-content {
+            white-space: normal;
+            color: var(--text);
+            font-size: var(--font-size);
+            display: inline; /* inline flow */
+        }
+        .chat-turn .agent-badge { margin-right: 0; vertical-align: baseline; }
+        .agent-badge {
+            display: inline;
+            position: relative;
+            border: var(--border-width) solid var(--accent-muted); /* slightly thicker */
+            border-radius: var(--pill-radius-left); /* round left and bottom-right */
+            font-size: var(--font-size);
+            color: var(--muted-text);
+            background: var(--panel-bg);
+            box-shadow: var(--inset-shadow);
+            line-height: 1.2;
+            border-right: 0;
+        }
+        /* Use flex on assistant badges to vertically center reward pill */
+        .chat-turn.role-assistant .agent-badge { display: inline-flex; align-items: center; }
+        .agent-badge::after {
+            content: none;
+        }
+        /* removed external separator; emoji is rendered inside message bubble */
+        .agent-name { font-weight: 700; }
+        .emoji-bw { filter: grayscale(100%); opacity: 0.95; font-size: var(--font-size); vertical-align: baseline; margin: 0; position: relative; top: -1px; line-height: 1; display: inline-block; }
+        .ts-badge {
+            position: relative;
+            display: inline;
+            border: var(--border-width) solid var(--accent-muted-2); /* slightly thicker */
+            border-radius: var(--corner-radius); /* not a pill */
+            font-size: var(--font-size);
+            # font-weight: 700;
+            color: var(--muted-text);
+            background: #F4F8FB; /* subtle tint */
+            # padding: 1px 6px; /* slight padding for visibility */
+            margin-right: 8px; /* small gap from following content */
+            pointer-events: auto; /* allow events so we can ignore them in JS */
+        }
+        /* Hide timestep badges when grouping by 1 */
+        .hide-ts-badges .ts-badge { display: none; }
+        /* Strong hide: completely hide collapsed turns */
+        .strong-hide .chat-turn.collapsed { display: none; }
+        .ts-badge::before {
+            content: "";
+            position: relative;
+            background: var(--accent-muted-2);
+            border-radius: 2px;
+        }
+        .agent-badge { margin-left: 6px;  }
+        .message-box {
+            display: inline; /* inline bubble behaving like text */
+            font-size: var(--font-size);
+            border: var(--border-width) solid var(--accent-muted);
+            border-radius: var(--pill-radius-right); /* round left and bottom-right */
+            position: relative;
+            background: var(--bg);
+            vertical-align: baseline;
+            line-height: 1.2;
+            padding-left: 0;
+            border-left: 0;
+        }
+        .chat-turn.agent-alice.role-assistant .message-box::before { color: #0eb224; }
+        .chat-turn.agent-bob.role-assistant .message-box::before { color: #ef8323; }
+        .chat-turn.collapsed .message-box::before { display: none; }
+        /* Assistant bubble border colors by common agent names */
+        .chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
+        .chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
+        /* Tie badge and seam to agent color for a cohesive capsule, assistants only */
+    .chat-turn.agent-alice.role-assistant .agent-badge { border-color: #0eb224; background: rgba(14,178,36,0.08); }
+        .chat-turn.agent-alice.role-assistant .agent-badge::after { border-right-color: #0eb224; }
+        .chat-turn.agent-alice.role-assistant .turn-content::before { border-left-color: #0eb224; border-top-color: #0eb224; }
+        .chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
+    .chat-turn.agent-bob.role-assistant .agent-badge { border-color: #ef8323; background: rgba(239,131,35,0.10); }
+        .chat-turn.agent-bob.role-assistant .agent-badge::after { border-right-color: #ef8323; }
+        .chat-turn.agent-bob.role-assistant .turn-content::before { border-left-color: #ef8323; border-top-color: #ef8323; }
+        .chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
+        /* No colored agent-name; keep neutral */
+        .reward {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
+            color: #000000; /* full black */
+            font-weight: 600; /* slightly bolder */
+            font-family: "Inter", ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Arial, "Noto Sans", sans-serif;
+            font-size: var(--font-size);
+            letter-spacing: 0.15px;
+            line-height: 1;
+            padding: 0 4px 1px 4px; /* slight bottom pad for optical centering */
+            border-radius: 4px;
+            border: 1px solid #f4e6a8;
+            margin: 0 4px;
+            box-shadow: 0 0 0 1px rgba(255,255,255,0.55) inset, 0 1px 2px rgba(0,0,0,0.04);
+        }
+        .message-placeholder { display: none; color: #7f8c8d; font-style: italic; }
+        .chat-turn.collapsed .message-box { color: transparent; font-size: 0; display: inline-block; }
+        .chat-turn.collapsed .message-box::after { content: "(...)"; color: #7f8c8d; font-style: italic; font-size: var(--font-size); line-height: 1.2; }
+        .chat-turn.collapsed .agent-badge,
+        .chat-turn.collapsed .message-box { opacity: 0.3; }
+        /* Group divider - clearer and pretty */
+        .group-divider {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            width: 100%;
+            margin: 8px 0 4px 0;
+            position: relative;
+            cursor: pointer;
+            user-select: none;
+        }
+        .group-divider::before,
+        .group-divider::after {
+            content: "";
+            flex: 1 1 auto;
+            height: 2px;
+            background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
+        }
+        .group-divider .group-label {
+            display: inline-block;
+            border: 1px solid var(--accent-muted);
+            border-radius: 999px;
+            padding: 2px 10px;
+            font-size: var(--group-label-font-size);
+            font-weight: 700;
+            color: var(--muted-text);
+            background: var(--bg);
+            box-shadow: var(--inset-shadow);
+            position: relative;
+            z-index: 1;
+            transition: background 0.2s ease;
+        }
+        .group-divider:hover .group-label {
+            background: var(--panel-bg);
+        }
+        .group-label::before {
+            content: '▼ ';
+            font-size: 0.8em;
+            display: inline-block;
+            transition: transform 0.2s ease;
+            opacity: 0;
+        }
+        .group-divider:hover .group-label::before {
+            opacity: 1;
+        }
+        .group-divider.collapsed .group-label::before {
+            content: '▶ ';
+            opacity: 1;
+        }
+        /* Hide collapsed rounds in strong hide mode */
+        .strong-hide .group-divider.collapsed {
+            display: none !important;
+        }
+        /* Enhance contrast for print / export */
+        body.split-mode .group-divider::before,
+        body.split-mode .group-divider::after {
+            background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0));
+        }
+        .chat-turn .turn-content { position: relative; }
+        .chat-turn .turn-content::before {
+            content: none;
+        }
+        .chat-turn .agent-badge {
+            position: relative;
+        }
+        /* removed absolute-positioned emoji to prevent overlap */
+    </style>
+    """
+    # HTML structure
+    html_parts = [
+        "<!DOCTYPE html>",
+        "<html>",
+        "<head>",
+        "<meta charset='UTF-8'>",
+        "<title>Chat Turns</title>",
+        css,
+        "<script>\n"
+        "document.addEventListener('DOMContentLoaded', function() {\n"
+        "  const linearFlow = document.getElementById('flow-linear');\n"
+        "  const splitFlow = document.getElementById('flow-split');\n"
+        "  const chatFlow = document.getElementById('flow-chat');\n"
+        "  let splitViewOn = false;\n"
+        "  let chatViewOn = true;\n"
+        "  function activeFlows() { return [chatViewOn && chatFlow ? chatFlow : null, splitViewOn && splitFlow ? splitFlow : null, linearFlow].filter(Boolean).filter(f => f.style.display !== 'none'); }\n"
+        "  // State for range filtering and strong hide\n"
+        "  let currentRangeStart = null;\n"
+        "  let currentRangeEnd = null;\n"
+        "  let strongHideOn = false;\n"
+        "  document.body.addEventListener('click', function(e){\n"
+        "    if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
+        "    if (e.target.closest('.ts-badge')) { return; }\n"
+        "    const r = e.target.closest('.reasoning-inline'); if (r) { e.stopPropagation(); r.classList.toggle('collapsed'); return; }\n"
+        "    const turn = e.target.closest('.chat-turn');\n"
+        "    if (turn) { e.stopPropagation(); turn.classList.toggle('collapsed'); }\n"
+        "  });\n"
+        "  // Reasoning handled via <details>, no JS required\n"
+        "  function applyRangeFilter() {\n"
+        "    for (const flow of activeFlows()) {\n"
+        "      const turns = Array.from(flow.querySelectorAll('.chat-turn'));\n"
+        "      for (const el of turns) {\n"
+        "        const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
+        "        const afterStart = (currentRangeStart === null) || (t >= currentRangeStart);\n"
+        "        const beforeEnd = (currentRangeEnd === null) || (t <= currentRangeEnd);\n"
+        "        el.style.display = (afterStart && beforeEnd) ? '' : 'none';\n"
+        "      }\n"
+        "      const dividers = Array.from(flow.querySelectorAll('.group-divider'));\n"
+        "      for (const d of dividers) {\n"
+        "        let anyVisible = false;\n"
+        "        let el = d.nextElementSibling;\n"
+        "        while (el && !el.classList.contains('group-divider')) {\n"
+        "          if (el.classList.contains('chat-turn')) {\n"
+        "            const disp = getComputedStyle(el).display;\n"
+        "            if (disp !== 'none') { anyVisible = true; break; }\n"
+        "          } else if (el.classList.contains('split-wrapper')) {\n"
+        "            // Search descendants for any visible chat-turn\n"
+        "            const turns = Array.from(el.querySelectorAll('.chat-turn'));\n"
+        "            for (const tEl of turns) {\n"
+        "              const disp2 = getComputedStyle(tEl).display;\n"
+        "              if (disp2 !== 'none') { anyVisible = true; break; }\n"
+        "            }\n"
+        "            if (anyVisible) break;\n"
+        "          }\n"
+        "          el = el.nextElementSibling;\n"
+        "        }\n"
+        "        d.style.display = anyVisible ? '' : 'none';\n"
+        "      }\n"
+        "    }\n"
+        "  }\n"
+        "  function applyGrouping(n) {\n"
+        "    function groupContainer(container, n) {\n"
+        "      Array.from(container.querySelectorAll(':scope > .group-divider')).forEach(el => el.remove());\n"
+        "      if (!n || n <= 0) { return; }\n"
+        "      const turns = Array.from(container.querySelectorAll(':scope > .chat-turn'));\n"
+        "      if (turns.length === 0) return;\n"
+        "      const items = Array.from(container.children).filter(el => !el.classList.contains('group-divider'));\n"
+        "      const frag = document.createDocumentFragment();\n"
+        "      let lastGroup = -1;\n"
+        "      for (const el of items) {\n"
+        "        if (!el.classList.contains('chat-turn')) { frag.appendChild(el); continue; }\n"
+        "        const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
+        "        const g = Math.floor(t / n);\n"
+        "        if (g !== lastGroup) {\n"
+        "          const div = document.createElement('div');\n"
+        "          div.className = 'group-divider';\n"
+        "          const label = document.createElement('span');\n"
+        "          label.className = 'group-label';\n"
+        "          const roundIndex = g + 1;\n"
+        "          label.textContent = `Round ${roundIndex}`;\n"
+        "          div.appendChild(label);\n"
+        "          frag.appendChild(div);\n"
+        "          lastGroup = g;\n"
+        "        }\n"
+        "        frag.appendChild(el);\n"
+        "      }\n"
+        "      container.innerHTML = '';\n"
+        "      container.appendChild(frag);\n"
+        "      container.classList.toggle('hide-ts-badges', n === 1);\n"
+        "      container.classList.toggle('strong-hide', strongHideOn);\n"
+        "    }\n"
+        "    for (const flow of activeFlows()) {\n"
+        "      if (flow.id === 'flow-split') {\n"
+        "        // Snapshot original turns once to avoid drift on repeated grouping\n"
+        "        const getOriginalTurns = () => {\n"
+        "          if (!flow.dataset.origData) {\n"
+        "            const data = [];\n"
+        "            const cols0 = flow.querySelectorAll('.split-col');\n"
+        "            cols0.forEach(col => {\n"
+        "              const agent = col.getAttribute('data-agent') || '';\n"
+        "              col.querySelectorAll(':scope > .chat-turn').forEach(el => {\n"
+        "                const t = parseInt(el.getAttribute('data-time-step')||'0',10);\n"
+        "                data.push({agent, time:t, html: el.outerHTML});\n"
+        "              });\n"
+        "            });\n"
+        "            flow.dataset.origData = JSON.stringify(data);\n"
+        "          }\n"
+        "          return JSON.parse(flow.dataset.origData);\n"
+        "        };\n"
+        "        const original = getOriginalTurns();\n"
+        "        const agents = Array.from(new Set(original.map(o => o.agent))).sort();\n"
+        "        const groups = new Map();\n"
+        "        original.forEach(o => {\n"
+        "          const g = n && n > 0 ? Math.floor(o.time / n) : 0;\n"
+        "          if (!groups.has(g)) groups.set(g, new Map());\n"
+        "          const gm = groups.get(g);\n"
+        "          if (!gm.has(o.agent)) gm.set(o.agent, []);\n"
+        "          gm.get(o.agent).push(o);\n"
+        "        });\n"
+        "        flow.innerHTML = '';\n"
+        "        const sorted = Array.from(groups.keys()).sort((a,b)=>a-b);\n"
+        "        sorted.forEach(g => {\n"
+        "          const div = document.createElement('div');\n"
+        "          div.className = 'group-divider';\n"
+        "          const label = document.createElement('span');\n"
+        "          label.className = 'group-label';\n"
+        "          label.textContent = `Round ${g+1}`;\n"
+        "          div.appendChild(label);\n"
+        "          flow.appendChild(div);\n"
+        "          const wrapper = document.createElement('div');\n"
+        "          wrapper.className = 'split-wrapper';\n"
+        "          agents.forEach(agent => {\n"
+        "            const colDiv = document.createElement('div');\n"
+        "            colDiv.className = 'split-col';\n"
+        "            colDiv.setAttribute('data-agent', agent);\n"
+        "            (groups.get(g).get(agent) || []).forEach(o => { colDiv.insertAdjacentHTML('beforeend', o.html); });\n"
+        "            wrapper.appendChild(colDiv);\n"
+        "          });\n"
+        "          if (wrapper.children.length === 2) { const res = document.createElement('div'); res.className='split-resizer'; wrapper.insertBefore(res, wrapper.children[1]); }\n"
+        "          flow.appendChild(wrapper);\n"
+        "        });\n"
+        "        flow.classList.toggle('hide-ts-badges', n === 1);\n"
+        "        flow.classList.toggle('strong-hide', strongHideOn);\n"
+        "        document.body.classList.add('split-mode');\n"
+        "      } else {\n"
+        "        groupContainer(flow, n);\n"
+        "      }\n"
+        "    }\n"
+        "    applyRangeFilter();\n"
+        "    initSplitResizers();\n"
+        "  }\n"
+        "  function initSplitResizers() {\n"
+        "    const wrappers = document.querySelectorAll('#flow-split .split-wrapper');\n"
+        "    wrappers.forEach(wrap => {\n"
+        "      const resizer = wrap.querySelector('.split-resizer');\n"
+        "      if (!resizer || resizer.dataset.bound) return; resizer.dataset.bound='1';\n"
+        "      const cols = wrap.querySelectorAll('.split-col'); if (cols.length !== 2) return; const c0=cols[0], c1=cols[1];\n"
+        "      c0.style.flex=c1.style.flex='1 1 0'; c0.style.width=c1.style.width='';\n"
+        "      requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; });\n"
+        "      let dragging=false,startX=0,startP0=0;\n"
+        "      const onDown=e=>{ dragging=true; startX=e.clientX; wrap.classList.add('resizing'); resizer.classList.add('dragging'); const rect=wrap.getBoundingClientRect(); const w=rect.width; const c0Rect=c0.getBoundingClientRect(); startP0=c0Rect.width/w; document.body.style.cursor='col-resize'; e.preventDefault(); };\n"
+        "      const onMove=e=>{ if(!dragging)return; const rect=wrap.getBoundingClientRect(); const w=rect.width; let delta=(e.clientX-startX)/w; let newP0=startP0+delta; const minP=0.15,maxP=0.85; if(newP0<minP)newP0=minP; if(newP0>maxP)newP0=maxP; c0.style.flex='0 0 '+(newP0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+((1-newP0)*100).toFixed(2)+'%'; };\n"
+        "      const onUp=()=>{ if(!dragging)return; dragging=false; wrap.classList.remove('resizing'); resizer.classList.remove('dragging'); document.body.style.cursor=''; };\n"
+        "      resizer.addEventListener('mousedown', onDown); window.addEventListener('mousemove', onMove); window.addEventListener('mouseup', onUp);\n"
+        "      resizer.addEventListener('dblclick', e=>{ if(e.shiftKey){ c0.style.flex=c1.style.flex='1 1 0'; requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; }); } else { c0.style.flex='0 0 50%'; c1.style.flex='0 0 50%'; } });\n"
+        "    });\n"
+        "  }\n"
+        "  initSplitResizers();\n"
+        "  const input = document.getElementById('group-size');\n"
+        "  const btn = document.getElementById('apply-grouping');\n"
+        "  if (btn && input) {\n"
+        "    btn.addEventListener('click', () => { const n = parseInt(input.value || '0', 10); applyGrouping(n); });\n"
+        "    input.addEventListener('keydown', (e) => { if (e.key === 'Enter') { const n = parseInt(input.value || '0', 10); applyGrouping(n); } });\n"
+        "  }\n"
+        "  if (input) { input.value = '1'; applyGrouping(1); }\n"
+        "  const rangeStart = document.getElementById('range-start');\n"
+        "  const rangeEnd = document.getElementById('range-end');\n"
+        "  const rangeBtn = document.getElementById('apply-range');\n"
+        "  if (rangeBtn && rangeStart && rangeEnd) {\n"
+        "    const applyRange = () => {\n"
+        "      const sv = parseInt(rangeStart.value || '', 10);\n"
+        "      const ev = parseInt(rangeEnd.value || '', 10);\n"
+        "      currentRangeStart = Number.isFinite(sv) ? sv : null;\n"
+        "      currentRangeEnd = Number.isFinite(ev) ? ev : null;\n"
+        "      applyRangeFilter();\n"
+        "    };\n"
+        "    rangeBtn.addEventListener('click', applyRange);\n"
+        "    rangeStart.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
+        "    rangeEnd.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
+        "  }\n"
+        "  const strongHideBtn = document.getElementById('toggle-strong-hide');\n"
+        "  const strongHideStateEl = document.getElementById('strong-hide-state');\n"
+        "  if (strongHideBtn) {\n"
+        "    const setLabel = () => { if (strongHideStateEl) { strongHideStateEl.textContent = strongHideOn ? 'On' : 'Off'; } };\n"
+        "    strongHideBtn.addEventListener('click', () => { strongHideOn = !strongHideOn; for (const f of activeFlows()) { f.classList.toggle('strong-hide', strongHideOn); } setLabel(); });\n"
+        "    if (strongHideOn) { for (const f of activeFlows()) { f.classList.add('strong-hide'); } }\n"
+        "    setLabel();\n"
+        "  }\n"
+        "  const splitBtn = document.getElementById('toggle-split-view');\n"
+        "  const splitStateEl = document.getElementById('split-view-state');\n"
+        "  if (splitBtn && splitFlow && linearFlow) {\n"
+        "    const updateSplit = () => { if (splitStateEl) splitStateEl.textContent = splitViewOn ? 'On' : 'Off'; };\n"
+        "    splitBtn.addEventListener('click', () => { if (chatViewOn) return; splitViewOn = !splitViewOn; linearFlow.style.display = splitViewOn ? 'none' : ''; splitFlow.style.display = splitViewOn ? '' : 'none'; applyGrouping(parseInt(input.value||'1',10)); updateSplit(); });\n"
+        "    updateSplit();\n"
+        "  }\n"
+        "  const chatBtn = document.getElementById('toggle-chat-view');\n"
+        "  const chatStateEl = document.getElementById('chat-view-state');\n"
+        "  const hideUserBtn = document.getElementById('toggle-hide-user-messages');\n"
+        "  const hideUserStateEl = document.getElementById('hide-user-state');\n"
+        "  const widthControl = document.getElementById('chat-width-control');\n"
+        "  const widthSlider = document.getElementById('chat-width-slider');\n"
+        "  const widthValue = document.getElementById('chat-width-value');\n"
+        "  let hideUserMessages = false;\n"
+        "  if (chatBtn && chatFlow && linearFlow) {\n"
+        "    const updateChat = () => {\n"
+        "      if (chatStateEl) chatStateEl.textContent = chatViewOn ? 'On' : 'Off';\n"
+        "      if (hideUserBtn) hideUserBtn.style.display = chatViewOn ? '' : 'none';\n"
+        "      if (widthControl) widthControl.style.display = chatViewOn ? '' : 'none';\n"
+        "    };\n"
+        "    chatBtn.addEventListener('click', () => {\n"
+        "      chatViewOn = !chatViewOn;\n"
+        "      if (chatViewOn) {\n"
+        "        splitViewOn = false;\n"
+        "        linearFlow.style.display = 'none';\n"
+        "        if (splitFlow) splitFlow.style.display = 'none';\n"
+        "        chatFlow.style.display = '';\n"
+        "        if (splitStateEl) splitStateEl.textContent = 'Off';\n"
+        "      } else {\n"
+        "        chatFlow.style.display = 'none';\n"
+        "        linearFlow.style.display = '';\n"
+        "      }\n"
+        "      updateChat();\n"
+        "    });\n"
+        "    updateChat();\n"
+        "  }\n"
+        "  if (hideUserBtn && hideUserStateEl && chatFlow) {\n"
+        "    const updateHideUser = () => { hideUserStateEl.textContent = hideUserMessages ? 'On' : 'Off'; };\n"
+        "    hideUserBtn.addEventListener('click', () => {\n"
+        "      hideUserMessages = !hideUserMessages;\n"
+        "      chatFlow.classList.toggle('hide-user-messages', hideUserMessages);\n"
+        "      updateHideUser();\n"
+        "    });\n"
+        "    updateHideUser();\n"
+        "  }\n"
+        "  if (widthSlider && widthValue && chatFlow) {\n"
+        "    const savedWidth = localStorage.getItem('chat-view-width');\n"
+        "    if (savedWidth) {\n"
+        "      widthSlider.value = savedWidth;\n"
+        "      chatFlow.style.setProperty('--chat-width', savedWidth + 'px');\n"
+        "      widthValue.textContent = savedWidth + 'px';\n"
+        "    }\n"
+        "    widthSlider.addEventListener('input', (e) => {\n"
+        "      const width = e.target.value;\n"
+        "      chatFlow.style.setProperty('--chat-width', width + 'px');\n"
+        "      widthValue.textContent = width + 'px';\n"
+        "      localStorage.setItem('chat-view-width', width);\n"
+        "    });\n"
+        "  }\n"
+        "  const fontFamilySelect = document.getElementById('font-family-select');\n"
+        "  const fontSizeInput = document.getElementById('font-size-input');\n"
+        "  if (fontFamilySelect) {\n"
+        "    const savedFont = localStorage.getItem('render-font-family');\n"
+        "    if (savedFont) {\n"
+        "      fontFamilySelect.value = savedFont;\n"
+        "      document.body.style.setProperty('--font-family', savedFont);\n"
+        "    }\n"
+        "    fontFamilySelect.addEventListener('change', (e) => {\n"
+        "      const font = e.target.value;\n"
+        "      document.body.style.setProperty('--font-family', font);\n"
+        "      localStorage.setItem('render-font-family', font);\n"
+        "    });\n"
+        "  }\n"
+        "  if (fontSizeInput) {\n"
+        "    const savedSize = localStorage.getItem('render-font-size');\n"
+        "    if (savedSize) {\n"
+        "      fontSizeInput.value = savedSize;\n"
+        "      document.body.style.setProperty('--font-size', savedSize + 'px');\n"
+        "    }\n"
+        "    fontSizeInput.addEventListener('input', (e) => {\n"
+        "      const size = e.target.value;\n"
+        "      document.body.style.setProperty('--font-size', size + 'px');\n"
+        "      localStorage.setItem('render-font-size', size);\n"
+        "    });\n"
+        "  }\n"
+        "  const aliceEmojiInput = document.getElementById('alice-emoji-input');\n"
+        "  const aliceNameInput = document.getElementById('alice-name-input');\n"
+        "  const bobEmojiInput = document.getElementById('bob-emoji-input');\n"
+        "  const bobNameInput = document.getElementById('bob-name-input');\n"
+        "  const applyAgentNamesBtn = document.getElementById('apply-agent-names');\n"
+        "  function loadAgentNames() {\n"
+        "    if (aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
+        "      const savedAliceEmoji = localStorage.getItem('alice-emoji') || '🤖';\n"
+        "      const savedAliceName = localStorage.getItem('alice-name') || 'Alice';\n"
+        "      const savedBobEmoji = localStorage.getItem('bob-emoji') || '🤖';\n"
+        "      const savedBobName = localStorage.getItem('bob-name') || 'Bob';\n"
+        "      aliceEmojiInput.value = savedAliceEmoji;\n"
+        "      aliceNameInput.value = savedAliceName;\n"
+        "      bobEmojiInput.value = savedBobEmoji;\n"
+        "      bobNameInput.value = savedBobName;\n"
+        "      applyAgentNamesToDOM(savedAliceEmoji, savedAliceName, savedBobEmoji, savedBobName);\n"
+        "    }\n"
+        "  }\n"
+        "  function applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName) {\n"
+        "    const agentMap = { 'alice': { name: aliceName, emoji: aliceEmoji }, 'bob': { name: bobName, emoji: bobEmoji } };\n"
+        "    document.querySelectorAll('[data-agent-id]').forEach(el => {\n"
+        "      const agentId = el.getAttribute('data-agent-id');\n"
+        "      if (!agentMap[agentId]) return;\n"
+        "      if (el.classList.contains('agent-name')) {\n"
+        "        el.textContent = agentMap[agentId].name;\n"
+        "      } else if (el.classList.contains('emoji-bw')) {\n"
+        "        const currentEmoji = el.textContent.trim();\n"
+        "        if (currentEmoji === '🤖' || currentEmoji === '👤') {\n"
+        "          el.textContent = agentMap[agentId].emoji;\n"
+        "        }\n"
+        "      }\n"
+        "    });\n"
+        "    const style = document.createElement('style');\n"
+        "    style.id = 'dynamic-agent-names-style';\n"
+        "    const existingStyle = document.getElementById('dynamic-agent-names-style');\n"
+        "    if (existingStyle) existingStyle.remove();\n"
+        "    style.textContent = `\n"
+        "      .agent-context-box.agent-alice .round-context-edit::before {\n"
+        "        content: '${aliceName} Prompt Summary:';\n"
+        "      }\n"
+        "      .agent-context-box.agent-bob .round-context-edit::before {\n"
+        "        content: '${bobName} Prompt Summary:';\n"
+        "      }\n"
+        "    `;\n"
+        "    document.head.appendChild(style);\n"
+        "  }\n"
+        "  if (applyAgentNamesBtn && aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
+        "    [aliceEmojiInput, aliceNameInput, bobEmojiInput, bobNameInput].forEach(input => {\n"
+        "      input.style.pointerEvents = 'auto';\n"
+        "      if (input.tagName === 'INPUT') {\n"
+        "        input.style.userSelect = 'text';\n"
+        "        input.style.webkitUserSelect = 'text';\n"
+        "        input.readOnly = false;\n"
+        "      }\n"
+        "      input.disabled = false;\n"
+        "      const stopAll = (e) => { e.stopPropagation(); e.stopImmediatePropagation(); };\n"
+        "      input.addEventListener('mousedown', stopAll, true);\n"
+        "      input.addEventListener('mouseup', stopAll, true);\n"
+        "      input.addEventListener('click', stopAll, true);\n"
+        "      input.addEventListener('dblclick', stopAll, true);\n"
+        "      input.addEventListener('focus', stopAll, true);\n"
+        "      input.addEventListener('blur', stopAll, true);\n"
+        "      input.addEventListener('paste', stopAll, true);\n"
+        "      input.addEventListener('cut', stopAll, true);\n"
+        "      input.addEventListener('copy', stopAll, true);\n"
+        "      input.addEventListener('select', stopAll, true);\n"
+        "      input.addEventListener('selectstart', stopAll, true);\n"
+        "      input.addEventListener('keydown', stopAll, true);\n"
+        "      input.addEventListener('keyup', stopAll, true);\n"
+        "      input.addEventListener('keypress', stopAll, true);\n"
+        "      input.addEventListener('input', stopAll, true);\n"
+        "      input.addEventListener('change', stopAll, true);\n"
+        "      input.addEventListener('contextmenu', stopAll, true);\n"
+        "    });\n"
+        "    const applyNames = () => {\n"
+        "      const aliceEmoji = aliceEmojiInput.value || '🤖';\n"
+        "      const aliceName = aliceNameInput.value.trim() || 'Alice';\n"
+        "      const bobEmoji = bobEmojiInput.value || '🤖';\n"
+        "      const bobName = bobNameInput.value.trim() || 'Bob';\n"
+        "      localStorage.setItem('alice-emoji', aliceEmoji);\n"
+        "      localStorage.setItem('alice-name', aliceName);\n"
+        "      localStorage.setItem('bob-emoji', bobEmoji);\n"
+        "      localStorage.setItem('bob-name', bobName);\n"
+        "      applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName);\n"
+        "    };\n"
+        "    applyAgentNamesBtn.addEventListener('click', applyNames);\n"
+        "    [aliceNameInput, bobNameInput].forEach(input => {\n"
+        "      input.addEventListener('keydown', (e) => {\n"
+        "        if (e.key === 'Enter') {\n"
+        "          e.preventDefault();\n"
+        "          e.stopPropagation();\n"
+        "          e.stopImmediatePropagation();\n"
+        "          applyNames();\n"
+        "        }\n"
+        "      }, true);\n"
+        "    });\n"
+        "    [aliceEmojiInput, bobEmojiInput].forEach(select => {\n"
+        "      select.addEventListener('change', applyNames);\n"
+        "    });\n"
+        "  }\n"
+        "  loadAgentNames();\n"
+        "  function setupRoundCollapse() {\n"
+        "    document.addEventListener('click', function(e) {\n"
+        "      if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
+        "      const divider = e.target.closest('.chat-group-divider, .group-divider');\n"
+        "      if (!divider) return;\n"
+        "      divider.classList.toggle('collapsed');\n"
+        "      const isCollapsed = divider.classList.contains('collapsed');\n"
+        "      let nextElement = divider.nextElementSibling;\n"
+        "      while (nextElement) {\n"
+        "        if (nextElement.classList.contains('chat-group-divider') || nextElement.classList.contains('group-divider')) {\n"
+        "          break;\n"
+        "        }\n"
+        "        if (isCollapsed) {\n"
+        "          if (!nextElement.dataset.originalDisplay) {\n"
+        "            nextElement.dataset.originalDisplay = nextElement.style.display || getComputedStyle(nextElement).display;\n"
+        "          }\n"
+        "          nextElement.style.display = 'none';\n"
+        "        } else {\n"
+        "          if (nextElement.dataset.originalDisplay) {\n"
+        "            const originalDisplay = nextElement.dataset.originalDisplay;\n"
+        "            nextElement.style.display = originalDisplay === 'none' ? '' : originalDisplay;\n"
+        "            if (nextElement.style.display === originalDisplay && originalDisplay !== 'none') {\n"
+        "              nextElement.style.display = '';\n"
+        "            }\n"
+        "            delete nextElement.dataset.originalDisplay;\n"
+        "          } else {\n"
+        "            nextElement.style.display = '';\n"
+        "          }\n"
+        "        }\n"
+        "        nextElement = nextElement.nextElementSibling;\n"
+        "      }\n"
+        "      e.stopPropagation();\n"
+        "    });\n"
+        "  }\n"
+        "  setupRoundCollapse();\n"
+        "  const strongHideBtnChat = document.getElementById('toggle-strong-hide');\n"
+        "  function applyStrongHideToChat() {\n"
+        "    if (!chatFlow) return;\n"
+        "    chatFlow.classList.toggle('strong-hide', strongHideOn);\n"
+        "    const contextEdits = chatFlow.querySelectorAll('.round-context-edit');\n"
+        "    contextEdits.forEach(edit => {\n"
+        "      const parent = edit.closest('.round-context, .agent-context-box, .split-agent-context');\n"
+        "      if (parent) {\n"
+        "        if (strongHideOn && edit.textContent.trim() === '') {\n"
+        "          parent.style.display = 'none';\n"
+        "        } else {\n"
+        "          parent.style.display = '';\n"
+        "        }\n"
+        "      }\n"
+        "    });\n"
+        "    const splitContexts = chatFlow.querySelectorAll('.split-agent-context');\n"
+        "    splitContexts.forEach(split => {\n"
+        "      if (strongHideOn) {\n"
+        "        const boxes = split.querySelectorAll('.agent-context-box');\n"
+        "        const allEmpty = Array.from(boxes).every(box => {\n"
+        "          const edit = box.querySelector('.round-context-edit');\n"
+        "          return edit && edit.textContent.trim() === '';\n"
+        "        });\n"
+        "        if (allEmpty) split.style.display = 'none';\n"
+        "      }\n"
+        "    });\n"
+        "  }\n"
+        "  if (strongHideBtnChat && chatFlow) {\n"
+        "    strongHideBtnChat.addEventListener('click', () => {\n"
+        "      setTimeout(() => applyStrongHideToChat(), 0);\n"
+        "    });\n"
+        "  }\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input, textarea, select, .round-context-edit, .toolbar')) { return; }\n"
+        "    const chatReasoning = e.target.closest('.chat-reasoning');\n"
+        "    if (chatReasoning) {\n"
+        "      chatReasoning.classList.toggle('collapsed');\n"
+        "    }\n"
+        "  });\n"
+        "  function applyColorToSelection(color, element) {\n"
+        "    const selection = window.getSelection();\n"
+        "    if (!selection.rangeCount) return false;\n"
+        "    const range = selection.getRangeAt(0);\n"
+        "    if (!element.contains(range.commonAncestorContainer)) return false;\n"
+        "    const selectedText = range.toString();\n"
+        "    if (!selectedText) return false;\n"
+        "    if (color === 'default') {\n"
+        "      // Remove styling - just extract the text content\n"
+        "      const textNode = document.createTextNode(selectedText);\n"
+        "      range.deleteContents();\n"
+        "      range.insertNode(textNode);\n"
+        "    } else {\n"
+        "      const span = document.createElement('span');\n"
+        "      span.style.color = color;\n"
+        "      span.style.fontWeight = '600';\n"
+        "      try {\n"
+        "        range.surroundContents(span);\n"
+        "      } catch (e) {\n"
+        "        const contents = range.extractContents();\n"
+        "        span.appendChild(contents);\n"
+        "        range.insertNode(span);\n"
+        "      }\n"
+        "    }\n"
+        "    return true;\n"
+        "  }\n"
+        "  let lastFocusedContextEdit = null;\n"
+        "  document.addEventListener('focusin', function(e) {\n"
+        "    if (e.target.classList.contains('round-context-edit')) {\n"
+        "      lastFocusedContextEdit = e.target;\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('mousedown', function(e) {\n"
+        "    if (e.target.classList.contains('context-color-btn')) {\n"
+        "      e.preventDefault();\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input:not(.round-context-edit), textarea, select') && !e.target.classList.contains('context-color-btn')) { return; }\n"
+        "    if (e.target.classList.contains('context-color-btn')) {\n"
+        "      e.preventDefault();\n"
+        "      const color = e.target.dataset.color;\n"
+        "      const controls = e.target.closest('.round-context-controls');\n"
+        "      const contextEdit = controls ? controls.previousElementSibling : null;\n"
+        "      if (contextEdit && contextEdit.classList.contains('round-context-edit')) {\n"
+        "        contextEdit.focus();\n"
+        "        const selection = window.getSelection();\n"
+        "        if (selection.rangeCount > 0 && selection.toString().length > 0 && contextEdit.contains(selection.anchorNode)) {\n"
+        "          if (applyColorToSelection(color, contextEdit)) {\n"
+        "            const key = contextEdit.dataset.contextKey;\n"
+        "            localStorage.setItem(key, contextEdit.innerHTML);\n"
+        "          }\n"
+        "        } else {\n"
+        "          try {\n"
+        "            if (color !== 'default') {\n"
+        "              document.execCommand('styleWithCSS', false, true);\n"
+        "              document.execCommand('foreColor', false, color);\n"
+        "            }\n"
+        "            const key = contextEdit.dataset.contextKey;\n"
+        "            setTimeout(() => localStorage.setItem(key, contextEdit.innerHTML), 10);\n"
+        "          } catch (e) {\n"
+        "            console.log('Color command failed:', e);\n"
+        "          }\n"
+        "        }\n"
+        "      }\n"
+        "    }\n"
+        "  });\n"
+        "  const contextEdits = document.querySelectorAll('.round-context-edit');\n"
+        "  contextEdits.forEach(edit => {\n"
+        "    edit.addEventListener('input', function() {\n"
+        "      const key = this.dataset.contextKey;\n"
+        "      localStorage.setItem(key, this.innerHTML);\n"
+        "    });\n"
+        "    const key = edit.dataset.contextKey;\n"
+        "    const saved = localStorage.getItem(key);\n"
+        "    if (saved) {\n"
+        "      edit.innerHTML = saved;\n"
+        "    }\n"
+        "  });\n"
+        "  document.addEventListener('click', function(e) {\n"
+        "    if (e.target.closest('input, textarea, select, .round-context-edit') && !e.target.classList.contains('merge-btn') && !e.target.classList.contains('unmerge-btn')) { return; }\n"
+        "    if (e.target.classList.contains('merge-btn')) {\n"
+        "      e.preventDefault();\n"
+        "      e.stopPropagation();\n"
+        "      const msgId = e.target.dataset.msgId;\n"
+        "      const currentMsg = e.target.closest('.chat-message');\n"
+        "      if (!currentMsg) return;\n"
+        "      if (currentMsg.classList.contains('role-user')) {\n"
+        "        alert('Cannot merge user messages');\n"
+        "        return;\n"
+        "      }\n"
+        "      let nextMsg = currentMsg.nextElementSibling;\n"
+        "      while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
+        "        nextMsg = nextMsg.nextElementSibling;\n"
+        "      }\n"
+        "      while (nextMsg && nextMsg.classList.contains('role-user')) {\n"
+        "        nextMsg = nextMsg.nextElementSibling;\n"
+        "        while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
+        "          nextMsg = nextMsg.nextElementSibling;\n"
+        "        }\n"
+        "      }\n"
+        "      if (!nextMsg || nextMsg.classList.contains('chat-message') === false) {\n"
+        "        alert('No next assistant message to merge with');\n"
+        "        return;\n"
+        "      }\n"
+        "      if (nextMsg.classList.contains('role-user')) {\n"
+        "        alert('Cannot merge with user messages');\n"
+        "        return;\n"
+        "      }\n"
+        "      const parent = currentMsg.parentElement;\n"
+        "      if (parent.classList.contains('simultaneous-messages')) {\n"
+        "        const wrapper = parent;\n"
+        "        currentMsg.style.display = '';\n"
+        "        currentMsg.classList.remove('merged');\n"
+        "        const refNode = wrapper.nextElementSibling;\n"
+        "        parent.parentElement.insertBefore(currentMsg, refNode);\n"
+        "        if (nextMsg.parentElement === wrapper) {\n"
+        "          parent.parentElement.insertBefore(nextMsg, refNode);\n"
+        "        }\n"
+        "        if (wrapper.children.length === 0) {\n"
+        "          wrapper.remove();\n"
+        "        }\n"
+        "      } else {\n"
+        "        const wrapper = document.createElement('div');\n"
+        "        wrapper.className = 'simultaneous-messages';\n"
+        "        const unmergeBtn = document.createElement('button');\n"
+        "        unmergeBtn.className = 'unmerge-btn';\n"
+        "        unmergeBtn.innerHTML = '✕';\n"
+        "        unmergeBtn.title = 'Click to unmerge messages';\n"
+        "        wrapper.appendChild(unmergeBtn);\n"
+        "        wrapper.dataset.firstMsgId = currentMsg.dataset.msgId;\n"
+        "        wrapper.dataset.secondMsgId = nextMsg.dataset.msgId;\n"
+        "        parent.insertBefore(wrapper, currentMsg);\n"
+        "        wrapper.appendChild(currentMsg);\n"
+        "        wrapper.appendChild(nextMsg);\n"
+        "        currentMsg.classList.add('merged');\n"
+        "        nextMsg.classList.add('merged');\n"
+        "      }\n"
+        "    }\n"
+        "    if (e.target.classList.contains('unmerge-btn')) {\n"
+        "      const wrapper = e.target.closest('.simultaneous-messages');\n"
+        "      if (!wrapper) return;\n"
+        "      const parent = wrapper.parentElement;\n"
+        "      const firstMsgId = wrapper.dataset.firstMsgId;\n"
+        "      const secondMsgId = wrapper.dataset.secondMsgId;\n"
+        "      const messages = Array.from(wrapper.querySelectorAll('.chat-message'));\n"
+        "      const refNode = wrapper.nextElementSibling;\n"
+        "      const firstMsg = messages.find(m => m.dataset.msgId === firstMsgId);\n"
+        "      const secondMsg = messages.find(m => m.dataset.msgId === secondMsgId);\n"
+        "      if (firstMsg) {\n"
+        "        firstMsg.classList.remove('merged');\n"
+        "        firstMsg.style.display = '';\n"
+        "        parent.insertBefore(firstMsg, refNode);\n"
+        "      }\n"
+        "      if (secondMsg) {\n"
+        "        secondMsg.classList.remove('merged');\n"
+        "        secondMsg.style.display = '';\n"
+        "        parent.insertBefore(secondMsg, refNode);\n"
+        "      }\n"
+        "      wrapper.remove();\n"
+        "    }\n"
+        "  });\n"
+        "});\n"
+        "</script>",
+        "</head>",
+        "<body>",
+        '<div class="toolbar-wrap">',
+        '<div class="toolbar-hotzone"></div>',
+        '<div class="toolbar">',
+        '<label for="group-size">Group every</label>',
+        '<input id="group-size" type="number" min="0" step="1" value="1" />',
+        "<span>timesteps</span>",
+        '<button id="apply-grouping">Apply</button>',
+        '<span style="margin-left:8px"></span>',
+        '<label for="range-start"><span class="emoji-bw">🔎</span> Range</label>',
+        '<input id="range-start" type="number" step="1" />',
+        "<span>to</span>",
+        '<input id="range-end" type="number" step="1" />',
+        '<button id="apply-range"><span class="emoji-bw">▶︎</span> Apply</button>',
+        '<button id="toggle-strong-hide"><span class="emoji-bw">🗜️</span> Strong Hide: <span id="strong-hide-state">Off</span></button>',
+        (
+            '<button id="toggle-split-view"><span class="emoji-bw">🪟</span> Split View: <span id="split-view-state">Off</span></button>'
+            if enable_split_view
+            else ""
+        ),
+        '<button id="toggle-chat-view"><span class="emoji-bw">💬</span> Chat View: <span id="chat-view-state">On</span></button>',
+        '<button id="toggle-hide-user-messages"><span class="emoji-bw">👁️</span> Hide Prompts: <span id="hide-user-state">Off</span></button>',
+        '<span id="chat-width-control" style="margin-left:8px;">',
+        '<label for="chat-width-slider"><span class="emoji-bw">↔️</span> Width:</label>',
+        '<input id="chat-width-slider" type="range" min="600" max="1600" step="50" value="900" style="width:120px; vertical-align:middle;" />',
+        '<span id="chat-width-value" style="margin-left:4px;">900px</span>',
+        '</span>',
+        '<span style="margin-left:12px;">',
+        '<label for="font-family-select"><span class="emoji-bw">🔤</span> Font:</label>',
+        '<select id="font-family-select" style="padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        '<option value="\'Segoe UI\', Tahoma, Geneva, Verdana, sans-serif">Segoe UI</option>',
+        '<option value="Arial, sans-serif">Arial</option>',
+        '<option value="\'Helvetica Neue\', Helvetica, sans-serif">Helvetica</option>',
+        '<option value="\'Times New Roman\', Times, serif">Times New Roman</option>',
+        '<option value="Georgia, serif">Georgia</option>',
+        '<option value="\'Courier New\', Courier, monospace">Courier New</option>',
+        '<option value="\'Comic Sans MS\', cursive">Comic Sans</option>',
+        '<option value="\'Trebuchet MS\', sans-serif">Trebuchet MS</option>',
+        '<option value="Verdana, sans-serif">Verdana</option>',
+        '<option value="\'Palatino Linotype\', \'Book Antiqua\', Palatino, serif">Palatino</option>',
+        '<option value="\'Lucida Console\', Monaco, monospace">Lucida Console</option>',
+        '</select>',
+        '</span>',
+        '<span style="margin-left:8px;">',
+        '<label for="font-size-input"><span class="emoji-bw">📏</span> Size:</label>',
+        '<input id="font-size-input" type="number" min="8" max="24" step="1" value="14" style="width:50px;" />',
+        '<span>px</span>',
+        '</span>',
+        '<span style="margin-left:12px; display:flex; align-items:center; gap:8px;">',
+        '<label style="font-weight:600;">Agent Names:</label>',
+        '<select id="alice-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        '<option value="🤖">🤖 Robot</option>',
+        '<option value="👤">👤 Human</option>',
+        '</select>',
+        '<input id="alice-name-input" type="text" placeholder="Alice" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
+        '<span style="margin:0 4px;">|</span>',
+        '<select id="bob-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
+        '<option value="🤖">🤖 Robot</option>',
+        '<option value="👤">👤 Human</option>',
+        '</select>',
+        '<input id="bob-name-input" type="text" placeholder="Bob" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
+        '<button id="apply-agent-names" style="padding:4px 8px; border:1px solid var(--accent-muted); background:var(--panel-bg); border-radius:var(--corner-radius); cursor:pointer;">Apply</button>',
+        '</span>',
+        "</div>",
+        "</div>",
+        '<div id="flow-linear" class="messages-flow" style="display:none">',
+    ]
+    last_time_step = None
+    for original_index, turn in indexed_turns:
+        # Build classes
+        agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
+        role_class = f"role-{turn.role}"
+        collapsed_class = " collapsed" if turn.role == "user" else ""
+        # Badge content
+        agent_id_clean = html.escape(turn.agent_id).lower()
+        if turn.role == "assistant":
+            name = html.escape(turn.agent_id)
+            emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
+            raw_val = turn.reward
+            if isinstance(raw_val, (int, float)):
+                reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
+                if len(reward_val) > 8:
+                    reward_val = reward_val[:8] + "…"
+            else:
+                reward_val = str(raw_val)
+            # Format: "🤖 Alice • Reward: 5.5556 • 💬 :"
+            badge_inner = (
+                f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
+                f' <span class="sep"> • </span><span class="reward">Reward ⚑ = {reward_val}</span>'
+            )
+        else:
+            # For user messages, show "Prompt of {Agent ID}" in the badge
+            name = html.escape(turn.agent_id)
+            # Format (no reward): "Prompt of Alice • "
+            badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
+        badge = f'<span class="agent-badge">{badge_inner}</span>'
+        # Inline timestep distinction badge at step boundaries (render before first message)
+        ts_badge_html = ""
+        if last_time_step is None or turn.time_step != last_time_step:
+            ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
+            last_time_step = turn.time_step
+        escaped_content = html.escape(turn.content)
+        reasoning_html = ""
+        if turn.reasoning_content:
+            # Normalize reasoning to avoid leading/newline whitespace that creates visual gaps
+            _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
+            _raw_reasoning = _re.sub(
+                r"^\s*\n+", "", _raw_reasoning
+            )  # drop leading blank lines
+            _raw_reasoning = _re.sub(
+                r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning
+            )  # newline right after **
+            _raw_reasoning = _re.sub(
+                r"(\s*\n\s*)\*\*", r" **", _raw_reasoning
+            )  # newline right before **
+            escaped_reasoning = html.escape(_raw_reasoning)
+            reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{escaped_reasoning}</span></span>'
+        collapsed_text = re.sub(r"\s+", " ", escaped_content).strip()
+        html_parts.append(
+            f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
+            f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
+            f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
+            f'<span class="message-placeholder">(...)</span>'
+            f"</div>"
+            f"</div>"
+        )
+    html_parts.append("</div>")  # close linear flow
+    if enable_split_view:
+        import html as _html_mod
+        html_parts.append(
+            '<div id="flow-split" class="messages-flow" style="display:none">'
+        )
+        html_parts.append('<div class="split-wrapper">')
+        # Per-agent columns
+        per_agent_turns = {
+            aid: [t for t in chat_turns if t.agent_id == aid]
+            for aid in assistant_agents
+        }
+        for idx, aid in enumerate(assistant_agents):
+            turns_agent = per_agent_turns[aid]
+            html_parts.append(
+                f'<div class="split-col" data-agent="{_html_mod.escape(aid)}">'
+            )
+            last_ts_agent = None
+            for turn in turns_agent:
+                agent_class = (
+                    f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
+                )
+                role_class = f"role-{turn.role}"
+                collapsed_class = " collapsed" if turn.role == "user" else ""
+                ts_badge_html = ""
+                if last_ts_agent is None or turn.time_step != last_ts_agent:
+                    ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
+                    last_ts_agent = turn.time_step
+                esc_content = _html_mod.escape(turn.content)
+                reasoning_html = ""
+                if turn.reasoning_content:
+                    _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
+                    _raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
+                    _raw_reasoning = _re.sub(r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning)
+                    _raw_reasoning = _re.sub(r"(\s*\n\s*)\*\*", r" **", _raw_reasoning)
+                    esc_reasoning = _html_mod.escape(_raw_reasoning)
+                    reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{esc_reasoning}</span></span>'
+                collapsed_text = re.sub(r"\s+", " ", esc_content).strip()
+                agent_id_clean = _html_mod.escape(turn.agent_id).lower()
+                if turn.role == "assistant":
+                    name = _html_mod.escape(turn.agent_id)
+                    emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
+                    raw_val = turn.reward
+                    if isinstance(raw_val, (int, float)):
+                        reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
+                        if len(reward_val) > 8:
+                            reward_val = reward_val[:8] + "…"
+                    else:
+                        reward_val = str(raw_val)
+                    badge_inner = (
+                        f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
+                        f' <span class="sep"> • </span><span class="reward">Reward ⚑ : {reward_val}</span>'
+                    )
+                else:
+                    name = _html_mod.escape(turn.agent_id)
+                    badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
+                badge = f'<span class="agent-badge">{badge_inner}</span>'
+                html_parts.append(
+                    f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
+                    f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
+                    f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
+                    f'<span class="message-placeholder">(...)</span>'
+                    f"</div></div>"
+                )
+            html_parts.append("</div>")  # close split col
+        html_parts.append("</div>")  # split-wrapper
+        html_parts.append("</div>")  # flow-split
+    # Add Chat View
+    import html as _html_mod
+    html_parts.append('<div id="flow-chat" class="messages-flow">')
+    # Helper function to add context annotation areas
+    def add_context_area(position: str, time_step: int):
+        context_key = f"round-context-{position}-{time_step}"
+        placeholder = f"Add context {position} round {time_step}..."
+        color_buttons = ""
+        # Add default/reset color button first
+        color_buttons += (
+            f'<div class="context-color-btn" data-color="default" '
+            f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
+            f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
+            f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
+            f'background-color: #fff;" title="Default color"></div>'
+        )
+        for color_name, color_value in [
+            ('red', '#d32f2f'),
+            ('orange', '#f57c00'),
+            ('yellow', '#f9a825'),
+            ('green', '#388e3c'),
+            ('blue', '#1976d2'),
+            ('purple', '#7b1fa2'),
+            ('gray', '#666666'),
+        ]:
+            color_buttons += (
+                f'<div class="context-color-btn" data-color="{color_value}" '
+                f'style="background-color: {color_value};" title="{color_name}"></div>'
+            )
+        html_parts.append(
+            f'<div class="round-context">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{context_key}" '
+            f'data-placeholder="{placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f'</div>'
+        )
+    # Helper function to add split agent context boxes
+    def add_split_agent_contexts(position: str, time_step: int):
+        color_buttons = ""
+        # Add default/reset color button first
+        color_buttons += (
+            f'<div class="context-color-btn" data-color="default" '
+            f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
+            f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
+            f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
+            f'background-color: #fff;" title="Default color"></div>'
+        )
+        for color_name, color_value in [
+            ('red', '#d32f2f'),
+            ('orange', '#f57c00'),
+            ('yellow', '#f9a825'),
+            ('green', '#388e3c'),
+            ('blue', '#1976d2'),
+            ('purple', '#7b1fa2'),
+            ('gray', '#666666'),
+        ]:
+            color_buttons += (
+                f'<div class="context-color-btn" data-color="{color_value}" '
+                f'style="background-color: {color_value};" title="{color_name}"></div>'
+            )
+        html_parts.append('<div class="split-agent-context">')
+        # Alice box
+        alice_key = f"agent-context-alice-{position}-{time_step}"
+        alice_placeholder = f"..."
+        html_parts.append(
+            f'<div class="agent-context-box agent-alice">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{alice_key}" '
+            f'data-placeholder="{alice_placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f'</div>'
+        )
+        # Bob box
+        bob_key = f"agent-context-bob-{position}-{time_step}"
+        bob_placeholder = f"..."
+        html_parts.append(
+            f'<div class="agent-context-box agent-bob">'
+            f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
+            f'data-context-key="{bob_key}" '
+            f'data-placeholder="{bob_placeholder}"></div>'
+            f'<div class="round-context-controls">{color_buttons}</div>'
+            f'</div>'
+        )
+        html_parts.append('</div>')  # split-agent-context
+    last_time_step_chat = None
+    for original_index, turn in indexed_turns:
+        agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
+        role_class = f"role-{turn.role}"
+        # Add time step divider and beginning context
+        if last_time_step_chat is None or turn.time_step != last_time_step_chat:
+            # Add end contexts for previous round (only regular context, not prompt summary)
+            if last_time_step_chat is not None:
+                add_context_area("end", last_time_step_chat)
+            html_parts.append(
+                f'<div class="chat-group-divider">'
+                f'<span class="chat-group-label">⏱ Round {turn.time_step + 1}</span>'
+                f'</div>'
+            )
+            # Add beginning contexts for new round (both context and prompt summary)
+            add_context_area("beginning", turn.time_step)
+            add_split_agent_contexts("beginning", turn.time_step)
+            last_time_step_chat = turn.time_step
+        # Build chat message with merge controls
+        html_parts.append(f'<div class="chat-message {agent_class} {role_class}" data-msg-id="{original_index}">')
+        # Add merge control button
+        html_parts.append(
+            f'<button class="merge-btn" title="Merge with next message" data-msg-id="{original_index}">⇄</button>'
+        )
+        html_parts.append('<div class="chat-message-content">')
+        # Header with agent name and reward (always show reward)
+        agent_id_clean = _html_mod.escape(turn.agent_id).lower()
+        if turn.role == "assistant":
+            name = _html_mod.escape(turn.agent_id)
+            raw_val = turn.reward
+            if isinstance(raw_val, (int, float)):
+                reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
+                if len(reward_val) > 8:
+                    reward_val = reward_val[:8] + "…"
+            else:
+                reward_val = str(raw_val)
+            header_html = (
+                f'<div class="chat-header">'
+                f'<span class="emoji-bw" data-agent-id="{agent_id_clean}">🤖</span> <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
+                f'<span class="chat-reward">⚑ {reward_val}</span>'
+                f'</div>'
+            )
+        else:
+            name = _html_mod.escape(turn.agent_id)
+            header_html = f'<div class="chat-header">Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span></div>'
+        html_parts.append(header_html)
+        # Reasoning content if present
+        if turn.reasoning_content:
+            _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
+            _raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
+            esc_reasoning = _html_mod.escape(_raw_reasoning)
+            html_parts.append(
+                f'<div class="chat-reasoning collapsed">'
+                f'<span class="reasoning-icon">💭</span> '
+                f'<span class="reasoning-text">{esc_reasoning}</span>'
+                f'</div>'
+            )
+        # Message bubble
+        esc_content = _html_mod.escape(turn.content)
+        html_parts.append(f'<div class="chat-bubble">{esc_content}</div>')
+        html_parts.append('</div>')  # chat-message-content
+        html_parts.append('</div>')  # chat-message
+    # Add end contexts for the last round (only regular context, not prompt summary)
+    if last_time_step_chat is not None:
+        add_context_area("end", last_time_step_chat)
+    html_parts.append("</div>")  # flow-chat
+    html_parts.extend(["</body>", "</html>"])
+    return "\n".join(html_parts)
+def export_html_from_rollout_tree(path: Path, outdir: Path, main_only: bool = False):
+    """Process a rollout tree file and generate HTML files for each path.
+    Creates separate HTML files for the main path and each branch path.
+    The main path is saved in the root output directory, while branch paths
+    are saved in a 'branches' subdirectory.
+    Args:
+        path: Path to the rollout tree JSON file
+        outdir: Output directory for HTML files
+        main_only: If True, only export the main trajectory (default: False)
+    """
+    root = load_rollout_tree(path)
+    mgid = root.id
+    main_path, branch_paths = get_rollout_tree_paths(root)
+    outdir.mkdir(parents=True, exist_ok=True)
+    # Create branches subdirectory if we have branch paths
+    if not main_only and branch_paths:
+        branches_dir = outdir / f"mgid:{mgid}_branches_html_renders"
+        branches_dir.mkdir(parents=True, exist_ok=True)
+    # Generate HTML for the main path
+    chat_turns = gather_all_chat_turns_for_path(main_path)
+    html_content = html_from_chat_turns(chat_turns)
+    output_file = outdir / f"mgid:{mgid}_main_html_render.render.html"
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    # Generate HTML for each branch path
+    for path_obj in branch_paths:
+        chat_turns = gather_all_chat_turns_for_path(path_obj)
+        html_content = html_from_chat_turns(chat_turns)
+        path_id: str = path_obj.id
+        output_filename = f"{path_id}_html_render.render.html"
+        output_file = branches_dir / output_filename
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(html_content)

src_code_for_reproducibility/utils/rollout_tree_gather_utils.py ADDED Viewed

	@@ -0,0 +1,314 @@

+from __future__ import annotations
+import csv
+import os
+import pickle
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
+from mllm.markov_games.rollout_tree import *
+def load_rollout_tree(path: Path) -> RolloutTreeRootNode:
+    """Load a rollout tree from a PKL file containing a dict."""
+    with open(path, "rb") as f:
+        data = pickle.load(f)
+    return RolloutTreeRootNode.model_validate(data)
+@dataclass
+class RolloutNodeList:
+    id: str
+    nodes: List[RolloutTreeNode]
+def get_rollout_tree_paths(
+    root: RolloutTreeRootNode, mgid: Optional[str] = None
+) -> Tuple[RolloutNodeList, List[RolloutNodeList]]:
+    """
+    Returns:
+        main_path: The main path from the root to the end of the tree.
+        branch_paths: A list of all branch paths from the root to the end of the tree.
+        Each branch path contains a list of nodes that are part of the branch, including the nodes from the main path before the branch was taken.
+    """
+    branch_paths = []
+    def collect_path_nodes(current) -> List[RolloutTreeNode]:
+        """Recursively collect all nodes in a path starting from current node."""
+        if current is None:
+            return []
+        if isinstance(current, RolloutTreeNode):
+            return [current] + collect_path_nodes(current.child)
+        elif isinstance(current, RolloutTreeBranchNode):
+            # For branch nodes, we only follow the main_child for path collection
+            if current.main_child:
+                return [current.main_child] + collect_path_nodes(
+                    current.main_child.child
+                )
+            else:
+                return []
+    def traverse_for_branches(
+        current,
+        main_path_prefix: List[RolloutTreeNode],
+        path_id: str,
+        current_time_step: Optional[int] = 0,
+    ):
+        """Traverse tree to collect all branch paths."""
+        if current is None:
+            return
+        if isinstance(current, RolloutTreeNode):
+            # Continue traversing with this node added to the main path prefix
+            new_prefix = main_path_prefix + [current]
+            traverse_for_branches(current.child, new_prefix, path_id, current.time_step)
+        elif isinstance(current, RolloutTreeBranchNode):
+            # Collect all branch paths
+            if current.branches:
+                for agent_id, branch_node_list in current.branches.items():
+                    if branch_node_list:
+                        # Start with the main path prefix, then recursively collect all nodes in this branch
+                        branch_path_nodes = main_path_prefix.copy()
+                        for branch_node in branch_node_list:
+                            branch_path_nodes.extend(collect_path_nodes(branch_node))
+                        # Create proper branch path ID with mgid, agent_id, and time_step
+                        mgid_str = mgid or str(root.id)
+                        branch_path_id = f"mgid:{mgid_str}_type:branch_agent:{agent_id}_time_step:{current_time_step}"
+                        branch_paths.append(
+                            RolloutNodeList(id=branch_path_id, nodes=branch_path_nodes)
+                        )
+            # Process the main child and add to prefix
+            new_prefix = main_path_prefix
+            if current.main_child:
+                new_prefix = main_path_prefix + [current.main_child]
+            # Continue traversing the main path
+            if current.main_child:
+                traverse_for_branches(
+                    current.main_child.child,
+                    new_prefix,
+                    path_id,
+                    current.main_child.time_step,
+                )
+    # Collect the main path nodes
+    main_path_nodes = collect_path_nodes(root.child)
+    # Traverse to collect all branch paths
+    traverse_for_branches(root.child, [], "")
+    # Create the main path with proper mgid format
+    mgid_str = mgid or str(root.id)
+    main_path = RolloutNodeList(id=f"mgid:{mgid_str}_type:main", nodes=main_path_nodes)
+    return main_path, branch_paths
+class ChatTurnLog(BaseModel):
+    time_step: int
+    agent_id: str
+    role: str
+    content: str
+    reasoning_content: Optional[str] = None
+    is_state_end: bool
+    reward: float
+def gather_agent_chat_turns_for_path(
+    agent_id: str, path: RolloutNodeList
+) -> List[ChatTurnLog]:
+    """Iterate through all chat turns for a specific agent in a path sorted by time step."""
+    turns = []
+    for node in path.nodes:
+        action_log = node.step_log.action_logs.get(agent_id, [])
+        if action_log:
+            for chat_turn in action_log.chat_turns or []:
+                turns.append(
+                    ChatTurnLog(
+                        time_step=node.time_step,
+                        agent_id=agent_id,
+                        role=chat_turn.role,
+                        content=chat_turn.content,
+                        reasoning_content=getattr(chat_turn, "reasoning_content", None),
+                        is_state_end=chat_turn.is_state_end,
+                        reward=node.step_log.simulation_step_log.rewards.get(
+                            agent_id, 0
+                        ),
+                    )
+                )
+    return turns
+def gather_all_chat_turns_for_path(path: RolloutNodeList) -> List[ChatTurnLog]:
+    """Iterate through all chat turns for all agents in a path sorted by time step."""
+    turns = []
+    # Collect turns from all agents, but interleave them per timestep by (user, assistant) pairs
+    for node in path.nodes:
+        # Build (user[, assistant]) pairs for each agent at this timestep
+        agent_ids = sorted(list(node.step_log.action_logs.keys()))
+        per_agent_pairs: Dict[str, List[List[ChatTurnLog]]] = {}
+        for agent_id in agent_ids:
+            action_log = node.step_log.action_logs.get(agent_id)
+            pairs: List[List[ChatTurnLog]] = []
+            current_pair: List[ChatTurnLog] = []
+            if action_log and action_log.chat_turns:
+                for chat_turn in action_log.chat_turns:
+                    turn_log = ChatTurnLog(
+                        time_step=node.time_step,
+                        agent_id=agent_id,
+                        role=chat_turn.role,
+                        content=chat_turn.content,
+                        reasoning_content=getattr(chat_turn, "reasoning_content", None),
+                        is_state_end=chat_turn.is_state_end,
+                        reward=node.step_log.simulation_step_log.rewards.get(
+                            agent_id, 0
+                        ),
+                    )
+                    if chat_turn.role == "user":
+                        # If a previous pair is open, close it and start a new one
+                        if current_pair:
+                            pairs.append(current_pair)
+                            current_pair = []
+                        current_pair = [turn_log]
+                    else:
+                        # assistant: attach to an open user message if present; otherwise stand alone
+                        if (
+                            current_pair
+                            and len(current_pair) == 1
+                            and current_pair[0].role == "user"
+                        ):
+                            current_pair.append(turn_log)
+                            pairs.append(current_pair)
+                            current_pair = []
+                        else:
+                            # No preceding user or already paired; treat as its own unit
+                            pairs.append([turn_log])
+                if current_pair:
+                    # Unpaired trailing user message
+                    pairs.append(current_pair)
+            per_agent_pairs[agent_id] = pairs
+        # Interleave pairs across agents: A1, B1, A2, B2, ...
+        index = 0
+        while True:
+            added_any = False
+            for agent_id in agent_ids:
+                agent_pairs = per_agent_pairs.get(agent_id, [])
+                if index < len(agent_pairs):
+                    for tl in agent_pairs[index]:
+                        turns.append(tl)
+                    added_any = True
+            if not added_any:
+                break
+            index += 1
+    return turns
+def chat_turns_to_dict(chat_turns: Iterator[ChatTurnLog]) -> Iterator[Dict[str, Any]]:
+    """Render all chat turns for a path as structured data for JSON."""
+    for chat_turn in chat_turns:
+        yield chat_turn.model_dump()
+def get_all_agents(root: RolloutTreeRootNode) -> List[str]:
+    """list of all agent IDs that appear in the tree."""
+    if root.child is None:
+        return []
+    # Get the first node to extract all agent IDs
+    first_node = root.child
+    if isinstance(first_node, RolloutTreeBranchNode):
+        first_node = first_node.main_child
+    if first_node is None:
+        return []
+    # All agents should be present in the first node
+    agents = set(first_node.step_log.action_logs.keys())
+    agents.update(first_node.step_log.simulation_step_log.rewards.keys())
+    return sorted(list(agents))
+def gather_agent_main_rewards(agent_id: str, path: RolloutNodeList) -> List[float]:
+    """Gather main rewards for a specific agent in a path."""
+    rewards = []
+    for node in path.nodes:
+        reward = node.step_log.simulation_step_log.rewards[agent_id]
+        rewards.append(reward)
+    return rewards
+def gather_all_rewards(path: RolloutNodeList) -> List[Dict[AgentId, float]]:
+    """Gather main rewards from main trajectory in a path."""
+    rewards = []
+    for node in path.nodes:
+        rewards.append(node.step_log.simulation_step_log.rewards.copy())
+    return rewards
+def gather_simulation_stats(
+    path: RolloutNodeList,
+    filter: Callable[[SimulationStepLog], bool],
+    stat_func: Callable[[SimulationStepLog], Any],
+) -> List[Any]:
+    """Gather stats from main trajectory in a path."""
+    stats = []
+    for node in path.nodes:
+        sl = node.step_log.simulation_step_log
+        if filter(sl):
+            stats.append(stat_func(sl))
+    return stats
+def gather_simulation_step_logs(path: RolloutNodeList) -> List[SimulationStepLog]:
+    """Gather simulation information from main trajectory in a path."""
+    infos = []
+    for node in path.nodes:
+        infos.append(node.step_log.simulation_step_log)
+    return infos
+def export_chat_logs(path: Path, outdir: Path):
+    """Process a rollout tree PKL file and generate a JSONL of chat turns as dicts.
+    Each line contains an object with path_id and chat_turns for a single path.
+    """
+    import json
+    root = load_rollout_tree(path)
+    mgid = root.id
+    main_path, branch_paths = get_rollout_tree_paths(root)
+    all_paths = [main_path] + branch_paths
+    outdir.mkdir(parents=True, exist_ok=True)
+    output_file = outdir / f"mgid:{mgid}_plucked_chats.render.jsonl"
+    with open(output_file, "w", encoding="utf-8") as f:
+        for path_obj in all_paths:
+            chat_turns = gather_all_chat_turns_for_path(path_obj)
+            output_obj = {
+                "path_id": str(path_obj.id),
+                "chat_turns": list(chat_turns_to_dict(iter(chat_turns))),
+            }
+            f.write(json.dumps(output_obj, ensure_ascii=False) + "\n")

src_code_for_reproducibility/utils/rollout_tree_stats.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from typing import Any, Callable, List, Tuple
+from mllm.markov_games.rollout_tree import RolloutTreeRootNode
+from mllm.markov_games.simulation import SimulationStepLog
+from mllm.utils.rollout_tree_gather_utils import (
+    gather_simulation_step_logs,
+    get_rollout_tree_paths,
+)
+from mllm.utils.stat_pack import StatPack
+def get_rollout_tree_stat_tally(
+    rollout_tree: RolloutTreeRootNode,
+    metrics: List[Callable[[SimulationStepLog], List[Tuple[str, float]]]],
+) -> StatPack:
+    stat_tally = StatPack()
+    # get simulation step logs
+    node_list = get_rollout_tree_paths(rollout_tree)[0]
+    simulation_step_logs = gather_simulation_step_logs(node_list)
+    for simulation_step_log in simulation_step_logs:
+        for metric in metrics:
+            metric_result = metric(simulation_step_log)
+            if metric_result is not None:
+                for key, value in metric_result:
+                    stat_tally.add_stat(key, value)
+    return stat_tally
+def get_rollout_tree_mean_stats(
+    rollout_tree: RolloutTreeRootNode, metrics: List[Callable[[SimulationStepLog], Any]]
+) -> StatPack:
+    """Get the mean stats for a rollout tree."""
+    stat_tally = get_rollout_tree_stat_tally(rollout_tree, metrics)
+    return stat_tally.mean()
+def get_mean_rollout_tree_stats(
+    rollout_trees: List[RolloutTreeRootNode],
+    metrics: List[Callable[[SimulationStepLog], Any]],
+) -> StatPack:
+    """Get the mean stats for a list of rollout trees."""
+    # TODO complete this
+    stat_tallies = [
+        get_rollout_tree_mean_stats(rollout_tree, metrics)
+        for rollout_tree in rollout_trees
+    ]
+    mean_stat_tally = StatPack()
+    for stat_tally in stat_tallies:
+        mean_stat_tally.add_stats(stat_tally)
+    return mean_stat_tally.mean()

src_code_for_reproducibility/utils/update_start_epoch.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+# During run, set hydra.run.dir=./outputs/{folder}
+def update_start_epoch(cfg, output_directory):
+    if cfg["experiment"]["resume_experiment"]:
+        folders = [f for f in os.listdir(output_directory) if f.startswith("iteration_")]
+        iterations = [int(f.split("_")[1]) for f in folders] if folders else [0]
+        cfg["experiment"]["start_epoch"] = max(iterations)
+    return None

src_code_for_reproducibility/utils/wandb_utils.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+from typing import Any, Dict, Optional
+_WANDB_AVAILABLE = False
+_WANDB_RUN = None
+def _try_import_wandb():
+    global _WANDB_AVAILABLE
+    if _WANDB_AVAILABLE:
+        return True
+    try:
+        import wandb  # type: ignore
+        _WANDB_AVAILABLE = True
+        return True
+    except Exception:
+        _WANDB_AVAILABLE = False
+        return False
+def _safe_get(cfg: Dict[str, Any], path: list[str], default: Any = None) -> Any:
+    cur: Any = cfg
+    for key in path:
+        if not isinstance(cur, dict) or key not in cur:
+            return default
+        cur = cur[key]
+    return cur
+def is_enabled(cfg: Dict[str, Any]) -> bool:
+    return bool(_safe_get(cfg, ["logging", "wandb", "enabled"], False))
+def init(cfg: Dict[str, Any], run_dir: str, run_name: Optional[str] = None) -> None:
+    """
+    Initialize Weights & Biases if enabled in config. No-op if disabled or wandb not installed.
+    """
+    global _WANDB_RUN
+    if not is_enabled(cfg):
+        return
+    if not _try_import_wandb():
+        return
+    import wandb  # type: ignore
+    project = _safe_get(cfg, ["logging", "wandb", "project"], "llm-negotiation")
+    entity = _safe_get(cfg, ["logging", "wandb", "entity"], None)
+    mode = _safe_get(cfg, ["logging", "wandb", "mode"], "online")
+    tags = _safe_get(cfg, ["logging", "wandb", "tags"], []) or []
+    notes = _safe_get(cfg, ["logging", "wandb", "notes"], None)
+    group = _safe_get(cfg, ["logging", "wandb", "group"], None)
+    name = _safe_get(cfg, ["logging", "wandb", "name"], run_name)
+    # Ensure files are written into the hydra run directory
+    os.makedirs(run_dir, exist_ok=True)
+    os.environ.setdefault("WANDB_DIR", run_dir)
+    # Convert cfg to plain types for W&B config; fallback to minimal dictionary
+    try:
+        from omegaconf import OmegaConf  # type: ignore
+        cfg_container = OmegaConf.to_container(cfg, resolve=True)  # type: ignore
+    except Exception:
+        cfg_container = cfg
+    _WANDB_RUN = wandb.init(
+        project=project,
+        entity=entity,
+        mode=mode,
+        name=name,
+        group=group,
+        tags=tags,
+        notes=notes,
+        config=cfg_container,
+        dir=run_dir,
+        reinit=True,
+    )
+def log(metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+    """Log a flat dictionary of metrics to W&B if active."""
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    try:
+        import wandb  # type: ignore
+        wandb.log(metrics if step is None else dict(metrics, step=step))
+    except Exception:
+        pass
+def _flatten(prefix: str, data: Dict[str, Any], out: Dict[str, Any]) -> None:
+    for k, v in data.items():
+        key = f"{prefix}.{k}" if prefix else k
+        if isinstance(v, dict):
+            _flatten(key, v, out)
+        else:
+            out[key] = v
+def _summarize_value(value: Any) -> Dict[str, Any]:
+    import numpy as np  # local import to avoid hard dependency during disabled mode
+    if value is None:
+        return {"none": 1}
+    # Scalars
+    if isinstance(value, (int, float)):
+        return {"value": float(value)}
+    # Lists or arrays
+    try:
+        arr = np.asarray(value)
+        if arr.size == 0:
+            return {"size": 0}
+        return {
+            "mean": float(np.nanmean(arr)),
+            "min": float(np.nanmin(arr)),
+            "max": float(np.nanmax(arr)),
+            "last": float(arr.reshape(-1)[-1]),
+            "size": int(arr.size),
+        }
+    except Exception:
+        # Fallback: string repr
+        return {"text": str(value)}
+def log_tally(array_tally: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
+    """
+    Flatten and summarize Tally.array_tally and log to WandB.
+    Each leaf list/array is summarized with mean/min/max/last/size.
+    """
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    summarized: Dict[str, Any] = {}
+    def walk(node: Any, path: list[str]):
+        if isinstance(node, dict):
+            for k, v in node.items():
+                walk(v, path + [k])
+            return
+        # node is a list of values accumulated over time
+        key = ".".join([p for p in ([prefix] if prefix else []) + path])
+        try:
+            summary = _summarize_value(node)
+            for sk, sv in summary.items():
+                summarized[f"{key}.{sk}"] = sv
+        except Exception:
+            summarized[f"{key}.error"] = 1
+    walk(array_tally, [])
+    if summarized:
+        log(summarized, step=step)
+def log_flat_stats(stats: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
+    if not _WANDB_AVAILABLE or _WANDB_RUN is None:
+        return
+    flat: Dict[str, Any] = {}
+    _flatten(prefix, stats, flat)
+    if flat:
+        log(flat, step=step)