Muqeeth commited on
Commit
3346216
·
verified ·
1 Parent(s): 1f4f273

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .hydra/config.yaml +173 -0
  2. .hydra/hydra.yaml +154 -0
  3. .hydra/overrides.yaml +1 -0
  4. run.log +0 -0
  5. seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md +207 -0
  6. seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json +42 -0
  7. seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json +42 -0
  8. src_code_for_reproducibility/__init__.py +0 -0
  9. src_code_for_reproducibility/docs/source/contributing.rst +0 -0
  10. src_code_for_reproducibility/docs/source/environments/dond.rst +410 -0
  11. src_code_for_reproducibility/docs/source/launch.rst +0 -0
  12. src_code_for_reproducibility/docs/source/media/runbatch.png +0 -0
  13. src_code_for_reproducibility/docs/source/modules.rst +7 -0
  14. src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst +7 -0
  15. src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
  16. src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst +7 -0
  17. src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
  18. src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
  19. src_code_for_reproducibility/docs/source/src.environments.rst +25 -0
  20. src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst +7 -0
  21. src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
  22. src_code_for_reproducibility/docs/source/src.generation.run_games.rst +7 -0
  23. src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
  24. src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
  25. src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
  26. src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
  27. src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
  28. src_code_for_reproducibility/docs/source/src.rst +28 -0
  29. src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
  30. src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
  31. src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst +7 -0
  32. src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
  33. src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst +7 -0
  34. src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst +7 -0
  35. src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst +7 -0
  36. src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
  37. src_code_for_reproducibility/docs/source/usage.rst +0 -0
  38. src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc +0 -0
  39. src_code_for_reproducibility/utils/__init__.py +0 -0
  40. src_code_for_reproducibility/utils/dict_get_path.py +12 -0
  41. src_code_for_reproducibility/utils/gather_training_stats.py +257 -0
  42. src_code_for_reproducibility/utils/get_stochastic_game_lengths.py +30 -0
  43. src_code_for_reproducibility/utils/kill_sglang.py +17 -0
  44. src_code_for_reproducibility/utils/output_source_code.py +6 -0
  45. src_code_for_reproducibility/utils/resource_context.py +78 -0
  46. src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py +1921 -0
  47. src_code_for_reproducibility/utils/rollout_tree_gather_utils.py +314 -0
  48. src_code_for_reproducibility/utils/rollout_tree_stats.py +50 -0
  49. src_code_for_reproducibility/utils/update_start_epoch.py +9 -0
  50. src_code_for_reproducibility/utils/wandb_utils.py +164 -0
.hydra/config.yaml ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment:
2
+ wandb_enabled: true
3
+ nb_epochs: 3000
4
+ nb_matches_per_iteration: 64
5
+ reinit_matches_each_it: true
6
+ checkpoint_every_n_iterations: 50
7
+ start_epoch: 0
8
+ resume_experiment: true
9
+ base_seed: 9999
10
+ seed_group_size: 8
11
+ train: true
12
+ stat_methods_for_live_wandb: mllm.markov_games.negotiation.negotiation_statistics
13
+ name: tas_rps_startend_naive_seed9999
14
+ agent_buffer: false
15
+ keep_agent_buffer_count: ${lora_count}
16
+ agent_buffer_recent_k: -1
17
+ description: Trust-and-Split Rock Paper Scissors negotiation game
18
+ logging:
19
+ wandb:
20
+ enabled: false
21
+ project: llm-negotiation
22
+ entity: null
23
+ mode: online
24
+ name: null
25
+ group: null
26
+ tags: []
27
+ notes: null
28
+ temperature: 1.0
29
+ markov_games:
30
+ runner_method_name: LinearRunner
31
+ runner_kwargs: {}
32
+ group_by_round: true
33
+ simulation_class_name: TrustAndSplitRPSSimulation
34
+ simulation_init_args:
35
+ nb_of_rounds: 10
36
+ quota_messages_per_agent_per_round: 1
37
+ alternating_hands: false
38
+ agents:
39
+ 0:
40
+ agent_id: ${agent_0_id}
41
+ agent_name: Alice
42
+ agent_class_name: TrustAndSplitRPSAgent
43
+ policy_id: base_llm/agent_adapter
44
+ init_kwargs:
45
+ goal: Maximize your total points over the whole game.
46
+ num_message_chars: 500
47
+ message_start_end_format: true
48
+ proposal_start_end_format: true
49
+ 1:
50
+ agent_id: ${agent_1_id}
51
+ agent_name: Bob
52
+ agent_class_name: TrustAndSplitRPSAgent
53
+ policy_id: base_llm/agent_adapter
54
+ init_kwargs:
55
+ goal: Maximize your total points over the whole game.
56
+ num_message_chars: 500
57
+ message_start_end_format: true
58
+ proposal_start_end_format: true
59
+ models:
60
+ base_llm:
61
+ class: LeanLocalLLM
62
+ init_args:
63
+ llm_id: base_llm
64
+ model_name: Qwen/Qwen2.5-7B-Instruct
65
+ inference_backend: vllm
66
+ hf_kwargs:
67
+ device_map: auto
68
+ torch_dtype: bfloat16
69
+ max_memory:
70
+ 0: 20GiB
71
+ attn_implementation: flash_attention_2
72
+ inference_backend_init_kwargs:
73
+ enable_lora: true
74
+ seed: ${experiment.base_seed}
75
+ enable_prefix_caching: true
76
+ max_model_len: 10000.0
77
+ gpu_memory_utilization: 0.5
78
+ dtype: bfloat16
79
+ trust_remote_code: true
80
+ max_lora_rank: 32
81
+ enforce_eager: false
82
+ max_loras: ${lora_count}
83
+ max_cpu_loras: ${lora_count}
84
+ enable_sleep_mode: true
85
+ inference_backend_sampling_params:
86
+ temperature: ${temperature}
87
+ top_p: 1.0
88
+ max_tokens: 400
89
+ top_k: -1
90
+ logprobs: 0
91
+ adapter_configs:
92
+ agent_adapter:
93
+ task_type: CAUSAL_LM
94
+ r: 32
95
+ lora_alpha: 64
96
+ lora_dropout: 0.0
97
+ target_modules: all-linear
98
+ critic_adapter:
99
+ task_type: CAUSAL_LM
100
+ r: 32
101
+ lora_alpha: 64
102
+ lora_dropout: 0.0
103
+ target_modules: all-linear
104
+ enable_thinking: null
105
+ regex_max_attempts: 1
106
+ critics:
107
+ agent_critic:
108
+ module_pointer:
109
+ - base_llm
110
+ - critic_adapter
111
+ optimizers:
112
+ agent_optimizer:
113
+ module_pointer:
114
+ - base_llm
115
+ - agent_adapter
116
+ optimizer_class_name: torch.optim.Adam
117
+ init_args:
118
+ lr: 3.0e-06
119
+ weight_decay: 0.0
120
+ critic_optimizer:
121
+ module_pointer: agent_critic
122
+ optimizer_class_name: torch.optim.Adam
123
+ init_args:
124
+ lr: 3.0e-06
125
+ weight_decay: 0.0
126
+ trainers:
127
+ agent_trainer:
128
+ class: TrainerNaive
129
+ module_pointers:
130
+ policy:
131
+ - base_llm
132
+ - agent_adapter
133
+ policy_optimizer: agent_optimizer
134
+ critic: agent_critic
135
+ critic_optimizer: critic_optimizer
136
+ kwargs:
137
+ entropy_coeff: 0.0
138
+ entropy_topk: null
139
+ entropy_mask_regex: null
140
+ kl_coeff: 0.001
141
+ gradient_clipping: 1.0
142
+ restrict_tokens: null
143
+ mini_batch_size: 1
144
+ use_gradient_checkpointing: true
145
+ temperature: ${temperature}
146
+ device: cuda:0
147
+ use_gae: false
148
+ whiten_advantages: false
149
+ whiten_advantages_time_step_wise: false
150
+ skip_discounted_state_visitation: true
151
+ use_gae_lambda_annealing: false
152
+ gae_lambda_annealing_method: None
153
+ gae_lambda_annealing_method_params: None
154
+ gae_lambda_annealing_limit: 0.95
155
+ discount_factor: 0.96
156
+ use_rloo: true
157
+ enable_tokenwise_logging: false
158
+ pg_loss_normalization: nb_tokens
159
+ truncated_importance_sampling_ratio_cap: 2.0
160
+ reward_normalizing_constant: 100.0
161
+ train_on_which_data:
162
+ agent_trainer: ${agent_ids}
163
+ lora_count: 30
164
+ common_agent_kwargs:
165
+ goal: Maximize your total points over the whole game.
166
+ num_message_chars: 500
167
+ message_start_end_format: true
168
+ proposal_start_end_format: true
169
+ agent_0_id: Alice
170
+ agent_1_id: Bob
171
+ agent_ids:
172
+ - Alice
173
+ - Bob
.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: run
117
+ chdir: false
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: tas_rps_startend_naive_seed9999.yaml
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.3.2
131
+ version_base: '1.1'
132
+ cwd: /scratch/m/muqeeth/llm_negotiation
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /scratch/m/muqeeth/llm_negotiation/configs
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /scratch/m/muqeeth/llm_negotiation/2025_11/tas_rps_startend_naive_seed9999
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
run.log ADDED
The diff for this file is too large to render. See raw diff
 
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-7B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen2.5-7B-Instruct
7
+ - lora
8
+ - transformers
9
+ ---
10
+
11
+ # Model Card for Model ID
12
+
13
+ <!-- Provide a quick summary of what the model is/does. -->
14
+
15
+
16
+
17
+ ## Model Details
18
+
19
+ ### Model Description
20
+
21
+ <!-- Provide a longer summary of what this model is. -->
22
+
23
+
24
+
25
+ - **Developed by:** [More Information Needed]
26
+ - **Funded by [optional]:** [More Information Needed]
27
+ - **Shared by [optional]:** [More Information Needed]
28
+ - **Model type:** [More Information Needed]
29
+ - **Language(s) (NLP):** [More Information Needed]
30
+ - **License:** [More Information Needed]
31
+ - **Finetuned from model [optional]:** [More Information Needed]
32
+
33
+ ### Model Sources [optional]
34
+
35
+ <!-- Provide the basic links for the model. -->
36
+
37
+ - **Repository:** [More Information Needed]
38
+ - **Paper [optional]:** [More Information Needed]
39
+ - **Demo [optional]:** [More Information Needed]
40
+
41
+ ## Uses
42
+
43
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
44
+
45
+ ### Direct Use
46
+
47
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
48
+
49
+ [More Information Needed]
50
+
51
+ ### Downstream Use [optional]
52
+
53
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
54
+
55
+ [More Information Needed]
56
+
57
+ ### Out-of-Scope Use
58
+
59
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
60
+
61
+ [More Information Needed]
62
+
63
+ ## Bias, Risks, and Limitations
64
+
65
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
66
+
67
+ [More Information Needed]
68
+
69
+ ### Recommendations
70
+
71
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
72
+
73
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
74
+
75
+ ## How to Get Started with the Model
76
+
77
+ Use the code below to get started with the model.
78
+
79
+ [More Information Needed]
80
+
81
+ ## Training Details
82
+
83
+ ### Training Data
84
+
85
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
86
+
87
+ [More Information Needed]
88
+
89
+ ### Training Procedure
90
+
91
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
92
+
93
+ #### Preprocessing [optional]
94
+
95
+ [More Information Needed]
96
+
97
+
98
+ #### Training Hyperparameters
99
+
100
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
101
+
102
+ #### Speeds, Sizes, Times [optional]
103
+
104
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
105
+
106
+ [More Information Needed]
107
+
108
+ ## Evaluation
109
+
110
+ <!-- This section describes the evaluation protocols and provides the results. -->
111
+
112
+ ### Testing Data, Factors & Metrics
113
+
114
+ #### Testing Data
115
+
116
+ <!-- This should link to a Dataset Card if possible. -->
117
+
118
+ [More Information Needed]
119
+
120
+ #### Factors
121
+
122
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
123
+
124
+ [More Information Needed]
125
+
126
+ #### Metrics
127
+
128
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
129
+
130
+ [More Information Needed]
131
+
132
+ ### Results
133
+
134
+ [More Information Needed]
135
+
136
+ #### Summary
137
+
138
+
139
+
140
+ ## Model Examination [optional]
141
+
142
+ <!-- Relevant interpretability work for the model goes here -->
143
+
144
+ [More Information Needed]
145
+
146
+ ## Environmental Impact
147
+
148
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
149
+
150
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
151
+
152
+ - **Hardware Type:** [More Information Needed]
153
+ - **Hours used:** [More Information Needed]
154
+ - **Cloud Provider:** [More Information Needed]
155
+ - **Compute Region:** [More Information Needed]
156
+ - **Carbon Emitted:** [More Information Needed]
157
+
158
+ ## Technical Specifications [optional]
159
+
160
+ ### Model Architecture and Objective
161
+
162
+ [More Information Needed]
163
+
164
+ ### Compute Infrastructure
165
+
166
+ [More Information Needed]
167
+
168
+ #### Hardware
169
+
170
+ [More Information Needed]
171
+
172
+ #### Software
173
+
174
+ [More Information Needed]
175
+
176
+ ## Citation [optional]
177
+
178
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
179
+
180
+ **BibTeX:**
181
+
182
+ [More Information Needed]
183
+
184
+ **APA:**
185
+
186
+ [More Information Needed]
187
+
188
+ ## Glossary [optional]
189
+
190
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
191
+
192
+ [More Information Needed]
193
+
194
+ ## More Information [optional]
195
+
196
+ [More Information Needed]
197
+
198
+ ## Model Card Authors [optional]
199
+
200
+ [More Information Needed]
201
+
202
+ ## Model Card Contact
203
+
204
+ [More Information Needed]
205
+ ### Framework versions
206
+
207
+ - PEFT 0.17.1
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "k_proj",
29
+ "v_proj",
30
+ "down_proj",
31
+ "q_proj",
32
+ "up_proj",
33
+ "gate_proj",
34
+ "o_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 64,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "k_proj",
29
+ "v_proj",
30
+ "down_proj",
31
+ "q_proj",
32
+ "up_proj",
33
+ "gate_proj",
34
+ "o_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
src_code_for_reproducibility/__init__.py ADDED
File without changes
src_code_for_reproducibility/docs/source/contributing.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/environments/dond.rst ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ =================
2
+ Deal or No Deal
3
+ =================
4
+
5
+ The Deal or No Deal (DoND) environment provides a multi-agent negotiation interface where players trade
6
+ items with different values. This document describes the API for interacting with the DoND environment
7
+ and its associated agent handler.
8
+
9
+ Overview
10
+ --------
11
+
12
+ Deal or No Deal is a negotiation game where two agents must agree on how to divide a set of items,
13
+ each of which has different values to each agent. The agents engage in a back-and-forth dialogue to
14
+ determine an allocation of the items, with each trying to maximize their own total value.
15
+
16
+ Our implementation follows the Multi-Agent Negotiation Environment standard, allowing it to be used
17
+ with LLM agents through a text-based interface.
18
+
19
+ Game Rules
20
+ ----------
21
+
22
+ ### Basic Structure
23
+
24
+ The core mechanics of Deal or No Deal are:
25
+
26
+ 1. Two agents negotiate over a set of items (e.g., books, balls, hats)
27
+ 2. Each item has:
28
+ - A specific quantity (how many of each item is available)
29
+ - A value for each agent (which may differ between agents)
30
+ 3. Agents take turns sending messages to negotiate how to split the items
31
+ 4. Once an agreement is reached, agents finalize the deal
32
+ 5. Points are awarded based on the value of items each agent receives
33
+
34
+ ### Detailed Gameplay
35
+
36
+ #### Setup Phase
37
+
38
+ The game begins with:
39
+ - A set of items (e.g., "book", "hat", "ball")
40
+ - Each item has a quantity (e.g., 6 books, 2 hats, 4 balls)
41
+ - Each agent has private values for each item (e.g., books might be worth 5 points to one agent but only 2 points to the other)
42
+ - Agents are assigned roles (starting negotiator and responding negotiator)
43
+
44
+ #### Negotiation Phase
45
+
46
+ 1. Agents take turns sending free-form text messages to each other
47
+ 2. Messages can include offers, counter-offers, questions, or strategic communication
48
+ 3. There is a maximum number of messages permitted (preventing endless negotiations)
49
+ 4. Either agent can propose to finalize an agreement at any time
50
+
51
+ For example:
52
+ - Agent 1: "I propose I get all the books and you get all the hats and balls."
53
+ - Agent 2: "That doesn't work for me. How about you get 3 books and I get 3 books, all the hats, and all the balls?"
54
+ - Agent 1: "Let me counter-offer: I get 4 books and 2 balls, you get 2 books, all hats, and 2 balls."
55
+
56
+ #### Finalization Phase
57
+
58
+ 1. When an agent wants to finalize a deal, they must specify the exact allocation:
59
+ - How many of each item they receive
60
+ - How many of each item the other agent receives
61
+ 2. The other agent must then either agree (by submitting the same allocation) or reject the finalization
62
+ 3. If both agents submit matching finalizations, the deal is executed
63
+ 4. If finalizations don't match, no agreement is reached, and both agents receive 0 points
64
+
65
+ #### Scoring
66
+
67
+ 1. Each agent's score is calculated based on the value of items they receive
68
+ 2. The formula is: Sum(quantity_of_item_i × value_of_item_i_to_agent)
69
+ 3. If no agreement is reached, both agents receive 0 points
70
+
71
+ ### Example Game
72
+
73
+ Let's walk through a simple example:
74
+
75
+ **Setup:**
76
+ - Items: Books (4), Hats (2), Balls (6)
77
+ - Agent 1 values: Books=5, Hats=1, Balls=2
78
+ - Agent 2 values: Books=3, Hats=6, Balls=1
79
+
80
+ **Negotiation (simplified):**
81
+ 1. Agent 1: "I would like all the books and balls. You can have the hats."
82
+ 2. Agent 2: "That doesn't work for me. Books are valuable. I propose I get all the hats and 2 books, you get 2 books and all the balls."
83
+ 3. Agent 1: "How about I get 3 books and all the balls, and you get 1 book and all the hats?"
84
+ 4. Agent 2: "I accept your proposal."
85
+
86
+ **Finalization:**
87
+ - Agent 1 submits: Agent 1 gets (Books: 3, Hats: 0, Balls: 6), Agent 2 gets (Books: 1, Hats: 2, Balls: 0)
88
+ - Agent 2 submits the same allocation, confirming agreement
89
+
90
+ **Scoring:**
91
+ - Agent 1 score: (3 books × 5) + (0 hats × 1) + (6 balls × 2) = 15 + 0 + 12 = 27 points
92
+ - Agent 2 score: (1 book × 3) + (2 hats × 6) + (0 balls × 1) = 3 + 12 + 0 = 15 points
93
+
94
+ ### Game Variations
95
+
96
+ The DoND environment supports several variations through configuration parameters:
97
+
98
+ #### Different Value Distributions
99
+
100
+ The environment offers multiple ways to assign values to items:
101
+
102
+ 1. **Standard Random Setup (dond_random_setup)**:
103
+ - Items have even-numbered quantities
104
+ - Each agent receives distinct random values for each item
105
+ - Values are drawn from a uniform distribution
106
+
107
+ 2. **Independent Random Values (independent_random_vals)**:
108
+ - Item quantities can be any number in the specified range
109
+ - Values for each agent are drawn independently
110
+ - Creates more varied negotiation scenarios
111
+
112
+ 3. **Bicameral Value Distribution (bicameral_vals_assignator)**:
113
+ - Creates a "high value" and "low value" distribution for each item
114
+ - Each agent values approximately half the items highly and half lowly
115
+ - Values are drawn from normal distributions with different means
116
+ - Creates scenarios with clear trade opportunities
117
+
118
+ #### Visibility Options
119
+
120
+ 1. **Finalization Visibility**:
121
+ - When enabled, both agents can see each other's finalization proposals
122
+ - When disabled, finalization proposals remain private until both are submitted
123
+
124
+ 2. **Other Values Visibility**:
125
+ - When enabled, agents can see each other's value functions
126
+ - When disabled, agents only know their own values
127
+ - Creates information asymmetry and richer negotiation dynamics
128
+
129
+ #### Game Modes
130
+
131
+ 1. **Cooperative Mode ("coop")**:
132
+ - Agents are encouraged to find mutually beneficial solutions
133
+ - Success is measured by the sum of both agents' scores
134
+
135
+ 2. **Competitive Mode ("comp")**:
136
+ - Agents aim to maximize their individual scores
137
+ - Creates more adversarial negotiations
138
+
139
+ #### Round Structure
140
+
141
+ 1. **Single Round**:
142
+ - One negotiation session between the same agents
143
+ - Simple evaluation of negotiation skills
144
+
145
+ 2. **Multiple Rounds**:
146
+ - Agents negotiate multiple times with different item setups
147
+ - Allows for learning and adaptation over time
148
+ - Roles can be swapped between rounds
149
+
150
+ DondEnv
151
+ ------------
152
+
153
+ The ``DondEnv`` class provides an interface to the Deal or No Deal environment that follows the Multi-Agent
154
+ Negotiation Environment standard.
155
+
156
+ .. code-block:: python
157
+
158
+ class DondEnv:
159
+ """
160
+ Multi-Agent Negotiation Environment for Deal or No Deal.
161
+ """
162
+ def __init__(
163
+ self,
164
+ agents,
165
+ mode="coop",
166
+ max_messages=None,
167
+ min_messages=None,
168
+ max_chars_per_message=None,
169
+ rounds_per_game=1,
170
+ random_setup_func=None,
171
+ random_setup_kwargs=None,
172
+ role_assignator_func=None,
173
+ role_assignator_func_kwargs=None,
174
+ finalization_visibility=False,
175
+ other_values_visibility=False,
176
+ random_seed=None
177
+ ):
178
+ """Initialize the Deal or No Deal environment.
179
+
180
+ Args:
181
+ agents: List of agent IDs participating in the game
182
+ mode: Game mode ("coop" or "comp")
183
+ max_messages: Maximum number of messages per agent per round
184
+ min_messages: Minimum number of messages per agent per round
185
+ max_chars_per_message: Maximum characters per message
186
+ rounds_per_game: Number of negotiation rounds to play
187
+ random_setup_func: Function to generate item quantities and values
188
+ random_setup_kwargs: Arguments for the random setup function
189
+ role_assignator_func: Function to assign roles to agents
190
+ role_assignator_func_kwargs: Arguments for the role assignator
191
+ finalization_visibility: Whether agents can see each other's finalizations
192
+ other_values_visibility: Whether agents can see each other's values
193
+ random_seed: Seed for reproducibility
194
+ """
195
+ # ...
196
+
197
+ def reset(self):
198
+ """Reset the environment to an initial state and return the initial observation.
199
+
200
+ Returns:
201
+ observation (dict): A dictionary where keys are agent identifiers and values are observations.
202
+ """
203
+ # ...
204
+
205
+ def step(self, actions):
206
+ """Take a step in the environment using the provided actions.
207
+
208
+ Args:
209
+ actions (dict): A dictionary where keys are agent identifiers and values are actions.
210
+ Actions can be messages or finalization proposals.
211
+
212
+ Returns:
213
+ observations (dict): A dictionary where keys are agent identifiers and values are observations.
214
+ done (bool): Whether the episode has ended.
215
+ info (dict): Additional information about the environment.
216
+ """
217
+ # ...
218
+
219
+ def get_state(self):
220
+ """Retrieve the current state of the game.
221
+
222
+ Returns:
223
+ state (dict): The current state of the game, including items, quantities, values, etc.
224
+ """
225
+ # ...
226
+
227
+ Key Implementation Details
228
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
229
+
230
+ The ``DondEnv`` class implements several key features:
231
+
232
+ 1. **Multi-Agent Support**: The environment tracks two agents and manages their alternating messages.
233
+
234
+ 2. **Turn-Based Dialogue**: The environment enforces turn structure and limits on message count.
235
+
236
+ 3. **Finalization Processing**: The environment validates and processes finalization proposals.
237
+
238
+ 4. **Random Setup**: The environment supports multiple methods of generating negotiation scenarios.
239
+
240
+ 5. **Round Management**: The environment can handle multiple rounds with different setups.
241
+
242
+ Observation Structure
243
+ ~~~~~~~~~~~~~~~~~~~~
244
+
245
+ Each agent receives an observation (state) dictionary with rich information about the game:
246
+
247
+ .. code-block:: python
248
+
249
+ {
250
+ "mode": str, # Game mode ("coop" or "comp")
251
+ "role_values": dict, # Value mappings for each role
252
+ "role_props": dict, # Properties for each role
253
+ "agent_to_role": dict, # Mapping from agent IDs to roles
254
+ "is_new_round": bool, # Whether this is the start of a new round
255
+ "is_new_game": bool, # Whether this is the start of a new game
256
+ "game_over": bool, # Whether the game is over
257
+ "items": list, # List of item names
258
+ "quantities": dict, # Quantities of each item
259
+ "has_finalized": bool, # Whether finalization has been proposed
260
+ "last_message": dict, # The last message sent
261
+ "messages_remaining": dict, # Number of messages each agent can still send
262
+ # And various history tracking fields
263
+ }
264
+
265
+ Action Structure
266
+ ~~~~~~~~~~~~~~~
267
+
268
+ Actions can be:
269
+
270
+ 1. **Text Messages**: Free-form text for negotiation.
271
+ 2. **Finalization Proposals**: Structured data specifying the exact allocation of items.
272
+
273
+ Example finalization format:
274
+
275
+ .. code-block:: python
276
+
277
+ {
278
+ "type": "finalize",
279
+ "allocation": {
280
+ "agent1": {"book": 3, "hat": 0, "ball": 6},
281
+ "agent2": {"book": 1, "hat": 2, "ball": 0}
282
+ }
283
+ }
284
+
285
+ Value Setup Functions
286
+ --------------------
287
+
288
+ The DoND environment provides several functions for setting up item values:
289
+
290
+ .. code-block:: python
291
+
292
+ def dond_random_setup(items, min_quant, max_quant, min_val, max_val, random_seed=None):
293
+ """
294
+ Generates items, even-numbered quantities and distinct random values for each category for both agents.
295
+
296
+ Args:
297
+ items (list): List of items.
298
+ min_quant (int): Minimum quantity per item.
299
+ max_quant (int): Maximum quantity per item.
300
+ min_val (int): Minimum value per item.
301
+ max_val (int): Maximum value per item.
302
+ random_seed (int, optional): Seed for random generation.
303
+
304
+ Returns:
305
+ tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
306
+ """
307
+ # ...
308
+
309
+ def independent_random_vals(items, min_quant, max_quant, min_val, max_val, random_seed=None):
310
+ """
311
+ Generates random quantities and independent random values for both agents.
312
+
313
+ Args:
314
+ Similar to dond_random_setup
315
+
316
+ Returns:
317
+ tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
318
+ """
319
+ # ...
320
+
321
+ def bicameral_vals_assignator(items, min_quant, max_quant, low_val_mean, low_val_std, high_val_mean, high_val_std, random_seed=None):
322
+ """
323
+ Generates values with a bicameral distribution - each agent values half the items highly.
324
+
325
+ Args:
326
+ items (list): List of items.
327
+ min_quant, max_quant: Range for quantities
328
+ low_val_mean, low_val_std: Mean and standard deviation for the "low value" distribution
329
+ high_val_mean, high_val_std: Mean and standard deviation for the "high value" distribution
330
+ random_seed: Seed for reproducibility
331
+
332
+ Returns:
333
+ tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
334
+ """
335
+ # ...
336
+
337
+ Running DoND Games
338
+ ----------------------
339
+
340
+ To run Deal or No Deal games with LLM agents, you can use the following structure:
341
+
342
+ .. code-block:: python
343
+
344
+ from mllm.environments.dond.dond_game import DondEnv
345
+ from mllm.environments.dond.dond_agent import DondAgent
346
+ from src.run_matches import run_batched_matches
347
+
348
+ # Create environment
349
+ env = DondEnv(
350
+ agents=["agent1", "agent2"],
351
+ mode="coop",
352
+ max_messages=10,
353
+ rounds_per_game=1,
354
+ random_setup_func="dond_random_setup",
355
+ random_setup_kwargs={
356
+ "items": ["book", "hat", "ball"],
357
+ "min_quant": 2,
358
+ "max_quant": 8,
359
+ "min_val": 1,
360
+ "max_val": 10
361
+ },
362
+ finalization_visibility=False
363
+ )
364
+
365
+ # Create agent handlers (implementation details would vary)
366
+ agent_handlers = {
367
+ "agent1": DondAgent(agent_id="agent1"),
368
+ "agent2": DondAgent(agent_id="agent2")
369
+ }
370
+
371
+ # Define policy mapping
372
+ policy_mapping = {
373
+ "llm_policy": my_llm_policy_function
374
+ }
375
+
376
+ # Run the game
377
+ game_results = run_batched_matches(
378
+ envs=[env],
379
+ agent_handlers_per_env=[agent_handlers],
380
+ policy_mapping=policy_mapping,
381
+ max_parallel_matches=1
382
+ )
383
+
384
+ Limitations and Considerations
385
+ -----------------------------
386
+
387
+ 1. **Negotiation Complexity**: The open-ended nature of negotiations can be challenging for some LLM agents.
388
+
389
+ 2. **Parsing Challenges**: Extracting structured finalization proposals from free-form text requires robust parsing.
390
+
391
+ 3. **Optimization Opportunities**: Different agents may employ different negotiation strategies to optimize outcomes.
392
+
393
+ 4. **Fairness Evaluation**: The environment allows research into questions of fair division and Pareto optimality.
394
+
395
+ 5. **Strategic Deception**: Agents might strategically misrepresent their true values, adding complexity to negotiations.
396
+
397
+ Advanced Usage
398
+ ------------
399
+
400
+ For advanced usage, you can:
401
+
402
+ 1. **Custom Value Functions**: Create more complex distributions of item values for specific research questions.
403
+
404
+ 2. **Novel Negotiation Scenarios**: Design item sets and values to test specific negotiation skills.
405
+
406
+ 3. **Curriculum Learning**: Create progressively more difficult negotiation scenarios.
407
+
408
+ 4. **Communication Analysis**: Analyze the language and strategies used in successful negotiations.
409
+
410
+ 5. **Multi-Round Dynamics**: Study how agents adapt their strategies over multiple rounds.
src_code_for_reproducibility/docs/source/launch.rst ADDED
File without changes
src_code_for_reproducibility/docs/source/media/runbatch.png ADDED
src_code_for_reproducibility/docs/source/modules.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src
2
+ ===
3
+
4
+ .. toctree::
5
+ :maxdepth: 4
6
+
7
+ src
src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_game module
2
+ =======================================
3
+
4
+ .. automodule:: src.environments.dond.dond_game
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_log\_funcs module
2
+ =============================================
3
+
4
+ .. automodule:: src.environments.dond.dond_log_funcs
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.dond.dond\_agent module
2
+ =========================================
3
+
4
+ .. automodule:: src.environments.dond.dond_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.environment\_imports module
2
+ ============================================
3
+
4
+ .. automodule:: src.environments.environment_imports
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.environments.ipd.ipd\_agent module
2
+ ======================================
3
+
4
+ .. automodule:: src.environments.ipd.ipd_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.environments.rst ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src.environments package
2
+ ========================
3
+
4
+ .. automodule:: src.environments
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments.dond
16
+ src.environments.ipd
17
+
18
+ Submodules
19
+ ----------
20
+
21
+ .. toctree::
22
+ :maxdepth: 4
23
+
24
+ src.environments.env_imports
25
+ src.environments.environment_imports
src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.dond\_run\_train module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.dond_run_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.experiments.last\_completion module
2
+ =======================================
3
+
4
+ .. automodule:: src.experiments.last_completion
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.generation.run_games.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.generation.run\_games module
2
+ ================================
3
+
4
+ .. automodule:: src.generation.run_games
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.dummy\_local\_llm module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.dummy_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.new\_local\_llm module
2
+ =================================
3
+
4
+ .. automodule:: src.models.new_local_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.oai\_agent module
2
+ ============================
3
+
4
+ .. automodule:: src.models.oai_agent
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.server_llm.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.server\_llm module
2
+ =============================
3
+
4
+ .. automodule:: src.models.server_llm
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.models.updatable\_worker module
2
+ ===================================
3
+
4
+ .. automodule:: src.models.updatable_worker
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.rst ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ src package
2
+ ===========
3
+
4
+ .. automodule:: src
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
8
+
9
+ Subpackages
10
+ -----------
11
+
12
+ .. toctree::
13
+ :maxdepth: 4
14
+
15
+ src.environments
16
+ src.experiments
17
+ src.generation
18
+ src.models
19
+ src.training
20
+ src.utils
21
+
22
+ Submodules
23
+ ----------
24
+
25
+ .. toctree::
26
+ :maxdepth: 4
27
+
28
+ src.run
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train module
2
+ ==============================
3
+
4
+ .. automodule:: src.training.ppo_train
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.ppo\_train\_value\_head module
2
+ ===========================================
3
+
4
+ .. automodule:: src.training.ppo_train_value_head
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.training.rl\_convs\_processing module
2
+ =========================================
3
+
4
+ .. automodule:: src.training.rl_convs_processing
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.extra\_stats module
2
+ =============================
3
+
4
+ .. automodule:: src.utils.extra_stats
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.inherit\_args module
2
+ ==============================
3
+
4
+ .. automodule:: src.utils.inherit_args
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.parallel\_shuffle module
2
+ ==================================
3
+
4
+ .. automodule:: src.utils.parallel_shuffle
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.quick\_stats module
2
+ =============================
3
+
4
+ .. automodule:: src.utils.quick_stats
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ src.utils.update\_start\_epoch module
2
+ =====================================
3
+
4
+ .. automodule:: src.utils.update_start_epoch
5
+ :members:
6
+ :undoc-members:
7
+ :show-inheritance:
src_code_for_reproducibility/docs/source/usage.rst ADDED
File without changes
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc ADDED
Binary file (6.72 kB). View file
 
src_code_for_reproducibility/utils/__init__.py ADDED
File without changes
src_code_for_reproducibility/utils/dict_get_path.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def get_from_nested_dict(a:dict, path) -> any:
3
+ # path is string or list of string
4
+ try:
5
+ if isinstance(path, str):
6
+ return a[path]
7
+ else:
8
+ for p in path:
9
+ a = a[p]
10
+ return a
11
+ except Exception:
12
+ return None
src_code_for_reproducibility/utils/gather_training_stats.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import csv
3
+ import gc
4
+ import json
5
+ import logging
6
+ import os
7
+ import pickle
8
+ import random
9
+ import re
10
+ import subprocess
11
+ import sys
12
+ import time
13
+ from datetime import datetime
14
+ from statistics import mean
15
+ from typing import Any, Dict
16
+
17
+ import hydra
18
+ import matplotlib.pyplot as plt
19
+ import numpy as np
20
+ import pandas as pd
21
+ import torch
22
+ from omegaconf import OmegaConf
23
+
24
+ from mllm.training.tally_metrics import Tally
25
+ from mllm.utils.stat_pack import StatPack
26
+
27
+
28
+ def get_from_nested_dict(dictio: dict, path: list[str]):
29
+ for sp in path[:-1]:
30
+ dictio = dictio[sp]
31
+ return dictio.get(path[-1])
32
+
33
+
34
+ def set_at_path(dictio: dict, path: list[str], value):
35
+ for sp in path[:-1]:
36
+ if sp not in dictio:
37
+ dictio[sp] = {}
38
+ dictio = dictio[sp]
39
+ dictio[path[-1]] = value
40
+
41
+
42
+ def produce_tabular_render(inpath: str, outpath: str = None):
43
+ """
44
+ TODO: docstring
45
+ """
46
+ with open(inpath, "r") as f:
47
+ data = json.load(f)
48
+ rollout_paths = data.keys()
49
+ for rollout_path in rollout_paths:
50
+ if outpath is None:
51
+ m_path = rollout_path.replace("/", "|")
52
+ m_path = m_path.replace(".json", "")
53
+ m_path = (
54
+ os.path.split(inpath)[0]
55
+ + "/contextualized_tabular_renders/"
56
+ + m_path
57
+ + "_tabular_render.render.csv"
58
+ )
59
+ # import pdb; pdb.set_trace()
60
+ os.makedirs(os.path.split(m_path)[0], exist_ok=True)
61
+ metrics = data[rollout_path]
62
+ d = {k: [] for k in metrics[0].keys()}
63
+ for m in metrics:
64
+ for k, v in m.items():
65
+ d[k].append(v)
66
+ d = pd.DataFrame(d)
67
+ d.to_csv(m_path)
68
+
69
+
70
+ def get_metric_paths(data: list[dict]):
71
+ d = data[0]
72
+ paths = []
73
+
74
+ def traverse_dict(d, current_path=[]):
75
+ for key, value in d.items():
76
+ new_path = current_path + [key]
77
+ if isinstance(value, dict):
78
+ traverse_dict(value, new_path)
79
+ else:
80
+ paths.append(new_path)
81
+
82
+ traverse_dict(d)
83
+ return paths
84
+
85
+
86
+ def print_metric_paths(data: list[dict]):
87
+ paths = get_metric_paths(data)
88
+ for p in paths:
89
+ print(p)
90
+
91
+
92
+ def get_metric_iteration_list(data: list[dict], metric_path: list[str]):
93
+ if isinstance(metric_path, str):
94
+ metric_path = [metric_path]
95
+ sgl = []
96
+ for d in data:
97
+ sgl.append(get_from_nested_dict(d, metric_path))
98
+ return sgl
99
+
100
+
101
+ def to_1d_numeric(x):
102
+ """Return a 1-D float array (or None if not numeric). Accepts scalars, numpy arrays, or nested list/tuple of them."""
103
+ if x is None:
104
+ return None
105
+ if isinstance(x, (int, float, np.number)):
106
+ return np.array([float(x)], dtype=float)
107
+ if isinstance(x, np.ndarray):
108
+ try:
109
+ return x.astype(float).ravel()
110
+ except Exception:
111
+ return None
112
+ if isinstance(x, (list, tuple)):
113
+ parts = []
114
+ for e in x:
115
+ arr = to_1d_numeric(e)
116
+ if arr is not None and arr.size > 0:
117
+ parts.append(arr)
118
+ if parts:
119
+ return np.concatenate(parts)
120
+ return None
121
+ return None
122
+
123
+
124
+ def get_single_metric_vector(data, metric_path, iterations=None):
125
+ if isinstance(metric_path, str):
126
+ metric_path = [metric_path]
127
+ if iterations == None:
128
+ iterations = len(data)
129
+ vecs = []
130
+ for d in data:
131
+ ar = get_from_nested_dict(d, metric_path)
132
+ arr = to_1d_numeric(ar)
133
+ if arr is not None:
134
+ vecs.append(arr)
135
+
136
+ return np.concatenate(vecs) if vecs else np.empty(0, dtype=float)
137
+
138
+
139
+ def _load_metrics_file(file_path: str):
140
+ if not (file_path.endswith(".tally.pkl") or file_path.endswith(".pkl")):
141
+ raise ValueError("Only *.tally.pkl files are supported.")
142
+ import pickle
143
+
144
+ with open(file_path, "rb") as f:
145
+ tree = pickle.load(f)
146
+ return tree
147
+
148
+
149
+ def get_leaf_items(array_tally: dict, prefix: list[str] = None):
150
+ if prefix is None:
151
+ prefix = []
152
+ for key, value in array_tally.items():
153
+ next_prefix = prefix + [str(key)]
154
+ if isinstance(value, dict):
155
+ yield from get_leaf_items(value, next_prefix)
156
+ else:
157
+ yield next_prefix, value
158
+
159
+
160
+ def _sanitize_filename_part(part: str) -> str:
161
+ s = part.replace("/", "|")
162
+ s = s.replace(" ", "_")
163
+ return s
164
+
165
+
166
+ def render_rt_tally_pkl_to_csvs(pkl_path: str, outdir: str):
167
+ """
168
+ This method takes care of tokenwise logging.
169
+ """
170
+ with open(pkl_path, "rb") as f:
171
+ payload = pickle.load(f)
172
+ # Backward compatibility: older tallies stored the dict directly
173
+ if isinstance(payload, dict) and "array_tally" in payload:
174
+ array_tally = payload.get("array_tally", {})
175
+ else:
176
+ array_tally = payload
177
+
178
+ os.makedirs(outdir, exist_ok=True)
179
+ trainer_id = os.path.basename(pkl_path).replace(".rt_tally.pkl", "")
180
+ for path_list, rollout_tally_items in get_leaf_items(array_tally):
181
+ # Create file and initiate writer
182
+ path_part = ".".join(_sanitize_filename_part(p) for p in path_list)
183
+ filename = f"{trainer_id}__{path_part}.render.csv"
184
+ out_path = os.path.join(outdir, filename)
185
+
186
+ # Write metric rows to CSV
187
+ with open(out_path, "w", newline="") as f:
188
+ writer = csv.writer(f)
189
+
190
+ # Write header row - need to determine metric column count from first rollout_tally_item
191
+ first_item = rollout_tally_items[0]
192
+ metric_cols = (
193
+ first_item.metric_matrix.shape[1]
194
+ if first_item.metric_matrix.ndim > 1
195
+ else 1
196
+ )
197
+ header = ["agent_id", "crn_id", "rollout_id"] + [
198
+ f"t_{i}" for i in range(metric_cols)
199
+ ]
200
+ writer.writerow(header)
201
+
202
+ for rollout_tally_item in rollout_tally_items:
203
+ crn_ids = rollout_tally_item.crn_ids
204
+ rollout_ids = rollout_tally_item.rollout_ids
205
+ agent_ids = rollout_tally_item.agent_ids
206
+ metric_matrix = rollout_tally_item.metric_matrix
207
+ for i in range(metric_matrix.shape[0]):
208
+ row_vals = metric_matrix[i].reshape(-1)
209
+ # Convert row_vals to a list to avoid numpy concatenation issues
210
+ row_vals = (
211
+ row_vals.tolist()
212
+ if hasattr(row_vals, "tolist")
213
+ else list(row_vals)
214
+ )
215
+ row_prefix = [
216
+ agent_ids[i],
217
+ crn_ids[i],
218
+ rollout_ids[i],
219
+ ]
220
+ writer.writerow(row_prefix + row_vals)
221
+
222
+
223
+ def tally_to_stat_pack(tally: Dict[str, Any]):
224
+ stat_pack = StatPack()
225
+ if "array_tally" in tally:
226
+ tally = tally["array_tally"]
227
+
228
+ # backward compatibility: will remove later, flatten keys in tally
229
+ def get_from_nested_dict(dictio: dict, path: list[str]):
230
+ for sp in path[:-1]:
231
+ dictio = dictio[sp]
232
+ return dictio.get(path[-1])
233
+
234
+ def get_metric_paths(tally: dict):
235
+ paths = []
236
+
237
+ def traverse_dict(tally, current_path=[]):
238
+ for key, value in tally.items():
239
+ new_path = current_path + [key]
240
+ if isinstance(value, dict):
241
+ traverse_dict(value, new_path)
242
+ else:
243
+ paths.append(new_path)
244
+
245
+ traverse_dict(tally)
246
+ return paths
247
+
248
+ paths = get_metric_paths(tally)
249
+ modified_tally = {}
250
+ for p in paths:
251
+ val = get_from_nested_dict(tally, p)
252
+ modified_tally["_".join(p)] = np.mean(val)
253
+ del tally
254
+ tally = modified_tally
255
+ for key, value in tally.items():
256
+ stat_pack.add_stat(key, value)
257
+ return stat_pack
src_code_for_reproducibility/utils/get_stochastic_game_lengths.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def get_stochastic_game_lengths(
4
+ max_length,
5
+ nb_games,
6
+ continuation_prob,
7
+ same_length_batch=False
8
+ ):
9
+ """
10
+ Generates stochastic game lengths based on a geometric distribution.
11
+
12
+ Args:
13
+ max_length (int): The maximum length a game can have.
14
+ nb_games (int): The number of games to generate lengths for.
15
+ continuation_prob (float): The probability of the game continuing after each round.
16
+ same_length_batch (bool): If True, all games will have the same length.
17
+
18
+ Returns:
19
+ Array: An array of game lengths.
20
+ """
21
+ if continuation_prob == 1:
22
+ return [max_length] * nb_games
23
+ if same_length_batch:
24
+ length = np.random.geometric(1 - continuation_prob, 1)
25
+ game_lengths = np.repeat(length, nb_games)
26
+ else:
27
+ game_lengths = np.random.geometric(1 - continuation_prob, nb_games)
28
+
29
+ game_lengths = np.where(game_lengths > max_length, max_length, game_lengths)
30
+ return game_lengths.tolist()
src_code_for_reproducibility/utils/kill_sglang.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import psutil
2
+ import signal
3
+
4
+ target_name = "sglang::scheduler"
5
+ killed = []
6
+
7
+ def kill_sglang():
8
+ for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
9
+ try:
10
+ # Some processes may not have a name or cmdline
11
+ cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
12
+ if target_name in cmdline:
13
+ print(f"Killing PID {proc.pid}: {cmdline}")
14
+ proc.send_signal(signal.SIGKILL)
15
+ killed.append(proc.pid)
16
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
17
+ pass
src_code_for_reproducibility/utils/output_source_code.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def output_source_code(model, output_path: str) -> None:
2
+ """
3
+ Outputs the source code of the model to the given path.
4
+ """
5
+ with open(output_path, "w") as f:
6
+ f.write(model.source_code)
src_code_for_reproducibility/utils/resource_context.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+ from contextlib import contextmanager
4
+
5
+ import torch
6
+
7
+
8
+ def vram_usage():
9
+ output = ""
10
+ for i in range(torch.cuda.device_count()):
11
+ gpu_memory_allocated = torch.cuda.memory_allocated(i) / (
12
+ 1024**3
13
+ ) # Convert bytes to GB
14
+ gpu_memory_reserved = torch.cuda.memory_reserved(i) / (
15
+ 1024**3
16
+ ) # Convert bytes to GB
17
+ output += f"GPU {i}: Memory Allocated: {gpu_memory_allocated:.2f} GB, Memory Reserved: {gpu_memory_reserved:.2f} GB"
18
+ return output
19
+
20
+
21
+ def ram_usage():
22
+ import psutil
23
+
24
+ process = psutil.Process()
25
+ memory_info = process.memory_info()
26
+ ram_used = memory_info.rss / (1024**3) # Convert bytes to GB
27
+ return f"RAM Usage: {ram_used:.2f} GB"
28
+
29
+
30
+ @contextmanager
31
+ def resource_logger_context(logger: logging.Logger, task_description: str):
32
+ """
33
+ Context manager to log the resource usage of the current task.
34
+ Args:
35
+ logger: The logger to use to log the resource usage.
36
+ task_description: The description of the task to log.
37
+ Returns:
38
+ None
39
+ """
40
+ try:
41
+ initial_time = time.time()
42
+ # Assume CUDA is available and use device 0 only
43
+ total_mem_bytes = torch.cuda.get_device_properties(0).total_memory
44
+ initial_total_bytes = (
45
+ torch.cuda.memory_allocated(0) + torch.cuda.memory_reserved(0)
46
+ )
47
+ torch.cuda.reset_peak_memory_stats(0)
48
+ yield None
49
+ finally:
50
+ final_time = time.time()
51
+ # Ensure kernels within the block are accounted for
52
+ torch.cuda.synchronize()
53
+
54
+ # Compute metrics
55
+ final_allocated_bytes = torch.cuda.memory_allocated(0)
56
+ final_reserved_bytes = torch.cuda.memory_reserved(0)
57
+ final_total_bytes = final_allocated_bytes + final_reserved_bytes
58
+
59
+ delta_vram_percent_total = (
60
+ 100 * (final_total_bytes - initial_total_bytes) / total_mem_bytes
61
+ if total_mem_bytes
62
+ else 0.0
63
+ )
64
+ current_percent_vram_taken = (
65
+ 100 * final_total_bytes / total_mem_bytes if total_mem_bytes else 0.0
66
+ )
67
+ block_peak_percent = (
68
+ 100 * torch.cuda.max_memory_allocated(0) / total_mem_bytes
69
+ if total_mem_bytes
70
+ else 0.0
71
+ )
72
+ delta_time_str = time.strftime(
73
+ '%H:%M:%S', time.gmtime(final_time - initial_time)
74
+ )
75
+
76
+ logger.info(
77
+ f"For task: {task_description}, ΔVRAM % (total): {delta_vram_percent_total:.2f}%, Current % of VRAM taken: {current_percent_vram_taken:.2f}%, Block Peak % of device VRAM: {block_peak_percent:.2f}%, ΔTime: {delta_time_str}"
78
+ )
src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py ADDED
@@ -0,0 +1,1921 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import List
3
+
4
+ from mllm.utils.rollout_tree_gather_utils import *
5
+
6
+
7
+ def html_from_chat_turns(chat_turns: List[ChatTurnLog]) -> str:
8
+ """
9
+ Render chat turns as a single, wrapping sequence of messages in time order.
10
+ Keep badge and message bubble styles, include time on every badge and
11
+ include rewards on assistant badges. Each message is individually
12
+ hide/show by click; when hidden, only the badge remains and "(...)" is
13
+ shown inline (not inside a bubble).
14
+ """
15
+ import html
16
+ import re as _re
17
+
18
+ # Prepare ordering: sort by (time_step, original_index) to keep stable order within same step
19
+ indexed_turns = list(enumerate(chat_turns))
20
+ indexed_turns.sort(key=lambda t: (t[1].time_step, t[0]))
21
+ assistant_agents = sorted({t.agent_id for t in chat_turns if t.role == "assistant"})
22
+ enable_split_view = len(assistant_agents) == 2
23
+
24
+ # CSS styles (simplified layout; no time-step or agent-column backgrounds)
25
+ css = """
26
+ <style>
27
+ :root {
28
+ --font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
29
+ --bg: #ffffff;
30
+ --text: #1c0b00;
31
+ --muted-text: #2C3E50;
32
+ --accent-muted: #BDC3C7;
33
+ --accent-muted-2: #D0D7DE;
34
+ --panel-bg: #F8FAFC;
35
+ --reward-color: #3a2e00; /* dark text for reward pill */
36
+ --font-size: 14px;
37
+ --border-width: 2px;
38
+ --corner-radius: 6px;
39
+ --pill-radius-left: 999px 0 0 999px;
40
+ --pill-radius-right: 0 999px 999px 0;
41
+ --inset-shadow: 0 1px 0 rgba(0,0,0,0.03) inset;
42
+
43
+ /* Chat View Colors */
44
+ --alice-bg: #dcf8c6;
45
+ --alice-border: #0eb224;
46
+ --bob-bg: #ffe4cc;
47
+ --bob-border: #ef8323;
48
+ --user-bg: #f5f5f5;
49
+ --chat-bg: #ffffff;
50
+ }
51
+ body {
52
+ font-family: var(--font-family);
53
+ margin: 12px;
54
+ background-color: var(--bg);
55
+ color: var(--text);
56
+ font-size: var(--font-size);
57
+ line-height: 1.5;
58
+ }
59
+
60
+ /* Chat View Styles */
61
+ #flow-chat {
62
+ max-width: 900px;
63
+ margin: 0 auto;
64
+ background: var(--chat-bg);
65
+ padding: 12px 16px 12px 8px;
66
+ border-radius: 8px;
67
+ }
68
+
69
+ .simultaneous-messages {
70
+ display: flex !important;
71
+ flex-direction: row !important;
72
+ flex-wrap: nowrap !important;
73
+ gap: 8px;
74
+ margin-bottom: 4px;
75
+ align-items: flex-start;
76
+ width: 100%;
77
+ overflow: hidden;
78
+ box-sizing: border-box;
79
+ }
80
+
81
+ .simultaneous-messages .chat-message {
82
+ flex: 1 1 0 !important;
83
+ margin-bottom: 0 !important;
84
+ display: flex !important;
85
+ flex-direction: row !important;
86
+ align-items: flex-start !important;
87
+ margin-left: 0 !important;
88
+ min-width: 0 !important;
89
+ max-width: 50% !important;
90
+ gap: 0 !important;
91
+ overflow: hidden !important;
92
+ }
93
+
94
+ .simultaneous-messages .chat-message-content {
95
+ max-width: 100% !important;
96
+ width: 100%;
97
+ align-items: flex-start !important;
98
+ margin-left: 0 !important;
99
+ overflow: hidden !important;
100
+ }
101
+
102
+ .simultaneous-messages .chat-message.agent-alice {
103
+ justify-content: flex-start !important;
104
+ }
105
+
106
+ .simultaneous-messages .chat-message.agent-bob {
107
+ justify-content: flex-end !important;
108
+ }
109
+
110
+ .simultaneous-messages .chat-message.agent-alice .chat-message-content {
111
+ margin-left: 0 !important;
112
+ align-items: flex-start !important;
113
+ }
114
+
115
+ .simultaneous-messages .chat-message.agent-bob .chat-message-content {
116
+ margin-left: auto !important;
117
+ margin-right: 0 !important;
118
+ align-items: flex-end !important;
119
+ }
120
+
121
+ .simultaneous-messages .chat-bubble {
122
+ max-width: 100%;
123
+ word-break: break-word;
124
+ overflow-wrap: break-word;
125
+ box-sizing: border-box;
126
+ }
127
+
128
+ .simultaneous-messages .chat-message.agent-alice .chat-bubble {
129
+ border-radius: 10px;
130
+ }
131
+
132
+ .simultaneous-messages .chat-message.agent-bob .chat-bubble {
133
+ border-radius: 10px;
134
+ }
135
+
136
+ .simultaneous-messages .chat-message.agent-alice .chat-header {
137
+ justify-content: flex-start;
138
+ flex-shrink: 0;
139
+ }
140
+
141
+ .simultaneous-messages .chat-message.agent-bob .chat-header {
142
+ justify-content: flex-end;
143
+ flex-shrink: 0;
144
+ }
145
+
146
+ .simultaneous-messages .chat-reasoning {
147
+ max-width: 100%;
148
+ overflow-wrap: break-word;
149
+ }
150
+
151
+ .chat-message {
152
+ display: flex;
153
+ margin-bottom: 2px;
154
+ align-items: flex-end;
155
+ gap: 6px;
156
+ position: relative;
157
+ margin-left: 36px;
158
+ }
159
+
160
+ .chat-message.agent-alice {
161
+ margin-left: 0;
162
+ }
163
+
164
+ .chat-message.agent-alice::before {
165
+ left: 0;
166
+ }
167
+
168
+ .chat-message.role-user {
169
+ opacity: 0.7;
170
+ }
171
+
172
+ .chat-message::before {
173
+ content: '';
174
+ position: absolute;
175
+ left: -36px;
176
+ top: 0;
177
+ bottom: 0;
178
+ width: 36px;
179
+ pointer-events: auto;
180
+ }
181
+
182
+ .merge-btn {
183
+ position: absolute;
184
+ left: -30px;
185
+ top: 50%;
186
+ transform: translateY(-50%);
187
+ width: 26px;
188
+ height: 26px;
189
+ border-radius: 4px;
190
+ border: 1.5px solid var(--accent-muted);
191
+ background: white;
192
+ cursor: pointer;
193
+ font-size: var(--font-size);
194
+ opacity: 0;
195
+ display: flex;
196
+ align-items: center;
197
+ justify-content: center;
198
+ transition: opacity 0.2s ease, transform 0.1s ease;
199
+ padding: 0;
200
+ line-height: 1;
201
+ z-index: 10;
202
+ }
203
+
204
+ .chat-message:hover .merge-btn,
205
+ .merge-btn:hover {
206
+ opacity: 1;
207
+ }
208
+
209
+ .merge-btn:hover {
210
+ background: var(--panel-bg);
211
+ border-color: var(--accent-muted-2);
212
+ transform: translateY(-50%) scale(1.15);
213
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15);
214
+ }
215
+
216
+ .merge-btn:active {
217
+ transform: translateY(-50%) scale(0.95);
218
+ }
219
+
220
+ .chat-message.agent-alice .merge-btn {
221
+ left: -30px;
222
+ }
223
+
224
+ .chat-message.role-user .merge-btn {
225
+ display: none !important;
226
+ }
227
+
228
+ .simultaneous-messages .merge-btn {
229
+ opacity: 0 !important;
230
+ pointer-events: none;
231
+ }
232
+
233
+ .simultaneous-messages {
234
+ padding: 6px 0 6px 0 !important;
235
+ margin-left: 0 !important;
236
+ margin-right: 0 !important;
237
+ position: relative !important;
238
+ background: transparent !important;
239
+ border-radius: 0 !important;
240
+ box-sizing: border-box !important;
241
+ overflow: visible !important;
242
+ max-width: 100% !important;
243
+ border: none !important;
244
+ transition: padding 0.2s ease !important;
245
+ }
246
+
247
+ .simultaneous-messages:hover {
248
+ padding-top: 40px !important;
249
+ }
250
+
251
+ .simultaneous-messages::before {
252
+ content: '⇅ Merged';
253
+ position: absolute;
254
+ left: 0 !important;
255
+ top: 8px !important;
256
+ font-size: var(--font-size);
257
+ font-weight: 500;
258
+ color: #888;
259
+ pointer-events: none;
260
+ opacity: 0;
261
+ transition: opacity 0.2s ease;
262
+ }
263
+
264
+ .simultaneous-messages:hover::before {
265
+ opacity: 1;
266
+ }
267
+
268
+ .unmerge-btn {
269
+ position: absolute !important;
270
+ right: 0 !important;
271
+ top: 6px !important;
272
+ width: 36px !important;
273
+ height: 28px !important;
274
+ border-radius: 5px !important;
275
+ border: 2px solid #d63031 !important;
276
+ background: white !important;
277
+ cursor: pointer !important;
278
+ font-size: var(--font-size) !important;
279
+ font-weight: bold !important;
280
+ color: #d63031 !important;
281
+ display: flex !important;
282
+ align-items: center !important;
283
+ justify-content: center !important;
284
+ transition: all 0.2s ease !important;
285
+ padding: 0 !important;
286
+ line-height: 1 !important;
287
+ z-index: 1000 !important;
288
+ flex: none !important;
289
+ pointer-events: auto !important;
290
+ box-shadow: 0 2px 6px rgba(214, 48, 49, 0.3) !important;
291
+ opacity: 0 !important;
292
+ }
293
+
294
+ .simultaneous-messages:hover .unmerge-btn {
295
+ opacity: 1 !important;
296
+ }
297
+
298
+ .unmerge-btn:hover {
299
+ background: #ffe5e5 !important;
300
+ border-color: #b71c1c !important;
301
+ transform: scale(1.1) !important;
302
+ box-shadow: 0 3px 8px rgba(214, 48, 49, 0.4) !important;
303
+ }
304
+
305
+ .unmerge-btn:active {
306
+ transform: scale(0.95) !important;
307
+ background: #ffcccc !important;
308
+ }
309
+
310
+ .chat-message-content {
311
+ max-width: 72%;
312
+ display: flex;
313
+ flex-direction: column;
314
+ gap: 2px;
315
+ }
316
+
317
+ .chat-message.agent-alice .chat-message-content {
318
+ align-items: flex-start;
319
+ }
320
+
321
+ .chat-message.agent-bob .chat-message-content {
322
+ align-items: flex-end;
323
+ margin-left: auto;
324
+ }
325
+
326
+ .chat-bubble {
327
+ padding: 6px 10px;
328
+ border-radius: 10px;
329
+ word-wrap: break-word;
330
+ position: relative;
331
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
332
+ line-height: 1.4;
333
+ }
334
+
335
+ .chat-message.agent-alice .chat-bubble {
336
+ background: var(--alice-bg);
337
+ border: 2px solid var(--alice-border);
338
+ border-radius: 10px 10px 10px 2px;
339
+ }
340
+
341
+ .chat-message.agent-bob .chat-bubble {
342
+ background: var(--bob-bg);
343
+ border: 2px solid var(--bob-border);
344
+ border-radius: 10px 10px 2px 10px;
345
+ }
346
+
347
+ .chat-message.role-user .chat-bubble {
348
+ background: var(--user-bg);
349
+ border: 2px solid #d0d0d0;
350
+ }
351
+
352
+ .chat-header {
353
+ display: flex;
354
+ align-items: center;
355
+ gap: 4px;
356
+ margin-bottom: 2px;
357
+ font-size: var(--font-size);
358
+ font-weight: 600;
359
+ line-height: 1.2;
360
+ }
361
+
362
+ .chat-message.agent-alice .chat-header {
363
+ color: var(--alice-border);
364
+ }
365
+
366
+ .chat-message.agent-bob .chat-header {
367
+ color: var(--bob-border);
368
+ }
369
+
370
+ .chat-timestamp {
371
+ font-size: var(--font-size);
372
+ color: var(--muted-text);
373
+ margin-top: 1px;
374
+ opacity: 0.75;
375
+ }
376
+
377
+ .chat-reward {
378
+ display: inline-flex;
379
+ align-items: center;
380
+ background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
381
+ color: #000000;
382
+ font-weight: 600;
383
+ font-size: var(--font-size);
384
+ padding: 1px 5px;
385
+ border-radius: 3px;
386
+ border: 1px solid #f4e6a8;
387
+ margin-left: 4px;
388
+ line-height: 1.3;
389
+ }
390
+
391
+ .chat-reasoning {
392
+ font-size: var(--font-size);
393
+ font-style: italic;
394
+ color: #555;
395
+ margin-bottom: 2px;
396
+ padding: 4px 8px;
397
+ background: rgba(0, 0, 0, 0.03);
398
+ border-radius: 5px;
399
+ cursor: pointer;
400
+ line-height: 1.3;
401
+ }
402
+
403
+ .chat-reasoning.collapsed .reasoning-text {
404
+ display: none;
405
+ }
406
+
407
+ .chat-reasoning.collapsed::after {
408
+ content: ' (click to expand)';
409
+ color: #777;
410
+ }
411
+
412
+ .chat-group-divider {
413
+ display: flex;
414
+ align-items: center;
415
+ gap: 8px;
416
+ width: 100%;
417
+ margin: 8px 0 4px 0;
418
+ position: relative;
419
+ cursor: pointer;
420
+ user-select: none;
421
+ }
422
+
423
+ .chat-group-divider::before,
424
+ .chat-group-divider::after {
425
+ content: "";
426
+ flex: 1 1 auto;
427
+ height: 2px;
428
+ background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
429
+ }
430
+
431
+ .chat-group-label {
432
+ display: inline-block;
433
+ background: white;
434
+ padding: 2px 12px;
435
+ border-radius: 999px;
436
+ font-size: var(--font-size);
437
+ font-weight: 700;
438
+ color: var(--muted-text);
439
+ border: 1.5px solid var(--accent-muted);
440
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
441
+ line-height: 1.4;
442
+ position: relative;
443
+ transition: background 0.2s ease;
444
+ }
445
+
446
+ .chat-group-divider:hover .chat-group-label {
447
+ background: var(--panel-bg);
448
+ }
449
+
450
+ .chat-group-label::before {
451
+ content: '▼ ';
452
+ font-size: 0.8em;
453
+ display: inline-block;
454
+ transition: transform 0.2s ease;
455
+ opacity: 0;
456
+ }
457
+
458
+ .chat-group-divider:hover .chat-group-label::before {
459
+ opacity: 1;
460
+ }
461
+
462
+ .chat-group-divider.collapsed .chat-group-label::before {
463
+ content: '▶ ';
464
+ opacity: 1;
465
+ }
466
+
467
+ .chat-group-divider.collapsed + * {
468
+ display: none !important;
469
+ }
470
+
471
+ /* Hide collapsed rounds in strong hide mode */
472
+ .strong-hide .chat-group-divider.collapsed {
473
+ display: none !important;
474
+ }
475
+
476
+ /* Chat view width control */
477
+ #flow-chat {
478
+ --chat-width: 900px;
479
+ max-width: var(--chat-width);
480
+ margin: 0 auto;
481
+ }
482
+
483
+ /* Hide user messages when toggle is on */
484
+ #flow-chat.hide-user-messages .chat-message.role-user {
485
+ display: none;
486
+ }
487
+
488
+ /* Hide rewards when hiding user messages */
489
+ #flow-chat.hide-user-messages .chat-reward {
490
+ display: none;
491
+ }
492
+
493
+ /* Round context annotations */
494
+ .round-context {
495
+ text-align: center;
496
+ margin: 4px auto;
497
+ max-width: 100%;
498
+ }
499
+
500
+ .round-context-edit {
501
+ min-height: 20px;
502
+ padding: 5px 10px;
503
+ border: 1.5px dashed var(--accent-muted);
504
+ border-radius: 6px;
505
+ background: #fafafa;
506
+ cursor: text;
507
+ transition: all 0.2s ease;
508
+ outline: none;
509
+ font-size: var(--font-size);
510
+ line-height: 1.3;
511
+ user-select: text;
512
+ -webkit-user-select: text;
513
+ -moz-user-select: text;
514
+ -ms-user-select: text;
515
+ }
516
+
517
+ .round-context-edit:focus {
518
+ border-style: solid;
519
+ border-color: var(--accent-muted-2);
520
+ background: #ffffff;
521
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
522
+ }
523
+
524
+ .round-context-edit:empty:before {
525
+ content: attr(data-placeholder);
526
+ color: #999;
527
+ font-style: italic;
528
+ }
529
+
530
+ .round-context-controls {
531
+ display: none;
532
+ justify-content: center;
533
+ gap: 4px;
534
+ margin-top: 4px;
535
+ flex-wrap: wrap;
536
+ }
537
+
538
+ .round-context-edit:focus + .round-context-controls,
539
+ .round-context-controls:hover,
540
+ .round-context:focus-within .round-context-controls {
541
+ display: flex;
542
+ }
543
+
544
+ .context-color-btn {
545
+ width: 22px;
546
+ height: 22px;
547
+ border-radius: 50%;
548
+ border: 1.5px solid #fff;
549
+ box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
550
+ cursor: pointer;
551
+ transition: transform 0.1s ease;
552
+ }
553
+
554
+ .context-color-btn:hover {
555
+ transform: scale(1.15);
556
+ }
557
+
558
+ .context-color-btn:active {
559
+ transform: scale(0.95);
560
+ }
561
+
562
+ /* Split agent context boxes */
563
+ .split-agent-context {
564
+ display: flex;
565
+ gap: 6px;
566
+ margin: 4px auto;
567
+ max-width: 100%;
568
+ align-items: flex-start;
569
+ }
570
+
571
+ .agent-context-box {
572
+ flex: 1;
573
+ min-width: 0;
574
+ position: relative;
575
+ }
576
+
577
+ .agent-context-box .round-context-edit {
578
+ margin: 0;
579
+ border-radius: 6px;
580
+ padding: 4px 8px;
581
+ min-height: 18px;
582
+ }
583
+
584
+ .agent-context-box.agent-alice .round-context-edit {
585
+ border-color: var(--alice-border);
586
+ background: rgba(14, 178, 36, 0.03);
587
+ }
588
+
589
+ .agent-context-box.agent-bob .round-context-edit {
590
+ border-color: var(--bob-border);
591
+ background: rgba(239, 131, 35, 0.03);
592
+ }
593
+
594
+ .agent-context-box.agent-alice .round-context-edit:focus {
595
+ border-color: var(--alice-border);
596
+ box-shadow: 0 2px 8px rgba(14, 178, 36, 0.2);
597
+ background: rgba(14, 178, 36, 0.05);
598
+ }
599
+
600
+ .agent-context-box.agent-bob .round-context-edit:focus {
601
+ border-color: var(--bob-border);
602
+ box-shadow: 0 2px 8px rgba(239, 131, 35, 0.2);
603
+ background: rgba(239, 131, 35, 0.05);
604
+ }
605
+
606
+ .agent-context-box .round-context-edit::before {
607
+ font-weight: 700;
608
+ font-size: var(--font-size);
609
+ margin-right: 5px;
610
+ letter-spacing: 0.2px;
611
+ }
612
+
613
+ .agent-context-box.agent-alice .round-context-edit::before {
614
+ content: 'Alice Prompt Summary:';
615
+ color: var(--alice-border);
616
+ }
617
+
618
+ .agent-context-box.agent-bob .round-context-edit::before {
619
+ content: 'Bob Prompt Summary:';
620
+ color: var(--bob-border);
621
+ }
622
+
623
+ /* Empty context boxes will be hidden by JavaScript when strong hide is enabled */
624
+ .messages-flow { display: block; }
625
+ .split-wrapper { display: flex; gap: 4px; align-items: flex-start; position: relative; }
626
+ .split-col { flex:1 1 0; min-width:0; }
627
+ /* In split view keep same inline density as linear view */
628
+ .split-col .chat-turn { display: inline; }
629
+ .split-wrapper.resizing { user-select: none; }
630
+ .split-resizer { width:4px; cursor: col-resize; flex:0 0 auto; align-self: stretch; position: relative; background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0)); border-radius:2px; transition: background .15s ease, width .15s ease; }
631
+ .split-resizer:hover { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 35%, var(--accent-muted) 65%, rgba(224,230,235,0)); }
632
+ .split-resizer.dragging { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0)); }
633
+ /* Inline reasoning (removed toggle to prevent layout shift on click) */
634
+ .reasoning-inline { display:inline; font-size:var(--font-size); font-style:italic; color:#555; white-space:pre-wrap; margin-right:4px; cursor:pointer; position:relative; }
635
+ .reasoning-inline .reasoning-text { display:inline; }
636
+ .reasoning-inline .reasoning-icon { display:inline-block; margin-right:2px; }
637
+ .reasoning-inline.collapsed .reasoning-text { display:none; }
638
+ .reasoning-inline.collapsed::after { content:'(...)'; font-style:italic; color:#777; margin-left:4px; }
639
+ .message-box .main-content { white-space:normal; }
640
+ /* tighten spacing */
641
+ .split-col .group-divider { margin:4px 0 2px 0; }
642
+ .toolbar {
643
+ display: flex;
644
+ align-items: center;
645
+ gap: 8px;
646
+ margin-bottom: 0;
647
+ font-size: var(--font-size);
648
+ max-height: 0;
649
+ overflow: hidden;
650
+ opacity: 0;
651
+ pointer-events: none;
652
+ transition: max-height 0.2s ease, opacity 0.2s ease;
653
+ flex-wrap: wrap;
654
+ }
655
+ .toolbar-wrap { position: sticky; top: 0; z-index: 10; background: var(--bg); }
656
+ .toolbar-hotzone { height: 6px; }
657
+ .toolbar-wrap:hover .toolbar { max-height: 500px; opacity: 1; pointer-events: auto; margin-bottom: 12px; }
658
+ .toolbar * { pointer-events: auto !important; }
659
+ .toolbar input,
660
+ .toolbar select { z-index: 100 !important; position: relative; }
661
+ .toolbar input[type="number"],
662
+ .toolbar input[type="text"],
663
+ .toolbar select {
664
+ width: 72px;
665
+ padding: 2px 6px;
666
+ border: 1px solid var(--accent-muted);
667
+ border-radius: var(--corner-radius);
668
+ background: var(--bg);
669
+ user-select: text !important;
670
+ -webkit-user-select: text !important;
671
+ -moz-user-select: text !important;
672
+ -ms-user-select: text !important;
673
+ pointer-events: auto !important;
674
+ cursor: pointer !important;
675
+ }
676
+ .toolbar input[type="text"] {
677
+ cursor: text !important;
678
+ }
679
+ .toolbar input[type="text"]:focus,
680
+ .toolbar input[type="number"]:focus,
681
+ .toolbar select:focus {
682
+ outline: 2px solid #0066cc;
683
+ outline-offset: 1px;
684
+ }
685
+ .toolbar button {
686
+ padding: 4px 8px;
687
+ border: 1px solid var(--accent-muted);
688
+ background: var(--panel-bg);
689
+ border-radius: var(--corner-radius);
690
+ cursor: pointer;
691
+ }
692
+ .chat-turn {
693
+ display: inline; /* inline like text */
694
+ background: transparent;
695
+ position: relative;
696
+ cursor: pointer;
697
+ }
698
+ /* No agent-specific background distinctions */
699
+ .turn-content {
700
+ white-space: normal;
701
+ color: var(--text);
702
+ font-size: var(--font-size);
703
+ display: inline; /* inline flow */
704
+ }
705
+ .chat-turn .agent-badge { margin-right: 0; vertical-align: baseline; }
706
+ .agent-badge {
707
+ display: inline;
708
+ position: relative;
709
+ border: var(--border-width) solid var(--accent-muted); /* slightly thicker */
710
+ border-radius: var(--pill-radius-left); /* round left and bottom-right */
711
+ font-size: var(--font-size);
712
+ color: var(--muted-text);
713
+ background: var(--panel-bg);
714
+ box-shadow: var(--inset-shadow);
715
+ line-height: 1.2;
716
+ border-right: 0;
717
+ }
718
+ /* Use flex on assistant badges to vertically center reward pill */
719
+ .chat-turn.role-assistant .agent-badge { display: inline-flex; align-items: center; }
720
+ .agent-badge::after {
721
+ content: none;
722
+ }
723
+ /* removed external separator; emoji is rendered inside message bubble */
724
+ .agent-name { font-weight: 700; }
725
+ .emoji-bw { filter: grayscale(100%); opacity: 0.95; font-size: var(--font-size); vertical-align: baseline; margin: 0; position: relative; top: -1px; line-height: 1; display: inline-block; }
726
+ .ts-badge {
727
+ position: relative;
728
+ display: inline;
729
+ border: var(--border-width) solid var(--accent-muted-2); /* slightly thicker */
730
+ border-radius: var(--corner-radius); /* not a pill */
731
+ font-size: var(--font-size);
732
+ # font-weight: 700;
733
+ color: var(--muted-text);
734
+ background: #F4F8FB; /* subtle tint */
735
+ # padding: 1px 6px; /* slight padding for visibility */
736
+ margin-right: 8px; /* small gap from following content */
737
+ pointer-events: auto; /* allow events so we can ignore them in JS */
738
+ }
739
+ /* Hide timestep badges when grouping by 1 */
740
+ .hide-ts-badges .ts-badge { display: none; }
741
+ /* Strong hide: completely hide collapsed turns */
742
+ .strong-hide .chat-turn.collapsed { display: none; }
743
+ .ts-badge::before {
744
+ content: "";
745
+ position: relative;
746
+ background: var(--accent-muted-2);
747
+ border-radius: 2px;
748
+ }
749
+ .agent-badge { margin-left: 6px; }
750
+ .message-box {
751
+ display: inline; /* inline bubble behaving like text */
752
+ font-size: var(--font-size);
753
+ border: var(--border-width) solid var(--accent-muted);
754
+ border-radius: var(--pill-radius-right); /* round left and bottom-right */
755
+ position: relative;
756
+ background: var(--bg);
757
+ vertical-align: baseline;
758
+ line-height: 1.2;
759
+ padding-left: 0;
760
+ border-left: 0;
761
+ }
762
+ .chat-turn.agent-alice.role-assistant .message-box::before { color: #0eb224; }
763
+ .chat-turn.agent-bob.role-assistant .message-box::before { color: #ef8323; }
764
+ .chat-turn.collapsed .message-box::before { display: none; }
765
+ /* Assistant bubble border colors by common agent names */
766
+ .chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
767
+ .chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
768
+ /* Tie badge and seam to agent color for a cohesive capsule, assistants only */
769
+ .chat-turn.agent-alice.role-assistant .agent-badge { border-color: #0eb224; background: rgba(14,178,36,0.08); }
770
+ .chat-turn.agent-alice.role-assistant .agent-badge::after { border-right-color: #0eb224; }
771
+ .chat-turn.agent-alice.role-assistant .turn-content::before { border-left-color: #0eb224; border-top-color: #0eb224; }
772
+ .chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
773
+
774
+ .chat-turn.agent-bob.role-assistant .agent-badge { border-color: #ef8323; background: rgba(239,131,35,0.10); }
775
+ .chat-turn.agent-bob.role-assistant .agent-badge::after { border-right-color: #ef8323; }
776
+ .chat-turn.agent-bob.role-assistant .turn-content::before { border-left-color: #ef8323; border-top-color: #ef8323; }
777
+ .chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
778
+ /* No colored agent-name; keep neutral */
779
+ .reward {
780
+ display: inline-flex;
781
+ align-items: center;
782
+ justify-content: center;
783
+ background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
784
+ color: #000000; /* full black */
785
+ font-weight: 600; /* slightly bolder */
786
+ font-family: "Inter", ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Arial, "Noto Sans", sans-serif;
787
+ font-size: var(--font-size);
788
+ letter-spacing: 0.15px;
789
+ line-height: 1;
790
+ padding: 0 4px 1px 4px; /* slight bottom pad for optical centering */
791
+ border-radius: 4px;
792
+ border: 1px solid #f4e6a8;
793
+ margin: 0 4px;
794
+ box-shadow: 0 0 0 1px rgba(255,255,255,0.55) inset, 0 1px 2px rgba(0,0,0,0.04);
795
+ }
796
+ .message-placeholder { display: none; color: #7f8c8d; font-style: italic; }
797
+ .chat-turn.collapsed .message-box { color: transparent; font-size: 0; display: inline-block; }
798
+ .chat-turn.collapsed .message-box::after { content: "(...)"; color: #7f8c8d; font-style: italic; font-size: var(--font-size); line-height: 1.2; }
799
+ .chat-turn.collapsed .agent-badge,
800
+ .chat-turn.collapsed .message-box { opacity: 0.3; }
801
+ /* Group divider - clearer and pretty */
802
+ .group-divider {
803
+ display: flex;
804
+ align-items: center;
805
+ gap: 8px;
806
+ width: 100%;
807
+ margin: 8px 0 4px 0;
808
+ position: relative;
809
+ cursor: pointer;
810
+ user-select: none;
811
+ }
812
+ .group-divider::before,
813
+ .group-divider::after {
814
+ content: "";
815
+ flex: 1 1 auto;
816
+ height: 2px;
817
+ background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
818
+ }
819
+ .group-divider .group-label {
820
+ display: inline-block;
821
+ border: 1px solid var(--accent-muted);
822
+ border-radius: 999px;
823
+ padding: 2px 10px;
824
+ font-size: var(--group-label-font-size);
825
+ font-weight: 700;
826
+ color: var(--muted-text);
827
+ background: var(--bg);
828
+ box-shadow: var(--inset-shadow);
829
+ position: relative;
830
+ z-index: 1;
831
+ transition: background 0.2s ease;
832
+ }
833
+
834
+ .group-divider:hover .group-label {
835
+ background: var(--panel-bg);
836
+ }
837
+
838
+ .group-label::before {
839
+ content: '▼ ';
840
+ font-size: 0.8em;
841
+ display: inline-block;
842
+ transition: transform 0.2s ease;
843
+ opacity: 0;
844
+ }
845
+
846
+ .group-divider:hover .group-label::before {
847
+ opacity: 1;
848
+ }
849
+
850
+ .group-divider.collapsed .group-label::before {
851
+ content: '▶ ';
852
+ opacity: 1;
853
+ }
854
+
855
+ /* Hide collapsed rounds in strong hide mode */
856
+ .strong-hide .group-divider.collapsed {
857
+ display: none !important;
858
+ }
859
+ /* Enhance contrast for print / export */
860
+ body.split-mode .group-divider::before,
861
+ body.split-mode .group-divider::after {
862
+ background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0));
863
+ }
864
+ .chat-turn .turn-content { position: relative; }
865
+ .chat-turn .turn-content::before {
866
+ content: none;
867
+ }
868
+ .chat-turn .agent-badge {
869
+ position: relative;
870
+ }
871
+ /* removed absolute-positioned emoji to prevent overlap */
872
+ </style>
873
+ """
874
+
875
+ # HTML structure
876
+ html_parts = [
877
+ "<!DOCTYPE html>",
878
+ "<html>",
879
+ "<head>",
880
+ "<meta charset='UTF-8'>",
881
+ "<title>Chat Turns</title>",
882
+ css,
883
+ "<script>\n"
884
+ "document.addEventListener('DOMContentLoaded', function() {\n"
885
+ " const linearFlow = document.getElementById('flow-linear');\n"
886
+ " const splitFlow = document.getElementById('flow-split');\n"
887
+ " const chatFlow = document.getElementById('flow-chat');\n"
888
+ " let splitViewOn = false;\n"
889
+ " let chatViewOn = true;\n"
890
+ " function activeFlows() { return [chatViewOn && chatFlow ? chatFlow : null, splitViewOn && splitFlow ? splitFlow : null, linearFlow].filter(Boolean).filter(f => f.style.display !== 'none'); }\n"
891
+ " // State for range filtering and strong hide\n"
892
+ " let currentRangeStart = null;\n"
893
+ " let currentRangeEnd = null;\n"
894
+ " let strongHideOn = false;\n"
895
+ " document.body.addEventListener('click', function(e){\n"
896
+ " if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
897
+ " if (e.target.closest('.ts-badge')) { return; }\n"
898
+ " const r = e.target.closest('.reasoning-inline'); if (r) { e.stopPropagation(); r.classList.toggle('collapsed'); return; }\n"
899
+ " const turn = e.target.closest('.chat-turn');\n"
900
+ " if (turn) { e.stopPropagation(); turn.classList.toggle('collapsed'); }\n"
901
+ " });\n"
902
+ " // Reasoning handled via <details>, no JS required\n"
903
+ " function applyRangeFilter() {\n"
904
+ " for (const flow of activeFlows()) {\n"
905
+ " const turns = Array.from(flow.querySelectorAll('.chat-turn'));\n"
906
+ " for (const el of turns) {\n"
907
+ " const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
908
+ " const afterStart = (currentRangeStart === null) || (t >= currentRangeStart);\n"
909
+ " const beforeEnd = (currentRangeEnd === null) || (t <= currentRangeEnd);\n"
910
+ " el.style.display = (afterStart && beforeEnd) ? '' : 'none';\n"
911
+ " }\n"
912
+ " const dividers = Array.from(flow.querySelectorAll('.group-divider'));\n"
913
+ " for (const d of dividers) {\n"
914
+ " let anyVisible = false;\n"
915
+ " let el = d.nextElementSibling;\n"
916
+ " while (el && !el.classList.contains('group-divider')) {\n"
917
+ " if (el.classList.contains('chat-turn')) {\n"
918
+ " const disp = getComputedStyle(el).display;\n"
919
+ " if (disp !== 'none') { anyVisible = true; break; }\n"
920
+ " } else if (el.classList.contains('split-wrapper')) {\n"
921
+ " // Search descendants for any visible chat-turn\n"
922
+ " const turns = Array.from(el.querySelectorAll('.chat-turn'));\n"
923
+ " for (const tEl of turns) {\n"
924
+ " const disp2 = getComputedStyle(tEl).display;\n"
925
+ " if (disp2 !== 'none') { anyVisible = true; break; }\n"
926
+ " }\n"
927
+ " if (anyVisible) break;\n"
928
+ " }\n"
929
+ " el = el.nextElementSibling;\n"
930
+ " }\n"
931
+ " d.style.display = anyVisible ? '' : 'none';\n"
932
+ " }\n"
933
+ " }\n"
934
+ " }\n"
935
+ " function applyGrouping(n) {\n"
936
+ " function groupContainer(container, n) {\n"
937
+ " Array.from(container.querySelectorAll(':scope > .group-divider')).forEach(el => el.remove());\n"
938
+ " if (!n || n <= 0) { return; }\n"
939
+ " const turns = Array.from(container.querySelectorAll(':scope > .chat-turn'));\n"
940
+ " if (turns.length === 0) return;\n"
941
+ " const items = Array.from(container.children).filter(el => !el.classList.contains('group-divider'));\n"
942
+ " const frag = document.createDocumentFragment();\n"
943
+ " let lastGroup = -1;\n"
944
+ " for (const el of items) {\n"
945
+ " if (!el.classList.contains('chat-turn')) { frag.appendChild(el); continue; }\n"
946
+ " const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
947
+ " const g = Math.floor(t / n);\n"
948
+ " if (g !== lastGroup) {\n"
949
+ " const div = document.createElement('div');\n"
950
+ " div.className = 'group-divider';\n"
951
+ " const label = document.createElement('span');\n"
952
+ " label.className = 'group-label';\n"
953
+ " const roundIndex = g + 1;\n"
954
+ " label.textContent = `Round ${roundIndex}`;\n"
955
+ " div.appendChild(label);\n"
956
+ " frag.appendChild(div);\n"
957
+ " lastGroup = g;\n"
958
+ " }\n"
959
+ " frag.appendChild(el);\n"
960
+ " }\n"
961
+ " container.innerHTML = '';\n"
962
+ " container.appendChild(frag);\n"
963
+ " container.classList.toggle('hide-ts-badges', n === 1);\n"
964
+ " container.classList.toggle('strong-hide', strongHideOn);\n"
965
+ " }\n"
966
+ " for (const flow of activeFlows()) {\n"
967
+ " if (flow.id === 'flow-split') {\n"
968
+ " // Snapshot original turns once to avoid drift on repeated grouping\n"
969
+ " const getOriginalTurns = () => {\n"
970
+ " if (!flow.dataset.origData) {\n"
971
+ " const data = [];\n"
972
+ " const cols0 = flow.querySelectorAll('.split-col');\n"
973
+ " cols0.forEach(col => {\n"
974
+ " const agent = col.getAttribute('data-agent') || '';\n"
975
+ " col.querySelectorAll(':scope > .chat-turn').forEach(el => {\n"
976
+ " const t = parseInt(el.getAttribute('data-time-step')||'0',10);\n"
977
+ " data.push({agent, time:t, html: el.outerHTML});\n"
978
+ " });\n"
979
+ " });\n"
980
+ " flow.dataset.origData = JSON.stringify(data);\n"
981
+ " }\n"
982
+ " return JSON.parse(flow.dataset.origData);\n"
983
+ " };\n"
984
+ " const original = getOriginalTurns();\n"
985
+ " const agents = Array.from(new Set(original.map(o => o.agent))).sort();\n"
986
+ " const groups = new Map();\n"
987
+ " original.forEach(o => {\n"
988
+ " const g = n && n > 0 ? Math.floor(o.time / n) : 0;\n"
989
+ " if (!groups.has(g)) groups.set(g, new Map());\n"
990
+ " const gm = groups.get(g);\n"
991
+ " if (!gm.has(o.agent)) gm.set(o.agent, []);\n"
992
+ " gm.get(o.agent).push(o);\n"
993
+ " });\n"
994
+ " flow.innerHTML = '';\n"
995
+ " const sorted = Array.from(groups.keys()).sort((a,b)=>a-b);\n"
996
+ " sorted.forEach(g => {\n"
997
+ " const div = document.createElement('div');\n"
998
+ " div.className = 'group-divider';\n"
999
+ " const label = document.createElement('span');\n"
1000
+ " label.className = 'group-label';\n"
1001
+ " label.textContent = `Round ${g+1}`;\n"
1002
+ " div.appendChild(label);\n"
1003
+ " flow.appendChild(div);\n"
1004
+ " const wrapper = document.createElement('div');\n"
1005
+ " wrapper.className = 'split-wrapper';\n"
1006
+ " agents.forEach(agent => {\n"
1007
+ " const colDiv = document.createElement('div');\n"
1008
+ " colDiv.className = 'split-col';\n"
1009
+ " colDiv.setAttribute('data-agent', agent);\n"
1010
+ " (groups.get(g).get(agent) || []).forEach(o => { colDiv.insertAdjacentHTML('beforeend', o.html); });\n"
1011
+ " wrapper.appendChild(colDiv);\n"
1012
+ " });\n"
1013
+ " if (wrapper.children.length === 2) { const res = document.createElement('div'); res.className='split-resizer'; wrapper.insertBefore(res, wrapper.children[1]); }\n"
1014
+ " flow.appendChild(wrapper);\n"
1015
+ " });\n"
1016
+ " flow.classList.toggle('hide-ts-badges', n === 1);\n"
1017
+ " flow.classList.toggle('strong-hide', strongHideOn);\n"
1018
+ " document.body.classList.add('split-mode');\n"
1019
+ " } else {\n"
1020
+ " groupContainer(flow, n);\n"
1021
+ " }\n"
1022
+ " }\n"
1023
+ " applyRangeFilter();\n"
1024
+ " initSplitResizers();\n"
1025
+ " }\n"
1026
+ " function initSplitResizers() {\n"
1027
+ " const wrappers = document.querySelectorAll('#flow-split .split-wrapper');\n"
1028
+ " wrappers.forEach(wrap => {\n"
1029
+ " const resizer = wrap.querySelector('.split-resizer');\n"
1030
+ " if (!resizer || resizer.dataset.bound) return; resizer.dataset.bound='1';\n"
1031
+ " const cols = wrap.querySelectorAll('.split-col'); if (cols.length !== 2) return; const c0=cols[0], c1=cols[1];\n"
1032
+ " c0.style.flex=c1.style.flex='1 1 0'; c0.style.width=c1.style.width='';\n"
1033
+ " requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; });\n"
1034
+ " let dragging=false,startX=0,startP0=0;\n"
1035
+ " const onDown=e=>{ dragging=true; startX=e.clientX; wrap.classList.add('resizing'); resizer.classList.add('dragging'); const rect=wrap.getBoundingClientRect(); const w=rect.width; const c0Rect=c0.getBoundingClientRect(); startP0=c0Rect.width/w; document.body.style.cursor='col-resize'; e.preventDefault(); };\n"
1036
+ " const onMove=e=>{ if(!dragging)return; const rect=wrap.getBoundingClientRect(); const w=rect.width; let delta=(e.clientX-startX)/w; let newP0=startP0+delta; const minP=0.15,maxP=0.85; if(newP0<minP)newP0=minP; if(newP0>maxP)newP0=maxP; c0.style.flex='0 0 '+(newP0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+((1-newP0)*100).toFixed(2)+'%'; };\n"
1037
+ " const onUp=()=>{ if(!dragging)return; dragging=false; wrap.classList.remove('resizing'); resizer.classList.remove('dragging'); document.body.style.cursor=''; };\n"
1038
+ " resizer.addEventListener('mousedown', onDown); window.addEventListener('mousemove', onMove); window.addEventListener('mouseup', onUp);\n"
1039
+ " resizer.addEventListener('dblclick', e=>{ if(e.shiftKey){ c0.style.flex=c1.style.flex='1 1 0'; requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; }); } else { c0.style.flex='0 0 50%'; c1.style.flex='0 0 50%'; } });\n"
1040
+ " });\n"
1041
+ " }\n"
1042
+ " initSplitResizers();\n"
1043
+ " const input = document.getElementById('group-size');\n"
1044
+ " const btn = document.getElementById('apply-grouping');\n"
1045
+ " if (btn && input) {\n"
1046
+ " btn.addEventListener('click', () => { const n = parseInt(input.value || '0', 10); applyGrouping(n); });\n"
1047
+ " input.addEventListener('keydown', (e) => { if (e.key === 'Enter') { const n = parseInt(input.value || '0', 10); applyGrouping(n); } });\n"
1048
+ " }\n"
1049
+ " if (input) { input.value = '1'; applyGrouping(1); }\n"
1050
+ " const rangeStart = document.getElementById('range-start');\n"
1051
+ " const rangeEnd = document.getElementById('range-end');\n"
1052
+ " const rangeBtn = document.getElementById('apply-range');\n"
1053
+ " if (rangeBtn && rangeStart && rangeEnd) {\n"
1054
+ " const applyRange = () => {\n"
1055
+ " const sv = parseInt(rangeStart.value || '', 10);\n"
1056
+ " const ev = parseInt(rangeEnd.value || '', 10);\n"
1057
+ " currentRangeStart = Number.isFinite(sv) ? sv : null;\n"
1058
+ " currentRangeEnd = Number.isFinite(ev) ? ev : null;\n"
1059
+ " applyRangeFilter();\n"
1060
+ " };\n"
1061
+ " rangeBtn.addEventListener('click', applyRange);\n"
1062
+ " rangeStart.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
1063
+ " rangeEnd.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
1064
+ " }\n"
1065
+ " const strongHideBtn = document.getElementById('toggle-strong-hide');\n"
1066
+ " const strongHideStateEl = document.getElementById('strong-hide-state');\n"
1067
+ " if (strongHideBtn) {\n"
1068
+ " const setLabel = () => { if (strongHideStateEl) { strongHideStateEl.textContent = strongHideOn ? 'On' : 'Off'; } };\n"
1069
+ " strongHideBtn.addEventListener('click', () => { strongHideOn = !strongHideOn; for (const f of activeFlows()) { f.classList.toggle('strong-hide', strongHideOn); } setLabel(); });\n"
1070
+ " if (strongHideOn) { for (const f of activeFlows()) { f.classList.add('strong-hide'); } }\n"
1071
+ " setLabel();\n"
1072
+ " }\n"
1073
+ " const splitBtn = document.getElementById('toggle-split-view');\n"
1074
+ " const splitStateEl = document.getElementById('split-view-state');\n"
1075
+ " if (splitBtn && splitFlow && linearFlow) {\n"
1076
+ " const updateSplit = () => { if (splitStateEl) splitStateEl.textContent = splitViewOn ? 'On' : 'Off'; };\n"
1077
+ " splitBtn.addEventListener('click', () => { if (chatViewOn) return; splitViewOn = !splitViewOn; linearFlow.style.display = splitViewOn ? 'none' : ''; splitFlow.style.display = splitViewOn ? '' : 'none'; applyGrouping(parseInt(input.value||'1',10)); updateSplit(); });\n"
1078
+ " updateSplit();\n"
1079
+ " }\n"
1080
+ " const chatBtn = document.getElementById('toggle-chat-view');\n"
1081
+ " const chatStateEl = document.getElementById('chat-view-state');\n"
1082
+ " const hideUserBtn = document.getElementById('toggle-hide-user-messages');\n"
1083
+ " const hideUserStateEl = document.getElementById('hide-user-state');\n"
1084
+ " const widthControl = document.getElementById('chat-width-control');\n"
1085
+ " const widthSlider = document.getElementById('chat-width-slider');\n"
1086
+ " const widthValue = document.getElementById('chat-width-value');\n"
1087
+ " let hideUserMessages = false;\n"
1088
+ " if (chatBtn && chatFlow && linearFlow) {\n"
1089
+ " const updateChat = () => {\n"
1090
+ " if (chatStateEl) chatStateEl.textContent = chatViewOn ? 'On' : 'Off';\n"
1091
+ " if (hideUserBtn) hideUserBtn.style.display = chatViewOn ? '' : 'none';\n"
1092
+ " if (widthControl) widthControl.style.display = chatViewOn ? '' : 'none';\n"
1093
+ " };\n"
1094
+ " chatBtn.addEventListener('click', () => {\n"
1095
+ " chatViewOn = !chatViewOn;\n"
1096
+ " if (chatViewOn) {\n"
1097
+ " splitViewOn = false;\n"
1098
+ " linearFlow.style.display = 'none';\n"
1099
+ " if (splitFlow) splitFlow.style.display = 'none';\n"
1100
+ " chatFlow.style.display = '';\n"
1101
+ " if (splitStateEl) splitStateEl.textContent = 'Off';\n"
1102
+ " } else {\n"
1103
+ " chatFlow.style.display = 'none';\n"
1104
+ " linearFlow.style.display = '';\n"
1105
+ " }\n"
1106
+ " updateChat();\n"
1107
+ " });\n"
1108
+ " updateChat();\n"
1109
+ " }\n"
1110
+ " if (hideUserBtn && hideUserStateEl && chatFlow) {\n"
1111
+ " const updateHideUser = () => { hideUserStateEl.textContent = hideUserMessages ? 'On' : 'Off'; };\n"
1112
+ " hideUserBtn.addEventListener('click', () => {\n"
1113
+ " hideUserMessages = !hideUserMessages;\n"
1114
+ " chatFlow.classList.toggle('hide-user-messages', hideUserMessages);\n"
1115
+ " updateHideUser();\n"
1116
+ " });\n"
1117
+ " updateHideUser();\n"
1118
+ " }\n"
1119
+ " if (widthSlider && widthValue && chatFlow) {\n"
1120
+ " const savedWidth = localStorage.getItem('chat-view-width');\n"
1121
+ " if (savedWidth) {\n"
1122
+ " widthSlider.value = savedWidth;\n"
1123
+ " chatFlow.style.setProperty('--chat-width', savedWidth + 'px');\n"
1124
+ " widthValue.textContent = savedWidth + 'px';\n"
1125
+ " }\n"
1126
+ " widthSlider.addEventListener('input', (e) => {\n"
1127
+ " const width = e.target.value;\n"
1128
+ " chatFlow.style.setProperty('--chat-width', width + 'px');\n"
1129
+ " widthValue.textContent = width + 'px';\n"
1130
+ " localStorage.setItem('chat-view-width', width);\n"
1131
+ " });\n"
1132
+ " }\n"
1133
+ " const fontFamilySelect = document.getElementById('font-family-select');\n"
1134
+ " const fontSizeInput = document.getElementById('font-size-input');\n"
1135
+ " if (fontFamilySelect) {\n"
1136
+ " const savedFont = localStorage.getItem('render-font-family');\n"
1137
+ " if (savedFont) {\n"
1138
+ " fontFamilySelect.value = savedFont;\n"
1139
+ " document.body.style.setProperty('--font-family', savedFont);\n"
1140
+ " }\n"
1141
+ " fontFamilySelect.addEventListener('change', (e) => {\n"
1142
+ " const font = e.target.value;\n"
1143
+ " document.body.style.setProperty('--font-family', font);\n"
1144
+ " localStorage.setItem('render-font-family', font);\n"
1145
+ " });\n"
1146
+ " }\n"
1147
+ " if (fontSizeInput) {\n"
1148
+ " const savedSize = localStorage.getItem('render-font-size');\n"
1149
+ " if (savedSize) {\n"
1150
+ " fontSizeInput.value = savedSize;\n"
1151
+ " document.body.style.setProperty('--font-size', savedSize + 'px');\n"
1152
+ " }\n"
1153
+ " fontSizeInput.addEventListener('input', (e) => {\n"
1154
+ " const size = e.target.value;\n"
1155
+ " document.body.style.setProperty('--font-size', size + 'px');\n"
1156
+ " localStorage.setItem('render-font-size', size);\n"
1157
+ " });\n"
1158
+ " }\n"
1159
+ " const aliceEmojiInput = document.getElementById('alice-emoji-input');\n"
1160
+ " const aliceNameInput = document.getElementById('alice-name-input');\n"
1161
+ " const bobEmojiInput = document.getElementById('bob-emoji-input');\n"
1162
+ " const bobNameInput = document.getElementById('bob-name-input');\n"
1163
+ " const applyAgentNamesBtn = document.getElementById('apply-agent-names');\n"
1164
+ " function loadAgentNames() {\n"
1165
+ " if (aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
1166
+ " const savedAliceEmoji = localStorage.getItem('alice-emoji') || '🤖';\n"
1167
+ " const savedAliceName = localStorage.getItem('alice-name') || 'Alice';\n"
1168
+ " const savedBobEmoji = localStorage.getItem('bob-emoji') || '🤖';\n"
1169
+ " const savedBobName = localStorage.getItem('bob-name') || 'Bob';\n"
1170
+ " aliceEmojiInput.value = savedAliceEmoji;\n"
1171
+ " aliceNameInput.value = savedAliceName;\n"
1172
+ " bobEmojiInput.value = savedBobEmoji;\n"
1173
+ " bobNameInput.value = savedBobName;\n"
1174
+ " applyAgentNamesToDOM(savedAliceEmoji, savedAliceName, savedBobEmoji, savedBobName);\n"
1175
+ " }\n"
1176
+ " }\n"
1177
+ " function applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName) {\n"
1178
+ " const agentMap = { 'alice': { name: aliceName, emoji: aliceEmoji }, 'bob': { name: bobName, emoji: bobEmoji } };\n"
1179
+ " document.querySelectorAll('[data-agent-id]').forEach(el => {\n"
1180
+ " const agentId = el.getAttribute('data-agent-id');\n"
1181
+ " if (!agentMap[agentId]) return;\n"
1182
+ " if (el.classList.contains('agent-name')) {\n"
1183
+ " el.textContent = agentMap[agentId].name;\n"
1184
+ " } else if (el.classList.contains('emoji-bw')) {\n"
1185
+ " const currentEmoji = el.textContent.trim();\n"
1186
+ " if (currentEmoji === '🤖' || currentEmoji === '👤') {\n"
1187
+ " el.textContent = agentMap[agentId].emoji;\n"
1188
+ " }\n"
1189
+ " }\n"
1190
+ " });\n"
1191
+ " const style = document.createElement('style');\n"
1192
+ " style.id = 'dynamic-agent-names-style';\n"
1193
+ " const existingStyle = document.getElementById('dynamic-agent-names-style');\n"
1194
+ " if (existingStyle) existingStyle.remove();\n"
1195
+ " style.textContent = `\n"
1196
+ " .agent-context-box.agent-alice .round-context-edit::before {\n"
1197
+ " content: '${aliceName} Prompt Summary:';\n"
1198
+ " }\n"
1199
+ " .agent-context-box.agent-bob .round-context-edit::before {\n"
1200
+ " content: '${bobName} Prompt Summary:';\n"
1201
+ " }\n"
1202
+ " `;\n"
1203
+ " document.head.appendChild(style);\n"
1204
+ " }\n"
1205
+ " if (applyAgentNamesBtn && aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
1206
+ " [aliceEmojiInput, aliceNameInput, bobEmojiInput, bobNameInput].forEach(input => {\n"
1207
+ " input.style.pointerEvents = 'auto';\n"
1208
+ " if (input.tagName === 'INPUT') {\n"
1209
+ " input.style.userSelect = 'text';\n"
1210
+ " input.style.webkitUserSelect = 'text';\n"
1211
+ " input.readOnly = false;\n"
1212
+ " }\n"
1213
+ " input.disabled = false;\n"
1214
+ " const stopAll = (e) => { e.stopPropagation(); e.stopImmediatePropagation(); };\n"
1215
+ " input.addEventListener('mousedown', stopAll, true);\n"
1216
+ " input.addEventListener('mouseup', stopAll, true);\n"
1217
+ " input.addEventListener('click', stopAll, true);\n"
1218
+ " input.addEventListener('dblclick', stopAll, true);\n"
1219
+ " input.addEventListener('focus', stopAll, true);\n"
1220
+ " input.addEventListener('blur', stopAll, true);\n"
1221
+ " input.addEventListener('paste', stopAll, true);\n"
1222
+ " input.addEventListener('cut', stopAll, true);\n"
1223
+ " input.addEventListener('copy', stopAll, true);\n"
1224
+ " input.addEventListener('select', stopAll, true);\n"
1225
+ " input.addEventListener('selectstart', stopAll, true);\n"
1226
+ " input.addEventListener('keydown', stopAll, true);\n"
1227
+ " input.addEventListener('keyup', stopAll, true);\n"
1228
+ " input.addEventListener('keypress', stopAll, true);\n"
1229
+ " input.addEventListener('input', stopAll, true);\n"
1230
+ " input.addEventListener('change', stopAll, true);\n"
1231
+ " input.addEventListener('contextmenu', stopAll, true);\n"
1232
+ " });\n"
1233
+ " const applyNames = () => {\n"
1234
+ " const aliceEmoji = aliceEmojiInput.value || '🤖';\n"
1235
+ " const aliceName = aliceNameInput.value.trim() || 'Alice';\n"
1236
+ " const bobEmoji = bobEmojiInput.value || '🤖';\n"
1237
+ " const bobName = bobNameInput.value.trim() || 'Bob';\n"
1238
+ " localStorage.setItem('alice-emoji', aliceEmoji);\n"
1239
+ " localStorage.setItem('alice-name', aliceName);\n"
1240
+ " localStorage.setItem('bob-emoji', bobEmoji);\n"
1241
+ " localStorage.setItem('bob-name', bobName);\n"
1242
+ " applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName);\n"
1243
+ " };\n"
1244
+ " applyAgentNamesBtn.addEventListener('click', applyNames);\n"
1245
+ " [aliceNameInput, bobNameInput].forEach(input => {\n"
1246
+ " input.addEventListener('keydown', (e) => {\n"
1247
+ " if (e.key === 'Enter') {\n"
1248
+ " e.preventDefault();\n"
1249
+ " e.stopPropagation();\n"
1250
+ " e.stopImmediatePropagation();\n"
1251
+ " applyNames();\n"
1252
+ " }\n"
1253
+ " }, true);\n"
1254
+ " });\n"
1255
+ " [aliceEmojiInput, bobEmojiInput].forEach(select => {\n"
1256
+ " select.addEventListener('change', applyNames);\n"
1257
+ " });\n"
1258
+ " }\n"
1259
+ " loadAgentNames();\n"
1260
+ " function setupRoundCollapse() {\n"
1261
+ " document.addEventListener('click', function(e) {\n"
1262
+ " if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
1263
+ " const divider = e.target.closest('.chat-group-divider, .group-divider');\n"
1264
+ " if (!divider) return;\n"
1265
+ " divider.classList.toggle('collapsed');\n"
1266
+ " const isCollapsed = divider.classList.contains('collapsed');\n"
1267
+ " let nextElement = divider.nextElementSibling;\n"
1268
+ " while (nextElement) {\n"
1269
+ " if (nextElement.classList.contains('chat-group-divider') || nextElement.classList.contains('group-divider')) {\n"
1270
+ " break;\n"
1271
+ " }\n"
1272
+ " if (isCollapsed) {\n"
1273
+ " if (!nextElement.dataset.originalDisplay) {\n"
1274
+ " nextElement.dataset.originalDisplay = nextElement.style.display || getComputedStyle(nextElement).display;\n"
1275
+ " }\n"
1276
+ " nextElement.style.display = 'none';\n"
1277
+ " } else {\n"
1278
+ " if (nextElement.dataset.originalDisplay) {\n"
1279
+ " const originalDisplay = nextElement.dataset.originalDisplay;\n"
1280
+ " nextElement.style.display = originalDisplay === 'none' ? '' : originalDisplay;\n"
1281
+ " if (nextElement.style.display === originalDisplay && originalDisplay !== 'none') {\n"
1282
+ " nextElement.style.display = '';\n"
1283
+ " }\n"
1284
+ " delete nextElement.dataset.originalDisplay;\n"
1285
+ " } else {\n"
1286
+ " nextElement.style.display = '';\n"
1287
+ " }\n"
1288
+ " }\n"
1289
+ " nextElement = nextElement.nextElementSibling;\n"
1290
+ " }\n"
1291
+ " e.stopPropagation();\n"
1292
+ " });\n"
1293
+ " }\n"
1294
+ " setupRoundCollapse();\n"
1295
+ " const strongHideBtnChat = document.getElementById('toggle-strong-hide');\n"
1296
+ " function applyStrongHideToChat() {\n"
1297
+ " if (!chatFlow) return;\n"
1298
+ " chatFlow.classList.toggle('strong-hide', strongHideOn);\n"
1299
+ " const contextEdits = chatFlow.querySelectorAll('.round-context-edit');\n"
1300
+ " contextEdits.forEach(edit => {\n"
1301
+ " const parent = edit.closest('.round-context, .agent-context-box, .split-agent-context');\n"
1302
+ " if (parent) {\n"
1303
+ " if (strongHideOn && edit.textContent.trim() === '') {\n"
1304
+ " parent.style.display = 'none';\n"
1305
+ " } else {\n"
1306
+ " parent.style.display = '';\n"
1307
+ " }\n"
1308
+ " }\n"
1309
+ " });\n"
1310
+ " const splitContexts = chatFlow.querySelectorAll('.split-agent-context');\n"
1311
+ " splitContexts.forEach(split => {\n"
1312
+ " if (strongHideOn) {\n"
1313
+ " const boxes = split.querySelectorAll('.agent-context-box');\n"
1314
+ " const allEmpty = Array.from(boxes).every(box => {\n"
1315
+ " const edit = box.querySelector('.round-context-edit');\n"
1316
+ " return edit && edit.textContent.trim() === '';\n"
1317
+ " });\n"
1318
+ " if (allEmpty) split.style.display = 'none';\n"
1319
+ " }\n"
1320
+ " });\n"
1321
+ " }\n"
1322
+ " if (strongHideBtnChat && chatFlow) {\n"
1323
+ " strongHideBtnChat.addEventListener('click', () => {\n"
1324
+ " setTimeout(() => applyStrongHideToChat(), 0);\n"
1325
+ " });\n"
1326
+ " }\n"
1327
+ " document.addEventListener('click', function(e) {\n"
1328
+ " if (e.target.closest('input, textarea, select, .round-context-edit, .toolbar')) { return; }\n"
1329
+ " const chatReasoning = e.target.closest('.chat-reasoning');\n"
1330
+ " if (chatReasoning) {\n"
1331
+ " chatReasoning.classList.toggle('collapsed');\n"
1332
+ " }\n"
1333
+ " });\n"
1334
+ " function applyColorToSelection(color, element) {\n"
1335
+ " const selection = window.getSelection();\n"
1336
+ " if (!selection.rangeCount) return false;\n"
1337
+ " const range = selection.getRangeAt(0);\n"
1338
+ " if (!element.contains(range.commonAncestorContainer)) return false;\n"
1339
+ " const selectedText = range.toString();\n"
1340
+ " if (!selectedText) return false;\n"
1341
+ " if (color === 'default') {\n"
1342
+ " // Remove styling - just extract the text content\n"
1343
+ " const textNode = document.createTextNode(selectedText);\n"
1344
+ " range.deleteContents();\n"
1345
+ " range.insertNode(textNode);\n"
1346
+ " } else {\n"
1347
+ " const span = document.createElement('span');\n"
1348
+ " span.style.color = color;\n"
1349
+ " span.style.fontWeight = '600';\n"
1350
+ " try {\n"
1351
+ " range.surroundContents(span);\n"
1352
+ " } catch (e) {\n"
1353
+ " const contents = range.extractContents();\n"
1354
+ " span.appendChild(contents);\n"
1355
+ " range.insertNode(span);\n"
1356
+ " }\n"
1357
+ " }\n"
1358
+ " return true;\n"
1359
+ " }\n"
1360
+ " let lastFocusedContextEdit = null;\n"
1361
+ " document.addEventListener('focusin', function(e) {\n"
1362
+ " if (e.target.classList.contains('round-context-edit')) {\n"
1363
+ " lastFocusedContextEdit = e.target;\n"
1364
+ " }\n"
1365
+ " });\n"
1366
+ " document.addEventListener('mousedown', function(e) {\n"
1367
+ " if (e.target.classList.contains('context-color-btn')) {\n"
1368
+ " e.preventDefault();\n"
1369
+ " }\n"
1370
+ " });\n"
1371
+ " document.addEventListener('click', function(e) {\n"
1372
+ " if (e.target.closest('input:not(.round-context-edit), textarea, select') && !e.target.classList.contains('context-color-btn')) { return; }\n"
1373
+ " if (e.target.classList.contains('context-color-btn')) {\n"
1374
+ " e.preventDefault();\n"
1375
+ " const color = e.target.dataset.color;\n"
1376
+ " const controls = e.target.closest('.round-context-controls');\n"
1377
+ " const contextEdit = controls ? controls.previousElementSibling : null;\n"
1378
+ " if (contextEdit && contextEdit.classList.contains('round-context-edit')) {\n"
1379
+ " contextEdit.focus();\n"
1380
+ " const selection = window.getSelection();\n"
1381
+ " if (selection.rangeCount > 0 && selection.toString().length > 0 && contextEdit.contains(selection.anchorNode)) {\n"
1382
+ " if (applyColorToSelection(color, contextEdit)) {\n"
1383
+ " const key = contextEdit.dataset.contextKey;\n"
1384
+ " localStorage.setItem(key, contextEdit.innerHTML);\n"
1385
+ " }\n"
1386
+ " } else {\n"
1387
+ " try {\n"
1388
+ " if (color !== 'default') {\n"
1389
+ " document.execCommand('styleWithCSS', false, true);\n"
1390
+ " document.execCommand('foreColor', false, color);\n"
1391
+ " }\n"
1392
+ " const key = contextEdit.dataset.contextKey;\n"
1393
+ " setTimeout(() => localStorage.setItem(key, contextEdit.innerHTML), 10);\n"
1394
+ " } catch (e) {\n"
1395
+ " console.log('Color command failed:', e);\n"
1396
+ " }\n"
1397
+ " }\n"
1398
+ " }\n"
1399
+ " }\n"
1400
+ " });\n"
1401
+ " const contextEdits = document.querySelectorAll('.round-context-edit');\n"
1402
+ " contextEdits.forEach(edit => {\n"
1403
+ " edit.addEventListener('input', function() {\n"
1404
+ " const key = this.dataset.contextKey;\n"
1405
+ " localStorage.setItem(key, this.innerHTML);\n"
1406
+ " });\n"
1407
+ " const key = edit.dataset.contextKey;\n"
1408
+ " const saved = localStorage.getItem(key);\n"
1409
+ " if (saved) {\n"
1410
+ " edit.innerHTML = saved;\n"
1411
+ " }\n"
1412
+ " });\n"
1413
+ " document.addEventListener('click', function(e) {\n"
1414
+ " if (e.target.closest('input, textarea, select, .round-context-edit') && !e.target.classList.contains('merge-btn') && !e.target.classList.contains('unmerge-btn')) { return; }\n"
1415
+ " if (e.target.classList.contains('merge-btn')) {\n"
1416
+ " e.preventDefault();\n"
1417
+ " e.stopPropagation();\n"
1418
+ " const msgId = e.target.dataset.msgId;\n"
1419
+ " const currentMsg = e.target.closest('.chat-message');\n"
1420
+ " if (!currentMsg) return;\n"
1421
+ " if (currentMsg.classList.contains('role-user')) {\n"
1422
+ " alert('Cannot merge user messages');\n"
1423
+ " return;\n"
1424
+ " }\n"
1425
+ " let nextMsg = currentMsg.nextElementSibling;\n"
1426
+ " while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
1427
+ " nextMsg = nextMsg.nextElementSibling;\n"
1428
+ " }\n"
1429
+ " while (nextMsg && nextMsg.classList.contains('role-user')) {\n"
1430
+ " nextMsg = nextMsg.nextElementSibling;\n"
1431
+ " while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
1432
+ " nextMsg = nextMsg.nextElementSibling;\n"
1433
+ " }\n"
1434
+ " }\n"
1435
+ " if (!nextMsg || nextMsg.classList.contains('chat-message') === false) {\n"
1436
+ " alert('No next assistant message to merge with');\n"
1437
+ " return;\n"
1438
+ " }\n"
1439
+ " if (nextMsg.classList.contains('role-user')) {\n"
1440
+ " alert('Cannot merge with user messages');\n"
1441
+ " return;\n"
1442
+ " }\n"
1443
+ " const parent = currentMsg.parentElement;\n"
1444
+ " if (parent.classList.contains('simultaneous-messages')) {\n"
1445
+ " const wrapper = parent;\n"
1446
+ " currentMsg.style.display = '';\n"
1447
+ " currentMsg.classList.remove('merged');\n"
1448
+ " const refNode = wrapper.nextElementSibling;\n"
1449
+ " parent.parentElement.insertBefore(currentMsg, refNode);\n"
1450
+ " if (nextMsg.parentElement === wrapper) {\n"
1451
+ " parent.parentElement.insertBefore(nextMsg, refNode);\n"
1452
+ " }\n"
1453
+ " if (wrapper.children.length === 0) {\n"
1454
+ " wrapper.remove();\n"
1455
+ " }\n"
1456
+ " } else {\n"
1457
+ " const wrapper = document.createElement('div');\n"
1458
+ " wrapper.className = 'simultaneous-messages';\n"
1459
+ " const unmergeBtn = document.createElement('button');\n"
1460
+ " unmergeBtn.className = 'unmerge-btn';\n"
1461
+ " unmergeBtn.innerHTML = '✕';\n"
1462
+ " unmergeBtn.title = 'Click to unmerge messages';\n"
1463
+ " wrapper.appendChild(unmergeBtn);\n"
1464
+ " wrapper.dataset.firstMsgId = currentMsg.dataset.msgId;\n"
1465
+ " wrapper.dataset.secondMsgId = nextMsg.dataset.msgId;\n"
1466
+ " parent.insertBefore(wrapper, currentMsg);\n"
1467
+ " wrapper.appendChild(currentMsg);\n"
1468
+ " wrapper.appendChild(nextMsg);\n"
1469
+ " currentMsg.classList.add('merged');\n"
1470
+ " nextMsg.classList.add('merged');\n"
1471
+ " }\n"
1472
+ " }\n"
1473
+ " if (e.target.classList.contains('unmerge-btn')) {\n"
1474
+ " const wrapper = e.target.closest('.simultaneous-messages');\n"
1475
+ " if (!wrapper) return;\n"
1476
+ " const parent = wrapper.parentElement;\n"
1477
+ " const firstMsgId = wrapper.dataset.firstMsgId;\n"
1478
+ " const secondMsgId = wrapper.dataset.secondMsgId;\n"
1479
+ " const messages = Array.from(wrapper.querySelectorAll('.chat-message'));\n"
1480
+ " const refNode = wrapper.nextElementSibling;\n"
1481
+ " const firstMsg = messages.find(m => m.dataset.msgId === firstMsgId);\n"
1482
+ " const secondMsg = messages.find(m => m.dataset.msgId === secondMsgId);\n"
1483
+ " if (firstMsg) {\n"
1484
+ " firstMsg.classList.remove('merged');\n"
1485
+ " firstMsg.style.display = '';\n"
1486
+ " parent.insertBefore(firstMsg, refNode);\n"
1487
+ " }\n"
1488
+ " if (secondMsg) {\n"
1489
+ " secondMsg.classList.remove('merged');\n"
1490
+ " secondMsg.style.display = '';\n"
1491
+ " parent.insertBefore(secondMsg, refNode);\n"
1492
+ " }\n"
1493
+ " wrapper.remove();\n"
1494
+ " }\n"
1495
+ " });\n"
1496
+ "});\n"
1497
+ "</script>",
1498
+ "</head>",
1499
+ "<body>",
1500
+ '<div class="toolbar-wrap">',
1501
+ '<div class="toolbar-hotzone"></div>',
1502
+ '<div class="toolbar">',
1503
+ '<label for="group-size">Group every</label>',
1504
+ '<input id="group-size" type="number" min="0" step="1" value="1" />',
1505
+ "<span>timesteps</span>",
1506
+ '<button id="apply-grouping">Apply</button>',
1507
+ '<span style="margin-left:8px"></span>',
1508
+ '<label for="range-start"><span class="emoji-bw">🔎</span> Range</label>',
1509
+ '<input id="range-start" type="number" step="1" />',
1510
+ "<span>to</span>",
1511
+ '<input id="range-end" type="number" step="1" />',
1512
+ '<button id="apply-range"><span class="emoji-bw">▶︎</span> Apply</button>',
1513
+ '<button id="toggle-strong-hide"><span class="emoji-bw">🗜️</span> Strong Hide: <span id="strong-hide-state">Off</span></button>',
1514
+ (
1515
+ '<button id="toggle-split-view"><span class="emoji-bw">🪟</span> Split View: <span id="split-view-state">Off</span></button>'
1516
+ if enable_split_view
1517
+ else ""
1518
+ ),
1519
+ '<button id="toggle-chat-view"><span class="emoji-bw">💬</span> Chat View: <span id="chat-view-state">On</span></button>',
1520
+ '<button id="toggle-hide-user-messages"><span class="emoji-bw">👁️</span> Hide Prompts: <span id="hide-user-state">Off</span></button>',
1521
+ '<span id="chat-width-control" style="margin-left:8px;">',
1522
+ '<label for="chat-width-slider"><span class="emoji-bw">↔️</span> Width:</label>',
1523
+ '<input id="chat-width-slider" type="range" min="600" max="1600" step="50" value="900" style="width:120px; vertical-align:middle;" />',
1524
+ '<span id="chat-width-value" style="margin-left:4px;">900px</span>',
1525
+ '</span>',
1526
+ '<span style="margin-left:12px;">',
1527
+ '<label for="font-family-select"><span class="emoji-bw">🔤</span> Font:</label>',
1528
+ '<select id="font-family-select" style="padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
1529
+ '<option value="\'Segoe UI\', Tahoma, Geneva, Verdana, sans-serif">Segoe UI</option>',
1530
+ '<option value="Arial, sans-serif">Arial</option>',
1531
+ '<option value="\'Helvetica Neue\', Helvetica, sans-serif">Helvetica</option>',
1532
+ '<option value="\'Times New Roman\', Times, serif">Times New Roman</option>',
1533
+ '<option value="Georgia, serif">Georgia</option>',
1534
+ '<option value="\'Courier New\', Courier, monospace">Courier New</option>',
1535
+ '<option value="\'Comic Sans MS\', cursive">Comic Sans</option>',
1536
+ '<option value="\'Trebuchet MS\', sans-serif">Trebuchet MS</option>',
1537
+ '<option value="Verdana, sans-serif">Verdana</option>',
1538
+ '<option value="\'Palatino Linotype\', \'Book Antiqua\', Palatino, serif">Palatino</option>',
1539
+ '<option value="\'Lucida Console\', Monaco, monospace">Lucida Console</option>',
1540
+ '</select>',
1541
+ '</span>',
1542
+ '<span style="margin-left:8px;">',
1543
+ '<label for="font-size-input"><span class="emoji-bw">📏</span> Size:</label>',
1544
+ '<input id="font-size-input" type="number" min="8" max="24" step="1" value="14" style="width:50px;" />',
1545
+ '<span>px</span>',
1546
+ '</span>',
1547
+ '<span style="margin-left:12px; display:flex; align-items:center; gap:8px;">',
1548
+ '<label style="font-weight:600;">Agent Names:</label>',
1549
+ '<select id="alice-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
1550
+ '<option value="🤖">🤖 Robot</option>',
1551
+ '<option value="👤">👤 Human</option>',
1552
+ '</select>',
1553
+ '<input id="alice-name-input" type="text" placeholder="Alice" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
1554
+ '<span style="margin:0 4px;">|</span>',
1555
+ '<select id="bob-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
1556
+ '<option value="🤖">🤖 Robot</option>',
1557
+ '<option value="👤">👤 Human</option>',
1558
+ '</select>',
1559
+ '<input id="bob-name-input" type="text" placeholder="Bob" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
1560
+ '<button id="apply-agent-names" style="padding:4px 8px; border:1px solid var(--accent-muted); background:var(--panel-bg); border-radius:var(--corner-radius); cursor:pointer;">Apply</button>',
1561
+ '</span>',
1562
+ "</div>",
1563
+ "</div>",
1564
+ '<div id="flow-linear" class="messages-flow" style="display:none">',
1565
+ ]
1566
+
1567
+ last_time_step = None
1568
+ for original_index, turn in indexed_turns:
1569
+ # Build classes
1570
+ agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
1571
+ role_class = f"role-{turn.role}"
1572
+ collapsed_class = " collapsed" if turn.role == "user" else ""
1573
+
1574
+ # Badge content
1575
+ agent_id_clean = html.escape(turn.agent_id).lower()
1576
+ if turn.role == "assistant":
1577
+ name = html.escape(turn.agent_id)
1578
+ emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
1579
+ raw_val = turn.reward
1580
+ if isinstance(raw_val, (int, float)):
1581
+ reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
1582
+ if len(reward_val) > 8:
1583
+ reward_val = reward_val[:8] + "…"
1584
+ else:
1585
+ reward_val = str(raw_val)
1586
+ # Format: "🤖 Alice • Reward: 5.5556 • 💬 :"
1587
+ badge_inner = (
1588
+ f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
1589
+ f' <span class="sep"> • </span><span class="reward">Reward ⚑ = {reward_val}</span>'
1590
+ )
1591
+ else:
1592
+ # For user messages, show "Prompt of {Agent ID}" in the badge
1593
+ name = html.escape(turn.agent_id)
1594
+ # Format (no reward): "Prompt of Alice • "
1595
+ badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
1596
+
1597
+ badge = f'<span class="agent-badge">{badge_inner}</span>'
1598
+
1599
+ # Inline timestep distinction badge at step boundaries (render before first message)
1600
+ ts_badge_html = ""
1601
+ if last_time_step is None or turn.time_step != last_time_step:
1602
+ ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
1603
+ last_time_step = turn.time_step
1604
+
1605
+ escaped_content = html.escape(turn.content)
1606
+ reasoning_html = ""
1607
+ if turn.reasoning_content:
1608
+ # Normalize reasoning to avoid leading/newline whitespace that creates visual gaps
1609
+ _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
1610
+ _raw_reasoning = _re.sub(
1611
+ r"^\s*\n+", "", _raw_reasoning
1612
+ ) # drop leading blank lines
1613
+ _raw_reasoning = _re.sub(
1614
+ r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning
1615
+ ) # newline right after **
1616
+ _raw_reasoning = _re.sub(
1617
+ r"(\s*\n\s*)\*\*", r" **", _raw_reasoning
1618
+ ) # newline right before **
1619
+ escaped_reasoning = html.escape(_raw_reasoning)
1620
+ reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{escaped_reasoning}</span></span>'
1621
+ collapsed_text = re.sub(r"\s+", " ", escaped_content).strip()
1622
+
1623
+ html_parts.append(
1624
+ f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
1625
+ f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
1626
+ f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
1627
+ f'<span class="message-placeholder">(...)</span>'
1628
+ f"</div>"
1629
+ f"</div>"
1630
+ )
1631
+
1632
+ html_parts.append("</div>") # close linear flow
1633
+ if enable_split_view:
1634
+ import html as _html_mod
1635
+
1636
+ html_parts.append(
1637
+ '<div id="flow-split" class="messages-flow" style="display:none">'
1638
+ )
1639
+ html_parts.append('<div class="split-wrapper">')
1640
+ # Per-agent columns
1641
+ per_agent_turns = {
1642
+ aid: [t for t in chat_turns if t.agent_id == aid]
1643
+ for aid in assistant_agents
1644
+ }
1645
+ for idx, aid in enumerate(assistant_agents):
1646
+ turns_agent = per_agent_turns[aid]
1647
+ html_parts.append(
1648
+ f'<div class="split-col" data-agent="{_html_mod.escape(aid)}">'
1649
+ )
1650
+ last_ts_agent = None
1651
+ for turn in turns_agent:
1652
+ agent_class = (
1653
+ f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
1654
+ )
1655
+ role_class = f"role-{turn.role}"
1656
+ collapsed_class = " collapsed" if turn.role == "user" else ""
1657
+ ts_badge_html = ""
1658
+ if last_ts_agent is None or turn.time_step != last_ts_agent:
1659
+ ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
1660
+ last_ts_agent = turn.time_step
1661
+ esc_content = _html_mod.escape(turn.content)
1662
+ reasoning_html = ""
1663
+ if turn.reasoning_content:
1664
+ _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
1665
+ _raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
1666
+ _raw_reasoning = _re.sub(r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning)
1667
+ _raw_reasoning = _re.sub(r"(\s*\n\s*)\*\*", r" **", _raw_reasoning)
1668
+ esc_reasoning = _html_mod.escape(_raw_reasoning)
1669
+ reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{esc_reasoning}</span></span>'
1670
+ collapsed_text = re.sub(r"\s+", " ", esc_content).strip()
1671
+ agent_id_clean = _html_mod.escape(turn.agent_id).lower()
1672
+ if turn.role == "assistant":
1673
+ name = _html_mod.escape(turn.agent_id)
1674
+ emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
1675
+ raw_val = turn.reward
1676
+ if isinstance(raw_val, (int, float)):
1677
+ reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
1678
+ if len(reward_val) > 8:
1679
+ reward_val = reward_val[:8] + "…"
1680
+ else:
1681
+ reward_val = str(raw_val)
1682
+ badge_inner = (
1683
+ f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
1684
+ f' <span class="sep"> • </span><span class="reward">Reward ⚑ : {reward_val}</span>'
1685
+ )
1686
+ else:
1687
+ name = _html_mod.escape(turn.agent_id)
1688
+ badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
1689
+ badge = f'<span class="agent-badge">{badge_inner}</span>'
1690
+ html_parts.append(
1691
+ f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
1692
+ f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
1693
+ f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
1694
+ f'<span class="message-placeholder">(...)</span>'
1695
+ f"</div></div>"
1696
+ )
1697
+ html_parts.append("</div>") # close split col
1698
+ html_parts.append("</div>") # split-wrapper
1699
+ html_parts.append("</div>") # flow-split
1700
+
1701
+ # Add Chat View
1702
+ import html as _html_mod
1703
+ html_parts.append('<div id="flow-chat" class="messages-flow">')
1704
+
1705
+ # Helper function to add context annotation areas
1706
+ def add_context_area(position: str, time_step: int):
1707
+ context_key = f"round-context-{position}-{time_step}"
1708
+ placeholder = f"Add context {position} round {time_step}..."
1709
+ color_buttons = ""
1710
+ # Add default/reset color button first
1711
+ color_buttons += (
1712
+ f'<div class="context-color-btn" data-color="default" '
1713
+ f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
1714
+ f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
1715
+ f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
1716
+ f'background-color: #fff;" title="Default color"></div>'
1717
+ )
1718
+ for color_name, color_value in [
1719
+ ('red', '#d32f2f'),
1720
+ ('orange', '#f57c00'),
1721
+ ('yellow', '#f9a825'),
1722
+ ('green', '#388e3c'),
1723
+ ('blue', '#1976d2'),
1724
+ ('purple', '#7b1fa2'),
1725
+ ('gray', '#666666'),
1726
+ ]:
1727
+ color_buttons += (
1728
+ f'<div class="context-color-btn" data-color="{color_value}" '
1729
+ f'style="background-color: {color_value};" title="{color_name}"></div>'
1730
+ )
1731
+
1732
+ html_parts.append(
1733
+ f'<div class="round-context">'
1734
+ f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
1735
+ f'data-context-key="{context_key}" '
1736
+ f'data-placeholder="{placeholder}"></div>'
1737
+ f'<div class="round-context-controls">{color_buttons}</div>'
1738
+ f'</div>'
1739
+ )
1740
+
1741
+ # Helper function to add split agent context boxes
1742
+ def add_split_agent_contexts(position: str, time_step: int):
1743
+ color_buttons = ""
1744
+ # Add default/reset color button first
1745
+ color_buttons += (
1746
+ f'<div class="context-color-btn" data-color="default" '
1747
+ f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
1748
+ f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
1749
+ f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
1750
+ f'background-color: #fff;" title="Default color"></div>'
1751
+ )
1752
+ for color_name, color_value in [
1753
+ ('red', '#d32f2f'),
1754
+ ('orange', '#f57c00'),
1755
+ ('yellow', '#f9a825'),
1756
+ ('green', '#388e3c'),
1757
+ ('blue', '#1976d2'),
1758
+ ('purple', '#7b1fa2'),
1759
+ ('gray', '#666666'),
1760
+ ]:
1761
+ color_buttons += (
1762
+ f'<div class="context-color-btn" data-color="{color_value}" '
1763
+ f'style="background-color: {color_value};" title="{color_name}"></div>'
1764
+ )
1765
+
1766
+ html_parts.append('<div class="split-agent-context">')
1767
+
1768
+ # Alice box
1769
+ alice_key = f"agent-context-alice-{position}-{time_step}"
1770
+ alice_placeholder = f"..."
1771
+ html_parts.append(
1772
+ f'<div class="agent-context-box agent-alice">'
1773
+ f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
1774
+ f'data-context-key="{alice_key}" '
1775
+ f'data-placeholder="{alice_placeholder}"></div>'
1776
+ f'<div class="round-context-controls">{color_buttons}</div>'
1777
+ f'</div>'
1778
+ )
1779
+
1780
+ # Bob box
1781
+ bob_key = f"agent-context-bob-{position}-{time_step}"
1782
+ bob_placeholder = f"..."
1783
+ html_parts.append(
1784
+ f'<div class="agent-context-box agent-bob">'
1785
+ f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
1786
+ f'data-context-key="{bob_key}" '
1787
+ f'data-placeholder="{bob_placeholder}"></div>'
1788
+ f'<div class="round-context-controls">{color_buttons}</div>'
1789
+ f'</div>'
1790
+ )
1791
+
1792
+ html_parts.append('</div>') # split-agent-context
1793
+
1794
+ last_time_step_chat = None
1795
+ for original_index, turn in indexed_turns:
1796
+ agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
1797
+ role_class = f"role-{turn.role}"
1798
+
1799
+ # Add time step divider and beginning context
1800
+ if last_time_step_chat is None or turn.time_step != last_time_step_chat:
1801
+ # Add end contexts for previous round (only regular context, not prompt summary)
1802
+ if last_time_step_chat is not None:
1803
+ add_context_area("end", last_time_step_chat)
1804
+
1805
+ html_parts.append(
1806
+ f'<div class="chat-group-divider">'
1807
+ f'<span class="chat-group-label">⏱ Round {turn.time_step + 1}</span>'
1808
+ f'</div>'
1809
+ )
1810
+
1811
+ # Add beginning contexts for new round (both context and prompt summary)
1812
+ add_context_area("beginning", turn.time_step)
1813
+ add_split_agent_contexts("beginning", turn.time_step)
1814
+
1815
+ last_time_step_chat = turn.time_step
1816
+
1817
+ # Build chat message with merge controls
1818
+ html_parts.append(f'<div class="chat-message {agent_class} {role_class}" data-msg-id="{original_index}">')
1819
+
1820
+ # Add merge control button
1821
+ html_parts.append(
1822
+ f'<button class="merge-btn" title="Merge with next message" data-msg-id="{original_index}">⇄</button>'
1823
+ )
1824
+
1825
+ html_parts.append('<div class="chat-message-content">')
1826
+
1827
+ # Header with agent name and reward (always show reward)
1828
+ agent_id_clean = _html_mod.escape(turn.agent_id).lower()
1829
+ if turn.role == "assistant":
1830
+ name = _html_mod.escape(turn.agent_id)
1831
+ raw_val = turn.reward
1832
+ if isinstance(raw_val, (int, float)):
1833
+ reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
1834
+ if len(reward_val) > 8:
1835
+ reward_val = reward_val[:8] + "…"
1836
+ else:
1837
+ reward_val = str(raw_val)
1838
+ header_html = (
1839
+ f'<div class="chat-header">'
1840
+ f'<span class="emoji-bw" data-agent-id="{agent_id_clean}">🤖</span> <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
1841
+ f'<span class="chat-reward">⚑ {reward_val}</span>'
1842
+ f'</div>'
1843
+ )
1844
+ else:
1845
+ name = _html_mod.escape(turn.agent_id)
1846
+ header_html = f'<div class="chat-header">Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span></div>'
1847
+
1848
+ html_parts.append(header_html)
1849
+
1850
+ # Reasoning content if present
1851
+ if turn.reasoning_content:
1852
+ _raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
1853
+ _raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
1854
+ esc_reasoning = _html_mod.escape(_raw_reasoning)
1855
+ html_parts.append(
1856
+ f'<div class="chat-reasoning collapsed">'
1857
+ f'<span class="reasoning-icon">💭</span> '
1858
+ f'<span class="reasoning-text">{esc_reasoning}</span>'
1859
+ f'</div>'
1860
+ )
1861
+
1862
+ # Message bubble
1863
+ esc_content = _html_mod.escape(turn.content)
1864
+ html_parts.append(f'<div class="chat-bubble">{esc_content}</div>')
1865
+
1866
+ html_parts.append('</div>') # chat-message-content
1867
+ html_parts.append('</div>') # chat-message
1868
+
1869
+ # Add end contexts for the last round (only regular context, not prompt summary)
1870
+ if last_time_step_chat is not None:
1871
+ add_context_area("end", last_time_step_chat)
1872
+
1873
+ html_parts.append("</div>") # flow-chat
1874
+ html_parts.extend(["</body>", "</html>"])
1875
+
1876
+ return "\n".join(html_parts)
1877
+
1878
+
1879
+ def export_html_from_rollout_tree(path: Path, outdir: Path, main_only: bool = False):
1880
+ """Process a rollout tree file and generate HTML files for each path.
1881
+ Creates separate HTML files for the main path and each branch path.
1882
+ The main path is saved in the root output directory, while branch paths
1883
+ are saved in a 'branches' subdirectory.
1884
+
1885
+ Args:
1886
+ path: Path to the rollout tree JSON file
1887
+ outdir: Output directory for HTML files
1888
+ main_only: If True, only export the main trajectory (default: False)
1889
+ """
1890
+ root = load_rollout_tree(path)
1891
+ mgid = root.id
1892
+
1893
+ main_path, branch_paths = get_rollout_tree_paths(root)
1894
+
1895
+ outdir.mkdir(parents=True, exist_ok=True)
1896
+
1897
+ # Create branches subdirectory if we have branch paths
1898
+ if not main_only and branch_paths:
1899
+ branches_dir = outdir / f"mgid:{mgid}_branches_html_renders"
1900
+ branches_dir.mkdir(parents=True, exist_ok=True)
1901
+
1902
+ # Generate HTML for the main path
1903
+ chat_turns = gather_all_chat_turns_for_path(main_path)
1904
+ html_content = html_from_chat_turns(chat_turns)
1905
+ output_file = outdir / f"mgid:{mgid}_main_html_render.render.html"
1906
+ with open(output_file, "w", encoding="utf-8") as f:
1907
+ f.write(html_content)
1908
+
1909
+ # Generate HTML for each branch path
1910
+ for path_obj in branch_paths:
1911
+ chat_turns = gather_all_chat_turns_for_path(path_obj)
1912
+
1913
+ html_content = html_from_chat_turns(chat_turns)
1914
+
1915
+ path_id: str = path_obj.id
1916
+ output_filename = f"{path_id}_html_render.render.html"
1917
+
1918
+ output_file = branches_dir / output_filename
1919
+
1920
+ with open(output_file, "w", encoding="utf-8") as f:
1921
+ f.write(html_content)
src_code_for_reproducibility/utils/rollout_tree_gather_utils.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import os
5
+ import pickle
6
+ import re
7
+ from collections import defaultdict
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
11
+
12
+ from mllm.markov_games.rollout_tree import *
13
+
14
+
15
+
16
+
17
+
18
+ def load_rollout_tree(path: Path) -> RolloutTreeRootNode:
19
+ """Load a rollout tree from a PKL file containing a dict."""
20
+ with open(path, "rb") as f:
21
+ data = pickle.load(f)
22
+ return RolloutTreeRootNode.model_validate(data)
23
+
24
+
25
+ @dataclass
26
+ class RolloutNodeList:
27
+ id: str
28
+ nodes: List[RolloutTreeNode]
29
+
30
+
31
+ def get_rollout_tree_paths(
32
+ root: RolloutTreeRootNode, mgid: Optional[str] = None
33
+ ) -> Tuple[RolloutNodeList, List[RolloutNodeList]]:
34
+ """
35
+ Returns:
36
+ main_path: The main path from the root to the end of the tree.
37
+ branch_paths: A list of all branch paths from the root to the end of the tree.
38
+ Each branch path contains a list of nodes that are part of the branch, including the nodes from the main path before the branch was taken.
39
+ """
40
+ branch_paths = []
41
+
42
+ def collect_path_nodes(current) -> List[RolloutTreeNode]:
43
+ """Recursively collect all nodes in a path starting from current node."""
44
+ if current is None:
45
+ return []
46
+
47
+ if isinstance(current, RolloutTreeNode):
48
+ return [current] + collect_path_nodes(current.child)
49
+
50
+ elif isinstance(current, RolloutTreeBranchNode):
51
+ # For branch nodes, we only follow the main_child for path collection
52
+ if current.main_child:
53
+ return [current.main_child] + collect_path_nodes(
54
+ current.main_child.child
55
+ )
56
+ else:
57
+ return []
58
+
59
+ def traverse_for_branches(
60
+ current,
61
+ main_path_prefix: List[RolloutTreeNode],
62
+ path_id: str,
63
+ current_time_step: Optional[int] = 0,
64
+ ):
65
+ """Traverse tree to collect all branch paths."""
66
+ if current is None:
67
+ return
68
+
69
+ if isinstance(current, RolloutTreeNode):
70
+ # Continue traversing with this node added to the main path prefix
71
+ new_prefix = main_path_prefix + [current]
72
+ traverse_for_branches(current.child, new_prefix, path_id, current.time_step)
73
+
74
+ elif isinstance(current, RolloutTreeBranchNode):
75
+ # Collect all branch paths
76
+ if current.branches:
77
+ for agent_id, branch_node_list in current.branches.items():
78
+ if branch_node_list:
79
+ # Start with the main path prefix, then recursively collect all nodes in this branch
80
+ branch_path_nodes = main_path_prefix.copy()
81
+ for branch_node in branch_node_list:
82
+ branch_path_nodes.extend(collect_path_nodes(branch_node))
83
+
84
+ # Create proper branch path ID with mgid, agent_id, and time_step
85
+ mgid_str = mgid or str(root.id)
86
+ branch_path_id = f"mgid:{mgid_str}_type:branch_agent:{agent_id}_time_step:{current_time_step}"
87
+ branch_paths.append(
88
+ RolloutNodeList(id=branch_path_id, nodes=branch_path_nodes)
89
+ )
90
+
91
+ # Process the main child and add to prefix
92
+ new_prefix = main_path_prefix
93
+ if current.main_child:
94
+ new_prefix = main_path_prefix + [current.main_child]
95
+
96
+ # Continue traversing the main path
97
+ if current.main_child:
98
+ traverse_for_branches(
99
+ current.main_child.child,
100
+ new_prefix,
101
+ path_id,
102
+ current.main_child.time_step,
103
+ )
104
+
105
+ # Collect the main path nodes
106
+ main_path_nodes = collect_path_nodes(root.child)
107
+
108
+ # Traverse to collect all branch paths
109
+ traverse_for_branches(root.child, [], "")
110
+
111
+ # Create the main path with proper mgid format
112
+ mgid_str = mgid or str(root.id)
113
+ main_path = RolloutNodeList(id=f"mgid:{mgid_str}_type:main", nodes=main_path_nodes)
114
+
115
+ return main_path, branch_paths
116
+
117
+
118
+ class ChatTurnLog(BaseModel):
119
+ time_step: int
120
+ agent_id: str
121
+ role: str
122
+ content: str
123
+ reasoning_content: Optional[str] = None
124
+ is_state_end: bool
125
+ reward: float
126
+
127
+
128
+ def gather_agent_chat_turns_for_path(
129
+ agent_id: str, path: RolloutNodeList
130
+ ) -> List[ChatTurnLog]:
131
+ """Iterate through all chat turns for a specific agent in a path sorted by time step."""
132
+ turns = []
133
+ for node in path.nodes:
134
+ action_log = node.step_log.action_logs.get(agent_id, [])
135
+ if action_log:
136
+ for chat_turn in action_log.chat_turns or []:
137
+ turns.append(
138
+ ChatTurnLog(
139
+ time_step=node.time_step,
140
+ agent_id=agent_id,
141
+ role=chat_turn.role,
142
+ content=chat_turn.content,
143
+ reasoning_content=getattr(chat_turn, "reasoning_content", None),
144
+ is_state_end=chat_turn.is_state_end,
145
+ reward=node.step_log.simulation_step_log.rewards.get(
146
+ agent_id, 0
147
+ ),
148
+ )
149
+ )
150
+ return turns
151
+
152
+
153
+ def gather_all_chat_turns_for_path(path: RolloutNodeList) -> List[ChatTurnLog]:
154
+ """Iterate through all chat turns for all agents in a path sorted by time step."""
155
+ turns = []
156
+
157
+ # Collect turns from all agents, but interleave them per timestep by (user, assistant) pairs
158
+ for node in path.nodes:
159
+ # Build (user[, assistant]) pairs for each agent at this timestep
160
+ agent_ids = sorted(list(node.step_log.action_logs.keys()))
161
+ per_agent_pairs: Dict[str, List[List[ChatTurnLog]]] = {}
162
+
163
+ for agent_id in agent_ids:
164
+ action_log = node.step_log.action_logs.get(agent_id)
165
+ pairs: List[List[ChatTurnLog]] = []
166
+ current_pair: List[ChatTurnLog] = []
167
+
168
+ if action_log and action_log.chat_turns:
169
+ for chat_turn in action_log.chat_turns:
170
+ turn_log = ChatTurnLog(
171
+ time_step=node.time_step,
172
+ agent_id=agent_id,
173
+ role=chat_turn.role,
174
+ content=chat_turn.content,
175
+ reasoning_content=getattr(chat_turn, "reasoning_content", None),
176
+ is_state_end=chat_turn.is_state_end,
177
+ reward=node.step_log.simulation_step_log.rewards.get(
178
+ agent_id, 0
179
+ ),
180
+ )
181
+
182
+ if chat_turn.role == "user":
183
+ # If a previous pair is open, close it and start a new one
184
+ if current_pair:
185
+ pairs.append(current_pair)
186
+ current_pair = []
187
+ current_pair = [turn_log]
188
+ else:
189
+ # assistant: attach to an open user message if present; otherwise stand alone
190
+ if (
191
+ current_pair
192
+ and len(current_pair) == 1
193
+ and current_pair[0].role == "user"
194
+ ):
195
+ current_pair.append(turn_log)
196
+ pairs.append(current_pair)
197
+ current_pair = []
198
+ else:
199
+ # No preceding user or already paired; treat as its own unit
200
+ pairs.append([turn_log])
201
+
202
+ if current_pair:
203
+ # Unpaired trailing user message
204
+ pairs.append(current_pair)
205
+
206
+ per_agent_pairs[agent_id] = pairs
207
+
208
+ # Interleave pairs across agents: A1, B1, A2, B2, ...
209
+ index = 0
210
+ while True:
211
+ added_any = False
212
+ for agent_id in agent_ids:
213
+ agent_pairs = per_agent_pairs.get(agent_id, [])
214
+ if index < len(agent_pairs):
215
+ for tl in agent_pairs[index]:
216
+ turns.append(tl)
217
+ added_any = True
218
+ if not added_any:
219
+ break
220
+ index += 1
221
+
222
+ return turns
223
+
224
+
225
+ def chat_turns_to_dict(chat_turns: Iterator[ChatTurnLog]) -> Iterator[Dict[str, Any]]:
226
+ """Render all chat turns for a path as structured data for JSON."""
227
+ for chat_turn in chat_turns:
228
+ yield chat_turn.model_dump()
229
+
230
+
231
+ def get_all_agents(root: RolloutTreeRootNode) -> List[str]:
232
+ """list of all agent IDs that appear in the tree."""
233
+ if root.child is None:
234
+ return []
235
+
236
+ # Get the first node to extract all agent IDs
237
+ first_node = root.child
238
+ if isinstance(first_node, RolloutTreeBranchNode):
239
+ first_node = first_node.main_child
240
+
241
+ if first_node is None:
242
+ return []
243
+
244
+ # All agents should be present in the first node
245
+ agents = set(first_node.step_log.action_logs.keys())
246
+ agents.update(first_node.step_log.simulation_step_log.rewards.keys())
247
+
248
+ return sorted(list(agents))
249
+
250
+
251
+ def gather_agent_main_rewards(agent_id: str, path: RolloutNodeList) -> List[float]:
252
+ """Gather main rewards for a specific agent in a path."""
253
+ rewards = []
254
+ for node in path.nodes:
255
+ reward = node.step_log.simulation_step_log.rewards[agent_id]
256
+ rewards.append(reward)
257
+ return rewards
258
+
259
+
260
+ def gather_all_rewards(path: RolloutNodeList) -> List[Dict[AgentId, float]]:
261
+ """Gather main rewards from main trajectory in a path."""
262
+ rewards = []
263
+ for node in path.nodes:
264
+ rewards.append(node.step_log.simulation_step_log.rewards.copy())
265
+ return rewards
266
+
267
+
268
+ def gather_simulation_stats(
269
+ path: RolloutNodeList,
270
+ filter: Callable[[SimulationStepLog], bool],
271
+ stat_func: Callable[[SimulationStepLog], Any],
272
+ ) -> List[Any]:
273
+ """Gather stats from main trajectory in a path."""
274
+ stats = []
275
+ for node in path.nodes:
276
+ sl = node.step_log.simulation_step_log
277
+ if filter(sl):
278
+ stats.append(stat_func(sl))
279
+ return stats
280
+
281
+
282
+ def gather_simulation_step_logs(path: RolloutNodeList) -> List[SimulationStepLog]:
283
+ """Gather simulation information from main trajectory in a path."""
284
+ infos = []
285
+ for node in path.nodes:
286
+ infos.append(node.step_log.simulation_step_log)
287
+ return infos
288
+
289
+
290
+ def export_chat_logs(path: Path, outdir: Path):
291
+ """Process a rollout tree PKL file and generate a JSONL of chat turns as dicts.
292
+ Each line contains an object with path_id and chat_turns for a single path.
293
+ """
294
+ import json
295
+
296
+ root = load_rollout_tree(path)
297
+ mgid = root.id
298
+
299
+ main_path, branch_paths = get_rollout_tree_paths(root)
300
+ all_paths = [main_path] + branch_paths
301
+
302
+ outdir.mkdir(parents=True, exist_ok=True)
303
+ output_file = outdir / f"mgid:{mgid}_plucked_chats.render.jsonl"
304
+
305
+ with open(output_file, "w", encoding="utf-8") as f:
306
+ for path_obj in all_paths:
307
+ chat_turns = gather_all_chat_turns_for_path(path_obj)
308
+ output_obj = {
309
+ "path_id": str(path_obj.id),
310
+ "chat_turns": list(chat_turns_to_dict(iter(chat_turns))),
311
+ }
312
+ f.write(json.dumps(output_obj, ensure_ascii=False) + "\n")
313
+
314
+
src_code_for_reproducibility/utils/rollout_tree_stats.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Callable, List, Tuple
2
+
3
+ from mllm.markov_games.rollout_tree import RolloutTreeRootNode
4
+ from mllm.markov_games.simulation import SimulationStepLog
5
+ from mllm.utils.rollout_tree_gather_utils import (
6
+ gather_simulation_step_logs,
7
+ get_rollout_tree_paths,
8
+ )
9
+ from mllm.utils.stat_pack import StatPack
10
+
11
+
12
+ def get_rollout_tree_stat_tally(
13
+ rollout_tree: RolloutTreeRootNode,
14
+ metrics: List[Callable[[SimulationStepLog], List[Tuple[str, float]]]],
15
+ ) -> StatPack:
16
+ stat_tally = StatPack()
17
+ # get simulation step logs
18
+ node_list = get_rollout_tree_paths(rollout_tree)[0]
19
+ simulation_step_logs = gather_simulation_step_logs(node_list)
20
+ for simulation_step_log in simulation_step_logs:
21
+ for metric in metrics:
22
+ metric_result = metric(simulation_step_log)
23
+ if metric_result is not None:
24
+ for key, value in metric_result:
25
+ stat_tally.add_stat(key, value)
26
+ return stat_tally
27
+
28
+
29
+ def get_rollout_tree_mean_stats(
30
+ rollout_tree: RolloutTreeRootNode, metrics: List[Callable[[SimulationStepLog], Any]]
31
+ ) -> StatPack:
32
+ """Get the mean stats for a rollout tree."""
33
+ stat_tally = get_rollout_tree_stat_tally(rollout_tree, metrics)
34
+ return stat_tally.mean()
35
+
36
+
37
+ def get_mean_rollout_tree_stats(
38
+ rollout_trees: List[RolloutTreeRootNode],
39
+ metrics: List[Callable[[SimulationStepLog], Any]],
40
+ ) -> StatPack:
41
+ """Get the mean stats for a list of rollout trees."""
42
+ # TODO complete this
43
+ stat_tallies = [
44
+ get_rollout_tree_mean_stats(rollout_tree, metrics)
45
+ for rollout_tree in rollout_trees
46
+ ]
47
+ mean_stat_tally = StatPack()
48
+ for stat_tally in stat_tallies:
49
+ mean_stat_tally.add_stats(stat_tally)
50
+ return mean_stat_tally.mean()
src_code_for_reproducibility/utils/update_start_epoch.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # During run, set hydra.run.dir=./outputs/{folder}
4
+ def update_start_epoch(cfg, output_directory):
5
+ if cfg["experiment"]["resume_experiment"]:
6
+ folders = [f for f in os.listdir(output_directory) if f.startswith("iteration_")]
7
+ iterations = [int(f.split("_")[1]) for f in folders] if folders else [0]
8
+ cfg["experiment"]["start_epoch"] = max(iterations)
9
+ return None
src_code_for_reproducibility/utils/wandb_utils.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Any, Dict, Optional
3
+
4
+
5
+ _WANDB_AVAILABLE = False
6
+ _WANDB_RUN = None
7
+
8
+
9
+ def _try_import_wandb():
10
+ global _WANDB_AVAILABLE
11
+ if _WANDB_AVAILABLE:
12
+ return True
13
+ try:
14
+ import wandb # type: ignore
15
+
16
+ _WANDB_AVAILABLE = True
17
+ return True
18
+ except Exception:
19
+ _WANDB_AVAILABLE = False
20
+ return False
21
+
22
+
23
+ def _safe_get(cfg: Dict[str, Any], path: list[str], default: Any = None) -> Any:
24
+ cur: Any = cfg
25
+ for key in path:
26
+ if not isinstance(cur, dict) or key not in cur:
27
+ return default
28
+ cur = cur[key]
29
+ return cur
30
+
31
+
32
+ def is_enabled(cfg: Dict[str, Any]) -> bool:
33
+ return bool(_safe_get(cfg, ["logging", "wandb", "enabled"], False))
34
+
35
+
36
+ def init(cfg: Dict[str, Any], run_dir: str, run_name: Optional[str] = None) -> None:
37
+ """
38
+ Initialize Weights & Biases if enabled in config. No-op if disabled or wandb not installed.
39
+ """
40
+ global _WANDB_RUN
41
+ if not is_enabled(cfg):
42
+ return
43
+ if not _try_import_wandb():
44
+ return
45
+
46
+ import wandb # type: ignore
47
+
48
+ project = _safe_get(cfg, ["logging", "wandb", "project"], "llm-negotiation")
49
+ entity = _safe_get(cfg, ["logging", "wandb", "entity"], None)
50
+ mode = _safe_get(cfg, ["logging", "wandb", "mode"], "online")
51
+ tags = _safe_get(cfg, ["logging", "wandb", "tags"], []) or []
52
+ notes = _safe_get(cfg, ["logging", "wandb", "notes"], None)
53
+ group = _safe_get(cfg, ["logging", "wandb", "group"], None)
54
+ name = _safe_get(cfg, ["logging", "wandb", "name"], run_name)
55
+
56
+ # Ensure files are written into the hydra run directory
57
+ os.makedirs(run_dir, exist_ok=True)
58
+ os.environ.setdefault("WANDB_DIR", run_dir)
59
+
60
+ # Convert cfg to plain types for W&B config; fallback to minimal dictionary
61
+ try:
62
+ from omegaconf import OmegaConf # type: ignore
63
+
64
+ cfg_container = OmegaConf.to_container(cfg, resolve=True) # type: ignore
65
+ except Exception:
66
+ cfg_container = cfg
67
+
68
+ _WANDB_RUN = wandb.init(
69
+ project=project,
70
+ entity=entity,
71
+ mode=mode,
72
+ name=name,
73
+ group=group,
74
+ tags=tags,
75
+ notes=notes,
76
+ config=cfg_container,
77
+ dir=run_dir,
78
+ reinit=True,
79
+ )
80
+
81
+
82
+ def log(metrics: Dict[str, Any], step: Optional[int] = None) -> None:
83
+ """Log a flat dictionary of metrics to W&B if active."""
84
+ if not _WANDB_AVAILABLE or _WANDB_RUN is None:
85
+ return
86
+ try:
87
+ import wandb # type: ignore
88
+
89
+ wandb.log(metrics if step is None else dict(metrics, step=step))
90
+ except Exception:
91
+ pass
92
+
93
+
94
+ def _flatten(prefix: str, data: Dict[str, Any], out: Dict[str, Any]) -> None:
95
+ for k, v in data.items():
96
+ key = f"{prefix}.{k}" if prefix else k
97
+ if isinstance(v, dict):
98
+ _flatten(key, v, out)
99
+ else:
100
+ out[key] = v
101
+
102
+
103
+ def _summarize_value(value: Any) -> Dict[str, Any]:
104
+ import numpy as np # local import to avoid hard dependency during disabled mode
105
+
106
+ if value is None:
107
+ return {"none": 1}
108
+ # Scalars
109
+ if isinstance(value, (int, float)):
110
+ return {"value": float(value)}
111
+ # Lists or arrays
112
+ try:
113
+ arr = np.asarray(value)
114
+ if arr.size == 0:
115
+ return {"size": 0}
116
+ return {
117
+ "mean": float(np.nanmean(arr)),
118
+ "min": float(np.nanmin(arr)),
119
+ "max": float(np.nanmax(arr)),
120
+ "last": float(arr.reshape(-1)[-1]),
121
+ "size": int(arr.size),
122
+ }
123
+ except Exception:
124
+ # Fallback: string repr
125
+ return {"text": str(value)}
126
+
127
+
128
+ def log_tally(array_tally: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
129
+ """
130
+ Flatten and summarize Tally.array_tally and log to WandB.
131
+ Each leaf list/array is summarized with mean/min/max/last/size.
132
+ """
133
+ if not _WANDB_AVAILABLE or _WANDB_RUN is None:
134
+ return
135
+ summarized: Dict[str, Any] = {}
136
+
137
+ def walk(node: Any, path: list[str]):
138
+ if isinstance(node, dict):
139
+ for k, v in node.items():
140
+ walk(v, path + [k])
141
+ return
142
+ # node is a list of values accumulated over time
143
+ key = ".".join([p for p in ([prefix] if prefix else []) + path])
144
+ try:
145
+ summary = _summarize_value(node)
146
+ for sk, sv in summary.items():
147
+ summarized[f"{key}.{sk}"] = sv
148
+ except Exception:
149
+ summarized[f"{key}.error"] = 1
150
+
151
+ walk(array_tally, [])
152
+ if summarized:
153
+ log(summarized, step=step)
154
+
155
+
156
+ def log_flat_stats(stats: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
157
+ if not _WANDB_AVAILABLE or _WANDB_RUN is None:
158
+ return
159
+ flat: Dict[str, Any] = {}
160
+ _flatten(prefix, stats, flat)
161
+ if flat:
162
+ log(flat, step=step)
163
+
164
+