Add files using upload-large-folder tool
Browse files- .hydra/config.yaml +173 -0
- .hydra/hydra.yaml +154 -0
- .hydra/overrides.yaml +1 -0
- run.log +0 -0
- seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md +207 -0
- seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json +42 -0
- seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json +42 -0
- src_code_for_reproducibility/__init__.py +0 -0
- src_code_for_reproducibility/docs/source/contributing.rst +0 -0
- src_code_for_reproducibility/docs/source/environments/dond.rst +410 -0
- src_code_for_reproducibility/docs/source/launch.rst +0 -0
- src_code_for_reproducibility/docs/source/media/runbatch.png +0 -0
- src_code_for_reproducibility/docs/source/modules.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst +7 -0
- src_code_for_reproducibility/docs/source/src.environments.rst +25 -0
- src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst +7 -0
- src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst +7 -0
- src_code_for_reproducibility/docs/source/src.generation.run_games.rst +7 -0
- src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst +7 -0
- src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst +7 -0
- src_code_for_reproducibility/docs/source/src.models.oai_agent.rst +7 -0
- src_code_for_reproducibility/docs/source/src.models.server_llm.rst +7 -0
- src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst +7 -0
- src_code_for_reproducibility/docs/source/src.rst +28 -0
- src_code_for_reproducibility/docs/source/src.training.ppo_train.rst +7 -0
- src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst +7 -0
- src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst +7 -0
- src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst +7 -0
- src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst +7 -0
- src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst +7 -0
- src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst +7 -0
- src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst +7 -0
- src_code_for_reproducibility/docs/source/usage.rst +0 -0
- src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc +0 -0
- src_code_for_reproducibility/utils/__init__.py +0 -0
- src_code_for_reproducibility/utils/dict_get_path.py +12 -0
- src_code_for_reproducibility/utils/gather_training_stats.py +257 -0
- src_code_for_reproducibility/utils/get_stochastic_game_lengths.py +30 -0
- src_code_for_reproducibility/utils/kill_sglang.py +17 -0
- src_code_for_reproducibility/utils/output_source_code.py +6 -0
- src_code_for_reproducibility/utils/resource_context.py +78 -0
- src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py +1921 -0
- src_code_for_reproducibility/utils/rollout_tree_gather_utils.py +314 -0
- src_code_for_reproducibility/utils/rollout_tree_stats.py +50 -0
- src_code_for_reproducibility/utils/update_start_epoch.py +9 -0
- src_code_for_reproducibility/utils/wandb_utils.py +164 -0
.hydra/config.yaml
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment:
|
| 2 |
+
wandb_enabled: true
|
| 3 |
+
nb_epochs: 3000
|
| 4 |
+
nb_matches_per_iteration: 64
|
| 5 |
+
reinit_matches_each_it: true
|
| 6 |
+
checkpoint_every_n_iterations: 50
|
| 7 |
+
start_epoch: 0
|
| 8 |
+
resume_experiment: true
|
| 9 |
+
base_seed: 9999
|
| 10 |
+
seed_group_size: 8
|
| 11 |
+
train: true
|
| 12 |
+
stat_methods_for_live_wandb: mllm.markov_games.negotiation.negotiation_statistics
|
| 13 |
+
name: tas_rps_startend_naive_seed9999
|
| 14 |
+
agent_buffer: false
|
| 15 |
+
keep_agent_buffer_count: ${lora_count}
|
| 16 |
+
agent_buffer_recent_k: -1
|
| 17 |
+
description: Trust-and-Split Rock Paper Scissors negotiation game
|
| 18 |
+
logging:
|
| 19 |
+
wandb:
|
| 20 |
+
enabled: false
|
| 21 |
+
project: llm-negotiation
|
| 22 |
+
entity: null
|
| 23 |
+
mode: online
|
| 24 |
+
name: null
|
| 25 |
+
group: null
|
| 26 |
+
tags: []
|
| 27 |
+
notes: null
|
| 28 |
+
temperature: 1.0
|
| 29 |
+
markov_games:
|
| 30 |
+
runner_method_name: LinearRunner
|
| 31 |
+
runner_kwargs: {}
|
| 32 |
+
group_by_round: true
|
| 33 |
+
simulation_class_name: TrustAndSplitRPSSimulation
|
| 34 |
+
simulation_init_args:
|
| 35 |
+
nb_of_rounds: 10
|
| 36 |
+
quota_messages_per_agent_per_round: 1
|
| 37 |
+
alternating_hands: false
|
| 38 |
+
agents:
|
| 39 |
+
0:
|
| 40 |
+
agent_id: ${agent_0_id}
|
| 41 |
+
agent_name: Alice
|
| 42 |
+
agent_class_name: TrustAndSplitRPSAgent
|
| 43 |
+
policy_id: base_llm/agent_adapter
|
| 44 |
+
init_kwargs:
|
| 45 |
+
goal: Maximize your total points over the whole game.
|
| 46 |
+
num_message_chars: 500
|
| 47 |
+
message_start_end_format: true
|
| 48 |
+
proposal_start_end_format: true
|
| 49 |
+
1:
|
| 50 |
+
agent_id: ${agent_1_id}
|
| 51 |
+
agent_name: Bob
|
| 52 |
+
agent_class_name: TrustAndSplitRPSAgent
|
| 53 |
+
policy_id: base_llm/agent_adapter
|
| 54 |
+
init_kwargs:
|
| 55 |
+
goal: Maximize your total points over the whole game.
|
| 56 |
+
num_message_chars: 500
|
| 57 |
+
message_start_end_format: true
|
| 58 |
+
proposal_start_end_format: true
|
| 59 |
+
models:
|
| 60 |
+
base_llm:
|
| 61 |
+
class: LeanLocalLLM
|
| 62 |
+
init_args:
|
| 63 |
+
llm_id: base_llm
|
| 64 |
+
model_name: Qwen/Qwen2.5-7B-Instruct
|
| 65 |
+
inference_backend: vllm
|
| 66 |
+
hf_kwargs:
|
| 67 |
+
device_map: auto
|
| 68 |
+
torch_dtype: bfloat16
|
| 69 |
+
max_memory:
|
| 70 |
+
0: 20GiB
|
| 71 |
+
attn_implementation: flash_attention_2
|
| 72 |
+
inference_backend_init_kwargs:
|
| 73 |
+
enable_lora: true
|
| 74 |
+
seed: ${experiment.base_seed}
|
| 75 |
+
enable_prefix_caching: true
|
| 76 |
+
max_model_len: 10000.0
|
| 77 |
+
gpu_memory_utilization: 0.5
|
| 78 |
+
dtype: bfloat16
|
| 79 |
+
trust_remote_code: true
|
| 80 |
+
max_lora_rank: 32
|
| 81 |
+
enforce_eager: false
|
| 82 |
+
max_loras: ${lora_count}
|
| 83 |
+
max_cpu_loras: ${lora_count}
|
| 84 |
+
enable_sleep_mode: true
|
| 85 |
+
inference_backend_sampling_params:
|
| 86 |
+
temperature: ${temperature}
|
| 87 |
+
top_p: 1.0
|
| 88 |
+
max_tokens: 400
|
| 89 |
+
top_k: -1
|
| 90 |
+
logprobs: 0
|
| 91 |
+
adapter_configs:
|
| 92 |
+
agent_adapter:
|
| 93 |
+
task_type: CAUSAL_LM
|
| 94 |
+
r: 32
|
| 95 |
+
lora_alpha: 64
|
| 96 |
+
lora_dropout: 0.0
|
| 97 |
+
target_modules: all-linear
|
| 98 |
+
critic_adapter:
|
| 99 |
+
task_type: CAUSAL_LM
|
| 100 |
+
r: 32
|
| 101 |
+
lora_alpha: 64
|
| 102 |
+
lora_dropout: 0.0
|
| 103 |
+
target_modules: all-linear
|
| 104 |
+
enable_thinking: null
|
| 105 |
+
regex_max_attempts: 1
|
| 106 |
+
critics:
|
| 107 |
+
agent_critic:
|
| 108 |
+
module_pointer:
|
| 109 |
+
- base_llm
|
| 110 |
+
- critic_adapter
|
| 111 |
+
optimizers:
|
| 112 |
+
agent_optimizer:
|
| 113 |
+
module_pointer:
|
| 114 |
+
- base_llm
|
| 115 |
+
- agent_adapter
|
| 116 |
+
optimizer_class_name: torch.optim.Adam
|
| 117 |
+
init_args:
|
| 118 |
+
lr: 3.0e-06
|
| 119 |
+
weight_decay: 0.0
|
| 120 |
+
critic_optimizer:
|
| 121 |
+
module_pointer: agent_critic
|
| 122 |
+
optimizer_class_name: torch.optim.Adam
|
| 123 |
+
init_args:
|
| 124 |
+
lr: 3.0e-06
|
| 125 |
+
weight_decay: 0.0
|
| 126 |
+
trainers:
|
| 127 |
+
agent_trainer:
|
| 128 |
+
class: TrainerNaive
|
| 129 |
+
module_pointers:
|
| 130 |
+
policy:
|
| 131 |
+
- base_llm
|
| 132 |
+
- agent_adapter
|
| 133 |
+
policy_optimizer: agent_optimizer
|
| 134 |
+
critic: agent_critic
|
| 135 |
+
critic_optimizer: critic_optimizer
|
| 136 |
+
kwargs:
|
| 137 |
+
entropy_coeff: 0.0
|
| 138 |
+
entropy_topk: null
|
| 139 |
+
entropy_mask_regex: null
|
| 140 |
+
kl_coeff: 0.001
|
| 141 |
+
gradient_clipping: 1.0
|
| 142 |
+
restrict_tokens: null
|
| 143 |
+
mini_batch_size: 1
|
| 144 |
+
use_gradient_checkpointing: true
|
| 145 |
+
temperature: ${temperature}
|
| 146 |
+
device: cuda:0
|
| 147 |
+
use_gae: false
|
| 148 |
+
whiten_advantages: false
|
| 149 |
+
whiten_advantages_time_step_wise: false
|
| 150 |
+
skip_discounted_state_visitation: true
|
| 151 |
+
use_gae_lambda_annealing: false
|
| 152 |
+
gae_lambda_annealing_method: None
|
| 153 |
+
gae_lambda_annealing_method_params: None
|
| 154 |
+
gae_lambda_annealing_limit: 0.95
|
| 155 |
+
discount_factor: 0.96
|
| 156 |
+
use_rloo: true
|
| 157 |
+
enable_tokenwise_logging: false
|
| 158 |
+
pg_loss_normalization: nb_tokens
|
| 159 |
+
truncated_importance_sampling_ratio_cap: 2.0
|
| 160 |
+
reward_normalizing_constant: 100.0
|
| 161 |
+
train_on_which_data:
|
| 162 |
+
agent_trainer: ${agent_ids}
|
| 163 |
+
lora_count: 30
|
| 164 |
+
common_agent_kwargs:
|
| 165 |
+
goal: Maximize your total points over the whole game.
|
| 166 |
+
num_message_chars: 500
|
| 167 |
+
message_start_end_format: true
|
| 168 |
+
proposal_start_end_format: true
|
| 169 |
+
agent_0_id: Alice
|
| 170 |
+
agent_1_id: Bob
|
| 171 |
+
agent_ids:
|
| 172 |
+
- Alice
|
| 173 |
+
- Bob
|
.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ${oc.env:SCRATCH}/llm_negotiation/${now:%Y_%m}/${experiment.name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task: []
|
| 115 |
+
job:
|
| 116 |
+
name: run
|
| 117 |
+
chdir: false
|
| 118 |
+
override_dirname: ''
|
| 119 |
+
id: ???
|
| 120 |
+
num: ???
|
| 121 |
+
config_name: tas_rps_startend_naive_seed9999.yaml
|
| 122 |
+
env_set: {}
|
| 123 |
+
env_copy: []
|
| 124 |
+
config:
|
| 125 |
+
override_dirname:
|
| 126 |
+
kv_sep: '='
|
| 127 |
+
item_sep: ','
|
| 128 |
+
exclude_keys: []
|
| 129 |
+
runtime:
|
| 130 |
+
version: 1.3.2
|
| 131 |
+
version_base: '1.1'
|
| 132 |
+
cwd: /scratch/m/muqeeth/llm_negotiation
|
| 133 |
+
config_sources:
|
| 134 |
+
- path: hydra.conf
|
| 135 |
+
schema: pkg
|
| 136 |
+
provider: hydra
|
| 137 |
+
- path: /scratch/m/muqeeth/llm_negotiation/configs
|
| 138 |
+
schema: file
|
| 139 |
+
provider: main
|
| 140 |
+
- path: ''
|
| 141 |
+
schema: structured
|
| 142 |
+
provider: schema
|
| 143 |
+
output_dir: /scratch/m/muqeeth/llm_negotiation/2025_11/tas_rps_startend_naive_seed9999
|
| 144 |
+
choices:
|
| 145 |
+
hydra/env: default
|
| 146 |
+
hydra/callbacks: null
|
| 147 |
+
hydra/job_logging: default
|
| 148 |
+
hydra/hydra_logging: default
|
| 149 |
+
hydra/hydra_help: default
|
| 150 |
+
hydra/help: default
|
| 151 |
+
hydra/sweeper: basic
|
| 152 |
+
hydra/launcher: basic
|
| 153 |
+
hydra/output: default
|
| 154 |
+
verbose: false
|
.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
run.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/README.md
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
+
library_name: peft
|
| 4 |
+
pipeline_tag: text-generation
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:Qwen/Qwen2.5-7B-Instruct
|
| 7 |
+
- lora
|
| 8 |
+
- transformers
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Model Card for Model ID
|
| 12 |
+
|
| 13 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
## Model Details
|
| 18 |
+
|
| 19 |
+
### Model Description
|
| 20 |
+
|
| 21 |
+
<!-- Provide a longer summary of what this model is. -->
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
- **Developed by:** [More Information Needed]
|
| 26 |
+
- **Funded by [optional]:** [More Information Needed]
|
| 27 |
+
- **Shared by [optional]:** [More Information Needed]
|
| 28 |
+
- **Model type:** [More Information Needed]
|
| 29 |
+
- **Language(s) (NLP):** [More Information Needed]
|
| 30 |
+
- **License:** [More Information Needed]
|
| 31 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
| 32 |
+
|
| 33 |
+
### Model Sources [optional]
|
| 34 |
+
|
| 35 |
+
<!-- Provide the basic links for the model. -->
|
| 36 |
+
|
| 37 |
+
- **Repository:** [More Information Needed]
|
| 38 |
+
- **Paper [optional]:** [More Information Needed]
|
| 39 |
+
- **Demo [optional]:** [More Information Needed]
|
| 40 |
+
|
| 41 |
+
## Uses
|
| 42 |
+
|
| 43 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
| 44 |
+
|
| 45 |
+
### Direct Use
|
| 46 |
+
|
| 47 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
| 48 |
+
|
| 49 |
+
[More Information Needed]
|
| 50 |
+
|
| 51 |
+
### Downstream Use [optional]
|
| 52 |
+
|
| 53 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
| 54 |
+
|
| 55 |
+
[More Information Needed]
|
| 56 |
+
|
| 57 |
+
### Out-of-Scope Use
|
| 58 |
+
|
| 59 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
| 60 |
+
|
| 61 |
+
[More Information Needed]
|
| 62 |
+
|
| 63 |
+
## Bias, Risks, and Limitations
|
| 64 |
+
|
| 65 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
| 66 |
+
|
| 67 |
+
[More Information Needed]
|
| 68 |
+
|
| 69 |
+
### Recommendations
|
| 70 |
+
|
| 71 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
| 72 |
+
|
| 73 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
| 74 |
+
|
| 75 |
+
## How to Get Started with the Model
|
| 76 |
+
|
| 77 |
+
Use the code below to get started with the model.
|
| 78 |
+
|
| 79 |
+
[More Information Needed]
|
| 80 |
+
|
| 81 |
+
## Training Details
|
| 82 |
+
|
| 83 |
+
### Training Data
|
| 84 |
+
|
| 85 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
| 86 |
+
|
| 87 |
+
[More Information Needed]
|
| 88 |
+
|
| 89 |
+
### Training Procedure
|
| 90 |
+
|
| 91 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
| 92 |
+
|
| 93 |
+
#### Preprocessing [optional]
|
| 94 |
+
|
| 95 |
+
[More Information Needed]
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
#### Training Hyperparameters
|
| 99 |
+
|
| 100 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
| 101 |
+
|
| 102 |
+
#### Speeds, Sizes, Times [optional]
|
| 103 |
+
|
| 104 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
| 105 |
+
|
| 106 |
+
[More Information Needed]
|
| 107 |
+
|
| 108 |
+
## Evaluation
|
| 109 |
+
|
| 110 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
| 111 |
+
|
| 112 |
+
### Testing Data, Factors & Metrics
|
| 113 |
+
|
| 114 |
+
#### Testing Data
|
| 115 |
+
|
| 116 |
+
<!-- This should link to a Dataset Card if possible. -->
|
| 117 |
+
|
| 118 |
+
[More Information Needed]
|
| 119 |
+
|
| 120 |
+
#### Factors
|
| 121 |
+
|
| 122 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
| 123 |
+
|
| 124 |
+
[More Information Needed]
|
| 125 |
+
|
| 126 |
+
#### Metrics
|
| 127 |
+
|
| 128 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
| 129 |
+
|
| 130 |
+
[More Information Needed]
|
| 131 |
+
|
| 132 |
+
### Results
|
| 133 |
+
|
| 134 |
+
[More Information Needed]
|
| 135 |
+
|
| 136 |
+
#### Summary
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
## Model Examination [optional]
|
| 141 |
+
|
| 142 |
+
<!-- Relevant interpretability work for the model goes here -->
|
| 143 |
+
|
| 144 |
+
[More Information Needed]
|
| 145 |
+
|
| 146 |
+
## Environmental Impact
|
| 147 |
+
|
| 148 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
| 149 |
+
|
| 150 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
| 151 |
+
|
| 152 |
+
- **Hardware Type:** [More Information Needed]
|
| 153 |
+
- **Hours used:** [More Information Needed]
|
| 154 |
+
- **Cloud Provider:** [More Information Needed]
|
| 155 |
+
- **Compute Region:** [More Information Needed]
|
| 156 |
+
- **Carbon Emitted:** [More Information Needed]
|
| 157 |
+
|
| 158 |
+
## Technical Specifications [optional]
|
| 159 |
+
|
| 160 |
+
### Model Architecture and Objective
|
| 161 |
+
|
| 162 |
+
[More Information Needed]
|
| 163 |
+
|
| 164 |
+
### Compute Infrastructure
|
| 165 |
+
|
| 166 |
+
[More Information Needed]
|
| 167 |
+
|
| 168 |
+
#### Hardware
|
| 169 |
+
|
| 170 |
+
[More Information Needed]
|
| 171 |
+
|
| 172 |
+
#### Software
|
| 173 |
+
|
| 174 |
+
[More Information Needed]
|
| 175 |
+
|
| 176 |
+
## Citation [optional]
|
| 177 |
+
|
| 178 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
| 179 |
+
|
| 180 |
+
**BibTeX:**
|
| 181 |
+
|
| 182 |
+
[More Information Needed]
|
| 183 |
+
|
| 184 |
+
**APA:**
|
| 185 |
+
|
| 186 |
+
[More Information Needed]
|
| 187 |
+
|
| 188 |
+
## Glossary [optional]
|
| 189 |
+
|
| 190 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
| 191 |
+
|
| 192 |
+
[More Information Needed]
|
| 193 |
+
|
| 194 |
+
## More Information [optional]
|
| 195 |
+
|
| 196 |
+
[More Information Needed]
|
| 197 |
+
|
| 198 |
+
## Model Card Authors [optional]
|
| 199 |
+
|
| 200 |
+
[More Information Needed]
|
| 201 |
+
|
| 202 |
+
## Model Card Contact
|
| 203 |
+
|
| 204 |
+
[More Information Needed]
|
| 205 |
+
### Framework versions
|
| 206 |
+
|
| 207 |
+
- PEFT 0.17.1
|
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/agent_adapter/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 64,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 32,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"k_proj",
|
| 29 |
+
"v_proj",
|
| 30 |
+
"down_proj",
|
| 31 |
+
"q_proj",
|
| 32 |
+
"up_proj",
|
| 33 |
+
"gate_proj",
|
| 34 |
+
"o_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "CAUSAL_LM",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
seed_9999/Qwen/Qwen2.5-7B-Instruct/adapters/critic_adapter/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 64,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 32,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"k_proj",
|
| 29 |
+
"v_proj",
|
| 30 |
+
"down_proj",
|
| 31 |
+
"q_proj",
|
| 32 |
+
"up_proj",
|
| 33 |
+
"gate_proj",
|
| 34 |
+
"o_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "CAUSAL_LM",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
src_code_for_reproducibility/__init__.py
ADDED
|
File without changes
|
src_code_for_reproducibility/docs/source/contributing.rst
ADDED
|
File without changes
|
src_code_for_reproducibility/docs/source/environments/dond.rst
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
=================
|
| 2 |
+
Deal or No Deal
|
| 3 |
+
=================
|
| 4 |
+
|
| 5 |
+
The Deal or No Deal (DoND) environment provides a multi-agent negotiation interface where players trade
|
| 6 |
+
items with different values. This document describes the API for interacting with the DoND environment
|
| 7 |
+
and its associated agent handler.
|
| 8 |
+
|
| 9 |
+
Overview
|
| 10 |
+
--------
|
| 11 |
+
|
| 12 |
+
Deal or No Deal is a negotiation game where two agents must agree on how to divide a set of items,
|
| 13 |
+
each of which has different values to each agent. The agents engage in a back-and-forth dialogue to
|
| 14 |
+
determine an allocation of the items, with each trying to maximize their own total value.
|
| 15 |
+
|
| 16 |
+
Our implementation follows the Multi-Agent Negotiation Environment standard, allowing it to be used
|
| 17 |
+
with LLM agents through a text-based interface.
|
| 18 |
+
|
| 19 |
+
Game Rules
|
| 20 |
+
----------
|
| 21 |
+
|
| 22 |
+
### Basic Structure
|
| 23 |
+
|
| 24 |
+
The core mechanics of Deal or No Deal are:
|
| 25 |
+
|
| 26 |
+
1. Two agents negotiate over a set of items (e.g., books, balls, hats)
|
| 27 |
+
2. Each item has:
|
| 28 |
+
- A specific quantity (how many of each item is available)
|
| 29 |
+
- A value for each agent (which may differ between agents)
|
| 30 |
+
3. Agents take turns sending messages to negotiate how to split the items
|
| 31 |
+
4. Once an agreement is reached, agents finalize the deal
|
| 32 |
+
5. Points are awarded based on the value of items each agent receives
|
| 33 |
+
|
| 34 |
+
### Detailed Gameplay
|
| 35 |
+
|
| 36 |
+
#### Setup Phase
|
| 37 |
+
|
| 38 |
+
The game begins with:
|
| 39 |
+
- A set of items (e.g., "book", "hat", "ball")
|
| 40 |
+
- Each item has a quantity (e.g., 6 books, 2 hats, 4 balls)
|
| 41 |
+
- Each agent has private values for each item (e.g., books might be worth 5 points to one agent but only 2 points to the other)
|
| 42 |
+
- Agents are assigned roles (starting negotiator and responding negotiator)
|
| 43 |
+
|
| 44 |
+
#### Negotiation Phase
|
| 45 |
+
|
| 46 |
+
1. Agents take turns sending free-form text messages to each other
|
| 47 |
+
2. Messages can include offers, counter-offers, questions, or strategic communication
|
| 48 |
+
3. There is a maximum number of messages permitted (preventing endless negotiations)
|
| 49 |
+
4. Either agent can propose to finalize an agreement at any time
|
| 50 |
+
|
| 51 |
+
For example:
|
| 52 |
+
- Agent 1: "I propose I get all the books and you get all the hats and balls."
|
| 53 |
+
- Agent 2: "That doesn't work for me. How about you get 3 books and I get 3 books, all the hats, and all the balls?"
|
| 54 |
+
- Agent 1: "Let me counter-offer: I get 4 books and 2 balls, you get 2 books, all hats, and 2 balls."
|
| 55 |
+
|
| 56 |
+
#### Finalization Phase
|
| 57 |
+
|
| 58 |
+
1. When an agent wants to finalize a deal, they must specify the exact allocation:
|
| 59 |
+
- How many of each item they receive
|
| 60 |
+
- How many of each item the other agent receives
|
| 61 |
+
2. The other agent must then either agree (by submitting the same allocation) or reject the finalization
|
| 62 |
+
3. If both agents submit matching finalizations, the deal is executed
|
| 63 |
+
4. If finalizations don't match, no agreement is reached, and both agents receive 0 points
|
| 64 |
+
|
| 65 |
+
#### Scoring
|
| 66 |
+
|
| 67 |
+
1. Each agent's score is calculated based on the value of items they receive
|
| 68 |
+
2. The formula is: Sum(quantity_of_item_i × value_of_item_i_to_agent)
|
| 69 |
+
3. If no agreement is reached, both agents receive 0 points
|
| 70 |
+
|
| 71 |
+
### Example Game
|
| 72 |
+
|
| 73 |
+
Let's walk through a simple example:
|
| 74 |
+
|
| 75 |
+
**Setup:**
|
| 76 |
+
- Items: Books (4), Hats (2), Balls (6)
|
| 77 |
+
- Agent 1 values: Books=5, Hats=1, Balls=2
|
| 78 |
+
- Agent 2 values: Books=3, Hats=6, Balls=1
|
| 79 |
+
|
| 80 |
+
**Negotiation (simplified):**
|
| 81 |
+
1. Agent 1: "I would like all the books and balls. You can have the hats."
|
| 82 |
+
2. Agent 2: "That doesn't work for me. Books are valuable. I propose I get all the hats and 2 books, you get 2 books and all the balls."
|
| 83 |
+
3. Agent 1: "How about I get 3 books and all the balls, and you get 1 book and all the hats?"
|
| 84 |
+
4. Agent 2: "I accept your proposal."
|
| 85 |
+
|
| 86 |
+
**Finalization:**
|
| 87 |
+
- Agent 1 submits: Agent 1 gets (Books: 3, Hats: 0, Balls: 6), Agent 2 gets (Books: 1, Hats: 2, Balls: 0)
|
| 88 |
+
- Agent 2 submits the same allocation, confirming agreement
|
| 89 |
+
|
| 90 |
+
**Scoring:**
|
| 91 |
+
- Agent 1 score: (3 books × 5) + (0 hats × 1) + (6 balls × 2) = 15 + 0 + 12 = 27 points
|
| 92 |
+
- Agent 2 score: (1 book × 3) + (2 hats × 6) + (0 balls × 1) = 3 + 12 + 0 = 15 points
|
| 93 |
+
|
| 94 |
+
### Game Variations
|
| 95 |
+
|
| 96 |
+
The DoND environment supports several variations through configuration parameters:
|
| 97 |
+
|
| 98 |
+
#### Different Value Distributions
|
| 99 |
+
|
| 100 |
+
The environment offers multiple ways to assign values to items:
|
| 101 |
+
|
| 102 |
+
1. **Standard Random Setup (dond_random_setup)**:
|
| 103 |
+
- Items have even-numbered quantities
|
| 104 |
+
- Each agent receives distinct random values for each item
|
| 105 |
+
- Values are drawn from a uniform distribution
|
| 106 |
+
|
| 107 |
+
2. **Independent Random Values (independent_random_vals)**:
|
| 108 |
+
- Item quantities can be any number in the specified range
|
| 109 |
+
- Values for each agent are drawn independently
|
| 110 |
+
- Creates more varied negotiation scenarios
|
| 111 |
+
|
| 112 |
+
3. **Bicameral Value Distribution (bicameral_vals_assignator)**:
|
| 113 |
+
- Creates a "high value" and "low value" distribution for each item
|
| 114 |
+
- Each agent values approximately half the items highly and half lowly
|
| 115 |
+
- Values are drawn from normal distributions with different means
|
| 116 |
+
- Creates scenarios with clear trade opportunities
|
| 117 |
+
|
| 118 |
+
#### Visibility Options
|
| 119 |
+
|
| 120 |
+
1. **Finalization Visibility**:
|
| 121 |
+
- When enabled, both agents can see each other's finalization proposals
|
| 122 |
+
- When disabled, finalization proposals remain private until both are submitted
|
| 123 |
+
|
| 124 |
+
2. **Other Values Visibility**:
|
| 125 |
+
- When enabled, agents can see each other's value functions
|
| 126 |
+
- When disabled, agents only know their own values
|
| 127 |
+
- Creates information asymmetry and richer negotiation dynamics
|
| 128 |
+
|
| 129 |
+
#### Game Modes
|
| 130 |
+
|
| 131 |
+
1. **Cooperative Mode ("coop")**:
|
| 132 |
+
- Agents are encouraged to find mutually beneficial solutions
|
| 133 |
+
- Success is measured by the sum of both agents' scores
|
| 134 |
+
|
| 135 |
+
2. **Competitive Mode ("comp")**:
|
| 136 |
+
- Agents aim to maximize their individual scores
|
| 137 |
+
- Creates more adversarial negotiations
|
| 138 |
+
|
| 139 |
+
#### Round Structure
|
| 140 |
+
|
| 141 |
+
1. **Single Round**:
|
| 142 |
+
- One negotiation session between the same agents
|
| 143 |
+
- Simple evaluation of negotiation skills
|
| 144 |
+
|
| 145 |
+
2. **Multiple Rounds**:
|
| 146 |
+
- Agents negotiate multiple times with different item setups
|
| 147 |
+
- Allows for learning and adaptation over time
|
| 148 |
+
- Roles can be swapped between rounds
|
| 149 |
+
|
| 150 |
+
DondEnv
|
| 151 |
+
------------
|
| 152 |
+
|
| 153 |
+
The ``DondEnv`` class provides an interface to the Deal or No Deal environment that follows the Multi-Agent
|
| 154 |
+
Negotiation Environment standard.
|
| 155 |
+
|
| 156 |
+
.. code-block:: python
|
| 157 |
+
|
| 158 |
+
class DondEnv:
|
| 159 |
+
"""
|
| 160 |
+
Multi-Agent Negotiation Environment for Deal or No Deal.
|
| 161 |
+
"""
|
| 162 |
+
def __init__(
|
| 163 |
+
self,
|
| 164 |
+
agents,
|
| 165 |
+
mode="coop",
|
| 166 |
+
max_messages=None,
|
| 167 |
+
min_messages=None,
|
| 168 |
+
max_chars_per_message=None,
|
| 169 |
+
rounds_per_game=1,
|
| 170 |
+
random_setup_func=None,
|
| 171 |
+
random_setup_kwargs=None,
|
| 172 |
+
role_assignator_func=None,
|
| 173 |
+
role_assignator_func_kwargs=None,
|
| 174 |
+
finalization_visibility=False,
|
| 175 |
+
other_values_visibility=False,
|
| 176 |
+
random_seed=None
|
| 177 |
+
):
|
| 178 |
+
"""Initialize the Deal or No Deal environment.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
agents: List of agent IDs participating in the game
|
| 182 |
+
mode: Game mode ("coop" or "comp")
|
| 183 |
+
max_messages: Maximum number of messages per agent per round
|
| 184 |
+
min_messages: Minimum number of messages per agent per round
|
| 185 |
+
max_chars_per_message: Maximum characters per message
|
| 186 |
+
rounds_per_game: Number of negotiation rounds to play
|
| 187 |
+
random_setup_func: Function to generate item quantities and values
|
| 188 |
+
random_setup_kwargs: Arguments for the random setup function
|
| 189 |
+
role_assignator_func: Function to assign roles to agents
|
| 190 |
+
role_assignator_func_kwargs: Arguments for the role assignator
|
| 191 |
+
finalization_visibility: Whether agents can see each other's finalizations
|
| 192 |
+
other_values_visibility: Whether agents can see each other's values
|
| 193 |
+
random_seed: Seed for reproducibility
|
| 194 |
+
"""
|
| 195 |
+
# ...
|
| 196 |
+
|
| 197 |
+
def reset(self):
|
| 198 |
+
"""Reset the environment to an initial state and return the initial observation.
|
| 199 |
+
|
| 200 |
+
Returns:
|
| 201 |
+
observation (dict): A dictionary where keys are agent identifiers and values are observations.
|
| 202 |
+
"""
|
| 203 |
+
# ...
|
| 204 |
+
|
| 205 |
+
def step(self, actions):
|
| 206 |
+
"""Take a step in the environment using the provided actions.
|
| 207 |
+
|
| 208 |
+
Args:
|
| 209 |
+
actions (dict): A dictionary where keys are agent identifiers and values are actions.
|
| 210 |
+
Actions can be messages or finalization proposals.
|
| 211 |
+
|
| 212 |
+
Returns:
|
| 213 |
+
observations (dict): A dictionary where keys are agent identifiers and values are observations.
|
| 214 |
+
done (bool): Whether the episode has ended.
|
| 215 |
+
info (dict): Additional information about the environment.
|
| 216 |
+
"""
|
| 217 |
+
# ...
|
| 218 |
+
|
| 219 |
+
def get_state(self):
|
| 220 |
+
"""Retrieve the current state of the game.
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
state (dict): The current state of the game, including items, quantities, values, etc.
|
| 224 |
+
"""
|
| 225 |
+
# ...
|
| 226 |
+
|
| 227 |
+
Key Implementation Details
|
| 228 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 229 |
+
|
| 230 |
+
The ``DondEnv`` class implements several key features:
|
| 231 |
+
|
| 232 |
+
1. **Multi-Agent Support**: The environment tracks two agents and manages their alternating messages.
|
| 233 |
+
|
| 234 |
+
2. **Turn-Based Dialogue**: The environment enforces turn structure and limits on message count.
|
| 235 |
+
|
| 236 |
+
3. **Finalization Processing**: The environment validates and processes finalization proposals.
|
| 237 |
+
|
| 238 |
+
4. **Random Setup**: The environment supports multiple methods of generating negotiation scenarios.
|
| 239 |
+
|
| 240 |
+
5. **Round Management**: The environment can handle multiple rounds with different setups.
|
| 241 |
+
|
| 242 |
+
Observation Structure
|
| 243 |
+
~~~~~~~~~~~~~~~~~~~~
|
| 244 |
+
|
| 245 |
+
Each agent receives an observation (state) dictionary with rich information about the game:
|
| 246 |
+
|
| 247 |
+
.. code-block:: python
|
| 248 |
+
|
| 249 |
+
{
|
| 250 |
+
"mode": str, # Game mode ("coop" or "comp")
|
| 251 |
+
"role_values": dict, # Value mappings for each role
|
| 252 |
+
"role_props": dict, # Properties for each role
|
| 253 |
+
"agent_to_role": dict, # Mapping from agent IDs to roles
|
| 254 |
+
"is_new_round": bool, # Whether this is the start of a new round
|
| 255 |
+
"is_new_game": bool, # Whether this is the start of a new game
|
| 256 |
+
"game_over": bool, # Whether the game is over
|
| 257 |
+
"items": list, # List of item names
|
| 258 |
+
"quantities": dict, # Quantities of each item
|
| 259 |
+
"has_finalized": bool, # Whether finalization has been proposed
|
| 260 |
+
"last_message": dict, # The last message sent
|
| 261 |
+
"messages_remaining": dict, # Number of messages each agent can still send
|
| 262 |
+
# And various history tracking fields
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
Action Structure
|
| 266 |
+
~~~~~~~~~~~~~~~
|
| 267 |
+
|
| 268 |
+
Actions can be:
|
| 269 |
+
|
| 270 |
+
1. **Text Messages**: Free-form text for negotiation.
|
| 271 |
+
2. **Finalization Proposals**: Structured data specifying the exact allocation of items.
|
| 272 |
+
|
| 273 |
+
Example finalization format:
|
| 274 |
+
|
| 275 |
+
.. code-block:: python
|
| 276 |
+
|
| 277 |
+
{
|
| 278 |
+
"type": "finalize",
|
| 279 |
+
"allocation": {
|
| 280 |
+
"agent1": {"book": 3, "hat": 0, "ball": 6},
|
| 281 |
+
"agent2": {"book": 1, "hat": 2, "ball": 0}
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
Value Setup Functions
|
| 286 |
+
--------------------
|
| 287 |
+
|
| 288 |
+
The DoND environment provides several functions for setting up item values:
|
| 289 |
+
|
| 290 |
+
.. code-block:: python
|
| 291 |
+
|
| 292 |
+
def dond_random_setup(items, min_quant, max_quant, min_val, max_val, random_seed=None):
|
| 293 |
+
"""
|
| 294 |
+
Generates items, even-numbered quantities and distinct random values for each category for both agents.
|
| 295 |
+
|
| 296 |
+
Args:
|
| 297 |
+
items (list): List of items.
|
| 298 |
+
min_quant (int): Minimum quantity per item.
|
| 299 |
+
max_quant (int): Maximum quantity per item.
|
| 300 |
+
min_val (int): Minimum value per item.
|
| 301 |
+
max_val (int): Maximum value per item.
|
| 302 |
+
random_seed (int, optional): Seed for random generation.
|
| 303 |
+
|
| 304 |
+
Returns:
|
| 305 |
+
tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
|
| 306 |
+
"""
|
| 307 |
+
# ...
|
| 308 |
+
|
| 309 |
+
def independent_random_vals(items, min_quant, max_quant, min_val, max_val, random_seed=None):
|
| 310 |
+
"""
|
| 311 |
+
Generates random quantities and independent random values for both agents.
|
| 312 |
+
|
| 313 |
+
Args:
|
| 314 |
+
Similar to dond_random_setup
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
|
| 318 |
+
"""
|
| 319 |
+
# ...
|
| 320 |
+
|
| 321 |
+
def bicameral_vals_assignator(items, min_quant, max_quant, low_val_mean, low_val_std, high_val_mean, high_val_std, random_seed=None):
|
| 322 |
+
"""
|
| 323 |
+
Generates values with a bicameral distribution - each agent values half the items highly.
|
| 324 |
+
|
| 325 |
+
Args:
|
| 326 |
+
items (list): List of items.
|
| 327 |
+
min_quant, max_quant: Range for quantities
|
| 328 |
+
low_val_mean, low_val_std: Mean and standard deviation for the "low value" distribution
|
| 329 |
+
high_val_mean, high_val_std: Mean and standard deviation for the "high value" distribution
|
| 330 |
+
random_seed: Seed for reproducibility
|
| 331 |
+
|
| 332 |
+
Returns:
|
| 333 |
+
tuple: (items, quantities, (val_starting_negotiator, val_responding_negotiator))
|
| 334 |
+
"""
|
| 335 |
+
# ...
|
| 336 |
+
|
| 337 |
+
Running DoND Games
|
| 338 |
+
----------------------
|
| 339 |
+
|
| 340 |
+
To run Deal or No Deal games with LLM agents, you can use the following structure:
|
| 341 |
+
|
| 342 |
+
.. code-block:: python
|
| 343 |
+
|
| 344 |
+
from mllm.environments.dond.dond_game import DondEnv
|
| 345 |
+
from mllm.environments.dond.dond_agent import DondAgent
|
| 346 |
+
from src.run_matches import run_batched_matches
|
| 347 |
+
|
| 348 |
+
# Create environment
|
| 349 |
+
env = DondEnv(
|
| 350 |
+
agents=["agent1", "agent2"],
|
| 351 |
+
mode="coop",
|
| 352 |
+
max_messages=10,
|
| 353 |
+
rounds_per_game=1,
|
| 354 |
+
random_setup_func="dond_random_setup",
|
| 355 |
+
random_setup_kwargs={
|
| 356 |
+
"items": ["book", "hat", "ball"],
|
| 357 |
+
"min_quant": 2,
|
| 358 |
+
"max_quant": 8,
|
| 359 |
+
"min_val": 1,
|
| 360 |
+
"max_val": 10
|
| 361 |
+
},
|
| 362 |
+
finalization_visibility=False
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
# Create agent handlers (implementation details would vary)
|
| 366 |
+
agent_handlers = {
|
| 367 |
+
"agent1": DondAgent(agent_id="agent1"),
|
| 368 |
+
"agent2": DondAgent(agent_id="agent2")
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
# Define policy mapping
|
| 372 |
+
policy_mapping = {
|
| 373 |
+
"llm_policy": my_llm_policy_function
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
# Run the game
|
| 377 |
+
game_results = run_batched_matches(
|
| 378 |
+
envs=[env],
|
| 379 |
+
agent_handlers_per_env=[agent_handlers],
|
| 380 |
+
policy_mapping=policy_mapping,
|
| 381 |
+
max_parallel_matches=1
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
Limitations and Considerations
|
| 385 |
+
-----------------------------
|
| 386 |
+
|
| 387 |
+
1. **Negotiation Complexity**: The open-ended nature of negotiations can be challenging for some LLM agents.
|
| 388 |
+
|
| 389 |
+
2. **Parsing Challenges**: Extracting structured finalization proposals from free-form text requires robust parsing.
|
| 390 |
+
|
| 391 |
+
3. **Optimization Opportunities**: Different agents may employ different negotiation strategies to optimize outcomes.
|
| 392 |
+
|
| 393 |
+
4. **Fairness Evaluation**: The environment allows research into questions of fair division and Pareto optimality.
|
| 394 |
+
|
| 395 |
+
5. **Strategic Deception**: Agents might strategically misrepresent their true values, adding complexity to negotiations.
|
| 396 |
+
|
| 397 |
+
Advanced Usage
|
| 398 |
+
------------
|
| 399 |
+
|
| 400 |
+
For advanced usage, you can:
|
| 401 |
+
|
| 402 |
+
1. **Custom Value Functions**: Create more complex distributions of item values for specific research questions.
|
| 403 |
+
|
| 404 |
+
2. **Novel Negotiation Scenarios**: Design item sets and values to test specific negotiation skills.
|
| 405 |
+
|
| 406 |
+
3. **Curriculum Learning**: Create progressively more difficult negotiation scenarios.
|
| 407 |
+
|
| 408 |
+
4. **Communication Analysis**: Analyze the language and strategies used in successful negotiations.
|
| 409 |
+
|
| 410 |
+
5. **Multi-Round Dynamics**: Study how agents adapt their strategies over multiple rounds.
|
src_code_for_reproducibility/docs/source/launch.rst
ADDED
|
File without changes
|
src_code_for_reproducibility/docs/source/media/runbatch.png
ADDED
|
src_code_for_reproducibility/docs/source/modules.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src
|
| 2 |
+
===
|
| 3 |
+
|
| 4 |
+
.. toctree::
|
| 5 |
+
:maxdepth: 4
|
| 6 |
+
|
| 7 |
+
src
|
src_code_for_reproducibility/docs/source/src.environments.dond.dond_game.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments.dond.dond\_game module
|
| 2 |
+
=======================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments.dond.dond_game
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.environments.dond.dond_log_funcs.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments.dond.dond\_log\_funcs module
|
| 2 |
+
=============================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments.dond.dond_log_funcs
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.environments.dond.dond_player.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments.dond.dond\_agent module
|
| 2 |
+
=========================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments.dond.dond_agent
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.environments.environment_imports.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments.environment\_imports module
|
| 2 |
+
============================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments.environment_imports
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.environments.ipd.ipd_agent.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments.ipd.ipd\_agent module
|
| 2 |
+
======================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments.ipd.ipd_agent
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.environments.rst
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.environments package
|
| 2 |
+
========================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.environments
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
| 8 |
+
|
| 9 |
+
Subpackages
|
| 10 |
+
-----------
|
| 11 |
+
|
| 12 |
+
.. toctree::
|
| 13 |
+
:maxdepth: 4
|
| 14 |
+
|
| 15 |
+
src.environments.dond
|
| 16 |
+
src.environments.ipd
|
| 17 |
+
|
| 18 |
+
Submodules
|
| 19 |
+
----------
|
| 20 |
+
|
| 21 |
+
.. toctree::
|
| 22 |
+
:maxdepth: 4
|
| 23 |
+
|
| 24 |
+
src.environments.env_imports
|
| 25 |
+
src.environments.environment_imports
|
src_code_for_reproducibility/docs/source/src.experiments.dond_run_train.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.experiments.dond\_run\_train module
|
| 2 |
+
=======================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.experiments.dond_run_train
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.experiments.last_completion.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.experiments.last\_completion module
|
| 2 |
+
=======================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.experiments.last_completion
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.generation.run_games.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.generation.run\_games module
|
| 2 |
+
================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.generation.run_games
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.models.dummy_local_llm.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.models.dummy\_local\_llm module
|
| 2 |
+
===================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.models.dummy_local_llm
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.models.new_local_llm.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.models.new\_local\_llm module
|
| 2 |
+
=================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.models.new_local_llm
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.models.oai_agent.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.models.oai\_agent module
|
| 2 |
+
============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.models.oai_agent
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.models.server_llm.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.models.server\_llm module
|
| 2 |
+
=============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.models.server_llm
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.models.updatable_worker.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.models.updatable\_worker module
|
| 2 |
+
===================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.models.updatable_worker
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.rst
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src package
|
| 2 |
+
===========
|
| 3 |
+
|
| 4 |
+
.. automodule:: src
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
| 8 |
+
|
| 9 |
+
Subpackages
|
| 10 |
+
-----------
|
| 11 |
+
|
| 12 |
+
.. toctree::
|
| 13 |
+
:maxdepth: 4
|
| 14 |
+
|
| 15 |
+
src.environments
|
| 16 |
+
src.experiments
|
| 17 |
+
src.generation
|
| 18 |
+
src.models
|
| 19 |
+
src.training
|
| 20 |
+
src.utils
|
| 21 |
+
|
| 22 |
+
Submodules
|
| 23 |
+
----------
|
| 24 |
+
|
| 25 |
+
.. toctree::
|
| 26 |
+
:maxdepth: 4
|
| 27 |
+
|
| 28 |
+
src.run
|
src_code_for_reproducibility/docs/source/src.training.ppo_train.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.training.ppo\_train module
|
| 2 |
+
==============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.training.ppo_train
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.training.ppo_train_value_head.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.training.ppo\_train\_value\_head module
|
| 2 |
+
===========================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.training.ppo_train_value_head
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.training.rl_convs_processing.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.training.rl\_convs\_processing module
|
| 2 |
+
=========================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.training.rl_convs_processing
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.utils.extra_stats.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.utils.extra\_stats module
|
| 2 |
+
=============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.utils.extra_stats
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.utils.inherit_args.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.utils.inherit\_args module
|
| 2 |
+
==============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.utils.inherit_args
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.utils.parallel_shuffle.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.utils.parallel\_shuffle module
|
| 2 |
+
==================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.utils.parallel_shuffle
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.utils.quick_stats.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.utils.quick\_stats module
|
| 2 |
+
=============================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.utils.quick_stats
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/src.utils.update_start_epoch.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
src.utils.update\_start\_epoch module
|
| 2 |
+
=====================================
|
| 3 |
+
|
| 4 |
+
.. automodule:: src.utils.update_start_epoch
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
:show-inheritance:
|
src_code_for_reproducibility/docs/source/usage.rst
ADDED
|
File without changes
|
src_code_for_reproducibility/markov_games/ipd/__pycache__/ipd_simulation.cpython-312.pyc
ADDED
|
Binary file (6.72 kB). View file
|
|
|
src_code_for_reproducibility/utils/__init__.py
ADDED
|
File without changes
|
src_code_for_reproducibility/utils/dict_get_path.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
def get_from_nested_dict(a:dict, path) -> any:
|
| 3 |
+
# path is string or list of string
|
| 4 |
+
try:
|
| 5 |
+
if isinstance(path, str):
|
| 6 |
+
return a[path]
|
| 7 |
+
else:
|
| 8 |
+
for p in path:
|
| 9 |
+
a = a[p]
|
| 10 |
+
return a
|
| 11 |
+
except Exception:
|
| 12 |
+
return None
|
src_code_for_reproducibility/utils/gather_training_stats.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import copy
|
| 2 |
+
import csv
|
| 3 |
+
import gc
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
import pickle
|
| 8 |
+
import random
|
| 9 |
+
import re
|
| 10 |
+
import subprocess
|
| 11 |
+
import sys
|
| 12 |
+
import time
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
from statistics import mean
|
| 15 |
+
from typing import Any, Dict
|
| 16 |
+
|
| 17 |
+
import hydra
|
| 18 |
+
import matplotlib.pyplot as plt
|
| 19 |
+
import numpy as np
|
| 20 |
+
import pandas as pd
|
| 21 |
+
import torch
|
| 22 |
+
from omegaconf import OmegaConf
|
| 23 |
+
|
| 24 |
+
from mllm.training.tally_metrics import Tally
|
| 25 |
+
from mllm.utils.stat_pack import StatPack
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_from_nested_dict(dictio: dict, path: list[str]):
|
| 29 |
+
for sp in path[:-1]:
|
| 30 |
+
dictio = dictio[sp]
|
| 31 |
+
return dictio.get(path[-1])
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def set_at_path(dictio: dict, path: list[str], value):
|
| 35 |
+
for sp in path[:-1]:
|
| 36 |
+
if sp not in dictio:
|
| 37 |
+
dictio[sp] = {}
|
| 38 |
+
dictio = dictio[sp]
|
| 39 |
+
dictio[path[-1]] = value
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def produce_tabular_render(inpath: str, outpath: str = None):
|
| 43 |
+
"""
|
| 44 |
+
TODO: docstring
|
| 45 |
+
"""
|
| 46 |
+
with open(inpath, "r") as f:
|
| 47 |
+
data = json.load(f)
|
| 48 |
+
rollout_paths = data.keys()
|
| 49 |
+
for rollout_path in rollout_paths:
|
| 50 |
+
if outpath is None:
|
| 51 |
+
m_path = rollout_path.replace("/", "|")
|
| 52 |
+
m_path = m_path.replace(".json", "")
|
| 53 |
+
m_path = (
|
| 54 |
+
os.path.split(inpath)[0]
|
| 55 |
+
+ "/contextualized_tabular_renders/"
|
| 56 |
+
+ m_path
|
| 57 |
+
+ "_tabular_render.render.csv"
|
| 58 |
+
)
|
| 59 |
+
# import pdb; pdb.set_trace()
|
| 60 |
+
os.makedirs(os.path.split(m_path)[0], exist_ok=True)
|
| 61 |
+
metrics = data[rollout_path]
|
| 62 |
+
d = {k: [] for k in metrics[0].keys()}
|
| 63 |
+
for m in metrics:
|
| 64 |
+
for k, v in m.items():
|
| 65 |
+
d[k].append(v)
|
| 66 |
+
d = pd.DataFrame(d)
|
| 67 |
+
d.to_csv(m_path)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def get_metric_paths(data: list[dict]):
|
| 71 |
+
d = data[0]
|
| 72 |
+
paths = []
|
| 73 |
+
|
| 74 |
+
def traverse_dict(d, current_path=[]):
|
| 75 |
+
for key, value in d.items():
|
| 76 |
+
new_path = current_path + [key]
|
| 77 |
+
if isinstance(value, dict):
|
| 78 |
+
traverse_dict(value, new_path)
|
| 79 |
+
else:
|
| 80 |
+
paths.append(new_path)
|
| 81 |
+
|
| 82 |
+
traverse_dict(d)
|
| 83 |
+
return paths
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def print_metric_paths(data: list[dict]):
|
| 87 |
+
paths = get_metric_paths(data)
|
| 88 |
+
for p in paths:
|
| 89 |
+
print(p)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def get_metric_iteration_list(data: list[dict], metric_path: list[str]):
|
| 93 |
+
if isinstance(metric_path, str):
|
| 94 |
+
metric_path = [metric_path]
|
| 95 |
+
sgl = []
|
| 96 |
+
for d in data:
|
| 97 |
+
sgl.append(get_from_nested_dict(d, metric_path))
|
| 98 |
+
return sgl
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def to_1d_numeric(x):
|
| 102 |
+
"""Return a 1-D float array (or None if not numeric). Accepts scalars, numpy arrays, or nested list/tuple of them."""
|
| 103 |
+
if x is None:
|
| 104 |
+
return None
|
| 105 |
+
if isinstance(x, (int, float, np.number)):
|
| 106 |
+
return np.array([float(x)], dtype=float)
|
| 107 |
+
if isinstance(x, np.ndarray):
|
| 108 |
+
try:
|
| 109 |
+
return x.astype(float).ravel()
|
| 110 |
+
except Exception:
|
| 111 |
+
return None
|
| 112 |
+
if isinstance(x, (list, tuple)):
|
| 113 |
+
parts = []
|
| 114 |
+
for e in x:
|
| 115 |
+
arr = to_1d_numeric(e)
|
| 116 |
+
if arr is not None and arr.size > 0:
|
| 117 |
+
parts.append(arr)
|
| 118 |
+
if parts:
|
| 119 |
+
return np.concatenate(parts)
|
| 120 |
+
return None
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def get_single_metric_vector(data, metric_path, iterations=None):
|
| 125 |
+
if isinstance(metric_path, str):
|
| 126 |
+
metric_path = [metric_path]
|
| 127 |
+
if iterations == None:
|
| 128 |
+
iterations = len(data)
|
| 129 |
+
vecs = []
|
| 130 |
+
for d in data:
|
| 131 |
+
ar = get_from_nested_dict(d, metric_path)
|
| 132 |
+
arr = to_1d_numeric(ar)
|
| 133 |
+
if arr is not None:
|
| 134 |
+
vecs.append(arr)
|
| 135 |
+
|
| 136 |
+
return np.concatenate(vecs) if vecs else np.empty(0, dtype=float)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _load_metrics_file(file_path: str):
|
| 140 |
+
if not (file_path.endswith(".tally.pkl") or file_path.endswith(".pkl")):
|
| 141 |
+
raise ValueError("Only *.tally.pkl files are supported.")
|
| 142 |
+
import pickle
|
| 143 |
+
|
| 144 |
+
with open(file_path, "rb") as f:
|
| 145 |
+
tree = pickle.load(f)
|
| 146 |
+
return tree
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def get_leaf_items(array_tally: dict, prefix: list[str] = None):
|
| 150 |
+
if prefix is None:
|
| 151 |
+
prefix = []
|
| 152 |
+
for key, value in array_tally.items():
|
| 153 |
+
next_prefix = prefix + [str(key)]
|
| 154 |
+
if isinstance(value, dict):
|
| 155 |
+
yield from get_leaf_items(value, next_prefix)
|
| 156 |
+
else:
|
| 157 |
+
yield next_prefix, value
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _sanitize_filename_part(part: str) -> str:
|
| 161 |
+
s = part.replace("/", "|")
|
| 162 |
+
s = s.replace(" ", "_")
|
| 163 |
+
return s
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def render_rt_tally_pkl_to_csvs(pkl_path: str, outdir: str):
|
| 167 |
+
"""
|
| 168 |
+
This method takes care of tokenwise logging.
|
| 169 |
+
"""
|
| 170 |
+
with open(pkl_path, "rb") as f:
|
| 171 |
+
payload = pickle.load(f)
|
| 172 |
+
# Backward compatibility: older tallies stored the dict directly
|
| 173 |
+
if isinstance(payload, dict) and "array_tally" in payload:
|
| 174 |
+
array_tally = payload.get("array_tally", {})
|
| 175 |
+
else:
|
| 176 |
+
array_tally = payload
|
| 177 |
+
|
| 178 |
+
os.makedirs(outdir, exist_ok=True)
|
| 179 |
+
trainer_id = os.path.basename(pkl_path).replace(".rt_tally.pkl", "")
|
| 180 |
+
for path_list, rollout_tally_items in get_leaf_items(array_tally):
|
| 181 |
+
# Create file and initiate writer
|
| 182 |
+
path_part = ".".join(_sanitize_filename_part(p) for p in path_list)
|
| 183 |
+
filename = f"{trainer_id}__{path_part}.render.csv"
|
| 184 |
+
out_path = os.path.join(outdir, filename)
|
| 185 |
+
|
| 186 |
+
# Write metric rows to CSV
|
| 187 |
+
with open(out_path, "w", newline="") as f:
|
| 188 |
+
writer = csv.writer(f)
|
| 189 |
+
|
| 190 |
+
# Write header row - need to determine metric column count from first rollout_tally_item
|
| 191 |
+
first_item = rollout_tally_items[0]
|
| 192 |
+
metric_cols = (
|
| 193 |
+
first_item.metric_matrix.shape[1]
|
| 194 |
+
if first_item.metric_matrix.ndim > 1
|
| 195 |
+
else 1
|
| 196 |
+
)
|
| 197 |
+
header = ["agent_id", "crn_id", "rollout_id"] + [
|
| 198 |
+
f"t_{i}" for i in range(metric_cols)
|
| 199 |
+
]
|
| 200 |
+
writer.writerow(header)
|
| 201 |
+
|
| 202 |
+
for rollout_tally_item in rollout_tally_items:
|
| 203 |
+
crn_ids = rollout_tally_item.crn_ids
|
| 204 |
+
rollout_ids = rollout_tally_item.rollout_ids
|
| 205 |
+
agent_ids = rollout_tally_item.agent_ids
|
| 206 |
+
metric_matrix = rollout_tally_item.metric_matrix
|
| 207 |
+
for i in range(metric_matrix.shape[0]):
|
| 208 |
+
row_vals = metric_matrix[i].reshape(-1)
|
| 209 |
+
# Convert row_vals to a list to avoid numpy concatenation issues
|
| 210 |
+
row_vals = (
|
| 211 |
+
row_vals.tolist()
|
| 212 |
+
if hasattr(row_vals, "tolist")
|
| 213 |
+
else list(row_vals)
|
| 214 |
+
)
|
| 215 |
+
row_prefix = [
|
| 216 |
+
agent_ids[i],
|
| 217 |
+
crn_ids[i],
|
| 218 |
+
rollout_ids[i],
|
| 219 |
+
]
|
| 220 |
+
writer.writerow(row_prefix + row_vals)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def tally_to_stat_pack(tally: Dict[str, Any]):
|
| 224 |
+
stat_pack = StatPack()
|
| 225 |
+
if "array_tally" in tally:
|
| 226 |
+
tally = tally["array_tally"]
|
| 227 |
+
|
| 228 |
+
# backward compatibility: will remove later, flatten keys in tally
|
| 229 |
+
def get_from_nested_dict(dictio: dict, path: list[str]):
|
| 230 |
+
for sp in path[:-1]:
|
| 231 |
+
dictio = dictio[sp]
|
| 232 |
+
return dictio.get(path[-1])
|
| 233 |
+
|
| 234 |
+
def get_metric_paths(tally: dict):
|
| 235 |
+
paths = []
|
| 236 |
+
|
| 237 |
+
def traverse_dict(tally, current_path=[]):
|
| 238 |
+
for key, value in tally.items():
|
| 239 |
+
new_path = current_path + [key]
|
| 240 |
+
if isinstance(value, dict):
|
| 241 |
+
traverse_dict(value, new_path)
|
| 242 |
+
else:
|
| 243 |
+
paths.append(new_path)
|
| 244 |
+
|
| 245 |
+
traverse_dict(tally)
|
| 246 |
+
return paths
|
| 247 |
+
|
| 248 |
+
paths = get_metric_paths(tally)
|
| 249 |
+
modified_tally = {}
|
| 250 |
+
for p in paths:
|
| 251 |
+
val = get_from_nested_dict(tally, p)
|
| 252 |
+
modified_tally["_".join(p)] = np.mean(val)
|
| 253 |
+
del tally
|
| 254 |
+
tally = modified_tally
|
| 255 |
+
for key, value in tally.items():
|
| 256 |
+
stat_pack.add_stat(key, value)
|
| 257 |
+
return stat_pack
|
src_code_for_reproducibility/utils/get_stochastic_game_lengths.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
def get_stochastic_game_lengths(
|
| 4 |
+
max_length,
|
| 5 |
+
nb_games,
|
| 6 |
+
continuation_prob,
|
| 7 |
+
same_length_batch=False
|
| 8 |
+
):
|
| 9 |
+
"""
|
| 10 |
+
Generates stochastic game lengths based on a geometric distribution.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
max_length (int): The maximum length a game can have.
|
| 14 |
+
nb_games (int): The number of games to generate lengths for.
|
| 15 |
+
continuation_prob (float): The probability of the game continuing after each round.
|
| 16 |
+
same_length_batch (bool): If True, all games will have the same length.
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Array: An array of game lengths.
|
| 20 |
+
"""
|
| 21 |
+
if continuation_prob == 1:
|
| 22 |
+
return [max_length] * nb_games
|
| 23 |
+
if same_length_batch:
|
| 24 |
+
length = np.random.geometric(1 - continuation_prob, 1)
|
| 25 |
+
game_lengths = np.repeat(length, nb_games)
|
| 26 |
+
else:
|
| 27 |
+
game_lengths = np.random.geometric(1 - continuation_prob, nb_games)
|
| 28 |
+
|
| 29 |
+
game_lengths = np.where(game_lengths > max_length, max_length, game_lengths)
|
| 30 |
+
return game_lengths.tolist()
|
src_code_for_reproducibility/utils/kill_sglang.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import psutil
|
| 2 |
+
import signal
|
| 3 |
+
|
| 4 |
+
target_name = "sglang::scheduler"
|
| 5 |
+
killed = []
|
| 6 |
+
|
| 7 |
+
def kill_sglang():
|
| 8 |
+
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
| 9 |
+
try:
|
| 10 |
+
# Some processes may not have a name or cmdline
|
| 11 |
+
cmdline = " ".join(proc.info['cmdline']) if proc.info['cmdline'] else ""
|
| 12 |
+
if target_name in cmdline:
|
| 13 |
+
print(f"Killing PID {proc.pid}: {cmdline}")
|
| 14 |
+
proc.send_signal(signal.SIGKILL)
|
| 15 |
+
killed.append(proc.pid)
|
| 16 |
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
| 17 |
+
pass
|
src_code_for_reproducibility/utils/output_source_code.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def output_source_code(model, output_path: str) -> None:
|
| 2 |
+
"""
|
| 3 |
+
Outputs the source code of the model to the given path.
|
| 4 |
+
"""
|
| 5 |
+
with open(output_path, "w") as f:
|
| 6 |
+
f.write(model.source_code)
|
src_code_for_reproducibility/utils/resource_context.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import time
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def vram_usage():
|
| 9 |
+
output = ""
|
| 10 |
+
for i in range(torch.cuda.device_count()):
|
| 11 |
+
gpu_memory_allocated = torch.cuda.memory_allocated(i) / (
|
| 12 |
+
1024**3
|
| 13 |
+
) # Convert bytes to GB
|
| 14 |
+
gpu_memory_reserved = torch.cuda.memory_reserved(i) / (
|
| 15 |
+
1024**3
|
| 16 |
+
) # Convert bytes to GB
|
| 17 |
+
output += f"GPU {i}: Memory Allocated: {gpu_memory_allocated:.2f} GB, Memory Reserved: {gpu_memory_reserved:.2f} GB"
|
| 18 |
+
return output
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def ram_usage():
|
| 22 |
+
import psutil
|
| 23 |
+
|
| 24 |
+
process = psutil.Process()
|
| 25 |
+
memory_info = process.memory_info()
|
| 26 |
+
ram_used = memory_info.rss / (1024**3) # Convert bytes to GB
|
| 27 |
+
return f"RAM Usage: {ram_used:.2f} GB"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@contextmanager
|
| 31 |
+
def resource_logger_context(logger: logging.Logger, task_description: str):
|
| 32 |
+
"""
|
| 33 |
+
Context manager to log the resource usage of the current task.
|
| 34 |
+
Args:
|
| 35 |
+
logger: The logger to use to log the resource usage.
|
| 36 |
+
task_description: The description of the task to log.
|
| 37 |
+
Returns:
|
| 38 |
+
None
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
initial_time = time.time()
|
| 42 |
+
# Assume CUDA is available and use device 0 only
|
| 43 |
+
total_mem_bytes = torch.cuda.get_device_properties(0).total_memory
|
| 44 |
+
initial_total_bytes = (
|
| 45 |
+
torch.cuda.memory_allocated(0) + torch.cuda.memory_reserved(0)
|
| 46 |
+
)
|
| 47 |
+
torch.cuda.reset_peak_memory_stats(0)
|
| 48 |
+
yield None
|
| 49 |
+
finally:
|
| 50 |
+
final_time = time.time()
|
| 51 |
+
# Ensure kernels within the block are accounted for
|
| 52 |
+
torch.cuda.synchronize()
|
| 53 |
+
|
| 54 |
+
# Compute metrics
|
| 55 |
+
final_allocated_bytes = torch.cuda.memory_allocated(0)
|
| 56 |
+
final_reserved_bytes = torch.cuda.memory_reserved(0)
|
| 57 |
+
final_total_bytes = final_allocated_bytes + final_reserved_bytes
|
| 58 |
+
|
| 59 |
+
delta_vram_percent_total = (
|
| 60 |
+
100 * (final_total_bytes - initial_total_bytes) / total_mem_bytes
|
| 61 |
+
if total_mem_bytes
|
| 62 |
+
else 0.0
|
| 63 |
+
)
|
| 64 |
+
current_percent_vram_taken = (
|
| 65 |
+
100 * final_total_bytes / total_mem_bytes if total_mem_bytes else 0.0
|
| 66 |
+
)
|
| 67 |
+
block_peak_percent = (
|
| 68 |
+
100 * torch.cuda.max_memory_allocated(0) / total_mem_bytes
|
| 69 |
+
if total_mem_bytes
|
| 70 |
+
else 0.0
|
| 71 |
+
)
|
| 72 |
+
delta_time_str = time.strftime(
|
| 73 |
+
'%H:%M:%S', time.gmtime(final_time - initial_time)
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
logger.info(
|
| 77 |
+
f"For task: {task_description}, ΔVRAM % (total): {delta_vram_percent_total:.2f}%, Current % of VRAM taken: {current_percent_vram_taken:.2f}%, Block Peak % of device VRAM: {block_peak_percent:.2f}%, ΔTime: {delta_time_str}"
|
| 78 |
+
)
|
src_code_for_reproducibility/utils/rollout_tree_chat_htmls.py
ADDED
|
@@ -0,0 +1,1921 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
from mllm.utils.rollout_tree_gather_utils import *
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def html_from_chat_turns(chat_turns: List[ChatTurnLog]) -> str:
|
| 8 |
+
"""
|
| 9 |
+
Render chat turns as a single, wrapping sequence of messages in time order.
|
| 10 |
+
Keep badge and message bubble styles, include time on every badge and
|
| 11 |
+
include rewards on assistant badges. Each message is individually
|
| 12 |
+
hide/show by click; when hidden, only the badge remains and "(...)" is
|
| 13 |
+
shown inline (not inside a bubble).
|
| 14 |
+
"""
|
| 15 |
+
import html
|
| 16 |
+
import re as _re
|
| 17 |
+
|
| 18 |
+
# Prepare ordering: sort by (time_step, original_index) to keep stable order within same step
|
| 19 |
+
indexed_turns = list(enumerate(chat_turns))
|
| 20 |
+
indexed_turns.sort(key=lambda t: (t[1].time_step, t[0]))
|
| 21 |
+
assistant_agents = sorted({t.agent_id for t in chat_turns if t.role == "assistant"})
|
| 22 |
+
enable_split_view = len(assistant_agents) == 2
|
| 23 |
+
|
| 24 |
+
# CSS styles (simplified layout; no time-step or agent-column backgrounds)
|
| 25 |
+
css = """
|
| 26 |
+
<style>
|
| 27 |
+
:root {
|
| 28 |
+
--font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 29 |
+
--bg: #ffffff;
|
| 30 |
+
--text: #1c0b00;
|
| 31 |
+
--muted-text: #2C3E50;
|
| 32 |
+
--accent-muted: #BDC3C7;
|
| 33 |
+
--accent-muted-2: #D0D7DE;
|
| 34 |
+
--panel-bg: #F8FAFC;
|
| 35 |
+
--reward-color: #3a2e00; /* dark text for reward pill */
|
| 36 |
+
--font-size: 14px;
|
| 37 |
+
--border-width: 2px;
|
| 38 |
+
--corner-radius: 6px;
|
| 39 |
+
--pill-radius-left: 999px 0 0 999px;
|
| 40 |
+
--pill-radius-right: 0 999px 999px 0;
|
| 41 |
+
--inset-shadow: 0 1px 0 rgba(0,0,0,0.03) inset;
|
| 42 |
+
|
| 43 |
+
/* Chat View Colors */
|
| 44 |
+
--alice-bg: #dcf8c6;
|
| 45 |
+
--alice-border: #0eb224;
|
| 46 |
+
--bob-bg: #ffe4cc;
|
| 47 |
+
--bob-border: #ef8323;
|
| 48 |
+
--user-bg: #f5f5f5;
|
| 49 |
+
--chat-bg: #ffffff;
|
| 50 |
+
}
|
| 51 |
+
body {
|
| 52 |
+
font-family: var(--font-family);
|
| 53 |
+
margin: 12px;
|
| 54 |
+
background-color: var(--bg);
|
| 55 |
+
color: var(--text);
|
| 56 |
+
font-size: var(--font-size);
|
| 57 |
+
line-height: 1.5;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
/* Chat View Styles */
|
| 61 |
+
#flow-chat {
|
| 62 |
+
max-width: 900px;
|
| 63 |
+
margin: 0 auto;
|
| 64 |
+
background: var(--chat-bg);
|
| 65 |
+
padding: 12px 16px 12px 8px;
|
| 66 |
+
border-radius: 8px;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.simultaneous-messages {
|
| 70 |
+
display: flex !important;
|
| 71 |
+
flex-direction: row !important;
|
| 72 |
+
flex-wrap: nowrap !important;
|
| 73 |
+
gap: 8px;
|
| 74 |
+
margin-bottom: 4px;
|
| 75 |
+
align-items: flex-start;
|
| 76 |
+
width: 100%;
|
| 77 |
+
overflow: hidden;
|
| 78 |
+
box-sizing: border-box;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.simultaneous-messages .chat-message {
|
| 82 |
+
flex: 1 1 0 !important;
|
| 83 |
+
margin-bottom: 0 !important;
|
| 84 |
+
display: flex !important;
|
| 85 |
+
flex-direction: row !important;
|
| 86 |
+
align-items: flex-start !important;
|
| 87 |
+
margin-left: 0 !important;
|
| 88 |
+
min-width: 0 !important;
|
| 89 |
+
max-width: 50% !important;
|
| 90 |
+
gap: 0 !important;
|
| 91 |
+
overflow: hidden !important;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.simultaneous-messages .chat-message-content {
|
| 95 |
+
max-width: 100% !important;
|
| 96 |
+
width: 100%;
|
| 97 |
+
align-items: flex-start !important;
|
| 98 |
+
margin-left: 0 !important;
|
| 99 |
+
overflow: hidden !important;
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.simultaneous-messages .chat-message.agent-alice {
|
| 103 |
+
justify-content: flex-start !important;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.simultaneous-messages .chat-message.agent-bob {
|
| 107 |
+
justify-content: flex-end !important;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
.simultaneous-messages .chat-message.agent-alice .chat-message-content {
|
| 111 |
+
margin-left: 0 !important;
|
| 112 |
+
align-items: flex-start !important;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.simultaneous-messages .chat-message.agent-bob .chat-message-content {
|
| 116 |
+
margin-left: auto !important;
|
| 117 |
+
margin-right: 0 !important;
|
| 118 |
+
align-items: flex-end !important;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.simultaneous-messages .chat-bubble {
|
| 122 |
+
max-width: 100%;
|
| 123 |
+
word-break: break-word;
|
| 124 |
+
overflow-wrap: break-word;
|
| 125 |
+
box-sizing: border-box;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.simultaneous-messages .chat-message.agent-alice .chat-bubble {
|
| 129 |
+
border-radius: 10px;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.simultaneous-messages .chat-message.agent-bob .chat-bubble {
|
| 133 |
+
border-radius: 10px;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
.simultaneous-messages .chat-message.agent-alice .chat-header {
|
| 137 |
+
justify-content: flex-start;
|
| 138 |
+
flex-shrink: 0;
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.simultaneous-messages .chat-message.agent-bob .chat-header {
|
| 142 |
+
justify-content: flex-end;
|
| 143 |
+
flex-shrink: 0;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.simultaneous-messages .chat-reasoning {
|
| 147 |
+
max-width: 100%;
|
| 148 |
+
overflow-wrap: break-word;
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
.chat-message {
|
| 152 |
+
display: flex;
|
| 153 |
+
margin-bottom: 2px;
|
| 154 |
+
align-items: flex-end;
|
| 155 |
+
gap: 6px;
|
| 156 |
+
position: relative;
|
| 157 |
+
margin-left: 36px;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.chat-message.agent-alice {
|
| 161 |
+
margin-left: 0;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
.chat-message.agent-alice::before {
|
| 165 |
+
left: 0;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.chat-message.role-user {
|
| 169 |
+
opacity: 0.7;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.chat-message::before {
|
| 173 |
+
content: '';
|
| 174 |
+
position: absolute;
|
| 175 |
+
left: -36px;
|
| 176 |
+
top: 0;
|
| 177 |
+
bottom: 0;
|
| 178 |
+
width: 36px;
|
| 179 |
+
pointer-events: auto;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
.merge-btn {
|
| 183 |
+
position: absolute;
|
| 184 |
+
left: -30px;
|
| 185 |
+
top: 50%;
|
| 186 |
+
transform: translateY(-50%);
|
| 187 |
+
width: 26px;
|
| 188 |
+
height: 26px;
|
| 189 |
+
border-radius: 4px;
|
| 190 |
+
border: 1.5px solid var(--accent-muted);
|
| 191 |
+
background: white;
|
| 192 |
+
cursor: pointer;
|
| 193 |
+
font-size: var(--font-size);
|
| 194 |
+
opacity: 0;
|
| 195 |
+
display: flex;
|
| 196 |
+
align-items: center;
|
| 197 |
+
justify-content: center;
|
| 198 |
+
transition: opacity 0.2s ease, transform 0.1s ease;
|
| 199 |
+
padding: 0;
|
| 200 |
+
line-height: 1;
|
| 201 |
+
z-index: 10;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.chat-message:hover .merge-btn,
|
| 205 |
+
.merge-btn:hover {
|
| 206 |
+
opacity: 1;
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
.merge-btn:hover {
|
| 210 |
+
background: var(--panel-bg);
|
| 211 |
+
border-color: var(--accent-muted-2);
|
| 212 |
+
transform: translateY(-50%) scale(1.15);
|
| 213 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.15);
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.merge-btn:active {
|
| 217 |
+
transform: translateY(-50%) scale(0.95);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
.chat-message.agent-alice .merge-btn {
|
| 221 |
+
left: -30px;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.chat-message.role-user .merge-btn {
|
| 225 |
+
display: none !important;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.simultaneous-messages .merge-btn {
|
| 229 |
+
opacity: 0 !important;
|
| 230 |
+
pointer-events: none;
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
.simultaneous-messages {
|
| 234 |
+
padding: 6px 0 6px 0 !important;
|
| 235 |
+
margin-left: 0 !important;
|
| 236 |
+
margin-right: 0 !important;
|
| 237 |
+
position: relative !important;
|
| 238 |
+
background: transparent !important;
|
| 239 |
+
border-radius: 0 !important;
|
| 240 |
+
box-sizing: border-box !important;
|
| 241 |
+
overflow: visible !important;
|
| 242 |
+
max-width: 100% !important;
|
| 243 |
+
border: none !important;
|
| 244 |
+
transition: padding 0.2s ease !important;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
.simultaneous-messages:hover {
|
| 248 |
+
padding-top: 40px !important;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
.simultaneous-messages::before {
|
| 252 |
+
content: '⇅ Merged';
|
| 253 |
+
position: absolute;
|
| 254 |
+
left: 0 !important;
|
| 255 |
+
top: 8px !important;
|
| 256 |
+
font-size: var(--font-size);
|
| 257 |
+
font-weight: 500;
|
| 258 |
+
color: #888;
|
| 259 |
+
pointer-events: none;
|
| 260 |
+
opacity: 0;
|
| 261 |
+
transition: opacity 0.2s ease;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
.simultaneous-messages:hover::before {
|
| 265 |
+
opacity: 1;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.unmerge-btn {
|
| 269 |
+
position: absolute !important;
|
| 270 |
+
right: 0 !important;
|
| 271 |
+
top: 6px !important;
|
| 272 |
+
width: 36px !important;
|
| 273 |
+
height: 28px !important;
|
| 274 |
+
border-radius: 5px !important;
|
| 275 |
+
border: 2px solid #d63031 !important;
|
| 276 |
+
background: white !important;
|
| 277 |
+
cursor: pointer !important;
|
| 278 |
+
font-size: var(--font-size) !important;
|
| 279 |
+
font-weight: bold !important;
|
| 280 |
+
color: #d63031 !important;
|
| 281 |
+
display: flex !important;
|
| 282 |
+
align-items: center !important;
|
| 283 |
+
justify-content: center !important;
|
| 284 |
+
transition: all 0.2s ease !important;
|
| 285 |
+
padding: 0 !important;
|
| 286 |
+
line-height: 1 !important;
|
| 287 |
+
z-index: 1000 !important;
|
| 288 |
+
flex: none !important;
|
| 289 |
+
pointer-events: auto !important;
|
| 290 |
+
box-shadow: 0 2px 6px rgba(214, 48, 49, 0.3) !important;
|
| 291 |
+
opacity: 0 !important;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
.simultaneous-messages:hover .unmerge-btn {
|
| 295 |
+
opacity: 1 !important;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.unmerge-btn:hover {
|
| 299 |
+
background: #ffe5e5 !important;
|
| 300 |
+
border-color: #b71c1c !important;
|
| 301 |
+
transform: scale(1.1) !important;
|
| 302 |
+
box-shadow: 0 3px 8px rgba(214, 48, 49, 0.4) !important;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
.unmerge-btn:active {
|
| 306 |
+
transform: scale(0.95) !important;
|
| 307 |
+
background: #ffcccc !important;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
.chat-message-content {
|
| 311 |
+
max-width: 72%;
|
| 312 |
+
display: flex;
|
| 313 |
+
flex-direction: column;
|
| 314 |
+
gap: 2px;
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
.chat-message.agent-alice .chat-message-content {
|
| 318 |
+
align-items: flex-start;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
.chat-message.agent-bob .chat-message-content {
|
| 322 |
+
align-items: flex-end;
|
| 323 |
+
margin-left: auto;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
.chat-bubble {
|
| 327 |
+
padding: 6px 10px;
|
| 328 |
+
border-radius: 10px;
|
| 329 |
+
word-wrap: break-word;
|
| 330 |
+
position: relative;
|
| 331 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 332 |
+
line-height: 1.4;
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
.chat-message.agent-alice .chat-bubble {
|
| 336 |
+
background: var(--alice-bg);
|
| 337 |
+
border: 2px solid var(--alice-border);
|
| 338 |
+
border-radius: 10px 10px 10px 2px;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
.chat-message.agent-bob .chat-bubble {
|
| 342 |
+
background: var(--bob-bg);
|
| 343 |
+
border: 2px solid var(--bob-border);
|
| 344 |
+
border-radius: 10px 10px 2px 10px;
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
.chat-message.role-user .chat-bubble {
|
| 348 |
+
background: var(--user-bg);
|
| 349 |
+
border: 2px solid #d0d0d0;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
.chat-header {
|
| 353 |
+
display: flex;
|
| 354 |
+
align-items: center;
|
| 355 |
+
gap: 4px;
|
| 356 |
+
margin-bottom: 2px;
|
| 357 |
+
font-size: var(--font-size);
|
| 358 |
+
font-weight: 600;
|
| 359 |
+
line-height: 1.2;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.chat-message.agent-alice .chat-header {
|
| 363 |
+
color: var(--alice-border);
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
.chat-message.agent-bob .chat-header {
|
| 367 |
+
color: var(--bob-border);
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
.chat-timestamp {
|
| 371 |
+
font-size: var(--font-size);
|
| 372 |
+
color: var(--muted-text);
|
| 373 |
+
margin-top: 1px;
|
| 374 |
+
opacity: 0.75;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
.chat-reward {
|
| 378 |
+
display: inline-flex;
|
| 379 |
+
align-items: center;
|
| 380 |
+
background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
|
| 381 |
+
color: #000000;
|
| 382 |
+
font-weight: 600;
|
| 383 |
+
font-size: var(--font-size);
|
| 384 |
+
padding: 1px 5px;
|
| 385 |
+
border-radius: 3px;
|
| 386 |
+
border: 1px solid #f4e6a8;
|
| 387 |
+
margin-left: 4px;
|
| 388 |
+
line-height: 1.3;
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
.chat-reasoning {
|
| 392 |
+
font-size: var(--font-size);
|
| 393 |
+
font-style: italic;
|
| 394 |
+
color: #555;
|
| 395 |
+
margin-bottom: 2px;
|
| 396 |
+
padding: 4px 8px;
|
| 397 |
+
background: rgba(0, 0, 0, 0.03);
|
| 398 |
+
border-radius: 5px;
|
| 399 |
+
cursor: pointer;
|
| 400 |
+
line-height: 1.3;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
.chat-reasoning.collapsed .reasoning-text {
|
| 404 |
+
display: none;
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
.chat-reasoning.collapsed::after {
|
| 408 |
+
content: ' (click to expand)';
|
| 409 |
+
color: #777;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
.chat-group-divider {
|
| 413 |
+
display: flex;
|
| 414 |
+
align-items: center;
|
| 415 |
+
gap: 8px;
|
| 416 |
+
width: 100%;
|
| 417 |
+
margin: 8px 0 4px 0;
|
| 418 |
+
position: relative;
|
| 419 |
+
cursor: pointer;
|
| 420 |
+
user-select: none;
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
.chat-group-divider::before,
|
| 424 |
+
.chat-group-divider::after {
|
| 425 |
+
content: "";
|
| 426 |
+
flex: 1 1 auto;
|
| 427 |
+
height: 2px;
|
| 428 |
+
background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.chat-group-label {
|
| 432 |
+
display: inline-block;
|
| 433 |
+
background: white;
|
| 434 |
+
padding: 2px 12px;
|
| 435 |
+
border-radius: 999px;
|
| 436 |
+
font-size: var(--font-size);
|
| 437 |
+
font-weight: 700;
|
| 438 |
+
color: var(--muted-text);
|
| 439 |
+
border: 1.5px solid var(--accent-muted);
|
| 440 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.08);
|
| 441 |
+
line-height: 1.4;
|
| 442 |
+
position: relative;
|
| 443 |
+
transition: background 0.2s ease;
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
.chat-group-divider:hover .chat-group-label {
|
| 447 |
+
background: var(--panel-bg);
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
.chat-group-label::before {
|
| 451 |
+
content: '▼ ';
|
| 452 |
+
font-size: 0.8em;
|
| 453 |
+
display: inline-block;
|
| 454 |
+
transition: transform 0.2s ease;
|
| 455 |
+
opacity: 0;
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
.chat-group-divider:hover .chat-group-label::before {
|
| 459 |
+
opacity: 1;
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
.chat-group-divider.collapsed .chat-group-label::before {
|
| 463 |
+
content: '▶ ';
|
| 464 |
+
opacity: 1;
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
.chat-group-divider.collapsed + * {
|
| 468 |
+
display: none !important;
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
/* Hide collapsed rounds in strong hide mode */
|
| 472 |
+
.strong-hide .chat-group-divider.collapsed {
|
| 473 |
+
display: none !important;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
/* Chat view width control */
|
| 477 |
+
#flow-chat {
|
| 478 |
+
--chat-width: 900px;
|
| 479 |
+
max-width: var(--chat-width);
|
| 480 |
+
margin: 0 auto;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
/* Hide user messages when toggle is on */
|
| 484 |
+
#flow-chat.hide-user-messages .chat-message.role-user {
|
| 485 |
+
display: none;
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
/* Hide rewards when hiding user messages */
|
| 489 |
+
#flow-chat.hide-user-messages .chat-reward {
|
| 490 |
+
display: none;
|
| 491 |
+
}
|
| 492 |
+
|
| 493 |
+
/* Round context annotations */
|
| 494 |
+
.round-context {
|
| 495 |
+
text-align: center;
|
| 496 |
+
margin: 4px auto;
|
| 497 |
+
max-width: 100%;
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
.round-context-edit {
|
| 501 |
+
min-height: 20px;
|
| 502 |
+
padding: 5px 10px;
|
| 503 |
+
border: 1.5px dashed var(--accent-muted);
|
| 504 |
+
border-radius: 6px;
|
| 505 |
+
background: #fafafa;
|
| 506 |
+
cursor: text;
|
| 507 |
+
transition: all 0.2s ease;
|
| 508 |
+
outline: none;
|
| 509 |
+
font-size: var(--font-size);
|
| 510 |
+
line-height: 1.3;
|
| 511 |
+
user-select: text;
|
| 512 |
+
-webkit-user-select: text;
|
| 513 |
+
-moz-user-select: text;
|
| 514 |
+
-ms-user-select: text;
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
.round-context-edit:focus {
|
| 518 |
+
border-style: solid;
|
| 519 |
+
border-color: var(--accent-muted-2);
|
| 520 |
+
background: #ffffff;
|
| 521 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
.round-context-edit:empty:before {
|
| 525 |
+
content: attr(data-placeholder);
|
| 526 |
+
color: #999;
|
| 527 |
+
font-style: italic;
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
.round-context-controls {
|
| 531 |
+
display: none;
|
| 532 |
+
justify-content: center;
|
| 533 |
+
gap: 4px;
|
| 534 |
+
margin-top: 4px;
|
| 535 |
+
flex-wrap: wrap;
|
| 536 |
+
}
|
| 537 |
+
|
| 538 |
+
.round-context-edit:focus + .round-context-controls,
|
| 539 |
+
.round-context-controls:hover,
|
| 540 |
+
.round-context:focus-within .round-context-controls {
|
| 541 |
+
display: flex;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
.context-color-btn {
|
| 545 |
+
width: 22px;
|
| 546 |
+
height: 22px;
|
| 547 |
+
border-radius: 50%;
|
| 548 |
+
border: 1.5px solid #fff;
|
| 549 |
+
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
|
| 550 |
+
cursor: pointer;
|
| 551 |
+
transition: transform 0.1s ease;
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
.context-color-btn:hover {
|
| 555 |
+
transform: scale(1.15);
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
.context-color-btn:active {
|
| 559 |
+
transform: scale(0.95);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
/* Split agent context boxes */
|
| 563 |
+
.split-agent-context {
|
| 564 |
+
display: flex;
|
| 565 |
+
gap: 6px;
|
| 566 |
+
margin: 4px auto;
|
| 567 |
+
max-width: 100%;
|
| 568 |
+
align-items: flex-start;
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
.agent-context-box {
|
| 572 |
+
flex: 1;
|
| 573 |
+
min-width: 0;
|
| 574 |
+
position: relative;
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
.agent-context-box .round-context-edit {
|
| 578 |
+
margin: 0;
|
| 579 |
+
border-radius: 6px;
|
| 580 |
+
padding: 4px 8px;
|
| 581 |
+
min-height: 18px;
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
.agent-context-box.agent-alice .round-context-edit {
|
| 585 |
+
border-color: var(--alice-border);
|
| 586 |
+
background: rgba(14, 178, 36, 0.03);
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
.agent-context-box.agent-bob .round-context-edit {
|
| 590 |
+
border-color: var(--bob-border);
|
| 591 |
+
background: rgba(239, 131, 35, 0.03);
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
.agent-context-box.agent-alice .round-context-edit:focus {
|
| 595 |
+
border-color: var(--alice-border);
|
| 596 |
+
box-shadow: 0 2px 8px rgba(14, 178, 36, 0.2);
|
| 597 |
+
background: rgba(14, 178, 36, 0.05);
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
.agent-context-box.agent-bob .round-context-edit:focus {
|
| 601 |
+
border-color: var(--bob-border);
|
| 602 |
+
box-shadow: 0 2px 8px rgba(239, 131, 35, 0.2);
|
| 603 |
+
background: rgba(239, 131, 35, 0.05);
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
.agent-context-box .round-context-edit::before {
|
| 607 |
+
font-weight: 700;
|
| 608 |
+
font-size: var(--font-size);
|
| 609 |
+
margin-right: 5px;
|
| 610 |
+
letter-spacing: 0.2px;
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
.agent-context-box.agent-alice .round-context-edit::before {
|
| 614 |
+
content: 'Alice Prompt Summary:';
|
| 615 |
+
color: var(--alice-border);
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
.agent-context-box.agent-bob .round-context-edit::before {
|
| 619 |
+
content: 'Bob Prompt Summary:';
|
| 620 |
+
color: var(--bob-border);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
/* Empty context boxes will be hidden by JavaScript when strong hide is enabled */
|
| 624 |
+
.messages-flow { display: block; }
|
| 625 |
+
.split-wrapper { display: flex; gap: 4px; align-items: flex-start; position: relative; }
|
| 626 |
+
.split-col { flex:1 1 0; min-width:0; }
|
| 627 |
+
/* In split view keep same inline density as linear view */
|
| 628 |
+
.split-col .chat-turn { display: inline; }
|
| 629 |
+
.split-wrapper.resizing { user-select: none; }
|
| 630 |
+
.split-resizer { width:4px; cursor: col-resize; flex:0 0 auto; align-self: stretch; position: relative; background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0)); border-radius:2px; transition: background .15s ease, width .15s ease; }
|
| 631 |
+
.split-resizer:hover { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 35%, var(--accent-muted) 65%, rgba(224,230,235,0)); }
|
| 632 |
+
.split-resizer.dragging { background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0)); }
|
| 633 |
+
/* Inline reasoning (removed toggle to prevent layout shift on click) */
|
| 634 |
+
.reasoning-inline { display:inline; font-size:var(--font-size); font-style:italic; color:#555; white-space:pre-wrap; margin-right:4px; cursor:pointer; position:relative; }
|
| 635 |
+
.reasoning-inline .reasoning-text { display:inline; }
|
| 636 |
+
.reasoning-inline .reasoning-icon { display:inline-block; margin-right:2px; }
|
| 637 |
+
.reasoning-inline.collapsed .reasoning-text { display:none; }
|
| 638 |
+
.reasoning-inline.collapsed::after { content:'(...)'; font-style:italic; color:#777; margin-left:4px; }
|
| 639 |
+
.message-box .main-content { white-space:normal; }
|
| 640 |
+
/* tighten spacing */
|
| 641 |
+
.split-col .group-divider { margin:4px 0 2px 0; }
|
| 642 |
+
.toolbar {
|
| 643 |
+
display: flex;
|
| 644 |
+
align-items: center;
|
| 645 |
+
gap: 8px;
|
| 646 |
+
margin-bottom: 0;
|
| 647 |
+
font-size: var(--font-size);
|
| 648 |
+
max-height: 0;
|
| 649 |
+
overflow: hidden;
|
| 650 |
+
opacity: 0;
|
| 651 |
+
pointer-events: none;
|
| 652 |
+
transition: max-height 0.2s ease, opacity 0.2s ease;
|
| 653 |
+
flex-wrap: wrap;
|
| 654 |
+
}
|
| 655 |
+
.toolbar-wrap { position: sticky; top: 0; z-index: 10; background: var(--bg); }
|
| 656 |
+
.toolbar-hotzone { height: 6px; }
|
| 657 |
+
.toolbar-wrap:hover .toolbar { max-height: 500px; opacity: 1; pointer-events: auto; margin-bottom: 12px; }
|
| 658 |
+
.toolbar * { pointer-events: auto !important; }
|
| 659 |
+
.toolbar input,
|
| 660 |
+
.toolbar select { z-index: 100 !important; position: relative; }
|
| 661 |
+
.toolbar input[type="number"],
|
| 662 |
+
.toolbar input[type="text"],
|
| 663 |
+
.toolbar select {
|
| 664 |
+
width: 72px;
|
| 665 |
+
padding: 2px 6px;
|
| 666 |
+
border: 1px solid var(--accent-muted);
|
| 667 |
+
border-radius: var(--corner-radius);
|
| 668 |
+
background: var(--bg);
|
| 669 |
+
user-select: text !important;
|
| 670 |
+
-webkit-user-select: text !important;
|
| 671 |
+
-moz-user-select: text !important;
|
| 672 |
+
-ms-user-select: text !important;
|
| 673 |
+
pointer-events: auto !important;
|
| 674 |
+
cursor: pointer !important;
|
| 675 |
+
}
|
| 676 |
+
.toolbar input[type="text"] {
|
| 677 |
+
cursor: text !important;
|
| 678 |
+
}
|
| 679 |
+
.toolbar input[type="text"]:focus,
|
| 680 |
+
.toolbar input[type="number"]:focus,
|
| 681 |
+
.toolbar select:focus {
|
| 682 |
+
outline: 2px solid #0066cc;
|
| 683 |
+
outline-offset: 1px;
|
| 684 |
+
}
|
| 685 |
+
.toolbar button {
|
| 686 |
+
padding: 4px 8px;
|
| 687 |
+
border: 1px solid var(--accent-muted);
|
| 688 |
+
background: var(--panel-bg);
|
| 689 |
+
border-radius: var(--corner-radius);
|
| 690 |
+
cursor: pointer;
|
| 691 |
+
}
|
| 692 |
+
.chat-turn {
|
| 693 |
+
display: inline; /* inline like text */
|
| 694 |
+
background: transparent;
|
| 695 |
+
position: relative;
|
| 696 |
+
cursor: pointer;
|
| 697 |
+
}
|
| 698 |
+
/* No agent-specific background distinctions */
|
| 699 |
+
.turn-content {
|
| 700 |
+
white-space: normal;
|
| 701 |
+
color: var(--text);
|
| 702 |
+
font-size: var(--font-size);
|
| 703 |
+
display: inline; /* inline flow */
|
| 704 |
+
}
|
| 705 |
+
.chat-turn .agent-badge { margin-right: 0; vertical-align: baseline; }
|
| 706 |
+
.agent-badge {
|
| 707 |
+
display: inline;
|
| 708 |
+
position: relative;
|
| 709 |
+
border: var(--border-width) solid var(--accent-muted); /* slightly thicker */
|
| 710 |
+
border-radius: var(--pill-radius-left); /* round left and bottom-right */
|
| 711 |
+
font-size: var(--font-size);
|
| 712 |
+
color: var(--muted-text);
|
| 713 |
+
background: var(--panel-bg);
|
| 714 |
+
box-shadow: var(--inset-shadow);
|
| 715 |
+
line-height: 1.2;
|
| 716 |
+
border-right: 0;
|
| 717 |
+
}
|
| 718 |
+
/* Use flex on assistant badges to vertically center reward pill */
|
| 719 |
+
.chat-turn.role-assistant .agent-badge { display: inline-flex; align-items: center; }
|
| 720 |
+
.agent-badge::after {
|
| 721 |
+
content: none;
|
| 722 |
+
}
|
| 723 |
+
/* removed external separator; emoji is rendered inside message bubble */
|
| 724 |
+
.agent-name { font-weight: 700; }
|
| 725 |
+
.emoji-bw { filter: grayscale(100%); opacity: 0.95; font-size: var(--font-size); vertical-align: baseline; margin: 0; position: relative; top: -1px; line-height: 1; display: inline-block; }
|
| 726 |
+
.ts-badge {
|
| 727 |
+
position: relative;
|
| 728 |
+
display: inline;
|
| 729 |
+
border: var(--border-width) solid var(--accent-muted-2); /* slightly thicker */
|
| 730 |
+
border-radius: var(--corner-radius); /* not a pill */
|
| 731 |
+
font-size: var(--font-size);
|
| 732 |
+
# font-weight: 700;
|
| 733 |
+
color: var(--muted-text);
|
| 734 |
+
background: #F4F8FB; /* subtle tint */
|
| 735 |
+
# padding: 1px 6px; /* slight padding for visibility */
|
| 736 |
+
margin-right: 8px; /* small gap from following content */
|
| 737 |
+
pointer-events: auto; /* allow events so we can ignore them in JS */
|
| 738 |
+
}
|
| 739 |
+
/* Hide timestep badges when grouping by 1 */
|
| 740 |
+
.hide-ts-badges .ts-badge { display: none; }
|
| 741 |
+
/* Strong hide: completely hide collapsed turns */
|
| 742 |
+
.strong-hide .chat-turn.collapsed { display: none; }
|
| 743 |
+
.ts-badge::before {
|
| 744 |
+
content: "";
|
| 745 |
+
position: relative;
|
| 746 |
+
background: var(--accent-muted-2);
|
| 747 |
+
border-radius: 2px;
|
| 748 |
+
}
|
| 749 |
+
.agent-badge { margin-left: 6px; }
|
| 750 |
+
.message-box {
|
| 751 |
+
display: inline; /* inline bubble behaving like text */
|
| 752 |
+
font-size: var(--font-size);
|
| 753 |
+
border: var(--border-width) solid var(--accent-muted);
|
| 754 |
+
border-radius: var(--pill-radius-right); /* round left and bottom-right */
|
| 755 |
+
position: relative;
|
| 756 |
+
background: var(--bg);
|
| 757 |
+
vertical-align: baseline;
|
| 758 |
+
line-height: 1.2;
|
| 759 |
+
padding-left: 0;
|
| 760 |
+
border-left: 0;
|
| 761 |
+
}
|
| 762 |
+
.chat-turn.agent-alice.role-assistant .message-box::before { color: #0eb224; }
|
| 763 |
+
.chat-turn.agent-bob.role-assistant .message-box::before { color: #ef8323; }
|
| 764 |
+
.chat-turn.collapsed .message-box::before { display: none; }
|
| 765 |
+
/* Assistant bubble border colors by common agent names */
|
| 766 |
+
.chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
|
| 767 |
+
.chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
|
| 768 |
+
/* Tie badge and seam to agent color for a cohesive capsule, assistants only */
|
| 769 |
+
.chat-turn.agent-alice.role-assistant .agent-badge { border-color: #0eb224; background: rgba(14,178,36,0.08); }
|
| 770 |
+
.chat-turn.agent-alice.role-assistant .agent-badge::after { border-right-color: #0eb224; }
|
| 771 |
+
.chat-turn.agent-alice.role-assistant .turn-content::before { border-left-color: #0eb224; border-top-color: #0eb224; }
|
| 772 |
+
.chat-turn.agent-alice.role-assistant .message-box { border-color: #0eb224; }
|
| 773 |
+
|
| 774 |
+
.chat-turn.agent-bob.role-assistant .agent-badge { border-color: #ef8323; background: rgba(239,131,35,0.10); }
|
| 775 |
+
.chat-turn.agent-bob.role-assistant .agent-badge::after { border-right-color: #ef8323; }
|
| 776 |
+
.chat-turn.agent-bob.role-assistant .turn-content::before { border-left-color: #ef8323; border-top-color: #ef8323; }
|
| 777 |
+
.chat-turn.agent-bob.role-assistant .message-box { border-color: #ef8323; }
|
| 778 |
+
/* No colored agent-name; keep neutral */
|
| 779 |
+
.reward {
|
| 780 |
+
display: inline-flex;
|
| 781 |
+
align-items: center;
|
| 782 |
+
justify-content: center;
|
| 783 |
+
background: linear-gradient(90deg, #fffdf2 0%, #ffffff 75%);
|
| 784 |
+
color: #000000; /* full black */
|
| 785 |
+
font-weight: 600; /* slightly bolder */
|
| 786 |
+
font-family: "Inter", ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", Arial, "Noto Sans", sans-serif;
|
| 787 |
+
font-size: var(--font-size);
|
| 788 |
+
letter-spacing: 0.15px;
|
| 789 |
+
line-height: 1;
|
| 790 |
+
padding: 0 4px 1px 4px; /* slight bottom pad for optical centering */
|
| 791 |
+
border-radius: 4px;
|
| 792 |
+
border: 1px solid #f4e6a8;
|
| 793 |
+
margin: 0 4px;
|
| 794 |
+
box-shadow: 0 0 0 1px rgba(255,255,255,0.55) inset, 0 1px 2px rgba(0,0,0,0.04);
|
| 795 |
+
}
|
| 796 |
+
.message-placeholder { display: none; color: #7f8c8d; font-style: italic; }
|
| 797 |
+
.chat-turn.collapsed .message-box { color: transparent; font-size: 0; display: inline-block; }
|
| 798 |
+
.chat-turn.collapsed .message-box::after { content: "(...)"; color: #7f8c8d; font-style: italic; font-size: var(--font-size); line-height: 1.2; }
|
| 799 |
+
.chat-turn.collapsed .agent-badge,
|
| 800 |
+
.chat-turn.collapsed .message-box { opacity: 0.3; }
|
| 801 |
+
/* Group divider - clearer and pretty */
|
| 802 |
+
.group-divider {
|
| 803 |
+
display: flex;
|
| 804 |
+
align-items: center;
|
| 805 |
+
gap: 8px;
|
| 806 |
+
width: 100%;
|
| 807 |
+
margin: 8px 0 4px 0;
|
| 808 |
+
position: relative;
|
| 809 |
+
cursor: pointer;
|
| 810 |
+
user-select: none;
|
| 811 |
+
}
|
| 812 |
+
.group-divider::before,
|
| 813 |
+
.group-divider::after {
|
| 814 |
+
content: "";
|
| 815 |
+
flex: 1 1 auto;
|
| 816 |
+
height: 2px;
|
| 817 |
+
background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted-2) 30%, var(--accent-muted-2) 70%, rgba(224,230,235,0));
|
| 818 |
+
}
|
| 819 |
+
.group-divider .group-label {
|
| 820 |
+
display: inline-block;
|
| 821 |
+
border: 1px solid var(--accent-muted);
|
| 822 |
+
border-radius: 999px;
|
| 823 |
+
padding: 2px 10px;
|
| 824 |
+
font-size: var(--group-label-font-size);
|
| 825 |
+
font-weight: 700;
|
| 826 |
+
color: var(--muted-text);
|
| 827 |
+
background: var(--bg);
|
| 828 |
+
box-shadow: var(--inset-shadow);
|
| 829 |
+
position: relative;
|
| 830 |
+
z-index: 1;
|
| 831 |
+
transition: background 0.2s ease;
|
| 832 |
+
}
|
| 833 |
+
|
| 834 |
+
.group-divider:hover .group-label {
|
| 835 |
+
background: var(--panel-bg);
|
| 836 |
+
}
|
| 837 |
+
|
| 838 |
+
.group-label::before {
|
| 839 |
+
content: '▼ ';
|
| 840 |
+
font-size: 0.8em;
|
| 841 |
+
display: inline-block;
|
| 842 |
+
transition: transform 0.2s ease;
|
| 843 |
+
opacity: 0;
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
.group-divider:hover .group-label::before {
|
| 847 |
+
opacity: 1;
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
.group-divider.collapsed .group-label::before {
|
| 851 |
+
content: '▶ ';
|
| 852 |
+
opacity: 1;
|
| 853 |
+
}
|
| 854 |
+
|
| 855 |
+
/* Hide collapsed rounds in strong hide mode */
|
| 856 |
+
.strong-hide .group-divider.collapsed {
|
| 857 |
+
display: none !important;
|
| 858 |
+
}
|
| 859 |
+
/* Enhance contrast for print / export */
|
| 860 |
+
body.split-mode .group-divider::before,
|
| 861 |
+
body.split-mode .group-divider::after {
|
| 862 |
+
background: linear-gradient(90deg, rgba(224,230,235,0), var(--accent-muted) 25%, var(--accent-muted) 75%, rgba(224,230,235,0));
|
| 863 |
+
}
|
| 864 |
+
.chat-turn .turn-content { position: relative; }
|
| 865 |
+
.chat-turn .turn-content::before {
|
| 866 |
+
content: none;
|
| 867 |
+
}
|
| 868 |
+
.chat-turn .agent-badge {
|
| 869 |
+
position: relative;
|
| 870 |
+
}
|
| 871 |
+
/* removed absolute-positioned emoji to prevent overlap */
|
| 872 |
+
</style>
|
| 873 |
+
"""
|
| 874 |
+
|
| 875 |
+
# HTML structure
|
| 876 |
+
html_parts = [
|
| 877 |
+
"<!DOCTYPE html>",
|
| 878 |
+
"<html>",
|
| 879 |
+
"<head>",
|
| 880 |
+
"<meta charset='UTF-8'>",
|
| 881 |
+
"<title>Chat Turns</title>",
|
| 882 |
+
css,
|
| 883 |
+
"<script>\n"
|
| 884 |
+
"document.addEventListener('DOMContentLoaded', function() {\n"
|
| 885 |
+
" const linearFlow = document.getElementById('flow-linear');\n"
|
| 886 |
+
" const splitFlow = document.getElementById('flow-split');\n"
|
| 887 |
+
" const chatFlow = document.getElementById('flow-chat');\n"
|
| 888 |
+
" let splitViewOn = false;\n"
|
| 889 |
+
" let chatViewOn = true;\n"
|
| 890 |
+
" function activeFlows() { return [chatViewOn && chatFlow ? chatFlow : null, splitViewOn && splitFlow ? splitFlow : null, linearFlow].filter(Boolean).filter(f => f.style.display !== 'none'); }\n"
|
| 891 |
+
" // State for range filtering and strong hide\n"
|
| 892 |
+
" let currentRangeStart = null;\n"
|
| 893 |
+
" let currentRangeEnd = null;\n"
|
| 894 |
+
" let strongHideOn = false;\n"
|
| 895 |
+
" document.body.addEventListener('click', function(e){\n"
|
| 896 |
+
" if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
|
| 897 |
+
" if (e.target.closest('.ts-badge')) { return; }\n"
|
| 898 |
+
" const r = e.target.closest('.reasoning-inline'); if (r) { e.stopPropagation(); r.classList.toggle('collapsed'); return; }\n"
|
| 899 |
+
" const turn = e.target.closest('.chat-turn');\n"
|
| 900 |
+
" if (turn) { e.stopPropagation(); turn.classList.toggle('collapsed'); }\n"
|
| 901 |
+
" });\n"
|
| 902 |
+
" // Reasoning handled via <details>, no JS required\n"
|
| 903 |
+
" function applyRangeFilter() {\n"
|
| 904 |
+
" for (const flow of activeFlows()) {\n"
|
| 905 |
+
" const turns = Array.from(flow.querySelectorAll('.chat-turn'));\n"
|
| 906 |
+
" for (const el of turns) {\n"
|
| 907 |
+
" const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
|
| 908 |
+
" const afterStart = (currentRangeStart === null) || (t >= currentRangeStart);\n"
|
| 909 |
+
" const beforeEnd = (currentRangeEnd === null) || (t <= currentRangeEnd);\n"
|
| 910 |
+
" el.style.display = (afterStart && beforeEnd) ? '' : 'none';\n"
|
| 911 |
+
" }\n"
|
| 912 |
+
" const dividers = Array.from(flow.querySelectorAll('.group-divider'));\n"
|
| 913 |
+
" for (const d of dividers) {\n"
|
| 914 |
+
" let anyVisible = false;\n"
|
| 915 |
+
" let el = d.nextElementSibling;\n"
|
| 916 |
+
" while (el && !el.classList.contains('group-divider')) {\n"
|
| 917 |
+
" if (el.classList.contains('chat-turn')) {\n"
|
| 918 |
+
" const disp = getComputedStyle(el).display;\n"
|
| 919 |
+
" if (disp !== 'none') { anyVisible = true; break; }\n"
|
| 920 |
+
" } else if (el.classList.contains('split-wrapper')) {\n"
|
| 921 |
+
" // Search descendants for any visible chat-turn\n"
|
| 922 |
+
" const turns = Array.from(el.querySelectorAll('.chat-turn'));\n"
|
| 923 |
+
" for (const tEl of turns) {\n"
|
| 924 |
+
" const disp2 = getComputedStyle(tEl).display;\n"
|
| 925 |
+
" if (disp2 !== 'none') { anyVisible = true; break; }\n"
|
| 926 |
+
" }\n"
|
| 927 |
+
" if (anyVisible) break;\n"
|
| 928 |
+
" }\n"
|
| 929 |
+
" el = el.nextElementSibling;\n"
|
| 930 |
+
" }\n"
|
| 931 |
+
" d.style.display = anyVisible ? '' : 'none';\n"
|
| 932 |
+
" }\n"
|
| 933 |
+
" }\n"
|
| 934 |
+
" }\n"
|
| 935 |
+
" function applyGrouping(n) {\n"
|
| 936 |
+
" function groupContainer(container, n) {\n"
|
| 937 |
+
" Array.from(container.querySelectorAll(':scope > .group-divider')).forEach(el => el.remove());\n"
|
| 938 |
+
" if (!n || n <= 0) { return; }\n"
|
| 939 |
+
" const turns = Array.from(container.querySelectorAll(':scope > .chat-turn'));\n"
|
| 940 |
+
" if (turns.length === 0) return;\n"
|
| 941 |
+
" const items = Array.from(container.children).filter(el => !el.classList.contains('group-divider'));\n"
|
| 942 |
+
" const frag = document.createDocumentFragment();\n"
|
| 943 |
+
" let lastGroup = -1;\n"
|
| 944 |
+
" for (const el of items) {\n"
|
| 945 |
+
" if (!el.classList.contains('chat-turn')) { frag.appendChild(el); continue; }\n"
|
| 946 |
+
" const t = parseInt(el.getAttribute('data-time-step') || '0', 10);\n"
|
| 947 |
+
" const g = Math.floor(t / n);\n"
|
| 948 |
+
" if (g !== lastGroup) {\n"
|
| 949 |
+
" const div = document.createElement('div');\n"
|
| 950 |
+
" div.className = 'group-divider';\n"
|
| 951 |
+
" const label = document.createElement('span');\n"
|
| 952 |
+
" label.className = 'group-label';\n"
|
| 953 |
+
" const roundIndex = g + 1;\n"
|
| 954 |
+
" label.textContent = `Round ${roundIndex}`;\n"
|
| 955 |
+
" div.appendChild(label);\n"
|
| 956 |
+
" frag.appendChild(div);\n"
|
| 957 |
+
" lastGroup = g;\n"
|
| 958 |
+
" }\n"
|
| 959 |
+
" frag.appendChild(el);\n"
|
| 960 |
+
" }\n"
|
| 961 |
+
" container.innerHTML = '';\n"
|
| 962 |
+
" container.appendChild(frag);\n"
|
| 963 |
+
" container.classList.toggle('hide-ts-badges', n === 1);\n"
|
| 964 |
+
" container.classList.toggle('strong-hide', strongHideOn);\n"
|
| 965 |
+
" }\n"
|
| 966 |
+
" for (const flow of activeFlows()) {\n"
|
| 967 |
+
" if (flow.id === 'flow-split') {\n"
|
| 968 |
+
" // Snapshot original turns once to avoid drift on repeated grouping\n"
|
| 969 |
+
" const getOriginalTurns = () => {\n"
|
| 970 |
+
" if (!flow.dataset.origData) {\n"
|
| 971 |
+
" const data = [];\n"
|
| 972 |
+
" const cols0 = flow.querySelectorAll('.split-col');\n"
|
| 973 |
+
" cols0.forEach(col => {\n"
|
| 974 |
+
" const agent = col.getAttribute('data-agent') || '';\n"
|
| 975 |
+
" col.querySelectorAll(':scope > .chat-turn').forEach(el => {\n"
|
| 976 |
+
" const t = parseInt(el.getAttribute('data-time-step')||'0',10);\n"
|
| 977 |
+
" data.push({agent, time:t, html: el.outerHTML});\n"
|
| 978 |
+
" });\n"
|
| 979 |
+
" });\n"
|
| 980 |
+
" flow.dataset.origData = JSON.stringify(data);\n"
|
| 981 |
+
" }\n"
|
| 982 |
+
" return JSON.parse(flow.dataset.origData);\n"
|
| 983 |
+
" };\n"
|
| 984 |
+
" const original = getOriginalTurns();\n"
|
| 985 |
+
" const agents = Array.from(new Set(original.map(o => o.agent))).sort();\n"
|
| 986 |
+
" const groups = new Map();\n"
|
| 987 |
+
" original.forEach(o => {\n"
|
| 988 |
+
" const g = n && n > 0 ? Math.floor(o.time / n) : 0;\n"
|
| 989 |
+
" if (!groups.has(g)) groups.set(g, new Map());\n"
|
| 990 |
+
" const gm = groups.get(g);\n"
|
| 991 |
+
" if (!gm.has(o.agent)) gm.set(o.agent, []);\n"
|
| 992 |
+
" gm.get(o.agent).push(o);\n"
|
| 993 |
+
" });\n"
|
| 994 |
+
" flow.innerHTML = '';\n"
|
| 995 |
+
" const sorted = Array.from(groups.keys()).sort((a,b)=>a-b);\n"
|
| 996 |
+
" sorted.forEach(g => {\n"
|
| 997 |
+
" const div = document.createElement('div');\n"
|
| 998 |
+
" div.className = 'group-divider';\n"
|
| 999 |
+
" const label = document.createElement('span');\n"
|
| 1000 |
+
" label.className = 'group-label';\n"
|
| 1001 |
+
" label.textContent = `Round ${g+1}`;\n"
|
| 1002 |
+
" div.appendChild(label);\n"
|
| 1003 |
+
" flow.appendChild(div);\n"
|
| 1004 |
+
" const wrapper = document.createElement('div');\n"
|
| 1005 |
+
" wrapper.className = 'split-wrapper';\n"
|
| 1006 |
+
" agents.forEach(agent => {\n"
|
| 1007 |
+
" const colDiv = document.createElement('div');\n"
|
| 1008 |
+
" colDiv.className = 'split-col';\n"
|
| 1009 |
+
" colDiv.setAttribute('data-agent', agent);\n"
|
| 1010 |
+
" (groups.get(g).get(agent) || []).forEach(o => { colDiv.insertAdjacentHTML('beforeend', o.html); });\n"
|
| 1011 |
+
" wrapper.appendChild(colDiv);\n"
|
| 1012 |
+
" });\n"
|
| 1013 |
+
" if (wrapper.children.length === 2) { const res = document.createElement('div'); res.className='split-resizer'; wrapper.insertBefore(res, wrapper.children[1]); }\n"
|
| 1014 |
+
" flow.appendChild(wrapper);\n"
|
| 1015 |
+
" });\n"
|
| 1016 |
+
" flow.classList.toggle('hide-ts-badges', n === 1);\n"
|
| 1017 |
+
" flow.classList.toggle('strong-hide', strongHideOn);\n"
|
| 1018 |
+
" document.body.classList.add('split-mode');\n"
|
| 1019 |
+
" } else {\n"
|
| 1020 |
+
" groupContainer(flow, n);\n"
|
| 1021 |
+
" }\n"
|
| 1022 |
+
" }\n"
|
| 1023 |
+
" applyRangeFilter();\n"
|
| 1024 |
+
" initSplitResizers();\n"
|
| 1025 |
+
" }\n"
|
| 1026 |
+
" function initSplitResizers() {\n"
|
| 1027 |
+
" const wrappers = document.querySelectorAll('#flow-split .split-wrapper');\n"
|
| 1028 |
+
" wrappers.forEach(wrap => {\n"
|
| 1029 |
+
" const resizer = wrap.querySelector('.split-resizer');\n"
|
| 1030 |
+
" if (!resizer || resizer.dataset.bound) return; resizer.dataset.bound='1';\n"
|
| 1031 |
+
" const cols = wrap.querySelectorAll('.split-col'); if (cols.length !== 2) return; const c0=cols[0], c1=cols[1];\n"
|
| 1032 |
+
" c0.style.flex=c1.style.flex='1 1 0'; c0.style.width=c1.style.width='';\n"
|
| 1033 |
+
" requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; });\n"
|
| 1034 |
+
" let dragging=false,startX=0,startP0=0;\n"
|
| 1035 |
+
" const onDown=e=>{ dragging=true; startX=e.clientX; wrap.classList.add('resizing'); resizer.classList.add('dragging'); const rect=wrap.getBoundingClientRect(); const w=rect.width; const c0Rect=c0.getBoundingClientRect(); startP0=c0Rect.width/w; document.body.style.cursor='col-resize'; e.preventDefault(); };\n"
|
| 1036 |
+
" const onMove=e=>{ if(!dragging)return; const rect=wrap.getBoundingClientRect(); const w=rect.width; let delta=(e.clientX-startX)/w; let newP0=startP0+delta; const minP=0.15,maxP=0.85; if(newP0<minP)newP0=minP; if(newP0>maxP)newP0=maxP; c0.style.flex='0 0 '+(newP0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+((1-newP0)*100).toFixed(2)+'%'; };\n"
|
| 1037 |
+
" const onUp=()=>{ if(!dragging)return; dragging=false; wrap.classList.remove('resizing'); resizer.classList.remove('dragging'); document.body.style.cursor=''; };\n"
|
| 1038 |
+
" resizer.addEventListener('mousedown', onDown); window.addEventListener('mousemove', onMove); window.addEventListener('mouseup', onUp);\n"
|
| 1039 |
+
" resizer.addEventListener('dblclick', e=>{ if(e.shiftKey){ c0.style.flex=c1.style.flex='1 1 0'; requestAnimationFrame(()=>{ const w0=c0.scrollWidth,w1=c1.scrollWidth,total=w0+w1||1; let p0=w0/total,p1=w1/total; const minP=0.25,maxP=0.75; if(p0<minP){p0=minP;p1=1-p0;} else if(p0>maxP){p0=maxP;p1=1-p0;} c0.style.flex='0 0 '+(p0*100).toFixed(2)+'%'; c1.style.flex='0 0 '+(p1*100).toFixed(2)+'%'; }); } else { c0.style.flex='0 0 50%'; c1.style.flex='0 0 50%'; } });\n"
|
| 1040 |
+
" });\n"
|
| 1041 |
+
" }\n"
|
| 1042 |
+
" initSplitResizers();\n"
|
| 1043 |
+
" const input = document.getElementById('group-size');\n"
|
| 1044 |
+
" const btn = document.getElementById('apply-grouping');\n"
|
| 1045 |
+
" if (btn && input) {\n"
|
| 1046 |
+
" btn.addEventListener('click', () => { const n = parseInt(input.value || '0', 10); applyGrouping(n); });\n"
|
| 1047 |
+
" input.addEventListener('keydown', (e) => { if (e.key === 'Enter') { const n = parseInt(input.value || '0', 10); applyGrouping(n); } });\n"
|
| 1048 |
+
" }\n"
|
| 1049 |
+
" if (input) { input.value = '1'; applyGrouping(1); }\n"
|
| 1050 |
+
" const rangeStart = document.getElementById('range-start');\n"
|
| 1051 |
+
" const rangeEnd = document.getElementById('range-end');\n"
|
| 1052 |
+
" const rangeBtn = document.getElementById('apply-range');\n"
|
| 1053 |
+
" if (rangeBtn && rangeStart && rangeEnd) {\n"
|
| 1054 |
+
" const applyRange = () => {\n"
|
| 1055 |
+
" const sv = parseInt(rangeStart.value || '', 10);\n"
|
| 1056 |
+
" const ev = parseInt(rangeEnd.value || '', 10);\n"
|
| 1057 |
+
" currentRangeStart = Number.isFinite(sv) ? sv : null;\n"
|
| 1058 |
+
" currentRangeEnd = Number.isFinite(ev) ? ev : null;\n"
|
| 1059 |
+
" applyRangeFilter();\n"
|
| 1060 |
+
" };\n"
|
| 1061 |
+
" rangeBtn.addEventListener('click', applyRange);\n"
|
| 1062 |
+
" rangeStart.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
|
| 1063 |
+
" rangeEnd.addEventListener('keydown', (e) => { if (e.key === 'Enter') applyRange(); });\n"
|
| 1064 |
+
" }\n"
|
| 1065 |
+
" const strongHideBtn = document.getElementById('toggle-strong-hide');\n"
|
| 1066 |
+
" const strongHideStateEl = document.getElementById('strong-hide-state');\n"
|
| 1067 |
+
" if (strongHideBtn) {\n"
|
| 1068 |
+
" const setLabel = () => { if (strongHideStateEl) { strongHideStateEl.textContent = strongHideOn ? 'On' : 'Off'; } };\n"
|
| 1069 |
+
" strongHideBtn.addEventListener('click', () => { strongHideOn = !strongHideOn; for (const f of activeFlows()) { f.classList.toggle('strong-hide', strongHideOn); } setLabel(); });\n"
|
| 1070 |
+
" if (strongHideOn) { for (const f of activeFlows()) { f.classList.add('strong-hide'); } }\n"
|
| 1071 |
+
" setLabel();\n"
|
| 1072 |
+
" }\n"
|
| 1073 |
+
" const splitBtn = document.getElementById('toggle-split-view');\n"
|
| 1074 |
+
" const splitStateEl = document.getElementById('split-view-state');\n"
|
| 1075 |
+
" if (splitBtn && splitFlow && linearFlow) {\n"
|
| 1076 |
+
" const updateSplit = () => { if (splitStateEl) splitStateEl.textContent = splitViewOn ? 'On' : 'Off'; };\n"
|
| 1077 |
+
" splitBtn.addEventListener('click', () => { if (chatViewOn) return; splitViewOn = !splitViewOn; linearFlow.style.display = splitViewOn ? 'none' : ''; splitFlow.style.display = splitViewOn ? '' : 'none'; applyGrouping(parseInt(input.value||'1',10)); updateSplit(); });\n"
|
| 1078 |
+
" updateSplit();\n"
|
| 1079 |
+
" }\n"
|
| 1080 |
+
" const chatBtn = document.getElementById('toggle-chat-view');\n"
|
| 1081 |
+
" const chatStateEl = document.getElementById('chat-view-state');\n"
|
| 1082 |
+
" const hideUserBtn = document.getElementById('toggle-hide-user-messages');\n"
|
| 1083 |
+
" const hideUserStateEl = document.getElementById('hide-user-state');\n"
|
| 1084 |
+
" const widthControl = document.getElementById('chat-width-control');\n"
|
| 1085 |
+
" const widthSlider = document.getElementById('chat-width-slider');\n"
|
| 1086 |
+
" const widthValue = document.getElementById('chat-width-value');\n"
|
| 1087 |
+
" let hideUserMessages = false;\n"
|
| 1088 |
+
" if (chatBtn && chatFlow && linearFlow) {\n"
|
| 1089 |
+
" const updateChat = () => {\n"
|
| 1090 |
+
" if (chatStateEl) chatStateEl.textContent = chatViewOn ? 'On' : 'Off';\n"
|
| 1091 |
+
" if (hideUserBtn) hideUserBtn.style.display = chatViewOn ? '' : 'none';\n"
|
| 1092 |
+
" if (widthControl) widthControl.style.display = chatViewOn ? '' : 'none';\n"
|
| 1093 |
+
" };\n"
|
| 1094 |
+
" chatBtn.addEventListener('click', () => {\n"
|
| 1095 |
+
" chatViewOn = !chatViewOn;\n"
|
| 1096 |
+
" if (chatViewOn) {\n"
|
| 1097 |
+
" splitViewOn = false;\n"
|
| 1098 |
+
" linearFlow.style.display = 'none';\n"
|
| 1099 |
+
" if (splitFlow) splitFlow.style.display = 'none';\n"
|
| 1100 |
+
" chatFlow.style.display = '';\n"
|
| 1101 |
+
" if (splitStateEl) splitStateEl.textContent = 'Off';\n"
|
| 1102 |
+
" } else {\n"
|
| 1103 |
+
" chatFlow.style.display = 'none';\n"
|
| 1104 |
+
" linearFlow.style.display = '';\n"
|
| 1105 |
+
" }\n"
|
| 1106 |
+
" updateChat();\n"
|
| 1107 |
+
" });\n"
|
| 1108 |
+
" updateChat();\n"
|
| 1109 |
+
" }\n"
|
| 1110 |
+
" if (hideUserBtn && hideUserStateEl && chatFlow) {\n"
|
| 1111 |
+
" const updateHideUser = () => { hideUserStateEl.textContent = hideUserMessages ? 'On' : 'Off'; };\n"
|
| 1112 |
+
" hideUserBtn.addEventListener('click', () => {\n"
|
| 1113 |
+
" hideUserMessages = !hideUserMessages;\n"
|
| 1114 |
+
" chatFlow.classList.toggle('hide-user-messages', hideUserMessages);\n"
|
| 1115 |
+
" updateHideUser();\n"
|
| 1116 |
+
" });\n"
|
| 1117 |
+
" updateHideUser();\n"
|
| 1118 |
+
" }\n"
|
| 1119 |
+
" if (widthSlider && widthValue && chatFlow) {\n"
|
| 1120 |
+
" const savedWidth = localStorage.getItem('chat-view-width');\n"
|
| 1121 |
+
" if (savedWidth) {\n"
|
| 1122 |
+
" widthSlider.value = savedWidth;\n"
|
| 1123 |
+
" chatFlow.style.setProperty('--chat-width', savedWidth + 'px');\n"
|
| 1124 |
+
" widthValue.textContent = savedWidth + 'px';\n"
|
| 1125 |
+
" }\n"
|
| 1126 |
+
" widthSlider.addEventListener('input', (e) => {\n"
|
| 1127 |
+
" const width = e.target.value;\n"
|
| 1128 |
+
" chatFlow.style.setProperty('--chat-width', width + 'px');\n"
|
| 1129 |
+
" widthValue.textContent = width + 'px';\n"
|
| 1130 |
+
" localStorage.setItem('chat-view-width', width);\n"
|
| 1131 |
+
" });\n"
|
| 1132 |
+
" }\n"
|
| 1133 |
+
" const fontFamilySelect = document.getElementById('font-family-select');\n"
|
| 1134 |
+
" const fontSizeInput = document.getElementById('font-size-input');\n"
|
| 1135 |
+
" if (fontFamilySelect) {\n"
|
| 1136 |
+
" const savedFont = localStorage.getItem('render-font-family');\n"
|
| 1137 |
+
" if (savedFont) {\n"
|
| 1138 |
+
" fontFamilySelect.value = savedFont;\n"
|
| 1139 |
+
" document.body.style.setProperty('--font-family', savedFont);\n"
|
| 1140 |
+
" }\n"
|
| 1141 |
+
" fontFamilySelect.addEventListener('change', (e) => {\n"
|
| 1142 |
+
" const font = e.target.value;\n"
|
| 1143 |
+
" document.body.style.setProperty('--font-family', font);\n"
|
| 1144 |
+
" localStorage.setItem('render-font-family', font);\n"
|
| 1145 |
+
" });\n"
|
| 1146 |
+
" }\n"
|
| 1147 |
+
" if (fontSizeInput) {\n"
|
| 1148 |
+
" const savedSize = localStorage.getItem('render-font-size');\n"
|
| 1149 |
+
" if (savedSize) {\n"
|
| 1150 |
+
" fontSizeInput.value = savedSize;\n"
|
| 1151 |
+
" document.body.style.setProperty('--font-size', savedSize + 'px');\n"
|
| 1152 |
+
" }\n"
|
| 1153 |
+
" fontSizeInput.addEventListener('input', (e) => {\n"
|
| 1154 |
+
" const size = e.target.value;\n"
|
| 1155 |
+
" document.body.style.setProperty('--font-size', size + 'px');\n"
|
| 1156 |
+
" localStorage.setItem('render-font-size', size);\n"
|
| 1157 |
+
" });\n"
|
| 1158 |
+
" }\n"
|
| 1159 |
+
" const aliceEmojiInput = document.getElementById('alice-emoji-input');\n"
|
| 1160 |
+
" const aliceNameInput = document.getElementById('alice-name-input');\n"
|
| 1161 |
+
" const bobEmojiInput = document.getElementById('bob-emoji-input');\n"
|
| 1162 |
+
" const bobNameInput = document.getElementById('bob-name-input');\n"
|
| 1163 |
+
" const applyAgentNamesBtn = document.getElementById('apply-agent-names');\n"
|
| 1164 |
+
" function loadAgentNames() {\n"
|
| 1165 |
+
" if (aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
|
| 1166 |
+
" const savedAliceEmoji = localStorage.getItem('alice-emoji') || '🤖';\n"
|
| 1167 |
+
" const savedAliceName = localStorage.getItem('alice-name') || 'Alice';\n"
|
| 1168 |
+
" const savedBobEmoji = localStorage.getItem('bob-emoji') || '🤖';\n"
|
| 1169 |
+
" const savedBobName = localStorage.getItem('bob-name') || 'Bob';\n"
|
| 1170 |
+
" aliceEmojiInput.value = savedAliceEmoji;\n"
|
| 1171 |
+
" aliceNameInput.value = savedAliceName;\n"
|
| 1172 |
+
" bobEmojiInput.value = savedBobEmoji;\n"
|
| 1173 |
+
" bobNameInput.value = savedBobName;\n"
|
| 1174 |
+
" applyAgentNamesToDOM(savedAliceEmoji, savedAliceName, savedBobEmoji, savedBobName);\n"
|
| 1175 |
+
" }\n"
|
| 1176 |
+
" }\n"
|
| 1177 |
+
" function applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName) {\n"
|
| 1178 |
+
" const agentMap = { 'alice': { name: aliceName, emoji: aliceEmoji }, 'bob': { name: bobName, emoji: bobEmoji } };\n"
|
| 1179 |
+
" document.querySelectorAll('[data-agent-id]').forEach(el => {\n"
|
| 1180 |
+
" const agentId = el.getAttribute('data-agent-id');\n"
|
| 1181 |
+
" if (!agentMap[agentId]) return;\n"
|
| 1182 |
+
" if (el.classList.contains('agent-name')) {\n"
|
| 1183 |
+
" el.textContent = agentMap[agentId].name;\n"
|
| 1184 |
+
" } else if (el.classList.contains('emoji-bw')) {\n"
|
| 1185 |
+
" const currentEmoji = el.textContent.trim();\n"
|
| 1186 |
+
" if (currentEmoji === '🤖' || currentEmoji === '👤') {\n"
|
| 1187 |
+
" el.textContent = agentMap[agentId].emoji;\n"
|
| 1188 |
+
" }\n"
|
| 1189 |
+
" }\n"
|
| 1190 |
+
" });\n"
|
| 1191 |
+
" const style = document.createElement('style');\n"
|
| 1192 |
+
" style.id = 'dynamic-agent-names-style';\n"
|
| 1193 |
+
" const existingStyle = document.getElementById('dynamic-agent-names-style');\n"
|
| 1194 |
+
" if (existingStyle) existingStyle.remove();\n"
|
| 1195 |
+
" style.textContent = `\n"
|
| 1196 |
+
" .agent-context-box.agent-alice .round-context-edit::before {\n"
|
| 1197 |
+
" content: '${aliceName} Prompt Summary:';\n"
|
| 1198 |
+
" }\n"
|
| 1199 |
+
" .agent-context-box.agent-bob .round-context-edit::before {\n"
|
| 1200 |
+
" content: '${bobName} Prompt Summary:';\n"
|
| 1201 |
+
" }\n"
|
| 1202 |
+
" `;\n"
|
| 1203 |
+
" document.head.appendChild(style);\n"
|
| 1204 |
+
" }\n"
|
| 1205 |
+
" if (applyAgentNamesBtn && aliceEmojiInput && aliceNameInput && bobEmojiInput && bobNameInput) {\n"
|
| 1206 |
+
" [aliceEmojiInput, aliceNameInput, bobEmojiInput, bobNameInput].forEach(input => {\n"
|
| 1207 |
+
" input.style.pointerEvents = 'auto';\n"
|
| 1208 |
+
" if (input.tagName === 'INPUT') {\n"
|
| 1209 |
+
" input.style.userSelect = 'text';\n"
|
| 1210 |
+
" input.style.webkitUserSelect = 'text';\n"
|
| 1211 |
+
" input.readOnly = false;\n"
|
| 1212 |
+
" }\n"
|
| 1213 |
+
" input.disabled = false;\n"
|
| 1214 |
+
" const stopAll = (e) => { e.stopPropagation(); e.stopImmediatePropagation(); };\n"
|
| 1215 |
+
" input.addEventListener('mousedown', stopAll, true);\n"
|
| 1216 |
+
" input.addEventListener('mouseup', stopAll, true);\n"
|
| 1217 |
+
" input.addEventListener('click', stopAll, true);\n"
|
| 1218 |
+
" input.addEventListener('dblclick', stopAll, true);\n"
|
| 1219 |
+
" input.addEventListener('focus', stopAll, true);\n"
|
| 1220 |
+
" input.addEventListener('blur', stopAll, true);\n"
|
| 1221 |
+
" input.addEventListener('paste', stopAll, true);\n"
|
| 1222 |
+
" input.addEventListener('cut', stopAll, true);\n"
|
| 1223 |
+
" input.addEventListener('copy', stopAll, true);\n"
|
| 1224 |
+
" input.addEventListener('select', stopAll, true);\n"
|
| 1225 |
+
" input.addEventListener('selectstart', stopAll, true);\n"
|
| 1226 |
+
" input.addEventListener('keydown', stopAll, true);\n"
|
| 1227 |
+
" input.addEventListener('keyup', stopAll, true);\n"
|
| 1228 |
+
" input.addEventListener('keypress', stopAll, true);\n"
|
| 1229 |
+
" input.addEventListener('input', stopAll, true);\n"
|
| 1230 |
+
" input.addEventListener('change', stopAll, true);\n"
|
| 1231 |
+
" input.addEventListener('contextmenu', stopAll, true);\n"
|
| 1232 |
+
" });\n"
|
| 1233 |
+
" const applyNames = () => {\n"
|
| 1234 |
+
" const aliceEmoji = aliceEmojiInput.value || '🤖';\n"
|
| 1235 |
+
" const aliceName = aliceNameInput.value.trim() || 'Alice';\n"
|
| 1236 |
+
" const bobEmoji = bobEmojiInput.value || '🤖';\n"
|
| 1237 |
+
" const bobName = bobNameInput.value.trim() || 'Bob';\n"
|
| 1238 |
+
" localStorage.setItem('alice-emoji', aliceEmoji);\n"
|
| 1239 |
+
" localStorage.setItem('alice-name', aliceName);\n"
|
| 1240 |
+
" localStorage.setItem('bob-emoji', bobEmoji);\n"
|
| 1241 |
+
" localStorage.setItem('bob-name', bobName);\n"
|
| 1242 |
+
" applyAgentNamesToDOM(aliceEmoji, aliceName, bobEmoji, bobName);\n"
|
| 1243 |
+
" };\n"
|
| 1244 |
+
" applyAgentNamesBtn.addEventListener('click', applyNames);\n"
|
| 1245 |
+
" [aliceNameInput, bobNameInput].forEach(input => {\n"
|
| 1246 |
+
" input.addEventListener('keydown', (e) => {\n"
|
| 1247 |
+
" if (e.key === 'Enter') {\n"
|
| 1248 |
+
" e.preventDefault();\n"
|
| 1249 |
+
" e.stopPropagation();\n"
|
| 1250 |
+
" e.stopImmediatePropagation();\n"
|
| 1251 |
+
" applyNames();\n"
|
| 1252 |
+
" }\n"
|
| 1253 |
+
" }, true);\n"
|
| 1254 |
+
" });\n"
|
| 1255 |
+
" [aliceEmojiInput, bobEmojiInput].forEach(select => {\n"
|
| 1256 |
+
" select.addEventListener('change', applyNames);\n"
|
| 1257 |
+
" });\n"
|
| 1258 |
+
" }\n"
|
| 1259 |
+
" loadAgentNames();\n"
|
| 1260 |
+
" function setupRoundCollapse() {\n"
|
| 1261 |
+
" document.addEventListener('click', function(e) {\n"
|
| 1262 |
+
" if (e.target.closest('input, textarea, select, button, .round-context-edit, .toolbar')) { return; }\n"
|
| 1263 |
+
" const divider = e.target.closest('.chat-group-divider, .group-divider');\n"
|
| 1264 |
+
" if (!divider) return;\n"
|
| 1265 |
+
" divider.classList.toggle('collapsed');\n"
|
| 1266 |
+
" const isCollapsed = divider.classList.contains('collapsed');\n"
|
| 1267 |
+
" let nextElement = divider.nextElementSibling;\n"
|
| 1268 |
+
" while (nextElement) {\n"
|
| 1269 |
+
" if (nextElement.classList.contains('chat-group-divider') || nextElement.classList.contains('group-divider')) {\n"
|
| 1270 |
+
" break;\n"
|
| 1271 |
+
" }\n"
|
| 1272 |
+
" if (isCollapsed) {\n"
|
| 1273 |
+
" if (!nextElement.dataset.originalDisplay) {\n"
|
| 1274 |
+
" nextElement.dataset.originalDisplay = nextElement.style.display || getComputedStyle(nextElement).display;\n"
|
| 1275 |
+
" }\n"
|
| 1276 |
+
" nextElement.style.display = 'none';\n"
|
| 1277 |
+
" } else {\n"
|
| 1278 |
+
" if (nextElement.dataset.originalDisplay) {\n"
|
| 1279 |
+
" const originalDisplay = nextElement.dataset.originalDisplay;\n"
|
| 1280 |
+
" nextElement.style.display = originalDisplay === 'none' ? '' : originalDisplay;\n"
|
| 1281 |
+
" if (nextElement.style.display === originalDisplay && originalDisplay !== 'none') {\n"
|
| 1282 |
+
" nextElement.style.display = '';\n"
|
| 1283 |
+
" }\n"
|
| 1284 |
+
" delete nextElement.dataset.originalDisplay;\n"
|
| 1285 |
+
" } else {\n"
|
| 1286 |
+
" nextElement.style.display = '';\n"
|
| 1287 |
+
" }\n"
|
| 1288 |
+
" }\n"
|
| 1289 |
+
" nextElement = nextElement.nextElementSibling;\n"
|
| 1290 |
+
" }\n"
|
| 1291 |
+
" e.stopPropagation();\n"
|
| 1292 |
+
" });\n"
|
| 1293 |
+
" }\n"
|
| 1294 |
+
" setupRoundCollapse();\n"
|
| 1295 |
+
" const strongHideBtnChat = document.getElementById('toggle-strong-hide');\n"
|
| 1296 |
+
" function applyStrongHideToChat() {\n"
|
| 1297 |
+
" if (!chatFlow) return;\n"
|
| 1298 |
+
" chatFlow.classList.toggle('strong-hide', strongHideOn);\n"
|
| 1299 |
+
" const contextEdits = chatFlow.querySelectorAll('.round-context-edit');\n"
|
| 1300 |
+
" contextEdits.forEach(edit => {\n"
|
| 1301 |
+
" const parent = edit.closest('.round-context, .agent-context-box, .split-agent-context');\n"
|
| 1302 |
+
" if (parent) {\n"
|
| 1303 |
+
" if (strongHideOn && edit.textContent.trim() === '') {\n"
|
| 1304 |
+
" parent.style.display = 'none';\n"
|
| 1305 |
+
" } else {\n"
|
| 1306 |
+
" parent.style.display = '';\n"
|
| 1307 |
+
" }\n"
|
| 1308 |
+
" }\n"
|
| 1309 |
+
" });\n"
|
| 1310 |
+
" const splitContexts = chatFlow.querySelectorAll('.split-agent-context');\n"
|
| 1311 |
+
" splitContexts.forEach(split => {\n"
|
| 1312 |
+
" if (strongHideOn) {\n"
|
| 1313 |
+
" const boxes = split.querySelectorAll('.agent-context-box');\n"
|
| 1314 |
+
" const allEmpty = Array.from(boxes).every(box => {\n"
|
| 1315 |
+
" const edit = box.querySelector('.round-context-edit');\n"
|
| 1316 |
+
" return edit && edit.textContent.trim() === '';\n"
|
| 1317 |
+
" });\n"
|
| 1318 |
+
" if (allEmpty) split.style.display = 'none';\n"
|
| 1319 |
+
" }\n"
|
| 1320 |
+
" });\n"
|
| 1321 |
+
" }\n"
|
| 1322 |
+
" if (strongHideBtnChat && chatFlow) {\n"
|
| 1323 |
+
" strongHideBtnChat.addEventListener('click', () => {\n"
|
| 1324 |
+
" setTimeout(() => applyStrongHideToChat(), 0);\n"
|
| 1325 |
+
" });\n"
|
| 1326 |
+
" }\n"
|
| 1327 |
+
" document.addEventListener('click', function(e) {\n"
|
| 1328 |
+
" if (e.target.closest('input, textarea, select, .round-context-edit, .toolbar')) { return; }\n"
|
| 1329 |
+
" const chatReasoning = e.target.closest('.chat-reasoning');\n"
|
| 1330 |
+
" if (chatReasoning) {\n"
|
| 1331 |
+
" chatReasoning.classList.toggle('collapsed');\n"
|
| 1332 |
+
" }\n"
|
| 1333 |
+
" });\n"
|
| 1334 |
+
" function applyColorToSelection(color, element) {\n"
|
| 1335 |
+
" const selection = window.getSelection();\n"
|
| 1336 |
+
" if (!selection.rangeCount) return false;\n"
|
| 1337 |
+
" const range = selection.getRangeAt(0);\n"
|
| 1338 |
+
" if (!element.contains(range.commonAncestorContainer)) return false;\n"
|
| 1339 |
+
" const selectedText = range.toString();\n"
|
| 1340 |
+
" if (!selectedText) return false;\n"
|
| 1341 |
+
" if (color === 'default') {\n"
|
| 1342 |
+
" // Remove styling - just extract the text content\n"
|
| 1343 |
+
" const textNode = document.createTextNode(selectedText);\n"
|
| 1344 |
+
" range.deleteContents();\n"
|
| 1345 |
+
" range.insertNode(textNode);\n"
|
| 1346 |
+
" } else {\n"
|
| 1347 |
+
" const span = document.createElement('span');\n"
|
| 1348 |
+
" span.style.color = color;\n"
|
| 1349 |
+
" span.style.fontWeight = '600';\n"
|
| 1350 |
+
" try {\n"
|
| 1351 |
+
" range.surroundContents(span);\n"
|
| 1352 |
+
" } catch (e) {\n"
|
| 1353 |
+
" const contents = range.extractContents();\n"
|
| 1354 |
+
" span.appendChild(contents);\n"
|
| 1355 |
+
" range.insertNode(span);\n"
|
| 1356 |
+
" }\n"
|
| 1357 |
+
" }\n"
|
| 1358 |
+
" return true;\n"
|
| 1359 |
+
" }\n"
|
| 1360 |
+
" let lastFocusedContextEdit = null;\n"
|
| 1361 |
+
" document.addEventListener('focusin', function(e) {\n"
|
| 1362 |
+
" if (e.target.classList.contains('round-context-edit')) {\n"
|
| 1363 |
+
" lastFocusedContextEdit = e.target;\n"
|
| 1364 |
+
" }\n"
|
| 1365 |
+
" });\n"
|
| 1366 |
+
" document.addEventListener('mousedown', function(e) {\n"
|
| 1367 |
+
" if (e.target.classList.contains('context-color-btn')) {\n"
|
| 1368 |
+
" e.preventDefault();\n"
|
| 1369 |
+
" }\n"
|
| 1370 |
+
" });\n"
|
| 1371 |
+
" document.addEventListener('click', function(e) {\n"
|
| 1372 |
+
" if (e.target.closest('input:not(.round-context-edit), textarea, select') && !e.target.classList.contains('context-color-btn')) { return; }\n"
|
| 1373 |
+
" if (e.target.classList.contains('context-color-btn')) {\n"
|
| 1374 |
+
" e.preventDefault();\n"
|
| 1375 |
+
" const color = e.target.dataset.color;\n"
|
| 1376 |
+
" const controls = e.target.closest('.round-context-controls');\n"
|
| 1377 |
+
" const contextEdit = controls ? controls.previousElementSibling : null;\n"
|
| 1378 |
+
" if (contextEdit && contextEdit.classList.contains('round-context-edit')) {\n"
|
| 1379 |
+
" contextEdit.focus();\n"
|
| 1380 |
+
" const selection = window.getSelection();\n"
|
| 1381 |
+
" if (selection.rangeCount > 0 && selection.toString().length > 0 && contextEdit.contains(selection.anchorNode)) {\n"
|
| 1382 |
+
" if (applyColorToSelection(color, contextEdit)) {\n"
|
| 1383 |
+
" const key = contextEdit.dataset.contextKey;\n"
|
| 1384 |
+
" localStorage.setItem(key, contextEdit.innerHTML);\n"
|
| 1385 |
+
" }\n"
|
| 1386 |
+
" } else {\n"
|
| 1387 |
+
" try {\n"
|
| 1388 |
+
" if (color !== 'default') {\n"
|
| 1389 |
+
" document.execCommand('styleWithCSS', false, true);\n"
|
| 1390 |
+
" document.execCommand('foreColor', false, color);\n"
|
| 1391 |
+
" }\n"
|
| 1392 |
+
" const key = contextEdit.dataset.contextKey;\n"
|
| 1393 |
+
" setTimeout(() => localStorage.setItem(key, contextEdit.innerHTML), 10);\n"
|
| 1394 |
+
" } catch (e) {\n"
|
| 1395 |
+
" console.log('Color command failed:', e);\n"
|
| 1396 |
+
" }\n"
|
| 1397 |
+
" }\n"
|
| 1398 |
+
" }\n"
|
| 1399 |
+
" }\n"
|
| 1400 |
+
" });\n"
|
| 1401 |
+
" const contextEdits = document.querySelectorAll('.round-context-edit');\n"
|
| 1402 |
+
" contextEdits.forEach(edit => {\n"
|
| 1403 |
+
" edit.addEventListener('input', function() {\n"
|
| 1404 |
+
" const key = this.dataset.contextKey;\n"
|
| 1405 |
+
" localStorage.setItem(key, this.innerHTML);\n"
|
| 1406 |
+
" });\n"
|
| 1407 |
+
" const key = edit.dataset.contextKey;\n"
|
| 1408 |
+
" const saved = localStorage.getItem(key);\n"
|
| 1409 |
+
" if (saved) {\n"
|
| 1410 |
+
" edit.innerHTML = saved;\n"
|
| 1411 |
+
" }\n"
|
| 1412 |
+
" });\n"
|
| 1413 |
+
" document.addEventListener('click', function(e) {\n"
|
| 1414 |
+
" if (e.target.closest('input, textarea, select, .round-context-edit') && !e.target.classList.contains('merge-btn') && !e.target.classList.contains('unmerge-btn')) { return; }\n"
|
| 1415 |
+
" if (e.target.classList.contains('merge-btn')) {\n"
|
| 1416 |
+
" e.preventDefault();\n"
|
| 1417 |
+
" e.stopPropagation();\n"
|
| 1418 |
+
" const msgId = e.target.dataset.msgId;\n"
|
| 1419 |
+
" const currentMsg = e.target.closest('.chat-message');\n"
|
| 1420 |
+
" if (!currentMsg) return;\n"
|
| 1421 |
+
" if (currentMsg.classList.contains('role-user')) {\n"
|
| 1422 |
+
" alert('Cannot merge user messages');\n"
|
| 1423 |
+
" return;\n"
|
| 1424 |
+
" }\n"
|
| 1425 |
+
" let nextMsg = currentMsg.nextElementSibling;\n"
|
| 1426 |
+
" while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
|
| 1427 |
+
" nextMsg = nextMsg.nextElementSibling;\n"
|
| 1428 |
+
" }\n"
|
| 1429 |
+
" while (nextMsg && nextMsg.classList.contains('role-user')) {\n"
|
| 1430 |
+
" nextMsg = nextMsg.nextElementSibling;\n"
|
| 1431 |
+
" while (nextMsg && !nextMsg.classList.contains('chat-message')) {\n"
|
| 1432 |
+
" nextMsg = nextMsg.nextElementSibling;\n"
|
| 1433 |
+
" }\n"
|
| 1434 |
+
" }\n"
|
| 1435 |
+
" if (!nextMsg || nextMsg.classList.contains('chat-message') === false) {\n"
|
| 1436 |
+
" alert('No next assistant message to merge with');\n"
|
| 1437 |
+
" return;\n"
|
| 1438 |
+
" }\n"
|
| 1439 |
+
" if (nextMsg.classList.contains('role-user')) {\n"
|
| 1440 |
+
" alert('Cannot merge with user messages');\n"
|
| 1441 |
+
" return;\n"
|
| 1442 |
+
" }\n"
|
| 1443 |
+
" const parent = currentMsg.parentElement;\n"
|
| 1444 |
+
" if (parent.classList.contains('simultaneous-messages')) {\n"
|
| 1445 |
+
" const wrapper = parent;\n"
|
| 1446 |
+
" currentMsg.style.display = '';\n"
|
| 1447 |
+
" currentMsg.classList.remove('merged');\n"
|
| 1448 |
+
" const refNode = wrapper.nextElementSibling;\n"
|
| 1449 |
+
" parent.parentElement.insertBefore(currentMsg, refNode);\n"
|
| 1450 |
+
" if (nextMsg.parentElement === wrapper) {\n"
|
| 1451 |
+
" parent.parentElement.insertBefore(nextMsg, refNode);\n"
|
| 1452 |
+
" }\n"
|
| 1453 |
+
" if (wrapper.children.length === 0) {\n"
|
| 1454 |
+
" wrapper.remove();\n"
|
| 1455 |
+
" }\n"
|
| 1456 |
+
" } else {\n"
|
| 1457 |
+
" const wrapper = document.createElement('div');\n"
|
| 1458 |
+
" wrapper.className = 'simultaneous-messages';\n"
|
| 1459 |
+
" const unmergeBtn = document.createElement('button');\n"
|
| 1460 |
+
" unmergeBtn.className = 'unmerge-btn';\n"
|
| 1461 |
+
" unmergeBtn.innerHTML = '✕';\n"
|
| 1462 |
+
" unmergeBtn.title = 'Click to unmerge messages';\n"
|
| 1463 |
+
" wrapper.appendChild(unmergeBtn);\n"
|
| 1464 |
+
" wrapper.dataset.firstMsgId = currentMsg.dataset.msgId;\n"
|
| 1465 |
+
" wrapper.dataset.secondMsgId = nextMsg.dataset.msgId;\n"
|
| 1466 |
+
" parent.insertBefore(wrapper, currentMsg);\n"
|
| 1467 |
+
" wrapper.appendChild(currentMsg);\n"
|
| 1468 |
+
" wrapper.appendChild(nextMsg);\n"
|
| 1469 |
+
" currentMsg.classList.add('merged');\n"
|
| 1470 |
+
" nextMsg.classList.add('merged');\n"
|
| 1471 |
+
" }\n"
|
| 1472 |
+
" }\n"
|
| 1473 |
+
" if (e.target.classList.contains('unmerge-btn')) {\n"
|
| 1474 |
+
" const wrapper = e.target.closest('.simultaneous-messages');\n"
|
| 1475 |
+
" if (!wrapper) return;\n"
|
| 1476 |
+
" const parent = wrapper.parentElement;\n"
|
| 1477 |
+
" const firstMsgId = wrapper.dataset.firstMsgId;\n"
|
| 1478 |
+
" const secondMsgId = wrapper.dataset.secondMsgId;\n"
|
| 1479 |
+
" const messages = Array.from(wrapper.querySelectorAll('.chat-message'));\n"
|
| 1480 |
+
" const refNode = wrapper.nextElementSibling;\n"
|
| 1481 |
+
" const firstMsg = messages.find(m => m.dataset.msgId === firstMsgId);\n"
|
| 1482 |
+
" const secondMsg = messages.find(m => m.dataset.msgId === secondMsgId);\n"
|
| 1483 |
+
" if (firstMsg) {\n"
|
| 1484 |
+
" firstMsg.classList.remove('merged');\n"
|
| 1485 |
+
" firstMsg.style.display = '';\n"
|
| 1486 |
+
" parent.insertBefore(firstMsg, refNode);\n"
|
| 1487 |
+
" }\n"
|
| 1488 |
+
" if (secondMsg) {\n"
|
| 1489 |
+
" secondMsg.classList.remove('merged');\n"
|
| 1490 |
+
" secondMsg.style.display = '';\n"
|
| 1491 |
+
" parent.insertBefore(secondMsg, refNode);\n"
|
| 1492 |
+
" }\n"
|
| 1493 |
+
" wrapper.remove();\n"
|
| 1494 |
+
" }\n"
|
| 1495 |
+
" });\n"
|
| 1496 |
+
"});\n"
|
| 1497 |
+
"</script>",
|
| 1498 |
+
"</head>",
|
| 1499 |
+
"<body>",
|
| 1500 |
+
'<div class="toolbar-wrap">',
|
| 1501 |
+
'<div class="toolbar-hotzone"></div>',
|
| 1502 |
+
'<div class="toolbar">',
|
| 1503 |
+
'<label for="group-size">Group every</label>',
|
| 1504 |
+
'<input id="group-size" type="number" min="0" step="1" value="1" />',
|
| 1505 |
+
"<span>timesteps</span>",
|
| 1506 |
+
'<button id="apply-grouping">Apply</button>',
|
| 1507 |
+
'<span style="margin-left:8px"></span>',
|
| 1508 |
+
'<label for="range-start"><span class="emoji-bw">🔎</span> Range</label>',
|
| 1509 |
+
'<input id="range-start" type="number" step="1" />',
|
| 1510 |
+
"<span>to</span>",
|
| 1511 |
+
'<input id="range-end" type="number" step="1" />',
|
| 1512 |
+
'<button id="apply-range"><span class="emoji-bw">▶︎</span> Apply</button>',
|
| 1513 |
+
'<button id="toggle-strong-hide"><span class="emoji-bw">🗜️</span> Strong Hide: <span id="strong-hide-state">Off</span></button>',
|
| 1514 |
+
(
|
| 1515 |
+
'<button id="toggle-split-view"><span class="emoji-bw">🪟</span> Split View: <span id="split-view-state">Off</span></button>'
|
| 1516 |
+
if enable_split_view
|
| 1517 |
+
else ""
|
| 1518 |
+
),
|
| 1519 |
+
'<button id="toggle-chat-view"><span class="emoji-bw">💬</span> Chat View: <span id="chat-view-state">On</span></button>',
|
| 1520 |
+
'<button id="toggle-hide-user-messages"><span class="emoji-bw">👁️</span> Hide Prompts: <span id="hide-user-state">Off</span></button>',
|
| 1521 |
+
'<span id="chat-width-control" style="margin-left:8px;">',
|
| 1522 |
+
'<label for="chat-width-slider"><span class="emoji-bw">↔️</span> Width:</label>',
|
| 1523 |
+
'<input id="chat-width-slider" type="range" min="600" max="1600" step="50" value="900" style="width:120px; vertical-align:middle;" />',
|
| 1524 |
+
'<span id="chat-width-value" style="margin-left:4px;">900px</span>',
|
| 1525 |
+
'</span>',
|
| 1526 |
+
'<span style="margin-left:12px;">',
|
| 1527 |
+
'<label for="font-family-select"><span class="emoji-bw">🔤</span> Font:</label>',
|
| 1528 |
+
'<select id="font-family-select" style="padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
|
| 1529 |
+
'<option value="\'Segoe UI\', Tahoma, Geneva, Verdana, sans-serif">Segoe UI</option>',
|
| 1530 |
+
'<option value="Arial, sans-serif">Arial</option>',
|
| 1531 |
+
'<option value="\'Helvetica Neue\', Helvetica, sans-serif">Helvetica</option>',
|
| 1532 |
+
'<option value="\'Times New Roman\', Times, serif">Times New Roman</option>',
|
| 1533 |
+
'<option value="Georgia, serif">Georgia</option>',
|
| 1534 |
+
'<option value="\'Courier New\', Courier, monospace">Courier New</option>',
|
| 1535 |
+
'<option value="\'Comic Sans MS\', cursive">Comic Sans</option>',
|
| 1536 |
+
'<option value="\'Trebuchet MS\', sans-serif">Trebuchet MS</option>',
|
| 1537 |
+
'<option value="Verdana, sans-serif">Verdana</option>',
|
| 1538 |
+
'<option value="\'Palatino Linotype\', \'Book Antiqua\', Palatino, serif">Palatino</option>',
|
| 1539 |
+
'<option value="\'Lucida Console\', Monaco, monospace">Lucida Console</option>',
|
| 1540 |
+
'</select>',
|
| 1541 |
+
'</span>',
|
| 1542 |
+
'<span style="margin-left:8px;">',
|
| 1543 |
+
'<label for="font-size-input"><span class="emoji-bw">📏</span> Size:</label>',
|
| 1544 |
+
'<input id="font-size-input" type="number" min="8" max="24" step="1" value="14" style="width:50px;" />',
|
| 1545 |
+
'<span>px</span>',
|
| 1546 |
+
'</span>',
|
| 1547 |
+
'<span style="margin-left:12px; display:flex; align-items:center; gap:8px;">',
|
| 1548 |
+
'<label style="font-weight:600;">Agent Names:</label>',
|
| 1549 |
+
'<select id="alice-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
|
| 1550 |
+
'<option value="🤖">🤖 Robot</option>',
|
| 1551 |
+
'<option value="👤">👤 Human</option>',
|
| 1552 |
+
'</select>',
|
| 1553 |
+
'<input id="alice-name-input" type="text" placeholder="Alice" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
|
| 1554 |
+
'<span style="margin:0 4px;">|</span>',
|
| 1555 |
+
'<select id="bob-emoji-input" style="width:65px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);">',
|
| 1556 |
+
'<option value="🤖">🤖 Robot</option>',
|
| 1557 |
+
'<option value="👤">👤 Human</option>',
|
| 1558 |
+
'</select>',
|
| 1559 |
+
'<input id="bob-name-input" type="text" placeholder="Bob" style="width:80px; padding:2px 6px; border:1px solid var(--accent-muted); border-radius:var(--corner-radius); background:var(--bg);" />',
|
| 1560 |
+
'<button id="apply-agent-names" style="padding:4px 8px; border:1px solid var(--accent-muted); background:var(--panel-bg); border-radius:var(--corner-radius); cursor:pointer;">Apply</button>',
|
| 1561 |
+
'</span>',
|
| 1562 |
+
"</div>",
|
| 1563 |
+
"</div>",
|
| 1564 |
+
'<div id="flow-linear" class="messages-flow" style="display:none">',
|
| 1565 |
+
]
|
| 1566 |
+
|
| 1567 |
+
last_time_step = None
|
| 1568 |
+
for original_index, turn in indexed_turns:
|
| 1569 |
+
# Build classes
|
| 1570 |
+
agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
|
| 1571 |
+
role_class = f"role-{turn.role}"
|
| 1572 |
+
collapsed_class = " collapsed" if turn.role == "user" else ""
|
| 1573 |
+
|
| 1574 |
+
# Badge content
|
| 1575 |
+
agent_id_clean = html.escape(turn.agent_id).lower()
|
| 1576 |
+
if turn.role == "assistant":
|
| 1577 |
+
name = html.escape(turn.agent_id)
|
| 1578 |
+
emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
|
| 1579 |
+
raw_val = turn.reward
|
| 1580 |
+
if isinstance(raw_val, (int, float)):
|
| 1581 |
+
reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
|
| 1582 |
+
if len(reward_val) > 8:
|
| 1583 |
+
reward_val = reward_val[:8] + "…"
|
| 1584 |
+
else:
|
| 1585 |
+
reward_val = str(raw_val)
|
| 1586 |
+
# Format: "🤖 Alice • Reward: 5.5556 • 💬 :"
|
| 1587 |
+
badge_inner = (
|
| 1588 |
+
f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
|
| 1589 |
+
f' <span class="sep"> • </span><span class="reward">Reward ⚑ = {reward_val}</span>'
|
| 1590 |
+
)
|
| 1591 |
+
else:
|
| 1592 |
+
# For user messages, show "Prompt of {Agent ID}" in the badge
|
| 1593 |
+
name = html.escape(turn.agent_id)
|
| 1594 |
+
# Format (no reward): "Prompt of Alice • "
|
| 1595 |
+
badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
|
| 1596 |
+
|
| 1597 |
+
badge = f'<span class="agent-badge">{badge_inner}</span>'
|
| 1598 |
+
|
| 1599 |
+
# Inline timestep distinction badge at step boundaries (render before first message)
|
| 1600 |
+
ts_badge_html = ""
|
| 1601 |
+
if last_time_step is None or turn.time_step != last_time_step:
|
| 1602 |
+
ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
|
| 1603 |
+
last_time_step = turn.time_step
|
| 1604 |
+
|
| 1605 |
+
escaped_content = html.escape(turn.content)
|
| 1606 |
+
reasoning_html = ""
|
| 1607 |
+
if turn.reasoning_content:
|
| 1608 |
+
# Normalize reasoning to avoid leading/newline whitespace that creates visual gaps
|
| 1609 |
+
_raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
|
| 1610 |
+
_raw_reasoning = _re.sub(
|
| 1611 |
+
r"^\s*\n+", "", _raw_reasoning
|
| 1612 |
+
) # drop leading blank lines
|
| 1613 |
+
_raw_reasoning = _re.sub(
|
| 1614 |
+
r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning
|
| 1615 |
+
) # newline right after **
|
| 1616 |
+
_raw_reasoning = _re.sub(
|
| 1617 |
+
r"(\s*\n\s*)\*\*", r" **", _raw_reasoning
|
| 1618 |
+
) # newline right before **
|
| 1619 |
+
escaped_reasoning = html.escape(_raw_reasoning)
|
| 1620 |
+
reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{escaped_reasoning}</span></span>'
|
| 1621 |
+
collapsed_text = re.sub(r"\s+", " ", escaped_content).strip()
|
| 1622 |
+
|
| 1623 |
+
html_parts.append(
|
| 1624 |
+
f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
|
| 1625 |
+
f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
|
| 1626 |
+
f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
|
| 1627 |
+
f'<span class="message-placeholder">(...)</span>'
|
| 1628 |
+
f"</div>"
|
| 1629 |
+
f"</div>"
|
| 1630 |
+
)
|
| 1631 |
+
|
| 1632 |
+
html_parts.append("</div>") # close linear flow
|
| 1633 |
+
if enable_split_view:
|
| 1634 |
+
import html as _html_mod
|
| 1635 |
+
|
| 1636 |
+
html_parts.append(
|
| 1637 |
+
'<div id="flow-split" class="messages-flow" style="display:none">'
|
| 1638 |
+
)
|
| 1639 |
+
html_parts.append('<div class="split-wrapper">')
|
| 1640 |
+
# Per-agent columns
|
| 1641 |
+
per_agent_turns = {
|
| 1642 |
+
aid: [t for t in chat_turns if t.agent_id == aid]
|
| 1643 |
+
for aid in assistant_agents
|
| 1644 |
+
}
|
| 1645 |
+
for idx, aid in enumerate(assistant_agents):
|
| 1646 |
+
turns_agent = per_agent_turns[aid]
|
| 1647 |
+
html_parts.append(
|
| 1648 |
+
f'<div class="split-col" data-agent="{_html_mod.escape(aid)}">'
|
| 1649 |
+
)
|
| 1650 |
+
last_ts_agent = None
|
| 1651 |
+
for turn in turns_agent:
|
| 1652 |
+
agent_class = (
|
| 1653 |
+
f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
|
| 1654 |
+
)
|
| 1655 |
+
role_class = f"role-{turn.role}"
|
| 1656 |
+
collapsed_class = " collapsed" if turn.role == "user" else ""
|
| 1657 |
+
ts_badge_html = ""
|
| 1658 |
+
if last_ts_agent is None or turn.time_step != last_ts_agent:
|
| 1659 |
+
ts_badge_html = f'<span class="ts-badge">⏱ {turn.time_step}</span>'
|
| 1660 |
+
last_ts_agent = turn.time_step
|
| 1661 |
+
esc_content = _html_mod.escape(turn.content)
|
| 1662 |
+
reasoning_html = ""
|
| 1663 |
+
if turn.reasoning_content:
|
| 1664 |
+
_raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
|
| 1665 |
+
_raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
|
| 1666 |
+
_raw_reasoning = _re.sub(r"\*\*(\s*\n\s*)", r"** ", _raw_reasoning)
|
| 1667 |
+
_raw_reasoning = _re.sub(r"(\s*\n\s*)\*\*", r" **", _raw_reasoning)
|
| 1668 |
+
esc_reasoning = _html_mod.escape(_raw_reasoning)
|
| 1669 |
+
reasoning_html = f'<span class="reasoning-inline"><span class="reasoning-icon">💭</span><span class="reasoning-text">{esc_reasoning}</span></span>'
|
| 1670 |
+
collapsed_text = re.sub(r"\s+", " ", esc_content).strip()
|
| 1671 |
+
agent_id_clean = _html_mod.escape(turn.agent_id).lower()
|
| 1672 |
+
if turn.role == "assistant":
|
| 1673 |
+
name = _html_mod.escape(turn.agent_id)
|
| 1674 |
+
emoji = '<span class="emoji-bw" data-agent-id="' + agent_id_clean + '"> 🤖</span>'
|
| 1675 |
+
raw_val = turn.reward
|
| 1676 |
+
if isinstance(raw_val, (int, float)):
|
| 1677 |
+
reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
|
| 1678 |
+
if len(reward_val) > 8:
|
| 1679 |
+
reward_val = reward_val[:8] + "…"
|
| 1680 |
+
else:
|
| 1681 |
+
reward_val = str(raw_val)
|
| 1682 |
+
badge_inner = (
|
| 1683 |
+
f'{emoji} <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
|
| 1684 |
+
f' <span class="sep"> • </span><span class="reward">Reward ⚑ : {reward_val}</span>'
|
| 1685 |
+
)
|
| 1686 |
+
else:
|
| 1687 |
+
name = _html_mod.escape(turn.agent_id)
|
| 1688 |
+
badge_inner = f'Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span> <span class="sep"> • </span>:'
|
| 1689 |
+
badge = f'<span class="agent-badge">{badge_inner}</span>'
|
| 1690 |
+
html_parts.append(
|
| 1691 |
+
f'<div class="chat-turn {agent_class} {role_class}{collapsed_class}" data-time-step="{turn.time_step}">'
|
| 1692 |
+
f'<div class="turn-content {agent_class} {role_class}">{ts_badge_html}{badge}'
|
| 1693 |
+
f'<span class="message-box">{reasoning_html}<span class="main-content">💬 {collapsed_text}</span></span>'
|
| 1694 |
+
f'<span class="message-placeholder">(...)</span>'
|
| 1695 |
+
f"</div></div>"
|
| 1696 |
+
)
|
| 1697 |
+
html_parts.append("</div>") # close split col
|
| 1698 |
+
html_parts.append("</div>") # split-wrapper
|
| 1699 |
+
html_parts.append("</div>") # flow-split
|
| 1700 |
+
|
| 1701 |
+
# Add Chat View
|
| 1702 |
+
import html as _html_mod
|
| 1703 |
+
html_parts.append('<div id="flow-chat" class="messages-flow">')
|
| 1704 |
+
|
| 1705 |
+
# Helper function to add context annotation areas
|
| 1706 |
+
def add_context_area(position: str, time_step: int):
|
| 1707 |
+
context_key = f"round-context-{position}-{time_step}"
|
| 1708 |
+
placeholder = f"Add context {position} round {time_step}..."
|
| 1709 |
+
color_buttons = ""
|
| 1710 |
+
# Add default/reset color button first
|
| 1711 |
+
color_buttons += (
|
| 1712 |
+
f'<div class="context-color-btn" data-color="default" '
|
| 1713 |
+
f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
|
| 1714 |
+
f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
|
| 1715 |
+
f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
|
| 1716 |
+
f'background-color: #fff;" title="Default color"></div>'
|
| 1717 |
+
)
|
| 1718 |
+
for color_name, color_value in [
|
| 1719 |
+
('red', '#d32f2f'),
|
| 1720 |
+
('orange', '#f57c00'),
|
| 1721 |
+
('yellow', '#f9a825'),
|
| 1722 |
+
('green', '#388e3c'),
|
| 1723 |
+
('blue', '#1976d2'),
|
| 1724 |
+
('purple', '#7b1fa2'),
|
| 1725 |
+
('gray', '#666666'),
|
| 1726 |
+
]:
|
| 1727 |
+
color_buttons += (
|
| 1728 |
+
f'<div class="context-color-btn" data-color="{color_value}" '
|
| 1729 |
+
f'style="background-color: {color_value};" title="{color_name}"></div>'
|
| 1730 |
+
)
|
| 1731 |
+
|
| 1732 |
+
html_parts.append(
|
| 1733 |
+
f'<div class="round-context">'
|
| 1734 |
+
f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
|
| 1735 |
+
f'data-context-key="{context_key}" '
|
| 1736 |
+
f'data-placeholder="{placeholder}"></div>'
|
| 1737 |
+
f'<div class="round-context-controls">{color_buttons}</div>'
|
| 1738 |
+
f'</div>'
|
| 1739 |
+
)
|
| 1740 |
+
|
| 1741 |
+
# Helper function to add split agent context boxes
|
| 1742 |
+
def add_split_agent_contexts(position: str, time_step: int):
|
| 1743 |
+
color_buttons = ""
|
| 1744 |
+
# Add default/reset color button first
|
| 1745 |
+
color_buttons += (
|
| 1746 |
+
f'<div class="context-color-btn" data-color="default" '
|
| 1747 |
+
f'style="background: linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%), '
|
| 1748 |
+
f'linear-gradient(135deg, #000 25%, transparent 25%, transparent 75%, #000 75%); '
|
| 1749 |
+
f'background-size: 4px 4px; background-position: 0 0, 2px 2px; '
|
| 1750 |
+
f'background-color: #fff;" title="Default color"></div>'
|
| 1751 |
+
)
|
| 1752 |
+
for color_name, color_value in [
|
| 1753 |
+
('red', '#d32f2f'),
|
| 1754 |
+
('orange', '#f57c00'),
|
| 1755 |
+
('yellow', '#f9a825'),
|
| 1756 |
+
('green', '#388e3c'),
|
| 1757 |
+
('blue', '#1976d2'),
|
| 1758 |
+
('purple', '#7b1fa2'),
|
| 1759 |
+
('gray', '#666666'),
|
| 1760 |
+
]:
|
| 1761 |
+
color_buttons += (
|
| 1762 |
+
f'<div class="context-color-btn" data-color="{color_value}" '
|
| 1763 |
+
f'style="background-color: {color_value};" title="{color_name}"></div>'
|
| 1764 |
+
)
|
| 1765 |
+
|
| 1766 |
+
html_parts.append('<div class="split-agent-context">')
|
| 1767 |
+
|
| 1768 |
+
# Alice box
|
| 1769 |
+
alice_key = f"agent-context-alice-{position}-{time_step}"
|
| 1770 |
+
alice_placeholder = f"..."
|
| 1771 |
+
html_parts.append(
|
| 1772 |
+
f'<div class="agent-context-box agent-alice">'
|
| 1773 |
+
f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
|
| 1774 |
+
f'data-context-key="{alice_key}" '
|
| 1775 |
+
f'data-placeholder="{alice_placeholder}"></div>'
|
| 1776 |
+
f'<div class="round-context-controls">{color_buttons}</div>'
|
| 1777 |
+
f'</div>'
|
| 1778 |
+
)
|
| 1779 |
+
|
| 1780 |
+
# Bob box
|
| 1781 |
+
bob_key = f"agent-context-bob-{position}-{time_step}"
|
| 1782 |
+
bob_placeholder = f"..."
|
| 1783 |
+
html_parts.append(
|
| 1784 |
+
f'<div class="agent-context-box agent-bob">'
|
| 1785 |
+
f'<div class="round-context-edit" contenteditable="true" spellcheck="true" '
|
| 1786 |
+
f'data-context-key="{bob_key}" '
|
| 1787 |
+
f'data-placeholder="{bob_placeholder}"></div>'
|
| 1788 |
+
f'<div class="round-context-controls">{color_buttons}</div>'
|
| 1789 |
+
f'</div>'
|
| 1790 |
+
)
|
| 1791 |
+
|
| 1792 |
+
html_parts.append('</div>') # split-agent-context
|
| 1793 |
+
|
| 1794 |
+
last_time_step_chat = None
|
| 1795 |
+
for original_index, turn in indexed_turns:
|
| 1796 |
+
agent_class = f"agent-{re.sub('[^a-z0-9_-]', '-', turn.agent_id.lower())}"
|
| 1797 |
+
role_class = f"role-{turn.role}"
|
| 1798 |
+
|
| 1799 |
+
# Add time step divider and beginning context
|
| 1800 |
+
if last_time_step_chat is None or turn.time_step != last_time_step_chat:
|
| 1801 |
+
# Add end contexts for previous round (only regular context, not prompt summary)
|
| 1802 |
+
if last_time_step_chat is not None:
|
| 1803 |
+
add_context_area("end", last_time_step_chat)
|
| 1804 |
+
|
| 1805 |
+
html_parts.append(
|
| 1806 |
+
f'<div class="chat-group-divider">'
|
| 1807 |
+
f'<span class="chat-group-label">⏱ Round {turn.time_step + 1}</span>'
|
| 1808 |
+
f'</div>'
|
| 1809 |
+
)
|
| 1810 |
+
|
| 1811 |
+
# Add beginning contexts for new round (both context and prompt summary)
|
| 1812 |
+
add_context_area("beginning", turn.time_step)
|
| 1813 |
+
add_split_agent_contexts("beginning", turn.time_step)
|
| 1814 |
+
|
| 1815 |
+
last_time_step_chat = turn.time_step
|
| 1816 |
+
|
| 1817 |
+
# Build chat message with merge controls
|
| 1818 |
+
html_parts.append(f'<div class="chat-message {agent_class} {role_class}" data-msg-id="{original_index}">')
|
| 1819 |
+
|
| 1820 |
+
# Add merge control button
|
| 1821 |
+
html_parts.append(
|
| 1822 |
+
f'<button class="merge-btn" title="Merge with next message" data-msg-id="{original_index}">⇄</button>'
|
| 1823 |
+
)
|
| 1824 |
+
|
| 1825 |
+
html_parts.append('<div class="chat-message-content">')
|
| 1826 |
+
|
| 1827 |
+
# Header with agent name and reward (always show reward)
|
| 1828 |
+
agent_id_clean = _html_mod.escape(turn.agent_id).lower()
|
| 1829 |
+
if turn.role == "assistant":
|
| 1830 |
+
name = _html_mod.escape(turn.agent_id)
|
| 1831 |
+
raw_val = turn.reward
|
| 1832 |
+
if isinstance(raw_val, (int, float)):
|
| 1833 |
+
reward_val = f"{raw_val:.4f}".rstrip("0").rstrip(".")
|
| 1834 |
+
if len(reward_val) > 8:
|
| 1835 |
+
reward_val = reward_val[:8] + "…"
|
| 1836 |
+
else:
|
| 1837 |
+
reward_val = str(raw_val)
|
| 1838 |
+
header_html = (
|
| 1839 |
+
f'<div class="chat-header">'
|
| 1840 |
+
f'<span class="emoji-bw" data-agent-id="{agent_id_clean}">🤖</span> <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span>'
|
| 1841 |
+
f'<span class="chat-reward">⚑ {reward_val}</span>'
|
| 1842 |
+
f'</div>'
|
| 1843 |
+
)
|
| 1844 |
+
else:
|
| 1845 |
+
name = _html_mod.escape(turn.agent_id)
|
| 1846 |
+
header_html = f'<div class="chat-header">Prompt of <span class="agent-name" data-agent-id="{agent_id_clean}">{name}</span></div>'
|
| 1847 |
+
|
| 1848 |
+
html_parts.append(header_html)
|
| 1849 |
+
|
| 1850 |
+
# Reasoning content if present
|
| 1851 |
+
if turn.reasoning_content:
|
| 1852 |
+
_raw_reasoning = turn.reasoning_content.replace("\r\n", "\n")
|
| 1853 |
+
_raw_reasoning = _re.sub(r"^\s*\n+", "", _raw_reasoning)
|
| 1854 |
+
esc_reasoning = _html_mod.escape(_raw_reasoning)
|
| 1855 |
+
html_parts.append(
|
| 1856 |
+
f'<div class="chat-reasoning collapsed">'
|
| 1857 |
+
f'<span class="reasoning-icon">💭</span> '
|
| 1858 |
+
f'<span class="reasoning-text">{esc_reasoning}</span>'
|
| 1859 |
+
f'</div>'
|
| 1860 |
+
)
|
| 1861 |
+
|
| 1862 |
+
# Message bubble
|
| 1863 |
+
esc_content = _html_mod.escape(turn.content)
|
| 1864 |
+
html_parts.append(f'<div class="chat-bubble">{esc_content}</div>')
|
| 1865 |
+
|
| 1866 |
+
html_parts.append('</div>') # chat-message-content
|
| 1867 |
+
html_parts.append('</div>') # chat-message
|
| 1868 |
+
|
| 1869 |
+
# Add end contexts for the last round (only regular context, not prompt summary)
|
| 1870 |
+
if last_time_step_chat is not None:
|
| 1871 |
+
add_context_area("end", last_time_step_chat)
|
| 1872 |
+
|
| 1873 |
+
html_parts.append("</div>") # flow-chat
|
| 1874 |
+
html_parts.extend(["</body>", "</html>"])
|
| 1875 |
+
|
| 1876 |
+
return "\n".join(html_parts)
|
| 1877 |
+
|
| 1878 |
+
|
| 1879 |
+
def export_html_from_rollout_tree(path: Path, outdir: Path, main_only: bool = False):
|
| 1880 |
+
"""Process a rollout tree file and generate HTML files for each path.
|
| 1881 |
+
Creates separate HTML files for the main path and each branch path.
|
| 1882 |
+
The main path is saved in the root output directory, while branch paths
|
| 1883 |
+
are saved in a 'branches' subdirectory.
|
| 1884 |
+
|
| 1885 |
+
Args:
|
| 1886 |
+
path: Path to the rollout tree JSON file
|
| 1887 |
+
outdir: Output directory for HTML files
|
| 1888 |
+
main_only: If True, only export the main trajectory (default: False)
|
| 1889 |
+
"""
|
| 1890 |
+
root = load_rollout_tree(path)
|
| 1891 |
+
mgid = root.id
|
| 1892 |
+
|
| 1893 |
+
main_path, branch_paths = get_rollout_tree_paths(root)
|
| 1894 |
+
|
| 1895 |
+
outdir.mkdir(parents=True, exist_ok=True)
|
| 1896 |
+
|
| 1897 |
+
# Create branches subdirectory if we have branch paths
|
| 1898 |
+
if not main_only and branch_paths:
|
| 1899 |
+
branches_dir = outdir / f"mgid:{mgid}_branches_html_renders"
|
| 1900 |
+
branches_dir.mkdir(parents=True, exist_ok=True)
|
| 1901 |
+
|
| 1902 |
+
# Generate HTML for the main path
|
| 1903 |
+
chat_turns = gather_all_chat_turns_for_path(main_path)
|
| 1904 |
+
html_content = html_from_chat_turns(chat_turns)
|
| 1905 |
+
output_file = outdir / f"mgid:{mgid}_main_html_render.render.html"
|
| 1906 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 1907 |
+
f.write(html_content)
|
| 1908 |
+
|
| 1909 |
+
# Generate HTML for each branch path
|
| 1910 |
+
for path_obj in branch_paths:
|
| 1911 |
+
chat_turns = gather_all_chat_turns_for_path(path_obj)
|
| 1912 |
+
|
| 1913 |
+
html_content = html_from_chat_turns(chat_turns)
|
| 1914 |
+
|
| 1915 |
+
path_id: str = path_obj.id
|
| 1916 |
+
output_filename = f"{path_id}_html_render.render.html"
|
| 1917 |
+
|
| 1918 |
+
output_file = branches_dir / output_filename
|
| 1919 |
+
|
| 1920 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 1921 |
+
f.write(html_content)
|
src_code_for_reproducibility/utils/rollout_tree_gather_utils.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import csv
|
| 4 |
+
import os
|
| 5 |
+
import pickle
|
| 6 |
+
import re
|
| 7 |
+
from collections import defaultdict
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
|
| 11 |
+
|
| 12 |
+
from mllm.markov_games.rollout_tree import *
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def load_rollout_tree(path: Path) -> RolloutTreeRootNode:
|
| 19 |
+
"""Load a rollout tree from a PKL file containing a dict."""
|
| 20 |
+
with open(path, "rb") as f:
|
| 21 |
+
data = pickle.load(f)
|
| 22 |
+
return RolloutTreeRootNode.model_validate(data)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class RolloutNodeList:
|
| 27 |
+
id: str
|
| 28 |
+
nodes: List[RolloutTreeNode]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def get_rollout_tree_paths(
|
| 32 |
+
root: RolloutTreeRootNode, mgid: Optional[str] = None
|
| 33 |
+
) -> Tuple[RolloutNodeList, List[RolloutNodeList]]:
|
| 34 |
+
"""
|
| 35 |
+
Returns:
|
| 36 |
+
main_path: The main path from the root to the end of the tree.
|
| 37 |
+
branch_paths: A list of all branch paths from the root to the end of the tree.
|
| 38 |
+
Each branch path contains a list of nodes that are part of the branch, including the nodes from the main path before the branch was taken.
|
| 39 |
+
"""
|
| 40 |
+
branch_paths = []
|
| 41 |
+
|
| 42 |
+
def collect_path_nodes(current) -> List[RolloutTreeNode]:
|
| 43 |
+
"""Recursively collect all nodes in a path starting from current node."""
|
| 44 |
+
if current is None:
|
| 45 |
+
return []
|
| 46 |
+
|
| 47 |
+
if isinstance(current, RolloutTreeNode):
|
| 48 |
+
return [current] + collect_path_nodes(current.child)
|
| 49 |
+
|
| 50 |
+
elif isinstance(current, RolloutTreeBranchNode):
|
| 51 |
+
# For branch nodes, we only follow the main_child for path collection
|
| 52 |
+
if current.main_child:
|
| 53 |
+
return [current.main_child] + collect_path_nodes(
|
| 54 |
+
current.main_child.child
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
return []
|
| 58 |
+
|
| 59 |
+
def traverse_for_branches(
|
| 60 |
+
current,
|
| 61 |
+
main_path_prefix: List[RolloutTreeNode],
|
| 62 |
+
path_id: str,
|
| 63 |
+
current_time_step: Optional[int] = 0,
|
| 64 |
+
):
|
| 65 |
+
"""Traverse tree to collect all branch paths."""
|
| 66 |
+
if current is None:
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
if isinstance(current, RolloutTreeNode):
|
| 70 |
+
# Continue traversing with this node added to the main path prefix
|
| 71 |
+
new_prefix = main_path_prefix + [current]
|
| 72 |
+
traverse_for_branches(current.child, new_prefix, path_id, current.time_step)
|
| 73 |
+
|
| 74 |
+
elif isinstance(current, RolloutTreeBranchNode):
|
| 75 |
+
# Collect all branch paths
|
| 76 |
+
if current.branches:
|
| 77 |
+
for agent_id, branch_node_list in current.branches.items():
|
| 78 |
+
if branch_node_list:
|
| 79 |
+
# Start with the main path prefix, then recursively collect all nodes in this branch
|
| 80 |
+
branch_path_nodes = main_path_prefix.copy()
|
| 81 |
+
for branch_node in branch_node_list:
|
| 82 |
+
branch_path_nodes.extend(collect_path_nodes(branch_node))
|
| 83 |
+
|
| 84 |
+
# Create proper branch path ID with mgid, agent_id, and time_step
|
| 85 |
+
mgid_str = mgid or str(root.id)
|
| 86 |
+
branch_path_id = f"mgid:{mgid_str}_type:branch_agent:{agent_id}_time_step:{current_time_step}"
|
| 87 |
+
branch_paths.append(
|
| 88 |
+
RolloutNodeList(id=branch_path_id, nodes=branch_path_nodes)
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Process the main child and add to prefix
|
| 92 |
+
new_prefix = main_path_prefix
|
| 93 |
+
if current.main_child:
|
| 94 |
+
new_prefix = main_path_prefix + [current.main_child]
|
| 95 |
+
|
| 96 |
+
# Continue traversing the main path
|
| 97 |
+
if current.main_child:
|
| 98 |
+
traverse_for_branches(
|
| 99 |
+
current.main_child.child,
|
| 100 |
+
new_prefix,
|
| 101 |
+
path_id,
|
| 102 |
+
current.main_child.time_step,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Collect the main path nodes
|
| 106 |
+
main_path_nodes = collect_path_nodes(root.child)
|
| 107 |
+
|
| 108 |
+
# Traverse to collect all branch paths
|
| 109 |
+
traverse_for_branches(root.child, [], "")
|
| 110 |
+
|
| 111 |
+
# Create the main path with proper mgid format
|
| 112 |
+
mgid_str = mgid or str(root.id)
|
| 113 |
+
main_path = RolloutNodeList(id=f"mgid:{mgid_str}_type:main", nodes=main_path_nodes)
|
| 114 |
+
|
| 115 |
+
return main_path, branch_paths
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class ChatTurnLog(BaseModel):
|
| 119 |
+
time_step: int
|
| 120 |
+
agent_id: str
|
| 121 |
+
role: str
|
| 122 |
+
content: str
|
| 123 |
+
reasoning_content: Optional[str] = None
|
| 124 |
+
is_state_end: bool
|
| 125 |
+
reward: float
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def gather_agent_chat_turns_for_path(
|
| 129 |
+
agent_id: str, path: RolloutNodeList
|
| 130 |
+
) -> List[ChatTurnLog]:
|
| 131 |
+
"""Iterate through all chat turns for a specific agent in a path sorted by time step."""
|
| 132 |
+
turns = []
|
| 133 |
+
for node in path.nodes:
|
| 134 |
+
action_log = node.step_log.action_logs.get(agent_id, [])
|
| 135 |
+
if action_log:
|
| 136 |
+
for chat_turn in action_log.chat_turns or []:
|
| 137 |
+
turns.append(
|
| 138 |
+
ChatTurnLog(
|
| 139 |
+
time_step=node.time_step,
|
| 140 |
+
agent_id=agent_id,
|
| 141 |
+
role=chat_turn.role,
|
| 142 |
+
content=chat_turn.content,
|
| 143 |
+
reasoning_content=getattr(chat_turn, "reasoning_content", None),
|
| 144 |
+
is_state_end=chat_turn.is_state_end,
|
| 145 |
+
reward=node.step_log.simulation_step_log.rewards.get(
|
| 146 |
+
agent_id, 0
|
| 147 |
+
),
|
| 148 |
+
)
|
| 149 |
+
)
|
| 150 |
+
return turns
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def gather_all_chat_turns_for_path(path: RolloutNodeList) -> List[ChatTurnLog]:
|
| 154 |
+
"""Iterate through all chat turns for all agents in a path sorted by time step."""
|
| 155 |
+
turns = []
|
| 156 |
+
|
| 157 |
+
# Collect turns from all agents, but interleave them per timestep by (user, assistant) pairs
|
| 158 |
+
for node in path.nodes:
|
| 159 |
+
# Build (user[, assistant]) pairs for each agent at this timestep
|
| 160 |
+
agent_ids = sorted(list(node.step_log.action_logs.keys()))
|
| 161 |
+
per_agent_pairs: Dict[str, List[List[ChatTurnLog]]] = {}
|
| 162 |
+
|
| 163 |
+
for agent_id in agent_ids:
|
| 164 |
+
action_log = node.step_log.action_logs.get(agent_id)
|
| 165 |
+
pairs: List[List[ChatTurnLog]] = []
|
| 166 |
+
current_pair: List[ChatTurnLog] = []
|
| 167 |
+
|
| 168 |
+
if action_log and action_log.chat_turns:
|
| 169 |
+
for chat_turn in action_log.chat_turns:
|
| 170 |
+
turn_log = ChatTurnLog(
|
| 171 |
+
time_step=node.time_step,
|
| 172 |
+
agent_id=agent_id,
|
| 173 |
+
role=chat_turn.role,
|
| 174 |
+
content=chat_turn.content,
|
| 175 |
+
reasoning_content=getattr(chat_turn, "reasoning_content", None),
|
| 176 |
+
is_state_end=chat_turn.is_state_end,
|
| 177 |
+
reward=node.step_log.simulation_step_log.rewards.get(
|
| 178 |
+
agent_id, 0
|
| 179 |
+
),
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
if chat_turn.role == "user":
|
| 183 |
+
# If a previous pair is open, close it and start a new one
|
| 184 |
+
if current_pair:
|
| 185 |
+
pairs.append(current_pair)
|
| 186 |
+
current_pair = []
|
| 187 |
+
current_pair = [turn_log]
|
| 188 |
+
else:
|
| 189 |
+
# assistant: attach to an open user message if present; otherwise stand alone
|
| 190 |
+
if (
|
| 191 |
+
current_pair
|
| 192 |
+
and len(current_pair) == 1
|
| 193 |
+
and current_pair[0].role == "user"
|
| 194 |
+
):
|
| 195 |
+
current_pair.append(turn_log)
|
| 196 |
+
pairs.append(current_pair)
|
| 197 |
+
current_pair = []
|
| 198 |
+
else:
|
| 199 |
+
# No preceding user or already paired; treat as its own unit
|
| 200 |
+
pairs.append([turn_log])
|
| 201 |
+
|
| 202 |
+
if current_pair:
|
| 203 |
+
# Unpaired trailing user message
|
| 204 |
+
pairs.append(current_pair)
|
| 205 |
+
|
| 206 |
+
per_agent_pairs[agent_id] = pairs
|
| 207 |
+
|
| 208 |
+
# Interleave pairs across agents: A1, B1, A2, B2, ...
|
| 209 |
+
index = 0
|
| 210 |
+
while True:
|
| 211 |
+
added_any = False
|
| 212 |
+
for agent_id in agent_ids:
|
| 213 |
+
agent_pairs = per_agent_pairs.get(agent_id, [])
|
| 214 |
+
if index < len(agent_pairs):
|
| 215 |
+
for tl in agent_pairs[index]:
|
| 216 |
+
turns.append(tl)
|
| 217 |
+
added_any = True
|
| 218 |
+
if not added_any:
|
| 219 |
+
break
|
| 220 |
+
index += 1
|
| 221 |
+
|
| 222 |
+
return turns
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def chat_turns_to_dict(chat_turns: Iterator[ChatTurnLog]) -> Iterator[Dict[str, Any]]:
|
| 226 |
+
"""Render all chat turns for a path as structured data for JSON."""
|
| 227 |
+
for chat_turn in chat_turns:
|
| 228 |
+
yield chat_turn.model_dump()
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def get_all_agents(root: RolloutTreeRootNode) -> List[str]:
|
| 232 |
+
"""list of all agent IDs that appear in the tree."""
|
| 233 |
+
if root.child is None:
|
| 234 |
+
return []
|
| 235 |
+
|
| 236 |
+
# Get the first node to extract all agent IDs
|
| 237 |
+
first_node = root.child
|
| 238 |
+
if isinstance(first_node, RolloutTreeBranchNode):
|
| 239 |
+
first_node = first_node.main_child
|
| 240 |
+
|
| 241 |
+
if first_node is None:
|
| 242 |
+
return []
|
| 243 |
+
|
| 244 |
+
# All agents should be present in the first node
|
| 245 |
+
agents = set(first_node.step_log.action_logs.keys())
|
| 246 |
+
agents.update(first_node.step_log.simulation_step_log.rewards.keys())
|
| 247 |
+
|
| 248 |
+
return sorted(list(agents))
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def gather_agent_main_rewards(agent_id: str, path: RolloutNodeList) -> List[float]:
|
| 252 |
+
"""Gather main rewards for a specific agent in a path."""
|
| 253 |
+
rewards = []
|
| 254 |
+
for node in path.nodes:
|
| 255 |
+
reward = node.step_log.simulation_step_log.rewards[agent_id]
|
| 256 |
+
rewards.append(reward)
|
| 257 |
+
return rewards
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def gather_all_rewards(path: RolloutNodeList) -> List[Dict[AgentId, float]]:
|
| 261 |
+
"""Gather main rewards from main trajectory in a path."""
|
| 262 |
+
rewards = []
|
| 263 |
+
for node in path.nodes:
|
| 264 |
+
rewards.append(node.step_log.simulation_step_log.rewards.copy())
|
| 265 |
+
return rewards
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def gather_simulation_stats(
|
| 269 |
+
path: RolloutNodeList,
|
| 270 |
+
filter: Callable[[SimulationStepLog], bool],
|
| 271 |
+
stat_func: Callable[[SimulationStepLog], Any],
|
| 272 |
+
) -> List[Any]:
|
| 273 |
+
"""Gather stats from main trajectory in a path."""
|
| 274 |
+
stats = []
|
| 275 |
+
for node in path.nodes:
|
| 276 |
+
sl = node.step_log.simulation_step_log
|
| 277 |
+
if filter(sl):
|
| 278 |
+
stats.append(stat_func(sl))
|
| 279 |
+
return stats
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def gather_simulation_step_logs(path: RolloutNodeList) -> List[SimulationStepLog]:
|
| 283 |
+
"""Gather simulation information from main trajectory in a path."""
|
| 284 |
+
infos = []
|
| 285 |
+
for node in path.nodes:
|
| 286 |
+
infos.append(node.step_log.simulation_step_log)
|
| 287 |
+
return infos
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def export_chat_logs(path: Path, outdir: Path):
|
| 291 |
+
"""Process a rollout tree PKL file and generate a JSONL of chat turns as dicts.
|
| 292 |
+
Each line contains an object with path_id and chat_turns for a single path.
|
| 293 |
+
"""
|
| 294 |
+
import json
|
| 295 |
+
|
| 296 |
+
root = load_rollout_tree(path)
|
| 297 |
+
mgid = root.id
|
| 298 |
+
|
| 299 |
+
main_path, branch_paths = get_rollout_tree_paths(root)
|
| 300 |
+
all_paths = [main_path] + branch_paths
|
| 301 |
+
|
| 302 |
+
outdir.mkdir(parents=True, exist_ok=True)
|
| 303 |
+
output_file = outdir / f"mgid:{mgid}_plucked_chats.render.jsonl"
|
| 304 |
+
|
| 305 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 306 |
+
for path_obj in all_paths:
|
| 307 |
+
chat_turns = gather_all_chat_turns_for_path(path_obj)
|
| 308 |
+
output_obj = {
|
| 309 |
+
"path_id": str(path_obj.id),
|
| 310 |
+
"chat_turns": list(chat_turns_to_dict(iter(chat_turns))),
|
| 311 |
+
}
|
| 312 |
+
f.write(json.dumps(output_obj, ensure_ascii=False) + "\n")
|
| 313 |
+
|
| 314 |
+
|
src_code_for_reproducibility/utils/rollout_tree_stats.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Callable, List, Tuple
|
| 2 |
+
|
| 3 |
+
from mllm.markov_games.rollout_tree import RolloutTreeRootNode
|
| 4 |
+
from mllm.markov_games.simulation import SimulationStepLog
|
| 5 |
+
from mllm.utils.rollout_tree_gather_utils import (
|
| 6 |
+
gather_simulation_step_logs,
|
| 7 |
+
get_rollout_tree_paths,
|
| 8 |
+
)
|
| 9 |
+
from mllm.utils.stat_pack import StatPack
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_rollout_tree_stat_tally(
|
| 13 |
+
rollout_tree: RolloutTreeRootNode,
|
| 14 |
+
metrics: List[Callable[[SimulationStepLog], List[Tuple[str, float]]]],
|
| 15 |
+
) -> StatPack:
|
| 16 |
+
stat_tally = StatPack()
|
| 17 |
+
# get simulation step logs
|
| 18 |
+
node_list = get_rollout_tree_paths(rollout_tree)[0]
|
| 19 |
+
simulation_step_logs = gather_simulation_step_logs(node_list)
|
| 20 |
+
for simulation_step_log in simulation_step_logs:
|
| 21 |
+
for metric in metrics:
|
| 22 |
+
metric_result = metric(simulation_step_log)
|
| 23 |
+
if metric_result is not None:
|
| 24 |
+
for key, value in metric_result:
|
| 25 |
+
stat_tally.add_stat(key, value)
|
| 26 |
+
return stat_tally
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def get_rollout_tree_mean_stats(
|
| 30 |
+
rollout_tree: RolloutTreeRootNode, metrics: List[Callable[[SimulationStepLog], Any]]
|
| 31 |
+
) -> StatPack:
|
| 32 |
+
"""Get the mean stats for a rollout tree."""
|
| 33 |
+
stat_tally = get_rollout_tree_stat_tally(rollout_tree, metrics)
|
| 34 |
+
return stat_tally.mean()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_mean_rollout_tree_stats(
|
| 38 |
+
rollout_trees: List[RolloutTreeRootNode],
|
| 39 |
+
metrics: List[Callable[[SimulationStepLog], Any]],
|
| 40 |
+
) -> StatPack:
|
| 41 |
+
"""Get the mean stats for a list of rollout trees."""
|
| 42 |
+
# TODO complete this
|
| 43 |
+
stat_tallies = [
|
| 44 |
+
get_rollout_tree_mean_stats(rollout_tree, metrics)
|
| 45 |
+
for rollout_tree in rollout_trees
|
| 46 |
+
]
|
| 47 |
+
mean_stat_tally = StatPack()
|
| 48 |
+
for stat_tally in stat_tallies:
|
| 49 |
+
mean_stat_tally.add_stats(stat_tally)
|
| 50 |
+
return mean_stat_tally.mean()
|
src_code_for_reproducibility/utils/update_start_epoch.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# During run, set hydra.run.dir=./outputs/{folder}
|
| 4 |
+
def update_start_epoch(cfg, output_directory):
|
| 5 |
+
if cfg["experiment"]["resume_experiment"]:
|
| 6 |
+
folders = [f for f in os.listdir(output_directory) if f.startswith("iteration_")]
|
| 7 |
+
iterations = [int(f.split("_")[1]) for f in folders] if folders else [0]
|
| 8 |
+
cfg["experiment"]["start_epoch"] = max(iterations)
|
| 9 |
+
return None
|
src_code_for_reproducibility/utils/wandb_utils.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Any, Dict, Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
_WANDB_AVAILABLE = False
|
| 6 |
+
_WANDB_RUN = None
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _try_import_wandb():
|
| 10 |
+
global _WANDB_AVAILABLE
|
| 11 |
+
if _WANDB_AVAILABLE:
|
| 12 |
+
return True
|
| 13 |
+
try:
|
| 14 |
+
import wandb # type: ignore
|
| 15 |
+
|
| 16 |
+
_WANDB_AVAILABLE = True
|
| 17 |
+
return True
|
| 18 |
+
except Exception:
|
| 19 |
+
_WANDB_AVAILABLE = False
|
| 20 |
+
return False
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _safe_get(cfg: Dict[str, Any], path: list[str], default: Any = None) -> Any:
|
| 24 |
+
cur: Any = cfg
|
| 25 |
+
for key in path:
|
| 26 |
+
if not isinstance(cur, dict) or key not in cur:
|
| 27 |
+
return default
|
| 28 |
+
cur = cur[key]
|
| 29 |
+
return cur
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def is_enabled(cfg: Dict[str, Any]) -> bool:
|
| 33 |
+
return bool(_safe_get(cfg, ["logging", "wandb", "enabled"], False))
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def init(cfg: Dict[str, Any], run_dir: str, run_name: Optional[str] = None) -> None:
|
| 37 |
+
"""
|
| 38 |
+
Initialize Weights & Biases if enabled in config. No-op if disabled or wandb not installed.
|
| 39 |
+
"""
|
| 40 |
+
global _WANDB_RUN
|
| 41 |
+
if not is_enabled(cfg):
|
| 42 |
+
return
|
| 43 |
+
if not _try_import_wandb():
|
| 44 |
+
return
|
| 45 |
+
|
| 46 |
+
import wandb # type: ignore
|
| 47 |
+
|
| 48 |
+
project = _safe_get(cfg, ["logging", "wandb", "project"], "llm-negotiation")
|
| 49 |
+
entity = _safe_get(cfg, ["logging", "wandb", "entity"], None)
|
| 50 |
+
mode = _safe_get(cfg, ["logging", "wandb", "mode"], "online")
|
| 51 |
+
tags = _safe_get(cfg, ["logging", "wandb", "tags"], []) or []
|
| 52 |
+
notes = _safe_get(cfg, ["logging", "wandb", "notes"], None)
|
| 53 |
+
group = _safe_get(cfg, ["logging", "wandb", "group"], None)
|
| 54 |
+
name = _safe_get(cfg, ["logging", "wandb", "name"], run_name)
|
| 55 |
+
|
| 56 |
+
# Ensure files are written into the hydra run directory
|
| 57 |
+
os.makedirs(run_dir, exist_ok=True)
|
| 58 |
+
os.environ.setdefault("WANDB_DIR", run_dir)
|
| 59 |
+
|
| 60 |
+
# Convert cfg to plain types for W&B config; fallback to minimal dictionary
|
| 61 |
+
try:
|
| 62 |
+
from omegaconf import OmegaConf # type: ignore
|
| 63 |
+
|
| 64 |
+
cfg_container = OmegaConf.to_container(cfg, resolve=True) # type: ignore
|
| 65 |
+
except Exception:
|
| 66 |
+
cfg_container = cfg
|
| 67 |
+
|
| 68 |
+
_WANDB_RUN = wandb.init(
|
| 69 |
+
project=project,
|
| 70 |
+
entity=entity,
|
| 71 |
+
mode=mode,
|
| 72 |
+
name=name,
|
| 73 |
+
group=group,
|
| 74 |
+
tags=tags,
|
| 75 |
+
notes=notes,
|
| 76 |
+
config=cfg_container,
|
| 77 |
+
dir=run_dir,
|
| 78 |
+
reinit=True,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def log(metrics: Dict[str, Any], step: Optional[int] = None) -> None:
|
| 83 |
+
"""Log a flat dictionary of metrics to W&B if active."""
|
| 84 |
+
if not _WANDB_AVAILABLE or _WANDB_RUN is None:
|
| 85 |
+
return
|
| 86 |
+
try:
|
| 87 |
+
import wandb # type: ignore
|
| 88 |
+
|
| 89 |
+
wandb.log(metrics if step is None else dict(metrics, step=step))
|
| 90 |
+
except Exception:
|
| 91 |
+
pass
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _flatten(prefix: str, data: Dict[str, Any], out: Dict[str, Any]) -> None:
|
| 95 |
+
for k, v in data.items():
|
| 96 |
+
key = f"{prefix}.{k}" if prefix else k
|
| 97 |
+
if isinstance(v, dict):
|
| 98 |
+
_flatten(key, v, out)
|
| 99 |
+
else:
|
| 100 |
+
out[key] = v
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _summarize_value(value: Any) -> Dict[str, Any]:
|
| 104 |
+
import numpy as np # local import to avoid hard dependency during disabled mode
|
| 105 |
+
|
| 106 |
+
if value is None:
|
| 107 |
+
return {"none": 1}
|
| 108 |
+
# Scalars
|
| 109 |
+
if isinstance(value, (int, float)):
|
| 110 |
+
return {"value": float(value)}
|
| 111 |
+
# Lists or arrays
|
| 112 |
+
try:
|
| 113 |
+
arr = np.asarray(value)
|
| 114 |
+
if arr.size == 0:
|
| 115 |
+
return {"size": 0}
|
| 116 |
+
return {
|
| 117 |
+
"mean": float(np.nanmean(arr)),
|
| 118 |
+
"min": float(np.nanmin(arr)),
|
| 119 |
+
"max": float(np.nanmax(arr)),
|
| 120 |
+
"last": float(arr.reshape(-1)[-1]),
|
| 121 |
+
"size": int(arr.size),
|
| 122 |
+
}
|
| 123 |
+
except Exception:
|
| 124 |
+
# Fallback: string repr
|
| 125 |
+
return {"text": str(value)}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def log_tally(array_tally: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
|
| 129 |
+
"""
|
| 130 |
+
Flatten and summarize Tally.array_tally and log to WandB.
|
| 131 |
+
Each leaf list/array is summarized with mean/min/max/last/size.
|
| 132 |
+
"""
|
| 133 |
+
if not _WANDB_AVAILABLE or _WANDB_RUN is None:
|
| 134 |
+
return
|
| 135 |
+
summarized: Dict[str, Any] = {}
|
| 136 |
+
|
| 137 |
+
def walk(node: Any, path: list[str]):
|
| 138 |
+
if isinstance(node, dict):
|
| 139 |
+
for k, v in node.items():
|
| 140 |
+
walk(v, path + [k])
|
| 141 |
+
return
|
| 142 |
+
# node is a list of values accumulated over time
|
| 143 |
+
key = ".".join([p for p in ([prefix] if prefix else []) + path])
|
| 144 |
+
try:
|
| 145 |
+
summary = _summarize_value(node)
|
| 146 |
+
for sk, sv in summary.items():
|
| 147 |
+
summarized[f"{key}.{sk}"] = sv
|
| 148 |
+
except Exception:
|
| 149 |
+
summarized[f"{key}.error"] = 1
|
| 150 |
+
|
| 151 |
+
walk(array_tally, [])
|
| 152 |
+
if summarized:
|
| 153 |
+
log(summarized, step=step)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def log_flat_stats(stats: Dict[str, Any], prefix: str = "", step: Optional[int] = None) -> None:
|
| 157 |
+
if not _WANDB_AVAILABLE or _WANDB_RUN is None:
|
| 158 |
+
return
|
| 159 |
+
flat: Dict[str, Any] = {}
|
| 160 |
+
_flatten(prefix, stats, flat)
|
| 161 |
+
if flat:
|
| 162 |
+
log(flat, step=step)
|
| 163 |
+
|
| 164 |
+
|