Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +4 -0
- 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
- 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
- 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log +2 -0
- 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
- 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
- 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log +9 -0
- 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
- 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
- 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log +12 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt +237 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log +14 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log +11 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log +27 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml +271 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log +78 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt +857 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json +55 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json +1 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log +12 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log +11 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log +27 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb +3 -0
- 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json +1 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +156 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +2 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt +3 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt +3 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt +418 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log +8 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log +8 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log +26 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log +3 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/requirements.txt +857 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/wandb-metadata.json +108 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log +7 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log +8 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log +26 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb +3 -0
- 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json +1 -0
- 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
- 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +156 -0
- 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +2 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132338-qrt50pak/run-qrt50pak.wandb filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
2026.01.21/13.27.30_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132748-8pqnk39p/run-8pqnk39p.wandb filter=lfs diff=lfs merge=lfs -text
|
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 0.01
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task: []
|
| 115 |
+
job:
|
| 116 |
+
name: train
|
| 117 |
+
chdir: null
|
| 118 |
+
override_dirname: ''
|
| 119 |
+
id: ???
|
| 120 |
+
num: ???
|
| 121 |
+
config_name: llmbc_box-close-v2.yaml
|
| 122 |
+
env_set: {}
|
| 123 |
+
env_copy: []
|
| 124 |
+
config:
|
| 125 |
+
override_dirname:
|
| 126 |
+
kv_sep: '='
|
| 127 |
+
item_sep: ','
|
| 128 |
+
exclude_keys: []
|
| 129 |
+
runtime:
|
| 130 |
+
version: 1.2.0
|
| 131 |
+
version_base: '1.2'
|
| 132 |
+
cwd: /work/u1131674/LLM-BC
|
| 133 |
+
config_sources:
|
| 134 |
+
- path: hydra.conf
|
| 135 |
+
schema: pkg
|
| 136 |
+
provider: hydra
|
| 137 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 138 |
+
schema: file
|
| 139 |
+
provider: main
|
| 140 |
+
- path: ''
|
| 141 |
+
schema: structured
|
| 142 |
+
provider: schema
|
| 143 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2
|
| 144 |
+
choices:
|
| 145 |
+
hydra/env: default
|
| 146 |
+
hydra/callbacks: null
|
| 147 |
+
hydra/job_logging: default
|
| 148 |
+
hydra/hydra_logging: default
|
| 149 |
+
hydra/hydra_help: default
|
| 150 |
+
hydra/help: default
|
| 151 |
+
hydra/sweeper: basic
|
| 152 |
+
hydra/launcher: basic
|
| 153 |
+
hydra/output: default
|
| 154 |
+
verbose: false
|
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-01-21 12:08:39,454][hydra.utils][ERROR] - Error initializing class at llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace: Error loading 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace':
|
| 2 |
+
ImportError("cannot import name 'Sentinel' from 'typing_extensions' (/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/typing_extensions.py)")
|
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 0.01
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task: []
|
| 115 |
+
job:
|
| 116 |
+
name: train
|
| 117 |
+
chdir: null
|
| 118 |
+
override_dirname: ''
|
| 119 |
+
id: ???
|
| 120 |
+
num: ???
|
| 121 |
+
config_name: llmbc_box-close-v2.yaml
|
| 122 |
+
env_set: {}
|
| 123 |
+
env_copy: []
|
| 124 |
+
config:
|
| 125 |
+
override_dirname:
|
| 126 |
+
kv_sep: '='
|
| 127 |
+
item_sep: ','
|
| 128 |
+
exclude_keys: []
|
| 129 |
+
runtime:
|
| 130 |
+
version: 1.2.0
|
| 131 |
+
version_base: '1.2'
|
| 132 |
+
cwd: /work/u1131674/LLM-BC
|
| 133 |
+
config_sources:
|
| 134 |
+
- path: hydra.conf
|
| 135 |
+
schema: pkg
|
| 136 |
+
provider: hydra
|
| 137 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 138 |
+
schema: file
|
| 139 |
+
provider: main
|
| 140 |
+
- path: ''
|
| 141 |
+
schema: structured
|
| 142 |
+
provider: schema
|
| 143 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2
|
| 144 |
+
choices:
|
| 145 |
+
hydra/env: default
|
| 146 |
+
hydra/callbacks: null
|
| 147 |
+
hydra/job_logging: default
|
| 148 |
+
hydra/hydra_logging: default
|
| 149 |
+
hydra/hydra_help: default
|
| 150 |
+
hydra/help: default
|
| 151 |
+
hydra/sweeper: basic
|
| 152 |
+
hydra/launcher: basic
|
| 153 |
+
hydra/output: default
|
| 154 |
+
verbose: false
|
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
|
| 2 |
+
[2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
|
| 3 |
+
[2026-01-21 12:10:36,293][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
|
| 4 |
+
[2026-01-21 12:10:42,233][datasets][INFO] - PyTorch version 2.2.2 available.
|
| 5 |
+
[2026-01-21 12:10:42,234][datasets][INFO] - TensorFlow version 2.15.1 available.
|
| 6 |
+
[2026-01-21 12:10:42,235][datasets][INFO] - JAX version 0.4.30 available.
|
| 7 |
+
[2026-01-21 12:11:05,787][matplotlib.font_manager][INFO] - Failed to extract font properties from /usr/share/fonts/google-noto-emoji/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2)
|
| 8 |
+
[2026-01-21 12:11:05,799][matplotlib.font_manager][INFO] - generated new fontManager
|
| 9 |
+
[2026-01-21 12:11:07,857][OpenGL.platform.ctypesloader][INFO] - Failed to load library ( 'libOSMesa.so.0' ): libOSMesa.so.0: cannot open shared object file: No such file or directory
|
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 0.01
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task: []
|
| 115 |
+
job:
|
| 116 |
+
name: train
|
| 117 |
+
chdir: null
|
| 118 |
+
override_dirname: ''
|
| 119 |
+
id: ???
|
| 120 |
+
num: ???
|
| 121 |
+
config_name: llmbc_box-close-v2.yaml
|
| 122 |
+
env_set: {}
|
| 123 |
+
env_copy: []
|
| 124 |
+
config:
|
| 125 |
+
override_dirname:
|
| 126 |
+
kv_sep: '='
|
| 127 |
+
item_sep: ','
|
| 128 |
+
exclude_keys: []
|
| 129 |
+
runtime:
|
| 130 |
+
version: 1.2.0
|
| 131 |
+
version_base: '1.2'
|
| 132 |
+
cwd: /work/u1131674/LLM-BC
|
| 133 |
+
config_sources:
|
| 134 |
+
- path: hydra.conf
|
| 135 |
+
schema: pkg
|
| 136 |
+
provider: hydra
|
| 137 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 138 |
+
schema: file
|
| 139 |
+
provider: main
|
| 140 |
+
- path: ''
|
| 141 |
+
schema: structured
|
| 142 |
+
provider: schema
|
| 143 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2
|
| 144 |
+
choices:
|
| 145 |
+
hydra/env: default
|
| 146 |
+
hydra/callbacks: null
|
| 147 |
+
hydra/job_logging: default
|
| 148 |
+
hydra/hydra_logging: default
|
| 149 |
+
hydra/hydra_help: default
|
| 150 |
+
hydra/help: default
|
| 151 |
+
hydra/sweeper: basic
|
| 152 |
+
hydra/launcher: basic
|
| 153 |
+
hydra/output: default
|
| 154 |
+
verbose: false
|
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
|
| 2 |
+
[2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
|
| 3 |
+
[2026-01-21 12:13:11,502][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
|
| 4 |
+
[2026-01-21 12:13:16,444][datasets][INFO] - PyTorch version 2.2.2 available.
|
| 5 |
+
[2026-01-21 12:13:16,445][datasets][INFO] - TensorFlow version 2.15.1 available.
|
| 6 |
+
[2026-01-21 12:13:16,446][datasets][INFO] - JAX version 0.4.30 available.
|
| 7 |
+
[2026-01-21 12:13:41,170][root][INFO] - running build_ext
|
| 8 |
+
[2026-01-21 12:13:41,174][root][INFO] - building 'mujoco_py.cymj' extension
|
| 9 |
+
[2026-01-21 12:13:41,174][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py
|
| 10 |
+
[2026-01-21 12:13:41,196][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl
|
| 11 |
+
[2026-01-21 12:13:41,197][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w
|
| 12 |
+
[2026-01-21 12:14:08,619][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 0.01
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task: []
|
| 115 |
+
job:
|
| 116 |
+
name: train
|
| 117 |
+
chdir: null
|
| 118 |
+
override_dirname: ''
|
| 119 |
+
id: ???
|
| 120 |
+
num: ???
|
| 121 |
+
config_name: llmbc_box-close-v2.yaml
|
| 122 |
+
env_set: {}
|
| 123 |
+
env_copy: []
|
| 124 |
+
config:
|
| 125 |
+
override_dirname:
|
| 126 |
+
kv_sep: '='
|
| 127 |
+
item_sep: ','
|
| 128 |
+
exclude_keys: []
|
| 129 |
+
runtime:
|
| 130 |
+
version: 1.2.0
|
| 131 |
+
version_base: '1.2'
|
| 132 |
+
cwd: /work/u1131674/LLM-BC
|
| 133 |
+
config_sources:
|
| 134 |
+
- path: hydra.conf
|
| 135 |
+
schema: pkg
|
| 136 |
+
provider: hydra
|
| 137 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 138 |
+
schema: file
|
| 139 |
+
provider: main
|
| 140 |
+
- path: ''
|
| 141 |
+
schema: structured
|
| 142 |
+
provider: schema
|
| 143 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
|
| 144 |
+
choices:
|
| 145 |
+
hydra/env: default
|
| 146 |
+
hydra/callbacks: null
|
| 147 |
+
hydra/job_logging: default
|
| 148 |
+
hydra/hydra_logging: default
|
| 149 |
+
hydra/hydra_help: default
|
| 150 |
+
hydra/help: default
|
| 151 |
+
hydra/sweeper: basic
|
| 152 |
+
hydra/launcher: basic
|
| 153 |
+
hydra/output: default
|
| 154 |
+
verbose: false
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
[]
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"train_loss": 0.2577439248561859, "train_loss_bc": 0.25195014476776123, "train_loss_llm": 0.5793781280517578, "grad_norm": 0.12829534709453583, "global_step": 0, "epoch": 0, "lr": 0.001}
|
| 2 |
+
{"train_loss": 0.278277724981308, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.5636913180351257, "grad_norm": 0.13488440215587616, "global_step": 1, "epoch": 0, "lr": 0.001}
|
| 3 |
+
{"train_loss": 0.29180172085762024, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584006309509277, "grad_norm": 0.27443262934684753, "global_step": 2, "epoch": 0, "lr": 0.001}
|
| 4 |
+
{"train_loss": 0.2927302420139313, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152303218841553, "grad_norm": 0.4118553102016449, "global_step": 3, "epoch": 0, "lr": 0.001}
|
| 5 |
+
{"train_loss": 0.28513821959495544, "train_loss_bc": 0.2797144651412964, "train_loss_llm": 0.5423756241798401, "grad_norm": 0.5492109656333923, "global_step": 4, "epoch": 0, "lr": 0.001}
|
| 6 |
+
{"train_loss": 0.31990620493888855, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508874654769897, "grad_norm": 0.6955047845840454, "global_step": 5, "epoch": 0, "lr": 0.001}
|
| 7 |
+
{"train_loss": 0.27779361605644226, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829552412033081, "grad_norm": 0.8313235640525818, "global_step": 6, "epoch": 0, "lr": 0.001}
|
| 8 |
+
{"train_loss": 0.23134832084178925, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914825201034546, "grad_norm": 0.9541349411010742, "global_step": 7, "epoch": 0, "lr": 0.001}
|
| 9 |
+
{"train_loss": 0.2081925868988037, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296263694763184, "grad_norm": 1.0696462392807007, "global_step": 8, "epoch": 0, "lr": 0.002}
|
| 10 |
+
{"train_loss": 0.2047700732946396, "train_loss_bc": 0.19888944923877716, "train_loss_llm": 0.5880619287490845, "grad_norm": 0.11593382805585861, "global_step": 9, "epoch": 0, "lr": 0.002}
|
| 11 |
+
{"train_loss": 0.2171408236026764, "train_loss_bc": 0.2111976146697998, "train_loss_llm": 0.5943207740783691, "grad_norm": 0.23421066999435425, "global_step": 10, "epoch": 0, "lr": 0.002}
|
| 12 |
+
{"train_loss": 0.211279034614563, "train_loss_bc": 0.2063535749912262, "train_loss_llm": 0.4925457537174225, "grad_norm": 0.3522197902202606, "global_step": 11, "epoch": 0, "lr": 0.002}
|
| 13 |
+
{"train_loss": 0.2630419433116913, "train_loss_bc": 0.2565857172012329, "train_loss_llm": 0.6456230282783508, "grad_norm": 0.48301446437835693, "global_step": 12, "epoch": 0, "lr": 0.002}
|
| 14 |
+
{"train_loss": 0.2441762089729309, "train_loss_bc": 0.23813079297542572, "train_loss_llm": 0.604541540145874, "grad_norm": 0.609789252281189, "global_step": 13, "epoch": 0, "lr": 0.002}
|
| 15 |
+
{"train_loss": 0.2768160402774811, "train_loss_bc": 0.27063897252082825, "train_loss_llm": 0.6177071332931519, "grad_norm": 0.7476180195808411, "global_step": 14, "epoch": 0, "lr": 0.002}
|
| 16 |
+
{"train_loss": 0.2484627217054367, "train_loss_bc": 0.24271151423454285, "train_loss_llm": 0.5751214623451233, "grad_norm": 0.8759933710098267, "global_step": 15, "epoch": 0, "lr": 0.002}
|
| 17 |
+
{"train_loss": 0.20768630504608154, "train_loss_bc": 0.20276379585266113, "train_loss_llm": 0.49225085973739624, "grad_norm": 0.9921573996543884, "global_step": 16, "epoch": 0, "lr": 0.003}
|
| 18 |
+
{"train_loss": 0.16910794377326965, "train_loss_bc": 0.16317197680473328, "train_loss_llm": 0.5935962796211243, "grad_norm": 0.11413145065307617, "global_step": 17, "epoch": 0, "lr": 0.003}
|
| 19 |
+
{"train_loss": 0.1472136378288269, "train_loss_bc": 0.14170503616333008, "train_loss_llm": 0.5508600473403931, "grad_norm": 0.2181655466556549, "global_step": 18, "epoch": 0, "lr": 0.003}
|
| 20 |
+
{"train_loss": 0.09437470138072968, "train_loss_bc": 0.08951498568058014, "train_loss_llm": 0.4859713315963745, "grad_norm": 0.2965621054172516, "global_step": 19, "epoch": 0, "lr": 0.003}
|
| 21 |
+
{"train_loss": 0.14634960889816284, "train_loss_bc": 0.14015674591064453, "train_loss_llm": 0.6192870140075684, "grad_norm": 0.399164617061615, "global_step": 20, "epoch": 0, "lr": 0.003}
|
| 22 |
+
{"train_loss": 0.13075421750545502, "train_loss_bc": 0.12502902746200562, "train_loss_llm": 0.5725185871124268, "grad_norm": 0.49499645829200745, "global_step": 21, "epoch": 0, "lr": 0.003}
|
| 23 |
+
{"train_loss": 0.1632406860589981, "train_loss_bc": 0.15791185200214386, "train_loss_llm": 0.5328830480575562, "grad_norm": 0.6078411936759949, "global_step": 22, "epoch": 0, "lr": 0.003}
|
| 24 |
+
{"train_loss": 0.16032128036022186, "train_loss_bc": 0.1541915237903595, "train_loss_llm": 0.6129759550094604, "grad_norm": 0.7181513905525208, "global_step": 23, "epoch": 0, "lr": 0.003}
|
| 25 |
+
{"train_loss": 0.10194464772939682, "train_loss_bc": 0.09674602746963501, "train_loss_llm": 0.5198622345924377, "grad_norm": 0.8007318377494812, "global_step": 24, "epoch": 0, "lr": 0.004}
|
| 26 |
+
{"train_loss": 0.04645621404051781, "train_loss_bc": 0.04119991511106491, "train_loss_llm": 0.5256298780441284, "grad_norm": 0.04878818616271019, "global_step": 25, "epoch": 0, "lr": 0.004}
|
| 27 |
+
{"train_loss": 0.05316489189863205, "train_loss_bc": 0.04852021113038063, "train_loss_llm": 0.46446824073791504, "grad_norm": 0.10567886382341385, "global_step": 26, "epoch": 0, "lr": 0.004}
|
| 28 |
+
{"train_loss": 0.034993816167116165, "train_loss_bc": 0.03099265694618225, "train_loss_llm": 0.40011608600616455, "grad_norm": 0.12614615261554718, "global_step": 27, "epoch": 0, "lr": 0.004}
|
| 29 |
+
{"train_loss": 0.05056390166282654, "train_loss_bc": 0.04542642831802368, "train_loss_llm": 0.5137471556663513, "grad_norm": 0.17804424464702606, "global_step": 28, "epoch": 0, "lr": 0.004}
|
| 30 |
+
{"train_loss": 0.040129225701093674, "train_loss_bc": 0.03615850210189819, "train_loss_llm": 0.3970724642276764, "grad_norm": 0.21957509219646454, "global_step": 29, "epoch": 0, "lr": 0.004}
|
| 31 |
+
{"train_loss": 0.06979431211948395, "train_loss_bc": 0.06506022810935974, "train_loss_llm": 0.47340837121009827, "grad_norm": 0.30375877022743225, "global_step": 30, "epoch": 0, "lr": 0.004}
|
| 32 |
+
{"train_loss": 0.05452805757522583, "train_loss_bc": 0.050960805267095566, "train_loss_llm": 0.3567253649234772, "grad_norm": 0.3601897656917572, "global_step": 31, "epoch": 0, "lr": 0.004}
|
| 33 |
+
{"train_loss": 0.05965254083275795, "train_loss_bc": 0.055447064340114594, "train_loss_llm": 0.4205475151538849, "grad_norm": 0.4274352192878723, "global_step": 32, "epoch": 0, "lr": 0.005}
|
| 34 |
+
{"train_loss": 0.06257618218660355, "train_loss_bc": 0.05625780671834946, "train_loss_llm": 0.6318378448486328, "grad_norm": 0.09260464459657669, "global_step": 33, "epoch": 0, "lr": 0.005}
|
| 35 |
+
{"train_loss": 0.05445178598165512, "train_loss_bc": 0.04902859777212143, "train_loss_llm": 0.5423187017440796, "grad_norm": 0.16763924062252045, "global_step": 34, "epoch": 0, "lr": 0.005}
|
| 36 |
+
{"train_loss": 0.06851150095462799, "train_loss_bc": 0.06271672248840332, "train_loss_llm": 0.5794777870178223, "grad_norm": 0.2678099274635315, "global_step": 35, "epoch": 0, "lr": 0.005}
|
| 37 |
+
{"train_loss": 0.0630837082862854, "train_loss_bc": 0.0575467087328434, "train_loss_llm": 0.553699791431427, "grad_norm": 0.3552546501159668, "global_step": 36, "epoch": 0, "lr": 0.005}
|
| 38 |
+
{"train_loss": 0.040140487253665924, "train_loss_bc": 0.03421059995889664, "train_loss_llm": 0.5929888486862183, "grad_norm": 0.41354402899742126, "global_step": 37, "epoch": 0, "lr": 0.005}
|
| 39 |
+
{"train_loss": 0.06981470435857773, "train_loss_bc": 0.063104547560215, "train_loss_llm": 0.6710153818130493, "grad_norm": 0.5134375095367432, "global_step": 38, "epoch": 0, "lr": 0.005}
|
| 40 |
+
{"train_loss": 0.051894403994083405, "train_loss_bc": 0.045866355299949646, "train_loss_llm": 0.6028048992156982, "grad_norm": 0.5825293660163879, "global_step": 39, "epoch": 0, "lr": 0.005}
|
| 41 |
+
{"train_loss": 0.04342593997716904, "train_loss_bc": 0.03640042245388031, "train_loss_llm": 0.7025519013404846, "grad_norm": 0.6445399522781372, "global_step": 40, "epoch": 0, "lr": 0.006}
|
| 42 |
+
{"train_loss": 0.1558080017566681, "train_loss_bc": 0.15039610862731934, "train_loss_llm": 0.5411889553070068, "grad_norm": 0.20307017862796783, "global_step": 41, "epoch": 0, "lr": 0.006}
|
| 43 |
+
{"train_loss": 0.12238138169050217, "train_loss_bc": 0.11733964830636978, "train_loss_llm": 0.5041730403900146, "grad_norm": 0.3785540461540222, "global_step": 42, "epoch": 0, "lr": 0.006}
|
| 44 |
+
{"train_loss": 0.11476962268352509, "train_loss_bc": 0.1102944016456604, "train_loss_llm": 0.44752180576324463, "grad_norm": 0.5496576428413391, "global_step": 43, "epoch": 0, "lr": 0.006}
|
| 45 |
+
{"train_loss": 0.1318601667881012, "train_loss_bc": 0.12625660002231598, "train_loss_llm": 0.5603567957878113, "grad_norm": 0.7342697381973267, "global_step": 44, "epoch": 0, "lr": 0.006}
|
| 46 |
+
{"train_loss": 0.15008734166622162, "train_loss_bc": 0.14480489492416382, "train_loss_llm": 0.5282450914382935, "grad_norm": 0.9383558630943298, "global_step": 45, "epoch": 0, "lr": 0.006}
|
| 47 |
+
{"train_loss": 0.11853287369012833, "train_loss_bc": 0.11271888017654419, "train_loss_llm": 0.5813996195793152, "grad_norm": 1.1123522520065308, "global_step": 46, "epoch": 0, "lr": 0.006}
|
| 48 |
+
{"train_loss": 0.14414051175117493, "train_loss_bc": 0.13898390531539917, "train_loss_llm": 0.5156602263450623, "grad_norm": 1.3082720041275024, "global_step": 47, "epoch": 0, "lr": 0.006}
|
| 49 |
+
{"train_loss": 0.1536247432231903, "train_loss_bc": 0.14848382771015167, "train_loss_llm": 0.5140920877456665, "grad_norm": 1.5149050951004028, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999}
|
| 50 |
+
{"train_loss": 0.25954943895339966, "train_loss_bc": 0.25265026092529297, "train_loss_llm": 0.6899186372756958, "grad_norm": 0.3054397702217102, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999}
|
| 51 |
+
{"train_loss": 0.1506877839565277, "train_loss_bc": 0.1453518569469452, "train_loss_llm": 0.5335921049118042, "grad_norm": 0.5257424116134644, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999}
|
| 52 |
+
{"train_loss": 0.17754197120666504, "train_loss_bc": 0.17331534624099731, "train_loss_llm": 0.4226621985435486, "grad_norm": 0.769081711769104, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999}
|
| 53 |
+
{"train_loss": 0.27337175607681274, "train_loss_bc": 0.26682397723197937, "train_loss_llm": 0.6547775268554688, "grad_norm": 1.0860713720321655, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999}
|
| 54 |
+
{"train_loss": 0.21706189215183258, "train_loss_bc": 0.21164150536060333, "train_loss_llm": 0.5420382022857666, "grad_norm": 1.3574727773666382, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999}
|
| 55 |
+
{"train_loss": 0.16595229506492615, "train_loss_bc": 0.16188554465770721, "train_loss_llm": 0.4066758155822754, "grad_norm": 1.5899840593338013, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999}
|
| 56 |
+
{"train_loss": 0.23229315876960754, "train_loss_bc": 0.22762833535671234, "train_loss_llm": 0.4664822220802307, "grad_norm": 1.8754494190216064, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999}
|
| 57 |
+
{"train_loss": 0.21556805074214935, "train_loss_bc": 0.2103624939918518, "train_loss_llm": 0.5205552577972412, "grad_norm": 2.147486448287964, "global_step": 56, "epoch": 0, "lr": 0.008}
|
| 58 |
+
{"train_loss": 0.22826582193374634, "train_loss_bc": 0.22174005210399628, "train_loss_llm": 0.6525774002075195, "grad_norm": 0.286575049161911, "global_step": 57, "epoch": 0, "lr": 0.008}
|
| 59 |
+
{"train_loss": 0.20953819155693054, "train_loss_bc": 0.203176349401474, "train_loss_llm": 0.6361845135688782, "grad_norm": 0.5594003200531006, "global_step": 58, "epoch": 0, "lr": 0.008}
|
| 60 |
+
{"train_loss": 0.191473588347435, "train_loss_bc": 0.18566101789474487, "train_loss_llm": 0.581256628036499, "grad_norm": 0.8172082304954529, "global_step": 59, "epoch": 0, "lr": 0.008}
|
| 61 |
+
{"train_loss": 0.17888422310352325, "train_loss_bc": 0.17266017198562622, "train_loss_llm": 0.6224054098129272, "grad_norm": 1.0602154731750488, "global_step": 60, "epoch": 0, "lr": 0.008}
|
| 62 |
+
{"train_loss": 0.21835987269878387, "train_loss_bc": 0.21199063956737518, "train_loss_llm": 0.6369228959083557, "grad_norm": 1.3346713781356812, "global_step": 61, "epoch": 0, "lr": 0.008}
|
| 63 |
+
{"train_loss": 0.17873793840408325, "train_loss_bc": 0.17244993150234222, "train_loss_llm": 0.6288003921508789, "grad_norm": 1.583105206489563, "global_step": 62, "epoch": 0, "lr": 0.008}
|
| 64 |
+
{"train_loss": 0.14904041588306427, "train_loss_bc": 0.14287304878234863, "train_loss_llm": 0.616736888885498, "grad_norm": 1.8050798177719116, "global_step": 63, "epoch": 0, "lr": 0.008}
|
| 65 |
+
{"train_loss": 0.22122563421726227, "train_loss_bc": 0.215244859457016, "train_loss_llm": 0.5980769395828247, "grad_norm": 2.082054615020752, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001}
|
| 66 |
+
{"train_loss": 0.11144096404314041, "train_loss_bc": 0.10432037711143494, "train_loss_llm": 0.712058424949646, "grad_norm": 0.1753779500722885, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001}
|
| 67 |
+
{"train_loss": 0.11379032582044601, "train_loss_bc": 0.107419952750206, "train_loss_llm": 0.6370369791984558, "grad_norm": 0.3535049855709076, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001}
|
| 68 |
+
{"train_loss": 0.10985075682401657, "train_loss_bc": 0.1022319421172142, "train_loss_llm": 0.7618812322616577, "grad_norm": 0.5256584286689758, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001}
|
| 69 |
+
{"train_loss": 0.18938198685646057, "train_loss_bc": 0.18246878683567047, "train_loss_llm": 0.691320538520813, "grad_norm": 0.7720930576324463, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001}
|
| 70 |
+
{"train_loss": 0.10004343092441559, "train_loss_bc": 0.09400247782468796, "train_loss_llm": 0.6040955781936646, "grad_norm": 0.939155638217926, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001}
|
| 71 |
+
{"train_loss": 0.11703117191791534, "train_loss_bc": 0.11094395071268082, "train_loss_llm": 0.6087222099304199, "grad_norm": 1.1172370910644531, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001}
|
| 72 |
+
{"train_loss": 0.13404561579227448, "train_loss_bc": 0.12686075270175934, "train_loss_llm": 0.7184867262840271, "grad_norm": 1.312468409538269, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001}
|
| 73 |
+
{"train_loss": 0.13330930471420288, "train_loss_bc": 0.12657678127288818, "train_loss_llm": 0.6732516288757324, "grad_norm": 1.5088775157928467, "global_step": 72, "epoch": 0, "lr": 0.01}
|
| 74 |
+
{"train_loss": 0.05257038772106171, "train_loss_bc": 0.04637575149536133, "train_loss_llm": 0.6194634437561035, "grad_norm": 0.09083625674247742, "global_step": 73, "epoch": 0, "lr": 0.01}
|
| 75 |
+
{"train_loss": 0.06475914269685745, "train_loss_bc": 0.057880476117134094, "train_loss_llm": 0.6878665089607239, "grad_norm": 0.1966981440782547, "global_step": 74, "epoch": 0, "lr": 0.01}
|
| 76 |
+
{"train_loss": 0.04975426197052002, "train_loss_bc": 0.043193425983190536, "train_loss_llm": 0.6560835242271423, "grad_norm": 0.28462910652160645, "global_step": 75, "epoch": 0, "lr": 0.01}
|
| 77 |
+
{"train_loss": 0.04952416196465492, "train_loss_bc": 0.04294995218515396, "train_loss_llm": 0.6574209332466125, "grad_norm": 0.368166983127594, "global_step": 76, "epoch": 0, "lr": 0.01}
|
| 78 |
+
{"train_loss": 0.07074079662561417, "train_loss_bc": 0.06346137821674347, "train_loss_llm": 0.7279415130615234, "grad_norm": 0.484068363904953, "global_step": 77, "epoch": 0, "lr": 0.01}
|
| 79 |
+
{"train_loss": 0.04157562926411629, "train_loss_bc": 0.034751974046230316, "train_loss_llm": 0.6823655962944031, "grad_norm": 0.5569941997528076, "global_step": 78, "epoch": 0, "lr": 0.01}
|
| 80 |
+
{"train_loss": 0.06484629958868027, "train_loss_bc": 0.05785399675369263, "train_loss_llm": 0.6992301940917969, "grad_norm": 0.6628190279006958, "global_step": 79, "epoch": 0, "lr": 0.01}
|
| 81 |
+
{"train_loss": 0.038789354264736176, "train_loss_bc": 0.03276902064681053, "train_loss_llm": 0.6020334959030151, "grad_norm": 0.7350778579711914, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305}
|
| 82 |
+
{"train_loss": 0.03389202430844307, "train_loss_bc": 0.02817351743578911, "train_loss_llm": 0.571850597858429, "grad_norm": 0.04861941188573837, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305}
|
| 83 |
+
{"train_loss": 0.03024495765566826, "train_loss_bc": 0.02487185411155224, "train_loss_llm": 0.5373104214668274, "grad_norm": 0.08896133303642273, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305}
|
| 84 |
+
{"train_loss": 0.029436565935611725, "train_loss_bc": 0.024766096845269203, "train_loss_llm": 0.46704691648483276, "grad_norm": 0.13158170878887177, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305}
|
| 85 |
+
{"train_loss": 0.03704115003347397, "train_loss_bc": 0.03144294396042824, "train_loss_llm": 0.5598207712173462, "grad_norm": 0.18903131783008575, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305}
|
| 86 |
+
{"train_loss": 0.031894855201244354, "train_loss_bc": 0.026735499501228333, "train_loss_llm": 0.5159357786178589, "grad_norm": 0.22145399451255798, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305}
|
| 87 |
+
{"train_loss": 0.03053618222475052, "train_loss_bc": 0.025796514004468918, "train_loss_llm": 0.47396671772003174, "grad_norm": 0.2594376802444458, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305}
|
| 88 |
+
{"train_loss": 0.025953643023967743, "train_loss_bc": 0.021002870053052902, "train_loss_llm": 0.49507731199264526, "grad_norm": 0.28883251547813416, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305}
|
| 89 |
+
{"train_loss": 0.03711831569671631, "train_loss_bc": 0.03182109445333481, "train_loss_llm": 0.5297219753265381, "grad_norm": 0.33612799644470215, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223}
|
| 90 |
+
{"train_loss": 0.07443847507238388, "train_loss_bc": 0.06750228255987167, "train_loss_llm": 0.6936193704605103, "grad_norm": 0.1032935231924057, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223}
|
| 91 |
+
{"train_loss": 0.06578436493873596, "train_loss_bc": 0.059477001428604126, "train_loss_llm": 0.6307359933853149, "grad_norm": 0.19785623252391815, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223}
|
| 92 |
+
{"train_loss": 0.05691196769475937, "train_loss_bc": 0.05067047104239464, "train_loss_llm": 0.6241495013237, "grad_norm": 0.28224730491638184, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223}
|
| 93 |
+
{"train_loss": 0.07031725347042084, "train_loss_bc": 0.06331950426101685, "train_loss_llm": 0.699774980545044, "grad_norm": 0.38025128841400146, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223}
|
| 94 |
+
{"train_loss": 0.06619272381067276, "train_loss_bc": 0.059530019760131836, "train_loss_llm": 0.6662706136703491, "grad_norm": 0.47631222009658813, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223}
|
| 95 |
+
{"train_loss": 0.050842322409152985, "train_loss_bc": 0.04463043063879013, "train_loss_llm": 0.6211893558502197, "grad_norm": 0.5518149137496948, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223}
|
| 96 |
+
{"train_loss": 0.05087399110198021, "train_loss_bc": 0.044860679656267166, "train_loss_llm": 0.6013312339782715, "grad_norm": 0.6292504668235779, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223}
|
| 97 |
+
{"train_loss": 0.06841012090444565, "train_loss_bc": 0.061625488102436066, "train_loss_llm": 0.6784631013870239, "grad_norm": 0.726737916469574, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575}
|
| 98 |
+
{"train_loss": 0.08856374025344849, "train_loss_bc": 0.08115855604410172, "train_loss_llm": 0.7405182123184204, "grad_norm": 0.11317727714776993, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575}
|
| 99 |
+
{"train_loss": 0.08638611435890198, "train_loss_bc": 0.07939188182353973, "train_loss_llm": 0.6994235515594482, "grad_norm": 0.22164146602153778, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575}
|
| 100 |
+
{"train_loss": 0.08941305428743362, "train_loss_bc": 0.0817980170249939, "train_loss_llm": 0.7615037560462952, "grad_norm": 0.32890111207962036, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575}
|
| 101 |
+
{"train_loss": 0.07866586744785309, "train_loss_bc": 0.07075173407793045, "train_loss_llm": 0.7914135456085205, "grad_norm": 0.4279417097568512, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575}
|
| 102 |
+
{"train_loss": 0.09740469604730606, "train_loss_bc": 0.0890614315867424, "train_loss_llm": 0.8343262672424316, "grad_norm": 0.5465472340583801, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575}
|
| 103 |
+
{"train_loss": 0.07890348881483078, "train_loss_bc": 0.07148407399654388, "train_loss_llm": 0.7419418096542358, "grad_norm": 0.6493978500366211, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575}
|
| 104 |
+
{"train_loss": 0.06637918949127197, "train_loss_bc": 0.05943997576832771, "train_loss_llm": 0.6939213275909424, "grad_norm": 0.736656665802002, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575}
|
| 105 |
+
{"train_loss": 0.052137844264507294, "train_loss_bc": 0.04496845602989197, "train_loss_llm": 0.716938853263855, "grad_norm": 0.8118408918380737, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895}
|
| 106 |
+
{"train_loss": 0.06986512988805771, "train_loss_bc": 0.06309865415096283, "train_loss_llm": 0.6766473650932312, "grad_norm": 0.08536022901535034, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895}
|
| 107 |
+
{"train_loss": 0.0901651680469513, "train_loss_bc": 0.08455254882574081, "train_loss_llm": 0.5612622499465942, "grad_norm": 0.19402463734149933, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895}
|
| 108 |
+
{"train_loss": 0.08825574815273285, "train_loss_bc": 0.08174335211515427, "train_loss_llm": 0.6512394547462463, "grad_norm": 0.29752182960510254, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895}
|
| 109 |
+
{"train_loss": 0.07733944058418274, "train_loss_bc": 0.07105374336242676, "train_loss_llm": 0.628569483757019, "grad_norm": 0.39171040058135986, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895}
|
| 110 |
+
{"train_loss": 0.06732399016618729, "train_loss_bc": 0.06240474805235863, "train_loss_llm": 0.49192410707473755, "grad_norm": 0.4783252775669098, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895}
|
| 111 |
+
{"train_loss": 0.06321073323488235, "train_loss_bc": 0.05660167708992958, "train_loss_llm": 0.6609058380126953, "grad_norm": 0.558458149433136, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895}
|
| 112 |
+
{"train_loss": 0.06905204057693481, "train_loss_bc": 0.06339387595653534, "train_loss_llm": 0.5658166408538818, "grad_norm": 0.6481609344482422, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895}
|
| 113 |
+
{"train_loss": 0.07884093374013901, "train_loss_bc": 0.07224734127521515, "train_loss_llm": 0.6593592166900635, "grad_norm": 0.7421054840087891, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654}
|
| 114 |
+
{"train_loss": 0.07946255803108215, "train_loss_bc": 0.07475702464580536, "train_loss_llm": 0.4705533981323242, "grad_norm": 0.10471871495246887, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654}
|
| 115 |
+
{"train_loss": 0.07216629385948181, "train_loss_bc": 0.06603223085403442, "train_loss_llm": 0.6134059429168701, "grad_norm": 0.19753926992416382, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654}
|
| 116 |
+
{"train_loss": 0.06510302424430847, "train_loss_bc": 0.057828500866889954, "train_loss_llm": 0.7274521589279175, "grad_norm": 0.28359219431877136, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654}
|
| 117 |
+
{"train_loss": 0.06222425401210785, "train_loss_bc": 0.055748678743839264, "train_loss_llm": 0.6475574970245361, "grad_norm": 0.36533209681510925, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654}
|
| 118 |
+
{"train_loss": 0.0845273807644844, "train_loss_bc": 0.07734745740890503, "train_loss_llm": 0.7179924249649048, "grad_norm": 0.4720841646194458, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654}
|
| 119 |
+
{"train_loss": 0.06714431196451187, "train_loss_bc": 0.06066868081688881, "train_loss_llm": 0.6475629210472107, "grad_norm": 0.5596445798873901, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654}
|
| 120 |
+
{"train_loss": 0.07048200070858002, "train_loss_bc": 0.06500747799873352, "train_loss_llm": 0.5474520921707153, "grad_norm": 0.6513513326644897, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654}
|
| 121 |
+
{"train_loss": 0.04110037535429001, "train_loss_bc": 0.03525649011135101, "train_loss_llm": 0.5843884944915771, "grad_norm": 0.7102450132369995, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032}
|
| 122 |
+
{"train_loss": 0.04190563037991524, "train_loss_bc": 0.03765689581632614, "train_loss_llm": 0.4248734712600708, "grad_norm": 0.06533454358577728, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032}
|
| 123 |
+
{"train_loss": 0.04612841457128525, "train_loss_bc": 0.04169066250324249, "train_loss_llm": 0.4437751770019531, "grad_norm": 0.13389350473880768, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032}
|
| 124 |
+
{"train_loss": 0.06232012063264847, "train_loss_bc": 0.057958535850048065, "train_loss_llm": 0.436158686876297, "grad_norm": 0.22540993988513947, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032}
|
| 125 |
+
{"train_loss": 0.05091412365436554, "train_loss_bc": 0.04628019779920578, "train_loss_llm": 0.4633924067020416, "grad_norm": 0.29657596349716187, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032}
|
| 126 |
+
{"train_loss": 0.04201853275299072, "train_loss_bc": 0.03731508180499077, "train_loss_llm": 0.47034499049186707, "grad_norm": 0.3558724820613861, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032}
|
| 127 |
+
{"train_loss": 0.06030768156051636, "train_loss_bc": 0.0569755993783474, "train_loss_llm": 0.3332084119319916, "grad_norm": 0.4466772675514221, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032}
|
| 128 |
+
{"train_loss": 0.049573902040719986, "train_loss_bc": 0.044726163148880005, "train_loss_llm": 0.48477375507354736, "grad_norm": 0.518007755279541, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032}
|
| 129 |
+
{"train_loss": 0.05068175494670868, "train_loss_bc": 0.04603324085474014, "train_loss_llm": 0.46485158801078796, "grad_norm": 0.584708034992218, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033}
|
| 130 |
+
{"train_loss": 0.03280109167098999, "train_loss_bc": 0.026645543053746223, "train_loss_llm": 0.6155548095703125, "grad_norm": 0.04615609720349312, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033}
|
| 131 |
+
{"train_loss": 0.02815183810889721, "train_loss_bc": 0.022307250648736954, "train_loss_llm": 0.5844587087631226, "grad_norm": 0.08306025713682175, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033}
|
| 132 |
+
{"train_loss": 0.04145112261176109, "train_loss_bc": 0.03547768294811249, "train_loss_llm": 0.597343921661377, "grad_norm": 0.1467656046152115, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033}
|
| 133 |
+
{"train_loss": 0.03268067538738251, "train_loss_bc": 0.026548977941274643, "train_loss_llm": 0.6131698489189148, "grad_norm": 0.1975640058517456, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033}
|
| 134 |
+
{"train_loss": 0.02972070872783661, "train_loss_bc": 0.024114008992910385, "train_loss_llm": 0.5606698989868164, "grad_norm": 0.2313450276851654, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033}
|
| 135 |
+
{"train_loss": 0.034362196922302246, "train_loss_bc": 0.028685620054602623, "train_loss_llm": 0.5676577091217041, "grad_norm": 0.2870166003704071, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033}
|
| 136 |
+
{"train_loss": 0.026356279850006104, "train_loss_bc": 0.021087775006890297, "train_loss_llm": 0.5268504023551941, "grad_norm": 0.32789376378059387, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033}
|
| 137 |
+
{"train_loss": 0.02352694608271122, "train_loss_bc": 0.017692333087325096, "train_loss_llm": 0.5834612846374512, "grad_norm": 0.3600025475025177, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659}
|
| 138 |
+
{"train_loss": 0.021489372476935387, "train_loss_bc": 0.015556419268250465, "train_loss_llm": 0.5932953953742981, "grad_norm": 0.029839487746357918, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659}
|
| 139 |
+
{"train_loss": 0.022915281355381012, "train_loss_bc": 0.016903359442949295, "train_loss_llm": 0.6011921167373657, "grad_norm": 0.0649353489279747, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659}
|
| 140 |
+
{"train_loss": 0.028618421405553818, "train_loss_bc": 0.021326089277863503, "train_loss_llm": 0.7292331457138062, "grad_norm": 0.09133722633123398, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659}
|
| 141 |
+
{"train_loss": 0.022449012845754623, "train_loss_bc": 0.016372717916965485, "train_loss_llm": 0.6076295375823975, "grad_norm": 0.11012815684080124, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659}
|
| 142 |
+
{"train_loss": 0.029746074229478836, "train_loss_bc": 0.023105649277567863, "train_loss_llm": 0.6640425324440002, "grad_norm": 0.13848648965358734, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659}
|
| 143 |
+
{"train_loss": 0.024118199944496155, "train_loss_bc": 0.018623564392328262, "train_loss_llm": 0.5494635105133057, "grad_norm": 0.1677691638469696, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659}
|
| 144 |
+
{"train_loss": 0.02615225501358509, "train_loss_bc": 0.020367056131362915, "train_loss_llm": 0.5785199403762817, "grad_norm": 0.2057863473892212, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659}
|
| 145 |
+
{"train_loss": 0.02474672719836235, "train_loss_bc": 0.01899828016757965, "train_loss_llm": 0.5748447179794312, "grad_norm": 0.2312604933977127, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916}
|
| 146 |
+
{"train_loss": 0.027259020134806633, "train_loss_bc": 0.022779621183872223, "train_loss_llm": 0.44793984293937683, "grad_norm": 0.03558708727359772, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916}
|
| 147 |
+
{"train_loss": 0.026615606620907784, "train_loss_bc": 0.022006575018167496, "train_loss_llm": 0.4609031677246094, "grad_norm": 0.07820506393909454, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916}
|
| 148 |
+
{"train_loss": 0.025012901052832603, "train_loss_bc": 0.020857524126768112, "train_loss_llm": 0.4155377149581909, "grad_norm": 0.11317337304353714, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916}
|
| 149 |
+
{"train_loss": 0.04018259048461914, "train_loss_bc": 0.034745171666145325, "train_loss_llm": 0.5437417030334473, "grad_norm": 0.1679946333169937, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916}
|
| 150 |
+
{"train_loss": 0.02269160747528076, "train_loss_bc": 0.018508322536945343, "train_loss_llm": 0.4183286130428314, "grad_norm": 0.1906110793352127, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916}
|
| 151 |
+
{"train_loss": 0.02399151585996151, "train_loss_bc": 0.01900067925453186, "train_loss_llm": 0.49908363819122314, "grad_norm": 0.2185346633195877, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916}
|
| 152 |
+
{"train_loss": 0.025642897933721542, "train_loss_bc": 0.01971365511417389, "train_loss_llm": 0.5929243564605713, "grad_norm": 0.256346195936203, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916}
|
| 153 |
+
{"train_loss": 0.0291135311126709, "train_loss_bc": 0.025170352309942245, "train_loss_llm": 0.3943178951740265, "grad_norm": 0.29621225595474243, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081}
|
| 154 |
+
{"train_loss": 0.031623922288417816, "train_loss_bc": 0.026910781860351562, "train_loss_llm": 0.4713141918182373, "grad_norm": 0.050291482359170914, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081}
|
| 155 |
+
{"train_loss": 0.04012516513466835, "train_loss_bc": 0.03538067638874054, "train_loss_llm": 0.47444888949394226, "grad_norm": 0.11879635602235794, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081}
|
| 156 |
+
{"train_loss": 0.02348470687866211, "train_loss_bc": 0.018497150391340256, "train_loss_llm": 0.49875572323799133, "grad_norm": 0.1580817550420761, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081}
|
| 157 |
+
{"train_loss": 0.02868938073515892, "train_loss_bc": 0.024003252387046814, "train_loss_llm": 0.4686127305030823, "grad_norm": 0.20671528577804565, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081}
|
| 158 |
+
{"train_loss": 0.03526413440704346, "train_loss_bc": 0.0299256332218647, "train_loss_llm": 0.5338499546051025, "grad_norm": 0.26737433671951294, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081}
|
| 159 |
+
{"train_loss": 0.04240602254867554, "train_loss_bc": 0.03778018057346344, "train_loss_llm": 0.46258440613746643, "grad_norm": 0.3377738893032074, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081}
|
| 160 |
+
{"train_loss": 0.04258374869823456, "train_loss_bc": 0.037083160132169724, "train_loss_llm": 0.5500588417053223, "grad_norm": 0.4062163233757019, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081}
|
| 161 |
+
{"train_loss": 0.03730035200715065, "train_loss_bc": 0.0325319766998291, "train_loss_llm": 0.4768376052379608, "grad_norm": 0.46591275930404663, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345}
|
| 162 |
+
{"train_loss": 0.03809020668268204, "train_loss_bc": 0.03346116095781326, "train_loss_llm": 0.46290475130081177, "grad_norm": 0.06493347138166428, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345}
|
| 163 |
+
{"train_loss": 0.027684010565280914, "train_loss_bc": 0.023107346147298813, "train_loss_llm": 0.4576663374900818, "grad_norm": 0.11537288874387741, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345}
|
| 164 |
+
{"train_loss": 0.03135323151946068, "train_loss_bc": 0.027640309184789658, "train_loss_llm": 0.3712920844554901, "grad_norm": 0.17289584875106812, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345}
|
| 165 |
+
{"train_loss": 0.0167723186314106, "train_loss_bc": 0.012659368105232716, "train_loss_llm": 0.41129496693611145, "grad_norm": 0.20604096353054047, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345}
|
| 166 |
+
{"train_loss": 0.03136400133371353, "train_loss_bc": 0.02577000856399536, "train_loss_llm": 0.5593993663787842, "grad_norm": 0.26496362686157227, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345}
|
| 167 |
+
{"train_loss": 0.04508890211582184, "train_loss_bc": 0.039878830313682556, "train_loss_llm": 0.5210072994232178, "grad_norm": 0.3450776934623718, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345}
|
| 168 |
+
{"train_loss": 0.02305273897945881, "train_loss_bc": 0.018481142818927765, "train_loss_llm": 0.4571595788002014, "grad_norm": 0.3923070430755615, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345}
|
| 169 |
+
{"train_loss": 0.028364604339003563, "train_loss_bc": 0.023760396987199783, "train_loss_llm": 0.46042078733444214, "grad_norm": 0.44835156202316284, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253}
|
| 170 |
+
{"train_loss": 0.03444590047001839, "train_loss_bc": 0.029000703245401382, "train_loss_llm": 0.5445197224617004, "grad_norm": 0.058123886585235596, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253}
|
| 171 |
+
{"train_loss": 0.031413737684488297, "train_loss_bc": 0.026274994015693665, "train_loss_llm": 0.5138742327690125, "grad_norm": 0.11113490164279938, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253}
|
| 172 |
+
{"train_loss": 0.026483573019504547, "train_loss_bc": 0.021075624972581863, "train_loss_llm": 0.540794849395752, "grad_norm": 0.15856337547302246, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253}
|
| 173 |
+
{"train_loss": 0.026955293491482735, "train_loss_bc": 0.02244516834616661, "train_loss_llm": 0.4510125517845154, "grad_norm": 0.20139986276626587, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253}
|
| 174 |
+
{"train_loss": 0.03016134910285473, "train_loss_bc": 0.02493642270565033, "train_loss_llm": 0.5224926471710205, "grad_norm": 0.25067630410194397, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253}
|
| 175 |
+
{"train_loss": 0.029931407421827316, "train_loss_bc": 0.024894531816244125, "train_loss_llm": 0.5036876797676086, "grad_norm": 0.2985679507255554, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253}
|
| 176 |
+
{"train_loss": 0.040666207671165466, "train_loss_bc": 0.035972896963357925, "train_loss_llm": 0.46933093667030334, "grad_norm": 0.36520418524742126, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253}
|
| 177 |
+
{"train_loss": 0.02875429019331932, "train_loss_bc": 0.024598199874162674, "train_loss_llm": 0.4156089723110199, "grad_norm": 0.4146167039871216, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371}
|
| 178 |
+
{"train_loss": 0.03293757140636444, "train_loss_bc": 0.02679475024342537, "train_loss_llm": 0.6142822504043579, "grad_norm": 0.05058354139328003, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371}
|
| 179 |
+
{"train_loss": 0.025597713887691498, "train_loss_bc": 0.01985827460885048, "train_loss_llm": 0.5739438533782959, "grad_norm": 0.08788882941007614, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371}
|
| 180 |
+
{"train_loss": 0.02832857519388199, "train_loss_bc": 0.023654501885175705, "train_loss_llm": 0.4674074053764343, "grad_norm": 0.1335342526435852, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371}
|
| 181 |
+
{"train_loss": 0.023435339331626892, "train_loss_bc": 0.018604157492518425, "train_loss_llm": 0.48311811685562134, "grad_norm": 0.16893020272254944, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371}
|
| 182 |
+
{"train_loss": 0.01497327908873558, "train_loss_bc": 0.010697474703192711, "train_loss_llm": 0.4275803864002228, "grad_norm": 0.1971648633480072, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371}
|
| 183 |
+
{"train_loss": 0.03193127363920212, "train_loss_bc": 0.025609299540519714, "train_loss_llm": 0.6321975588798523, "grad_norm": 0.2400187849998474, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371}
|
| 184 |
+
{"train_loss": 0.020016666501760483, "train_loss_bc": 0.01540004089474678, "train_loss_llm": 0.461662620306015, "grad_norm": 0.27775779366493225, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371}
|
| 185 |
+
{"train_loss": 0.019674330949783325, "train_loss_bc": 0.014173893257975578, "train_loss_llm": 0.5500437021255493, "grad_norm": 0.3127053380012512, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874}
|
| 186 |
+
{"train_loss": 0.020366767421364784, "train_loss_bc": 0.015372475609183311, "train_loss_llm": 0.49942925572395325, "grad_norm": 0.027160177007317543, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874}
|
| 187 |
+
{"train_loss": 0.02773042395710945, "train_loss_bc": 0.022009629756212234, "train_loss_llm": 0.5720794796943665, "grad_norm": 0.06589915603399277, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874}
|
| 188 |
+
{"train_loss": 0.027988407760858536, "train_loss_bc": 0.02180863544344902, "train_loss_llm": 0.6179772615432739, "grad_norm": 0.1006016656756401, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874}
|
| 189 |
+
{"train_loss": 0.027591602876782417, "train_loss_bc": 0.022058088332414627, "train_loss_llm": 0.5533514022827148, "grad_norm": 0.13344469666481018, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874}
|
| 190 |
+
{"train_loss": 0.017557095736265182, "train_loss_bc": 0.012610466219484806, "train_loss_llm": 0.494662880897522, "grad_norm": 0.1636464148759842, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874}
|
| 191 |
+
{"train_loss": 0.028389083221554756, "train_loss_bc": 0.021804803982377052, "train_loss_llm": 0.6584279537200928, "grad_norm": 0.20365768671035767, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874}
|
| 192 |
+
{"train_loss": 0.020810682326555252, "train_loss_bc": 0.014902697876095772, "train_loss_llm": 0.5907983779907227, "grad_norm": 0.23229533433914185, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874}
|
| 193 |
+
{"train_loss": 0.021981600672006607, "train_loss_bc": 0.016103900969028473, "train_loss_llm": 0.5877700448036194, "grad_norm": 0.2603759467601776, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049}
|
| 194 |
+
{"train_loss": 0.022998729720711708, "train_loss_bc": 0.01771724969148636, "train_loss_llm": 0.5281479954719543, "grad_norm": 0.034455616027116776, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049}
|
| 195 |
+
{"train_loss": 0.019327782094478607, "train_loss_bc": 0.014658035710453987, "train_loss_llm": 0.4669746160507202, "grad_norm": 0.0668833777308464, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049}
|
| 196 |
+
{"train_loss": 0.024879198521375656, "train_loss_bc": 0.0185236893594265, "train_loss_llm": 0.6355509757995605, "grad_norm": 0.08858254551887512, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049}
|
| 197 |
+
{"train_loss": 0.019756946712732315, "train_loss_bc": 0.014940358698368073, "train_loss_llm": 0.4816588759422302, "grad_norm": 0.11397459357976913, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049}
|
| 198 |
+
{"train_loss": 0.024903442710638046, "train_loss_bc": 0.018497081473469734, "train_loss_llm": 0.6406360864639282, "grad_norm": 0.14657457172870636, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049}
|
| 199 |
+
{"train_loss": 0.019728384912014008, "train_loss_bc": 0.014158019796013832, "train_loss_llm": 0.5570365190505981, "grad_norm": 0.1720155030488968, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049}
|
| 200 |
+
{"train_loss": 0.016792047768831253, "train_loss_bc": 0.011875113472342491, "train_loss_llm": 0.49169355630874634, "grad_norm": 0.20192894339561462, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049}
|
| 201 |
+
{"train_loss": 0.023527009412646294, "train_loss_bc": 0.017446376383304596, "train_loss_llm": 0.6080633401870728, "grad_norm": 0.2486913502216339, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905}
|
| 202 |
+
{"train_loss": 0.02416856773197651, "train_loss_bc": 0.018999043852090836, "train_loss_llm": 0.5169523358345032, "grad_norm": 0.029791679233312607, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905}
|
| 203 |
+
{"train_loss": 0.022336507216095924, "train_loss_bc": 0.017620330676436424, "train_loss_llm": 0.4716176390647888, "grad_norm": 0.056961867958307266, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905}
|
| 204 |
+
{"train_loss": 0.021891754120588303, "train_loss_bc": 0.01769360713660717, "train_loss_llm": 0.4198147654533386, "grad_norm": 0.07886364310979843, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905}
|
| 205 |
+
{"train_loss": 0.02422039769589901, "train_loss_bc": 0.01944451406598091, "train_loss_llm": 0.47758832573890686, "grad_norm": 0.11191964149475098, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905}
|
| 206 |
+
{"train_loss": 0.02202729508280754, "train_loss_bc": 0.016946561634540558, "train_loss_llm": 0.5080732703208923, "grad_norm": 0.12720732390880585, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905}
|
| 207 |
+
{"train_loss": 0.02344614453613758, "train_loss_bc": 0.01831752434372902, "train_loss_llm": 0.512861967086792, "grad_norm": 0.15643325448036194, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905}
|
| 208 |
+
{"train_loss": 0.021590720862150192, "train_loss_bc": 0.01635618507862091, "train_loss_llm": 0.5234535932540894, "grad_norm": 0.17777878046035767, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905}
|
| 209 |
+
{"train_loss": 0.02145991660654545, "train_loss_bc": 0.01685245707631111, "train_loss_llm": 0.46074602007865906, "grad_norm": 0.20263022184371948, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454}
|
| 210 |
+
{"train_loss": 0.023527000099420547, "train_loss_bc": 0.019175242632627487, "train_loss_llm": 0.4351757764816284, "grad_norm": 0.02783939242362976, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454}
|
| 211 |
+
{"train_loss": 0.02509186789393425, "train_loss_bc": 0.020386580377817154, "train_loss_llm": 0.4705287218093872, "grad_norm": 0.06272286921739578, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454}
|
| 212 |
+
{"train_loss": 0.023855067789554596, "train_loss_bc": 0.018275782465934753, "train_loss_llm": 0.5579285025596619, "grad_norm": 0.07670474052429199, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454}
|
| 213 |
+
{"train_loss": 0.024523191154003143, "train_loss_bc": 0.020061926916241646, "train_loss_llm": 0.4461265206336975, "grad_norm": 0.09239604324102402, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454}
|
| 214 |
+
{"train_loss": 0.023320389911532402, "train_loss_bc": 0.019183896481990814, "train_loss_llm": 0.413649320602417, "grad_norm": 0.12721095979213715, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454}
|
| 215 |
+
{"train_loss": 0.02343529649078846, "train_loss_bc": 0.018135903403162956, "train_loss_llm": 0.5299392938613892, "grad_norm": 0.14981500804424286, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454}
|
| 216 |
+
{"train_loss": 0.024323690682649612, "train_loss_bc": 0.01939486525952816, "train_loss_llm": 0.49288249015808105, "grad_norm": 0.17683890461921692, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454}
|
| 217 |
+
{"train_loss": 0.022757865488529205, "train_loss_bc": 0.018279647454619408, "train_loss_llm": 0.44782188534736633, "grad_norm": 0.19883114099502563, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697}
|
| 218 |
+
{"train_loss": 0.023337459191679955, "train_loss_bc": 0.01846153847873211, "train_loss_llm": 0.48759210109710693, "grad_norm": 0.02261751890182495, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697}
|
| 219 |
+
{"train_loss": 0.031769875437021255, "train_loss_bc": 0.026411594823002815, "train_loss_llm": 0.5358280539512634, "grad_norm": 0.06921491771936417, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697}
|
| 220 |
+
{"train_loss": 0.023737115785479546, "train_loss_bc": 0.019920486956834793, "train_loss_llm": 0.38166290521621704, "grad_norm": 0.09409084916114807, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697}
|
| 221 |
+
{"train_loss": 0.025979334488511086, "train_loss_bc": 0.021304704248905182, "train_loss_llm": 0.46746301651000977, "grad_norm": 0.11882251501083374, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697}
|
| 222 |
+
{"train_loss": 0.025000201538205147, "train_loss_bc": 0.019991103559732437, "train_loss_llm": 0.5009097456932068, "grad_norm": 0.15056195855140686, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697}
|
| 223 |
+
{"train_loss": 0.02242461033165455, "train_loss_bc": 0.018378354609012604, "train_loss_llm": 0.4046255350112915, "grad_norm": 0.1780581921339035, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697}
|
| 224 |
+
{"train_loss": 0.027378899976611137, "train_loss_bc": 0.023318542167544365, "train_loss_llm": 0.406035840511322, "grad_norm": 0.2123226374387741, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697}
|
| 225 |
+
{"train_loss": 0.02599770948290825, "train_loss_bc": 0.0203506201505661, "train_loss_llm": 0.564708948135376, "grad_norm": 0.24404466152191162, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066}
|
| 226 |
+
{"train_loss": 0.02545972168445587, "train_loss_bc": 0.020537808537483215, "train_loss_llm": 0.49219125509262085, "grad_norm": 0.0350002683699131, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066}
|
| 227 |
+
{"train_loss": 0.025176143273711205, "train_loss_bc": 0.020890595391392708, "train_loss_llm": 0.42855486273765564, "grad_norm": 0.06209180876612663, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066}
|
| 228 |
+
{"train_loss": 0.023136619478464127, "train_loss_bc": 0.01860974170267582, "train_loss_llm": 0.4526877701282501, "grad_norm": 0.09024433046579361, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066}
|
| 229 |
+
{"train_loss": 0.025945395231246948, "train_loss_bc": 0.021624740213155746, "train_loss_llm": 0.43206557631492615, "grad_norm": 0.12185320258140564, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066}
|
| 230 |
+
{"train_loss": 0.028116080909967422, "train_loss_bc": 0.02361183986067772, "train_loss_llm": 0.45042404532432556, "grad_norm": 0.16623881459236145, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066}
|
| 231 |
+
{"train_loss": 0.028553711250424385, "train_loss_bc": 0.02347782626748085, "train_loss_llm": 0.5075885057449341, "grad_norm": 0.19257326424121857, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066}
|
| 232 |
+
{"train_loss": 0.027766291052103043, "train_loss_bc": 0.022831808775663376, "train_loss_llm": 0.49344828724861145, "grad_norm": 0.22710655629634857, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066}
|
| 233 |
+
{"train_loss": 0.029369812458753586, "train_loss_bc": 0.024203170090913773, "train_loss_llm": 0.5166641473770142, "grad_norm": 0.2672453820705414, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341}
|
| 234 |
+
{"train_loss": 0.019986841827630997, "train_loss_bc": 0.01582282781600952, "train_loss_llm": 0.41640135645866394, "grad_norm": 0.03304322436451912, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341}
|
| 235 |
+
{"train_loss": 0.027561256662011147, "train_loss_bc": 0.0231167059391737, "train_loss_llm": 0.4444551467895508, "grad_norm": 0.06243205443024635, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341}
|
| 236 |
+
{"train_loss": 0.02846396341919899, "train_loss_bc": 0.023669028654694557, "train_loss_llm": 0.47949355840682983, "grad_norm": 0.10598953068256378, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341}
|
| 237 |
+
{"train_loss": 0.024963906034827232, "train_loss_bc": 0.020005209371447563, "train_loss_llm": 0.49586963653564453, "grad_norm": 0.1454334259033203, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
|
| 2 |
+
[2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
|
| 3 |
+
[2026-01-21 12:18:20,592][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
|
| 4 |
+
[2026-01-21 12:18:26,191][datasets][INFO] - PyTorch version 2.2.2 available.
|
| 5 |
+
[2026-01-21 12:18:26,192][datasets][INFO] - TensorFlow version 2.15.1 available.
|
| 6 |
+
[2026-01-21 12:18:26,193][datasets][INFO] - JAX version 0.4.30 available.
|
| 7 |
+
[2026-01-21 12:18:49,867][root][INFO] - running build_ext
|
| 8 |
+
[2026-01-21 12:18:49,870][root][INFO] - building 'mujoco_py.cymj' extension
|
| 9 |
+
[2026-01-21 12:18:49,872][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w
|
| 10 |
+
[2026-01-21 12:19:17,011][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w
|
| 11 |
+
[2026-01-21 12:19:17,219][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py
|
| 12 |
+
[2026-01-21 12:19:17,222][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -shared -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -L/home/u1131674/.mujoco/mujoco210/bin -Wl,--enable-new-dtags,-rpath,/home/u1131674/.mujoco/mujoco210/bin -lmujoco210 -lglewegl -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py/cymj.cpython-39-x86_64-linux-gnu.so -fopenmp
|
| 13 |
+
[2026-01-21 12:19:18,581][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
|
| 14 |
+
[2026-01-21 12:19:18,590][absl][INFO] - MuJoCo library version is: 2.3.7
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
|
| 2 |
+
{"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"}
|
| 3 |
+
{"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"}
|
| 4 |
+
{"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"}
|
| 5 |
+
{"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"}
|
| 6 |
+
{"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}}
|
| 7 |
+
{"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}}
|
| 8 |
+
{"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 9 |
+
{"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"}
|
| 10 |
+
{"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 11 |
+
{"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
|
| 2 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718
|
| 3 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
|
| 4 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
|
| 5 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
| 6 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
| 7 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
|
| 8 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {}
|
| 9 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log
|
| 10 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log
|
| 11 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers
|
| 12 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
| 13 |
+
config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
|
| 14 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend
|
| 15 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request
|
| 16 |
+
2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 17 |
+
2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected
|
| 18 |
+
2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry
|
| 19 |
+
2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
| 20 |
+
2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend
|
| 21 |
+
2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg
|
| 22 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw
|
| 23 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams.
|
| 24 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed.
|
| 25 |
+
2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process
|
| 26 |
+
2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'}
|
| 27 |
+
2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_target_:
|
| 2 |
+
value: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
_wandb:
|
| 4 |
+
value:
|
| 5 |
+
cli_version: 0.18.6
|
| 6 |
+
m: []
|
| 7 |
+
python_version: 3.9.25
|
| 8 |
+
t:
|
| 9 |
+
"1":
|
| 10 |
+
- 1
|
| 11 |
+
- 2
|
| 12 |
+
- 3
|
| 13 |
+
- 5
|
| 14 |
+
- 11
|
| 15 |
+
- 12
|
| 16 |
+
- 41
|
| 17 |
+
- 49
|
| 18 |
+
- 50
|
| 19 |
+
- 51
|
| 20 |
+
- 53
|
| 21 |
+
- 55
|
| 22 |
+
- 71
|
| 23 |
+
- 83
|
| 24 |
+
- 95
|
| 25 |
+
- 98
|
| 26 |
+
- 100
|
| 27 |
+
- 105
|
| 28 |
+
"2":
|
| 29 |
+
- 1
|
| 30 |
+
- 2
|
| 31 |
+
- 3
|
| 32 |
+
- 5
|
| 33 |
+
- 11
|
| 34 |
+
- 12
|
| 35 |
+
- 41
|
| 36 |
+
- 49
|
| 37 |
+
- 50
|
| 38 |
+
- 51
|
| 39 |
+
- 53
|
| 40 |
+
- 55
|
| 41 |
+
- 71
|
| 42 |
+
- 83
|
| 43 |
+
- 95
|
| 44 |
+
- 98
|
| 45 |
+
- 100
|
| 46 |
+
- 105
|
| 47 |
+
"3":
|
| 48 |
+
- 13
|
| 49 |
+
- 15
|
| 50 |
+
- 16
|
| 51 |
+
- 23
|
| 52 |
+
- 55
|
| 53 |
+
- 61
|
| 54 |
+
"4": 3.9.25
|
| 55 |
+
"5": 0.18.6
|
| 56 |
+
"6": 4.47.1
|
| 57 |
+
"8":
|
| 58 |
+
- 5
|
| 59 |
+
"12": 0.18.6
|
| 60 |
+
"13": linux-x86_64
|
| 61 |
+
action_dim:
|
| 62 |
+
value: 4
|
| 63 |
+
checkpoint:
|
| 64 |
+
value:
|
| 65 |
+
save_last_ckpt: true
|
| 66 |
+
save_last_snapshot: false
|
| 67 |
+
topk:
|
| 68 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 69 |
+
k: 5
|
| 70 |
+
mode: max
|
| 71 |
+
monitor_key: test_success_rate
|
| 72 |
+
dataloader:
|
| 73 |
+
value:
|
| 74 |
+
batch_size: 16
|
| 75 |
+
num_workers: 0
|
| 76 |
+
persistent_workers: false
|
| 77 |
+
pin_memory: false
|
| 78 |
+
shuffle: true
|
| 79 |
+
exp_name:
|
| 80 |
+
value: default
|
| 81 |
+
horizon:
|
| 82 |
+
value: 1
|
| 83 |
+
llm:
|
| 84 |
+
value:
|
| 85 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 86 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 87 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 88 |
+
finetune_mode: orig
|
| 89 |
+
hydra:
|
| 90 |
+
job:
|
| 91 |
+
override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 92 |
+
run:
|
| 93 |
+
dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct
|
| 94 |
+
llm_mode: ete-finetuned
|
| 95 |
+
lora_config:
|
| 96 |
+
bias: none
|
| 97 |
+
lora_alpha: 64
|
| 98 |
+
lora_dropout: 0.05
|
| 99 |
+
r: 32
|
| 100 |
+
task_type: CAUSAL_LM
|
| 101 |
+
max_length: 100
|
| 102 |
+
model_name: SmolLM2-135M-Instruct
|
| 103 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 104 |
+
prompter:
|
| 105 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 106 |
+
use_joint_mlp_projector: true
|
| 107 |
+
use_joint_mlp_projector: true
|
| 108 |
+
use_quantization: false
|
| 109 |
+
llm_do_sample:
|
| 110 |
+
value: false
|
| 111 |
+
llm_orig_expert_feedback:
|
| 112 |
+
value: true
|
| 113 |
+
logging:
|
| 114 |
+
value:
|
| 115 |
+
group: null
|
| 116 |
+
id: null
|
| 117 |
+
mode: online
|
| 118 |
+
name: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2
|
| 119 |
+
project: box-close-v2-training
|
| 120 |
+
resume: true
|
| 121 |
+
tags:
|
| 122 |
+
- train_llmbc_lowdim
|
| 123 |
+
- box-close-v2
|
| 124 |
+
- default
|
| 125 |
+
model_name:
|
| 126 |
+
value: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 127 |
+
multi_run:
|
| 128 |
+
value:
|
| 129 |
+
run_dir: data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
|
| 130 |
+
wandb_name_base: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2
|
| 131 |
+
n_action_steps:
|
| 132 |
+
value: 1
|
| 133 |
+
n_latency_steps:
|
| 134 |
+
value: 0
|
| 135 |
+
n_obs_steps:
|
| 136 |
+
value: 1
|
| 137 |
+
name:
|
| 138 |
+
value: train_llmbc_lowdim
|
| 139 |
+
obs_dim:
|
| 140 |
+
value: 9
|
| 141 |
+
optimizer:
|
| 142 |
+
value:
|
| 143 |
+
_target_: torch.optim.AdamW
|
| 144 |
+
betas:
|
| 145 |
+
- 0.95
|
| 146 |
+
- 0.999
|
| 147 |
+
eps: 1e-08
|
| 148 |
+
lr: 0.01
|
| 149 |
+
weight_decay: 1e-06
|
| 150 |
+
output_dir:
|
| 151 |
+
value: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
|
| 152 |
+
past_action_visible:
|
| 153 |
+
value: false
|
| 154 |
+
policy:
|
| 155 |
+
value:
|
| 156 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 157 |
+
action_dim: 4
|
| 158 |
+
horizon: 1
|
| 159 |
+
llm_discriminator:
|
| 160 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 161 |
+
llm_translator:
|
| 162 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 163 |
+
action_dim: 4
|
| 164 |
+
cfg:
|
| 165 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 166 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 167 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 168 |
+
finetune_mode: orig
|
| 169 |
+
hydra:
|
| 170 |
+
job:
|
| 171 |
+
override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 172 |
+
run:
|
| 173 |
+
dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct
|
| 174 |
+
llm_mode: ete-finetuned
|
| 175 |
+
lora_config:
|
| 176 |
+
bias: none
|
| 177 |
+
lora_alpha: 64
|
| 178 |
+
lora_dropout: 0.05
|
| 179 |
+
r: 32
|
| 180 |
+
task_type: CAUSAL_LM
|
| 181 |
+
max_length: 100
|
| 182 |
+
model_name: SmolLM2-135M-Instruct
|
| 183 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 184 |
+
prompter:
|
| 185 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 186 |
+
use_joint_mlp_projector: true
|
| 187 |
+
use_joint_mlp_projector: true
|
| 188 |
+
use_quantization: false
|
| 189 |
+
horizon: 1
|
| 190 |
+
n_action_steps: 1
|
| 191 |
+
n_obs_steps: 1
|
| 192 |
+
obs_dim: 9
|
| 193 |
+
task_id: box-close-v2
|
| 194 |
+
loss_bc_weight: 1
|
| 195 |
+
loss_llm_weight: 0.01
|
| 196 |
+
model:
|
| 197 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 198 |
+
activation: relu
|
| 199 |
+
hidden_size:
|
| 200 |
+
- 256
|
| 201 |
+
- 256
|
| 202 |
+
input_size: 9
|
| 203 |
+
n_action_steps: 1
|
| 204 |
+
n_obs_steps: 1
|
| 205 |
+
output_size: 4
|
| 206 |
+
n_action_steps: 1
|
| 207 |
+
n_obs_steps: 1
|
| 208 |
+
normalize_llm_loss: true
|
| 209 |
+
obs_dim: 9
|
| 210 |
+
task:
|
| 211 |
+
value:
|
| 212 |
+
action_dim: 4
|
| 213 |
+
dataset:
|
| 214 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 215 |
+
data_path: datasets/box-close-v2.pt
|
| 216 |
+
data_path2: datasets/box-close-v2.pt
|
| 217 |
+
dummy_normalizer: true
|
| 218 |
+
horizon: 1
|
| 219 |
+
obs_eef_target: true
|
| 220 |
+
pad_after: 0
|
| 221 |
+
pad_before: 0
|
| 222 |
+
use_manual_normalizer: false
|
| 223 |
+
val_ratio: 0.1
|
| 224 |
+
env_runner:
|
| 225 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 226 |
+
discount: 0.9
|
| 227 |
+
env_name: llf-metaworld-box-close-v2
|
| 228 |
+
feedback_type:
|
| 229 |
+
- hp
|
| 230 |
+
- hn
|
| 231 |
+
- fp
|
| 232 |
+
instruction_type: b
|
| 233 |
+
max_steps: 30
|
| 234 |
+
n_action_steps: 1
|
| 235 |
+
n_envs: 10
|
| 236 |
+
n_obs_steps: 1
|
| 237 |
+
n_test: 50
|
| 238 |
+
n_train: 10
|
| 239 |
+
visual: false
|
| 240 |
+
instructor:
|
| 241 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 242 |
+
name: box-close-v2
|
| 243 |
+
obs_dim: 9
|
| 244 |
+
task_name:
|
| 245 |
+
value: box-close-v2
|
| 246 |
+
training:
|
| 247 |
+
value:
|
| 248 |
+
checkpoint_every: 5
|
| 249 |
+
debug: false
|
| 250 |
+
device: cuda:0
|
| 251 |
+
grad_norm_clip: 0.5
|
| 252 |
+
gradient_accumulate_every: 8
|
| 253 |
+
lr_scheduler: cosine
|
| 254 |
+
lr_warmup_steps: 10
|
| 255 |
+
max_train_steps: null
|
| 256 |
+
max_val_steps: null
|
| 257 |
+
num_epochs: 1001
|
| 258 |
+
resume: false
|
| 259 |
+
rollout_every: 5
|
| 260 |
+
sample_every: 5
|
| 261 |
+
sample_max_batch: 128
|
| 262 |
+
seed: 42
|
| 263 |
+
tqdm_interval_sec: 1
|
| 264 |
+
val_every: 1
|
| 265 |
+
val_dataloader:
|
| 266 |
+
value:
|
| 267 |
+
batch_size: 16
|
| 268 |
+
num_workers: 0
|
| 269 |
+
persistent_workers: false
|
| 270 |
+
pin_memory: false
|
| 271 |
+
shuffle: true
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00<?, ?it/s]/work/u1131674/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
|
| 2 |
+
obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
|
| 3 |
+
Eval MetaworldLowdimRunner 4/6: 13%|██████████▊ | 4/30 [00:00<00:01, 15.43it/s]Traceback (most recent call last):
|
| 4 |
+
File "/work/u1131674/LLM-BC/./train.py", line 35, in <module>
|
| 5 |
+
main()
|
| 6 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main
|
| 7 |
+
_run_hydra(
|
| 8 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra
|
| 9 |
+
_run_app(
|
| 10 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app
|
| 11 |
+
run_and_report(
|
| 12 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report
|
| 13 |
+
return func()
|
| 14 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in <lambda>
|
| 15 |
+
lambda: hydra.run(
|
| 16 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run
|
| 17 |
+
ret = run_job(
|
| 18 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job
|
| 19 |
+
ret.return_value = task_function(task_cfg)
|
| 20 |
+
File "/work/u1131674/LLM-BC/./train.py", line 32, in main
|
| 21 |
+
workspace.run()
|
| 22 |
+
File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run
|
| 23 |
+
runner_log = env_runner.run(policy)
|
| 24 |
+
File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run
|
| 25 |
+
action_dict = policy.predict_action(obs_dict)
|
| 26 |
+
File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action
|
| 27 |
+
action_mean, action_log_std = self.model.a_mean_logstd(obs)
|
| 28 |
+
File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd
|
| 29 |
+
y = self.forward(obs)
|
| 30 |
+
File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward
|
| 31 |
+
y = super().forward(y)
|
| 32 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward
|
| 33 |
+
input = module(input)
|
| 34 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 35 |
+
return self._call_impl(*args, **kwargs)
|
| 36 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 37 |
+
return forward_call(*args, **kwargs)
|
| 38 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward
|
| 39 |
+
return F.linear(input, self.weight, self.bias)
|
| 40 |
+
KeyboardInterrupt
|
| 41 |
+
Traceback (most recent call last):
|
| 42 |
+
File "/work/u1131674/LLM-BC/./train.py", line 35, in <module>
|
| 43 |
+
main()
|
| 44 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main
|
| 45 |
+
_run_hydra(
|
| 46 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra
|
| 47 |
+
_run_app(
|
| 48 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app
|
| 49 |
+
run_and_report(
|
| 50 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report
|
| 51 |
+
return func()
|
| 52 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in <lambda>
|
| 53 |
+
lambda: hydra.run(
|
| 54 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run
|
| 55 |
+
ret = run_job(
|
| 56 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job
|
| 57 |
+
ret.return_value = task_function(task_cfg)
|
| 58 |
+
File "/work/u1131674/LLM-BC/./train.py", line 32, in main
|
| 59 |
+
workspace.run()
|
| 60 |
+
File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run
|
| 61 |
+
runner_log = env_runner.run(policy)
|
| 62 |
+
File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run
|
| 63 |
+
action_dict = policy.predict_action(obs_dict)
|
| 64 |
+
File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action
|
| 65 |
+
action_mean, action_log_std = self.model.a_mean_logstd(obs)
|
| 66 |
+
File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd
|
| 67 |
+
y = self.forward(obs)
|
| 68 |
+
File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward
|
| 69 |
+
y = super().forward(y)
|
| 70 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward
|
| 71 |
+
input = module(input)
|
| 72 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 73 |
+
return self._call_impl(*args, **kwargs)
|
| 74 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 75 |
+
return forward_call(*args, **kwargs)
|
| 76 |
+
File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward
|
| 77 |
+
return F.linear(input, self.weight, self.bias)
|
| 78 |
+
KeyboardInterrupt
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt
ADDED
|
@@ -0,0 +1,857 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
rpds-py==0.27.1
|
| 2 |
+
typeguard==4.4.4
|
| 3 |
+
flatbuffers==25.12.19
|
| 4 |
+
toppra==0.6.3
|
| 5 |
+
sympy==1.14.0
|
| 6 |
+
tiktoken==0.8.0
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
arm_pytorch_utilities==0.4.3
|
| 9 |
+
pynndescent==0.6.0
|
| 10 |
+
multidict==6.7.0
|
| 11 |
+
fonttools==4.60.2
|
| 12 |
+
numexpr==2.10.1
|
| 13 |
+
cmudict==1.0.13
|
| 14 |
+
PyOpenGL-accelerate==3.1.10
|
| 15 |
+
gmpy2==2.2.1
|
| 16 |
+
peft==0.14.0
|
| 17 |
+
metaworld==2.0.0
|
| 18 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 19 |
+
python-dateutil==2.9.0.post0
|
| 20 |
+
aiosignal==1.4.0
|
| 21 |
+
pexpect==4.9.0
|
| 22 |
+
protobuf==4.25.8
|
| 23 |
+
typing_extensions==4.15.0
|
| 24 |
+
mujoco==2.3.7
|
| 25 |
+
tokenizers==0.21.0
|
| 26 |
+
pytorch-kinematics==0.7.5
|
| 27 |
+
sniffio==1.3.1
|
| 28 |
+
aiofiles==25.1.0
|
| 29 |
+
mplib==0.1.1
|
| 30 |
+
wcwidth==0.2.14
|
| 31 |
+
Pygments==2.19.1
|
| 32 |
+
anyio==4.12.1
|
| 33 |
+
tensorflow-estimator==2.15.0
|
| 34 |
+
filelock==3.17.0
|
| 35 |
+
numpy==1.23.5
|
| 36 |
+
attrs==25.4.0
|
| 37 |
+
Markdown==3.9
|
| 38 |
+
fsspec==2024.3.1
|
| 39 |
+
libclang==18.1.1
|
| 40 |
+
umap-learn==0.5.9.post2
|
| 41 |
+
dill==0.3.8
|
| 42 |
+
narwhals==2.15.0
|
| 43 |
+
tensorboard==2.15.2
|
| 44 |
+
dacite==1.9.2
|
| 45 |
+
termcolor==3.1.0
|
| 46 |
+
llmbc==0.0.0
|
| 47 |
+
python-multipart==0.0.20
|
| 48 |
+
exceptiongroup==1.3.1
|
| 49 |
+
sapien==3.0.0b1
|
| 50 |
+
pygame==2.6.1
|
| 51 |
+
nvidia-curand-cu12==10.3.2.106
|
| 52 |
+
evaluate==0.4.3
|
| 53 |
+
msgpack==1.1.1
|
| 54 |
+
tensorflow-probability==0.23.0
|
| 55 |
+
diffusers==0.31.0
|
| 56 |
+
certifi==2025.10.5
|
| 57 |
+
d4rl==1.1
|
| 58 |
+
pydub==0.25.1
|
| 59 |
+
annotated-doc==0.0.4
|
| 60 |
+
gitdb==4.0.12
|
| 61 |
+
gradio_client==0.2.9
|
| 62 |
+
Shapely==1.8.4
|
| 63 |
+
mani_skill==3.0.0b20
|
| 64 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 65 |
+
fasteners==0.20
|
| 66 |
+
hjson==3.1.0
|
| 67 |
+
ninja==1.13.0
|
| 68 |
+
stack-data==0.6.3
|
| 69 |
+
pyarrow==21.0.0
|
| 70 |
+
networkx==3.2.1
|
| 71 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 72 |
+
pyparsing==3.3.1
|
| 73 |
+
timm==1.0.22
|
| 74 |
+
typing-inspection==0.4.2
|
| 75 |
+
openai==2.8.1
|
| 76 |
+
pybullet==3.2.6
|
| 77 |
+
hydra-core==1.2.0
|
| 78 |
+
gradio==3.36.1
|
| 79 |
+
tensorflow==2.15.1
|
| 80 |
+
asttokens==3.0.1
|
| 81 |
+
importlib-metadata==5.2.0
|
| 82 |
+
astunparse==1.6.3
|
| 83 |
+
tifffile==2024.8.30
|
| 84 |
+
annotated-types==0.7.0
|
| 85 |
+
Bottleneck==1.4.2
|
| 86 |
+
accelerate==1.0.1
|
| 87 |
+
pytz==2025.2
|
| 88 |
+
urllib3==2.5.0
|
| 89 |
+
frozenlist==1.8.0
|
| 90 |
+
sentry-sdk==2.50.0
|
| 91 |
+
jsonschema==4.25.1
|
| 92 |
+
tyro==0.9.1
|
| 93 |
+
Farama-Notifications==0.0.4
|
| 94 |
+
ffmpy==1.0.0
|
| 95 |
+
httpx==0.28.1
|
| 96 |
+
pymunk==6.2.1
|
| 97 |
+
shtab==1.7.2
|
| 98 |
+
glfw==2.0.0
|
| 99 |
+
hf-xet==1.1.8
|
| 100 |
+
omegaconf==2.2.1
|
| 101 |
+
blobfile==3.0.0
|
| 102 |
+
decorator==5.2.1
|
| 103 |
+
cffi==1.17.1
|
| 104 |
+
matplotlib-inline==0.2.1
|
| 105 |
+
eval_type_backport==0.2.2
|
| 106 |
+
torchaudio==2.2.2
|
| 107 |
+
colorama==0.4.6
|
| 108 |
+
click==8.1.8
|
| 109 |
+
Cython==0.29.37
|
| 110 |
+
orjson==3.11.5
|
| 111 |
+
gym_bandits==0.0.2
|
| 112 |
+
traitlets==5.14.3
|
| 113 |
+
docker-pycreds==0.4.0
|
| 114 |
+
multiprocess==0.70.15
|
| 115 |
+
zipp==3.21.0
|
| 116 |
+
antlr4-python3-runtime==4.9.3
|
| 117 |
+
uc-micro-py==1.0.3
|
| 118 |
+
mpmath==1.3.0
|
| 119 |
+
idna==3.11
|
| 120 |
+
aiodns==3.5.0
|
| 121 |
+
charset-normalizer==3.4.4
|
| 122 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 123 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 124 |
+
seaborn==0.13.2
|
| 125 |
+
pyarrow-hotfix==0.7
|
| 126 |
+
pillow==11.3.0
|
| 127 |
+
pyautogen==0.1.0
|
| 128 |
+
requests==2.32.0
|
| 129 |
+
MarkupSafe==3.0.2
|
| 130 |
+
websockets==15.0.1
|
| 131 |
+
nvidia-nccl-cu12==2.19.3
|
| 132 |
+
pure_eval==0.2.3
|
| 133 |
+
parso==0.8.5
|
| 134 |
+
huggingface-hub==0.26.2
|
| 135 |
+
syllables==1.0.9
|
| 136 |
+
tf-agents==0.19.0
|
| 137 |
+
six==1.17.0
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
ptyprocess==0.7.0
|
| 140 |
+
platformdirs==4.4.0
|
| 141 |
+
fastapi==0.128.0
|
| 142 |
+
stable-baselines3==2.2.1
|
| 143 |
+
av==10.0.0
|
| 144 |
+
diskcache==5.6.3
|
| 145 |
+
pynvml==13.0.1
|
| 146 |
+
pytorch-seed==0.2.0
|
| 147 |
+
zarr==2.12.0
|
| 148 |
+
mdurl==0.1.2
|
| 149 |
+
docstring-parser==0.16
|
| 150 |
+
packaging==25.0
|
| 151 |
+
numcodecs==0.12.1
|
| 152 |
+
opt_einsum==3.4.0
|
| 153 |
+
markdown-it-py==2.2.0
|
| 154 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 155 |
+
PyWavelets==1.6.0
|
| 156 |
+
datasets==2.19.0
|
| 157 |
+
contourpy==1.3.0
|
| 158 |
+
aiohappyeyeballs==2.6.1
|
| 159 |
+
jaxlib==0.4.30
|
| 160 |
+
ImageIO==2.37.2
|
| 161 |
+
wandb==0.18.6
|
| 162 |
+
jiter==0.12.0
|
| 163 |
+
gymnasium==0.29.1
|
| 164 |
+
pycryptodomex==3.23.0
|
| 165 |
+
google-pasta==0.2.0
|
| 166 |
+
ipython==8.18.1
|
| 167 |
+
threadpoolctl==3.6.0
|
| 168 |
+
py-cpuinfo==9.0.0
|
| 169 |
+
bitsandbytes==0.45.0
|
| 170 |
+
xxhash==3.5.0
|
| 171 |
+
google-auth-oauthlib==1.2.4
|
| 172 |
+
rsa==4.9.1
|
| 173 |
+
rouge_score==0.1.2
|
| 174 |
+
dm-control==1.0.14
|
| 175 |
+
oauthlib==3.3.1
|
| 176 |
+
pandas==2.3.3
|
| 177 |
+
tenacity==9.1.2
|
| 178 |
+
asciitree==0.3.3
|
| 179 |
+
scipy==1.13.1
|
| 180 |
+
jedi==0.19.2
|
| 181 |
+
gast==0.7.0
|
| 182 |
+
google-auth==2.47.0
|
| 183 |
+
transforms3d==0.4.2
|
| 184 |
+
kiwisolver==1.4.7
|
| 185 |
+
matplotlib==3.7.5
|
| 186 |
+
aiohttp==3.12.15
|
| 187 |
+
pip==23.3.2
|
| 188 |
+
imageio-ffmpeg==0.6.0
|
| 189 |
+
deepspeed==0.16.1
|
| 190 |
+
yarl==1.18.0
|
| 191 |
+
nvidia-nvtx-cu12==12.1.105
|
| 192 |
+
llfbench==0.1.0
|
| 193 |
+
wheel==0.45.1
|
| 194 |
+
PySocks==1.7.1
|
| 195 |
+
ml-dtypes==0.3.2
|
| 196 |
+
PyYAML==6.0.2
|
| 197 |
+
fast_kinematics==0.2.2
|
| 198 |
+
gin-config==0.5.0
|
| 199 |
+
setproctitle==1.3.7
|
| 200 |
+
safetensors==0.5.3
|
| 201 |
+
torchvision==0.17.2
|
| 202 |
+
semantic-version==2.10.0
|
| 203 |
+
PyOpenGL==3.1.10
|
| 204 |
+
nltk==3.9.2
|
| 205 |
+
lxml==6.0.2
|
| 206 |
+
pydantic==2.12.5
|
| 207 |
+
tqdm==4.67.1
|
| 208 |
+
keras==2.15.0
|
| 209 |
+
parse==1.19.1
|
| 210 |
+
linkify-it-py==2.0.3
|
| 211 |
+
dm-tree==0.1.8
|
| 212 |
+
requests-oauthlib==2.0.0
|
| 213 |
+
scikit-learn==1.6.1
|
| 214 |
+
altair==6.0.0
|
| 215 |
+
Werkzeug==3.1.5
|
| 216 |
+
sentencepiece==0.2.0
|
| 217 |
+
uvicorn==0.39.0
|
| 218 |
+
cycler==0.12.1
|
| 219 |
+
transformers==4.47.1
|
| 220 |
+
uvloop==0.22.1
|
| 221 |
+
mkl_random==1.2.8
|
| 222 |
+
GitPython==3.1.46
|
| 223 |
+
regex==2025.9.1
|
| 224 |
+
jax==0.4.30
|
| 225 |
+
llvmlite==0.39.1
|
| 226 |
+
pyasn1_modules==0.4.2
|
| 227 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 228 |
+
pydantic_core==2.41.5
|
| 229 |
+
google-genai==1.47.0
|
| 230 |
+
propcache==0.3.1
|
| 231 |
+
pycares==4.10.0
|
| 232 |
+
pyperclip==1.11.0
|
| 233 |
+
pyasn1==0.6.2
|
| 234 |
+
async-timeout==5.0.1
|
| 235 |
+
psutil==7.0.0
|
| 236 |
+
gym==0.23.1
|
| 237 |
+
dm-env==1.6
|
| 238 |
+
Jinja2==3.1.6
|
| 239 |
+
sentence-transformers==3.2.1
|
| 240 |
+
einops==0.4.1
|
| 241 |
+
triton==2.2.0
|
| 242 |
+
grpcio==1.76.0
|
| 243 |
+
labmaze==1.0.6
|
| 244 |
+
nvidia-ml-py==13.590.44
|
| 245 |
+
brotlicffi==1.0.9.2
|
| 246 |
+
smmap==5.0.2
|
| 247 |
+
cloudpickle==3.1.2
|
| 248 |
+
setuptools==80.9.0
|
| 249 |
+
starlette==0.49.3
|
| 250 |
+
prompt_toolkit==3.0.52
|
| 251 |
+
wrapt==1.14.2
|
| 252 |
+
h5py==3.14.0
|
| 253 |
+
scikit-image==0.19.3
|
| 254 |
+
joblib==1.5.3
|
| 255 |
+
opencv-python==4.11.0.86
|
| 256 |
+
rich==14.2.0
|
| 257 |
+
trl==0.11.4
|
| 258 |
+
gym-notices==0.1.0
|
| 259 |
+
trimesh==4.11.1
|
| 260 |
+
mdit-py-plugins==0.3.3
|
| 261 |
+
distro==1.9.0
|
| 262 |
+
executing==2.2.1
|
| 263 |
+
mkl-service==2.4.0
|
| 264 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 265 |
+
FLAML==2.3.6
|
| 266 |
+
mujoco-py==2.1.2.14
|
| 267 |
+
h11==0.16.0
|
| 268 |
+
highway-env==1.9.1
|
| 269 |
+
httpcore==1.0.9
|
| 270 |
+
tensorboard-data-server==0.7.2
|
| 271 |
+
tzdata==2025.3
|
| 272 |
+
absl-py==2.3.1
|
| 273 |
+
jsonschema-specifications==2025.9.1
|
| 274 |
+
numba==0.56.4
|
| 275 |
+
tabulate==0.9.0
|
| 276 |
+
importlib-resources==5.13.0
|
| 277 |
+
pycparser==2.23
|
| 278 |
+
mkl_fft==1.3.11
|
| 279 |
+
torch==2.2.2
|
| 280 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 281 |
+
rpds-py==0.27.1
|
| 282 |
+
typeguard==4.4.4
|
| 283 |
+
flatbuffers==25.12.19
|
| 284 |
+
toppra==0.6.3
|
| 285 |
+
sympy==1.14.0
|
| 286 |
+
tiktoken==0.8.0
|
| 287 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 288 |
+
arm_pytorch_utilities==0.4.3
|
| 289 |
+
pynndescent==0.6.0
|
| 290 |
+
multidict==6.7.0
|
| 291 |
+
fonttools==4.60.2
|
| 292 |
+
numexpr==2.10.1
|
| 293 |
+
cmudict==1.0.13
|
| 294 |
+
PyOpenGL-accelerate==3.1.10
|
| 295 |
+
gmpy2==2.2.1
|
| 296 |
+
peft==0.14.0
|
| 297 |
+
metaworld==2.0.0
|
| 298 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 299 |
+
python-dateutil==2.9.0.post0
|
| 300 |
+
aiosignal==1.4.0
|
| 301 |
+
pexpect==4.9.0
|
| 302 |
+
protobuf==4.25.8
|
| 303 |
+
typing_extensions==4.15.0
|
| 304 |
+
mujoco==2.3.7
|
| 305 |
+
tokenizers==0.21.0
|
| 306 |
+
pytorch-kinematics==0.7.5
|
| 307 |
+
sniffio==1.3.1
|
| 308 |
+
aiofiles==25.1.0
|
| 309 |
+
mplib==0.1.1
|
| 310 |
+
wcwidth==0.2.14
|
| 311 |
+
Pygments==2.19.1
|
| 312 |
+
anyio==4.12.1
|
| 313 |
+
tensorflow-estimator==2.15.0
|
| 314 |
+
filelock==3.17.0
|
| 315 |
+
numpy==1.23.5
|
| 316 |
+
attrs==25.4.0
|
| 317 |
+
Markdown==3.9
|
| 318 |
+
fsspec==2024.3.1
|
| 319 |
+
libclang==18.1.1
|
| 320 |
+
umap-learn==0.5.9.post2
|
| 321 |
+
dill==0.3.8
|
| 322 |
+
narwhals==2.15.0
|
| 323 |
+
tensorboard==2.15.2
|
| 324 |
+
dacite==1.9.2
|
| 325 |
+
termcolor==3.1.0
|
| 326 |
+
llmbc==0.0.0
|
| 327 |
+
python-multipart==0.0.20
|
| 328 |
+
exceptiongroup==1.3.1
|
| 329 |
+
sapien==3.0.0b1
|
| 330 |
+
pygame==2.6.1
|
| 331 |
+
nvidia-curand-cu12==10.3.2.106
|
| 332 |
+
evaluate==0.4.3
|
| 333 |
+
msgpack==1.1.1
|
| 334 |
+
tensorflow-probability==0.23.0
|
| 335 |
+
diffusers==0.31.0
|
| 336 |
+
certifi==2025.10.5
|
| 337 |
+
d4rl==1.1
|
| 338 |
+
pydub==0.25.1
|
| 339 |
+
annotated-doc==0.0.4
|
| 340 |
+
gitdb==4.0.12
|
| 341 |
+
gradio_client==0.2.9
|
| 342 |
+
Shapely==1.8.4
|
| 343 |
+
mani_skill==3.0.0b20
|
| 344 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 345 |
+
fasteners==0.20
|
| 346 |
+
hjson==3.1.0
|
| 347 |
+
ninja==1.13.0
|
| 348 |
+
stack-data==0.6.3
|
| 349 |
+
pyarrow==21.0.0
|
| 350 |
+
networkx==3.2.1
|
| 351 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 352 |
+
pyparsing==3.3.1
|
| 353 |
+
timm==1.0.22
|
| 354 |
+
typing-inspection==0.4.2
|
| 355 |
+
openai==2.8.1
|
| 356 |
+
pybullet==3.2.6
|
| 357 |
+
hydra-core==1.2.0
|
| 358 |
+
gradio==3.36.1
|
| 359 |
+
tensorflow==2.15.1
|
| 360 |
+
asttokens==3.0.1
|
| 361 |
+
importlib-metadata==5.2.0
|
| 362 |
+
astunparse==1.6.3
|
| 363 |
+
tifffile==2024.8.30
|
| 364 |
+
annotated-types==0.7.0
|
| 365 |
+
Bottleneck==1.4.2
|
| 366 |
+
accelerate==1.0.1
|
| 367 |
+
pytz==2025.2
|
| 368 |
+
urllib3==2.5.0
|
| 369 |
+
frozenlist==1.8.0
|
| 370 |
+
sentry-sdk==2.50.0
|
| 371 |
+
jsonschema==4.25.1
|
| 372 |
+
tyro==0.9.1
|
| 373 |
+
Farama-Notifications==0.0.4
|
| 374 |
+
ffmpy==1.0.0
|
| 375 |
+
httpx==0.28.1
|
| 376 |
+
pymunk==6.2.1
|
| 377 |
+
shtab==1.7.2
|
| 378 |
+
glfw==2.0.0
|
| 379 |
+
hf-xet==1.1.8
|
| 380 |
+
omegaconf==2.2.1
|
| 381 |
+
blobfile==3.0.0
|
| 382 |
+
decorator==5.2.1
|
| 383 |
+
cffi==1.17.1
|
| 384 |
+
matplotlib-inline==0.2.1
|
| 385 |
+
eval_type_backport==0.2.2
|
| 386 |
+
torchaudio==2.2.2
|
| 387 |
+
colorama==0.4.6
|
| 388 |
+
click==8.1.8
|
| 389 |
+
Cython==0.29.37
|
| 390 |
+
orjson==3.11.5
|
| 391 |
+
gym_bandits==0.0.2
|
| 392 |
+
traitlets==5.14.3
|
| 393 |
+
docker-pycreds==0.4.0
|
| 394 |
+
multiprocess==0.70.15
|
| 395 |
+
zipp==3.21.0
|
| 396 |
+
antlr4-python3-runtime==4.9.3
|
| 397 |
+
uc-micro-py==1.0.3
|
| 398 |
+
mpmath==1.3.0
|
| 399 |
+
idna==3.11
|
| 400 |
+
aiodns==3.5.0
|
| 401 |
+
charset-normalizer==3.4.4
|
| 402 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 403 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 404 |
+
seaborn==0.13.2
|
| 405 |
+
pyarrow-hotfix==0.7
|
| 406 |
+
pillow==11.3.0
|
| 407 |
+
pyautogen==0.1.0
|
| 408 |
+
requests==2.32.0
|
| 409 |
+
MarkupSafe==3.0.2
|
| 410 |
+
websockets==15.0.1
|
| 411 |
+
nvidia-nccl-cu12==2.19.3
|
| 412 |
+
pure_eval==0.2.3
|
| 413 |
+
parso==0.8.5
|
| 414 |
+
huggingface-hub==0.26.2
|
| 415 |
+
syllables==1.0.9
|
| 416 |
+
tf-agents==0.19.0
|
| 417 |
+
six==1.17.0
|
| 418 |
+
referencing==0.36.2
|
| 419 |
+
ptyprocess==0.7.0
|
| 420 |
+
platformdirs==4.4.0
|
| 421 |
+
fastapi==0.128.0
|
| 422 |
+
stable-baselines3==2.2.1
|
| 423 |
+
av==10.0.0
|
| 424 |
+
diskcache==5.6.3
|
| 425 |
+
pynvml==13.0.1
|
| 426 |
+
pytorch-seed==0.2.0
|
| 427 |
+
zarr==2.12.0
|
| 428 |
+
mdurl==0.1.2
|
| 429 |
+
docstring-parser==0.16
|
| 430 |
+
packaging==25.0
|
| 431 |
+
numcodecs==0.12.1
|
| 432 |
+
opt_einsum==3.4.0
|
| 433 |
+
markdown-it-py==2.2.0
|
| 434 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 435 |
+
PyWavelets==1.6.0
|
| 436 |
+
datasets==2.19.0
|
| 437 |
+
contourpy==1.3.0
|
| 438 |
+
aiohappyeyeballs==2.6.1
|
| 439 |
+
jaxlib==0.4.30
|
| 440 |
+
ImageIO==2.37.2
|
| 441 |
+
wandb==0.18.6
|
| 442 |
+
jiter==0.12.0
|
| 443 |
+
gymnasium==0.29.1
|
| 444 |
+
pycryptodomex==3.23.0
|
| 445 |
+
google-pasta==0.2.0
|
| 446 |
+
ipython==8.18.1
|
| 447 |
+
threadpoolctl==3.6.0
|
| 448 |
+
py-cpuinfo==9.0.0
|
| 449 |
+
bitsandbytes==0.45.0
|
| 450 |
+
xxhash==3.5.0
|
| 451 |
+
google-auth-oauthlib==1.2.4
|
| 452 |
+
rsa==4.9.1
|
| 453 |
+
rouge_score==0.1.2
|
| 454 |
+
dm-control==1.0.14
|
| 455 |
+
oauthlib==3.3.1
|
| 456 |
+
pandas==2.3.3
|
| 457 |
+
tenacity==9.1.2
|
| 458 |
+
asciitree==0.3.3
|
| 459 |
+
scipy==1.13.1
|
| 460 |
+
jedi==0.19.2
|
| 461 |
+
gast==0.7.0
|
| 462 |
+
google-auth==2.47.0
|
| 463 |
+
transforms3d==0.4.2
|
| 464 |
+
kiwisolver==1.4.7
|
| 465 |
+
matplotlib==3.7.5
|
| 466 |
+
aiohttp==3.12.15
|
| 467 |
+
pip==23.3.2
|
| 468 |
+
imageio-ffmpeg==0.6.0
|
| 469 |
+
deepspeed==0.16.1
|
| 470 |
+
yarl==1.18.0
|
| 471 |
+
nvidia-nvtx-cu12==12.1.105
|
| 472 |
+
llfbench==0.1.0
|
| 473 |
+
wheel==0.45.1
|
| 474 |
+
PySocks==1.7.1
|
| 475 |
+
ml-dtypes==0.3.2
|
| 476 |
+
PyYAML==6.0.2
|
| 477 |
+
fast_kinematics==0.2.2
|
| 478 |
+
gin-config==0.5.0
|
| 479 |
+
setproctitle==1.3.7
|
| 480 |
+
safetensors==0.5.3
|
| 481 |
+
torchvision==0.17.2
|
| 482 |
+
semantic-version==2.10.0
|
| 483 |
+
PyOpenGL==3.1.10
|
| 484 |
+
nltk==3.9.2
|
| 485 |
+
lxml==6.0.2
|
| 486 |
+
pydantic==2.12.5
|
| 487 |
+
tqdm==4.67.1
|
| 488 |
+
keras==2.15.0
|
| 489 |
+
parse==1.19.1
|
| 490 |
+
linkify-it-py==2.0.3
|
| 491 |
+
dm-tree==0.1.8
|
| 492 |
+
requests-oauthlib==2.0.0
|
| 493 |
+
scikit-learn==1.6.1
|
| 494 |
+
altair==6.0.0
|
| 495 |
+
Werkzeug==3.1.5
|
| 496 |
+
sentencepiece==0.2.0
|
| 497 |
+
uvicorn==0.39.0
|
| 498 |
+
cycler==0.12.1
|
| 499 |
+
transformers==4.47.1
|
| 500 |
+
uvloop==0.22.1
|
| 501 |
+
mkl_random==1.2.8
|
| 502 |
+
GitPython==3.1.46
|
| 503 |
+
regex==2025.9.1
|
| 504 |
+
jax==0.4.30
|
| 505 |
+
llvmlite==0.39.1
|
| 506 |
+
pyasn1_modules==0.4.2
|
| 507 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 508 |
+
pydantic_core==2.41.5
|
| 509 |
+
google-genai==1.47.0
|
| 510 |
+
propcache==0.3.1
|
| 511 |
+
pycares==4.10.0
|
| 512 |
+
pyperclip==1.11.0
|
| 513 |
+
pyasn1==0.6.2
|
| 514 |
+
async-timeout==5.0.1
|
| 515 |
+
psutil==7.0.0
|
| 516 |
+
gym==0.23.1
|
| 517 |
+
dm-env==1.6
|
| 518 |
+
Jinja2==3.1.6
|
| 519 |
+
sentence-transformers==3.2.1
|
| 520 |
+
einops==0.4.1
|
| 521 |
+
triton==2.2.0
|
| 522 |
+
grpcio==1.76.0
|
| 523 |
+
labmaze==1.0.6
|
| 524 |
+
nvidia-ml-py==13.590.44
|
| 525 |
+
brotlicffi==1.0.9.2
|
| 526 |
+
smmap==5.0.2
|
| 527 |
+
cloudpickle==3.1.2
|
| 528 |
+
setuptools==80.9.0
|
| 529 |
+
starlette==0.49.3
|
| 530 |
+
prompt_toolkit==3.0.52
|
| 531 |
+
wrapt==1.14.2
|
| 532 |
+
h5py==3.14.0
|
| 533 |
+
scikit-image==0.19.3
|
| 534 |
+
joblib==1.5.3
|
| 535 |
+
opencv-python==4.11.0.86
|
| 536 |
+
rich==14.2.0
|
| 537 |
+
trl==0.11.4
|
| 538 |
+
gym-notices==0.1.0
|
| 539 |
+
trimesh==4.11.1
|
| 540 |
+
mdit-py-plugins==0.3.3
|
| 541 |
+
distro==1.9.0
|
| 542 |
+
executing==2.2.1
|
| 543 |
+
mkl-service==2.4.0
|
| 544 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 545 |
+
FLAML==2.3.6
|
| 546 |
+
mujoco-py==2.1.2.14
|
| 547 |
+
h11==0.16.0
|
| 548 |
+
highway-env==1.9.1
|
| 549 |
+
httpcore==1.0.9
|
| 550 |
+
tensorboard-data-server==0.7.2
|
| 551 |
+
tzdata==2025.3
|
| 552 |
+
absl-py==2.3.1
|
| 553 |
+
jsonschema-specifications==2025.9.1
|
| 554 |
+
numba==0.56.4
|
| 555 |
+
tabulate==0.9.0
|
| 556 |
+
importlib-resources==5.13.0
|
| 557 |
+
pycparser==2.23
|
| 558 |
+
mkl_fft==1.3.11
|
| 559 |
+
torch==2.2.2
|
| 560 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 561 |
+
llmbc==0.0.0
|
| 562 |
+
rpds-py==0.27.1
|
| 563 |
+
typeguard==4.4.4
|
| 564 |
+
flatbuffers==25.12.19
|
| 565 |
+
toppra==0.6.3
|
| 566 |
+
sympy==1.14.0
|
| 567 |
+
tiktoken==0.8.0
|
| 568 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 569 |
+
arm_pytorch_utilities==0.4.3
|
| 570 |
+
pynndescent==0.6.0
|
| 571 |
+
multidict==6.7.0
|
| 572 |
+
fonttools==4.60.2
|
| 573 |
+
numexpr==2.10.1
|
| 574 |
+
cmudict==1.0.13
|
| 575 |
+
PyOpenGL-accelerate==3.1.10
|
| 576 |
+
gmpy2==2.2.1
|
| 577 |
+
peft==0.14.0
|
| 578 |
+
metaworld==2.0.0
|
| 579 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 580 |
+
python-dateutil==2.9.0.post0
|
| 581 |
+
aiosignal==1.4.0
|
| 582 |
+
pexpect==4.9.0
|
| 583 |
+
protobuf==4.25.8
|
| 584 |
+
typing_extensions==4.15.0
|
| 585 |
+
mujoco==2.3.7
|
| 586 |
+
tokenizers==0.21.0
|
| 587 |
+
pytorch-kinematics==0.7.5
|
| 588 |
+
sniffio==1.3.1
|
| 589 |
+
aiofiles==25.1.0
|
| 590 |
+
mplib==0.1.1
|
| 591 |
+
wcwidth==0.2.14
|
| 592 |
+
Pygments==2.19.1
|
| 593 |
+
anyio==4.12.1
|
| 594 |
+
tensorflow-estimator==2.15.0
|
| 595 |
+
filelock==3.17.0
|
| 596 |
+
numpy==1.23.5
|
| 597 |
+
attrs==25.4.0
|
| 598 |
+
Markdown==3.9
|
| 599 |
+
fsspec==2024.3.1
|
| 600 |
+
libclang==18.1.1
|
| 601 |
+
umap-learn==0.5.9.post2
|
| 602 |
+
dill==0.3.8
|
| 603 |
+
narwhals==2.15.0
|
| 604 |
+
tensorboard==2.15.2
|
| 605 |
+
dacite==1.9.2
|
| 606 |
+
termcolor==3.1.0
|
| 607 |
+
llmbc==0.0.0
|
| 608 |
+
python-multipart==0.0.20
|
| 609 |
+
exceptiongroup==1.3.1
|
| 610 |
+
sapien==3.0.0b1
|
| 611 |
+
pygame==2.6.1
|
| 612 |
+
nvidia-curand-cu12==10.3.2.106
|
| 613 |
+
evaluate==0.4.3
|
| 614 |
+
msgpack==1.1.1
|
| 615 |
+
tensorflow-probability==0.23.0
|
| 616 |
+
diffusers==0.31.0
|
| 617 |
+
certifi==2025.10.5
|
| 618 |
+
d4rl==1.1
|
| 619 |
+
pydub==0.25.1
|
| 620 |
+
annotated-doc==0.0.4
|
| 621 |
+
gitdb==4.0.12
|
| 622 |
+
gradio_client==0.2.9
|
| 623 |
+
Shapely==1.8.4
|
| 624 |
+
mani_skill==3.0.0b20
|
| 625 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 626 |
+
fasteners==0.20
|
| 627 |
+
hjson==3.1.0
|
| 628 |
+
ninja==1.13.0
|
| 629 |
+
stack-data==0.6.3
|
| 630 |
+
pyarrow==21.0.0
|
| 631 |
+
networkx==3.2.1
|
| 632 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 633 |
+
pyparsing==3.3.1
|
| 634 |
+
timm==1.0.22
|
| 635 |
+
typing-inspection==0.4.2
|
| 636 |
+
openai==2.8.1
|
| 637 |
+
pybullet==3.2.6
|
| 638 |
+
hydra-core==1.2.0
|
| 639 |
+
gradio==3.36.1
|
| 640 |
+
tensorflow==2.15.1
|
| 641 |
+
asttokens==3.0.1
|
| 642 |
+
importlib-metadata==5.2.0
|
| 643 |
+
astunparse==1.6.3
|
| 644 |
+
tifffile==2024.8.30
|
| 645 |
+
annotated-types==0.7.0
|
| 646 |
+
Bottleneck==1.4.2
|
| 647 |
+
accelerate==1.0.1
|
| 648 |
+
pytz==2025.2
|
| 649 |
+
urllib3==2.5.0
|
| 650 |
+
frozenlist==1.8.0
|
| 651 |
+
sentry-sdk==2.50.0
|
| 652 |
+
jsonschema==4.25.1
|
| 653 |
+
tyro==0.9.1
|
| 654 |
+
Farama-Notifications==0.0.4
|
| 655 |
+
ffmpy==1.0.0
|
| 656 |
+
httpx==0.28.1
|
| 657 |
+
pymunk==6.2.1
|
| 658 |
+
shtab==1.7.2
|
| 659 |
+
glfw==2.0.0
|
| 660 |
+
hf-xet==1.1.8
|
| 661 |
+
omegaconf==2.2.1
|
| 662 |
+
blobfile==3.0.0
|
| 663 |
+
decorator==5.2.1
|
| 664 |
+
cffi==1.17.1
|
| 665 |
+
matplotlib-inline==0.2.1
|
| 666 |
+
eval_type_backport==0.2.2
|
| 667 |
+
torchaudio==2.2.2
|
| 668 |
+
colorama==0.4.6
|
| 669 |
+
click==8.1.8
|
| 670 |
+
Cython==0.29.37
|
| 671 |
+
orjson==3.11.5
|
| 672 |
+
gym_bandits==0.0.2
|
| 673 |
+
traitlets==5.14.3
|
| 674 |
+
docker-pycreds==0.4.0
|
| 675 |
+
multiprocess==0.70.15
|
| 676 |
+
zipp==3.21.0
|
| 677 |
+
antlr4-python3-runtime==4.9.3
|
| 678 |
+
uc-micro-py==1.0.3
|
| 679 |
+
mpmath==1.3.0
|
| 680 |
+
idna==3.11
|
| 681 |
+
aiodns==3.5.0
|
| 682 |
+
charset-normalizer==3.4.4
|
| 683 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 684 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 685 |
+
seaborn==0.13.2
|
| 686 |
+
pyarrow-hotfix==0.7
|
| 687 |
+
pillow==11.3.0
|
| 688 |
+
pyautogen==0.1.0
|
| 689 |
+
requests==2.32.0
|
| 690 |
+
MarkupSafe==3.0.2
|
| 691 |
+
websockets==15.0.1
|
| 692 |
+
nvidia-nccl-cu12==2.19.3
|
| 693 |
+
pure_eval==0.2.3
|
| 694 |
+
parso==0.8.5
|
| 695 |
+
huggingface-hub==0.26.2
|
| 696 |
+
syllables==1.0.9
|
| 697 |
+
tf-agents==0.19.0
|
| 698 |
+
six==1.17.0
|
| 699 |
+
referencing==0.36.2
|
| 700 |
+
ptyprocess==0.7.0
|
| 701 |
+
platformdirs==4.4.0
|
| 702 |
+
fastapi==0.128.0
|
| 703 |
+
stable-baselines3==2.2.1
|
| 704 |
+
av==10.0.0
|
| 705 |
+
diskcache==5.6.3
|
| 706 |
+
pynvml==13.0.1
|
| 707 |
+
pytorch-seed==0.2.0
|
| 708 |
+
zarr==2.12.0
|
| 709 |
+
mdurl==0.1.2
|
| 710 |
+
docstring-parser==0.16
|
| 711 |
+
packaging==25.0
|
| 712 |
+
numcodecs==0.12.1
|
| 713 |
+
opt_einsum==3.4.0
|
| 714 |
+
markdown-it-py==2.2.0
|
| 715 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 716 |
+
PyWavelets==1.6.0
|
| 717 |
+
datasets==2.19.0
|
| 718 |
+
contourpy==1.3.0
|
| 719 |
+
aiohappyeyeballs==2.6.1
|
| 720 |
+
jaxlib==0.4.30
|
| 721 |
+
ImageIO==2.37.2
|
| 722 |
+
wandb==0.18.6
|
| 723 |
+
jiter==0.12.0
|
| 724 |
+
gymnasium==0.29.1
|
| 725 |
+
pycryptodomex==3.23.0
|
| 726 |
+
google-pasta==0.2.0
|
| 727 |
+
ipython==8.18.1
|
| 728 |
+
threadpoolctl==3.6.0
|
| 729 |
+
py-cpuinfo==9.0.0
|
| 730 |
+
bitsandbytes==0.45.0
|
| 731 |
+
xxhash==3.5.0
|
| 732 |
+
google-auth-oauthlib==1.2.4
|
| 733 |
+
rsa==4.9.1
|
| 734 |
+
rouge_score==0.1.2
|
| 735 |
+
dm-control==1.0.14
|
| 736 |
+
oauthlib==3.3.1
|
| 737 |
+
pandas==2.3.3
|
| 738 |
+
tenacity==9.1.2
|
| 739 |
+
asciitree==0.3.3
|
| 740 |
+
scipy==1.13.1
|
| 741 |
+
jedi==0.19.2
|
| 742 |
+
gast==0.7.0
|
| 743 |
+
google-auth==2.47.0
|
| 744 |
+
transforms3d==0.4.2
|
| 745 |
+
kiwisolver==1.4.7
|
| 746 |
+
matplotlib==3.7.5
|
| 747 |
+
aiohttp==3.12.15
|
| 748 |
+
pip==23.3.2
|
| 749 |
+
imageio-ffmpeg==0.6.0
|
| 750 |
+
deepspeed==0.16.1
|
| 751 |
+
yarl==1.18.0
|
| 752 |
+
nvidia-nvtx-cu12==12.1.105
|
| 753 |
+
llfbench==0.1.0
|
| 754 |
+
wheel==0.45.1
|
| 755 |
+
PySocks==1.7.1
|
| 756 |
+
ml-dtypes==0.3.2
|
| 757 |
+
PyYAML==6.0.2
|
| 758 |
+
fast_kinematics==0.2.2
|
| 759 |
+
gin-config==0.5.0
|
| 760 |
+
setproctitle==1.3.7
|
| 761 |
+
safetensors==0.5.3
|
| 762 |
+
torchvision==0.17.2
|
| 763 |
+
semantic-version==2.10.0
|
| 764 |
+
PyOpenGL==3.1.10
|
| 765 |
+
nltk==3.9.2
|
| 766 |
+
lxml==6.0.2
|
| 767 |
+
pydantic==2.12.5
|
| 768 |
+
tqdm==4.67.1
|
| 769 |
+
keras==2.15.0
|
| 770 |
+
parse==1.19.1
|
| 771 |
+
linkify-it-py==2.0.3
|
| 772 |
+
dm-tree==0.1.8
|
| 773 |
+
requests-oauthlib==2.0.0
|
| 774 |
+
scikit-learn==1.6.1
|
| 775 |
+
altair==6.0.0
|
| 776 |
+
Werkzeug==3.1.5
|
| 777 |
+
sentencepiece==0.2.0
|
| 778 |
+
uvicorn==0.39.0
|
| 779 |
+
cycler==0.12.1
|
| 780 |
+
transformers==4.47.1
|
| 781 |
+
uvloop==0.22.1
|
| 782 |
+
mkl_random==1.2.8
|
| 783 |
+
GitPython==3.1.46
|
| 784 |
+
regex==2025.9.1
|
| 785 |
+
jax==0.4.30
|
| 786 |
+
llvmlite==0.39.1
|
| 787 |
+
pyasn1_modules==0.4.2
|
| 788 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 789 |
+
pydantic_core==2.41.5
|
| 790 |
+
google-genai==1.47.0
|
| 791 |
+
propcache==0.3.1
|
| 792 |
+
pycares==4.10.0
|
| 793 |
+
pyperclip==1.11.0
|
| 794 |
+
pyasn1==0.6.2
|
| 795 |
+
async-timeout==5.0.1
|
| 796 |
+
psutil==7.0.0
|
| 797 |
+
gym==0.23.1
|
| 798 |
+
dm-env==1.6
|
| 799 |
+
Jinja2==3.1.6
|
| 800 |
+
sentence-transformers==3.2.1
|
| 801 |
+
einops==0.4.1
|
| 802 |
+
triton==2.2.0
|
| 803 |
+
grpcio==1.76.0
|
| 804 |
+
labmaze==1.0.6
|
| 805 |
+
nvidia-ml-py==13.590.44
|
| 806 |
+
brotlicffi==1.0.9.2
|
| 807 |
+
smmap==5.0.2
|
| 808 |
+
cloudpickle==3.1.2
|
| 809 |
+
setuptools==80.9.0
|
| 810 |
+
starlette==0.49.3
|
| 811 |
+
prompt_toolkit==3.0.52
|
| 812 |
+
wrapt==1.14.2
|
| 813 |
+
h5py==3.14.0
|
| 814 |
+
scikit-image==0.19.3
|
| 815 |
+
joblib==1.5.3
|
| 816 |
+
opencv-python==4.11.0.86
|
| 817 |
+
rich==14.2.0
|
| 818 |
+
trl==0.11.4
|
| 819 |
+
gym-notices==0.1.0
|
| 820 |
+
trimesh==4.11.1
|
| 821 |
+
mdit-py-plugins==0.3.3
|
| 822 |
+
distro==1.9.0
|
| 823 |
+
executing==2.2.1
|
| 824 |
+
mkl-service==2.4.0
|
| 825 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 826 |
+
FLAML==2.3.6
|
| 827 |
+
mujoco-py==2.1.2.14
|
| 828 |
+
h11==0.16.0
|
| 829 |
+
highway-env==1.9.1
|
| 830 |
+
httpcore==1.0.9
|
| 831 |
+
tensorboard-data-server==0.7.2
|
| 832 |
+
tzdata==2025.3
|
| 833 |
+
absl-py==2.3.1
|
| 834 |
+
jsonschema-specifications==2025.9.1
|
| 835 |
+
numba==0.56.4
|
| 836 |
+
tabulate==0.9.0
|
| 837 |
+
importlib-resources==5.13.0
|
| 838 |
+
pycparser==2.23
|
| 839 |
+
mkl_fft==1.3.11
|
| 840 |
+
torch==2.2.2
|
| 841 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 842 |
+
zipp==3.19.2
|
| 843 |
+
jaraco.text==3.12.1
|
| 844 |
+
jaraco.context==5.3.0
|
| 845 |
+
importlib_metadata==8.0.0
|
| 846 |
+
typeguard==4.3.0
|
| 847 |
+
inflect==7.3.1
|
| 848 |
+
more-itertools==10.3.0
|
| 849 |
+
wheel==0.45.1
|
| 850 |
+
packaging==24.2
|
| 851 |
+
backports.tarfile==1.2.0
|
| 852 |
+
autocommand==2.2.2
|
| 853 |
+
jaraco.collections==5.1.0
|
| 854 |
+
tomli==2.0.1
|
| 855 |
+
platformdirs==4.2.2
|
| 856 |
+
jaraco.functools==4.0.1
|
| 857 |
+
typing_extensions==4.12.2
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
|
| 3 |
+
"python": "3.9.25",
|
| 4 |
+
"startedAt": "2026-01-21T04:19:19.675401Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config-path",
|
| 7 |
+
"./config/main_table",
|
| 8 |
+
"--config-name",
|
| 9 |
+
"llmbc_box-close-v2.yaml"
|
| 10 |
+
],
|
| 11 |
+
"program": "/work/u1131674/LLM-BC/./train.py",
|
| 12 |
+
"codePath": "train.py",
|
| 13 |
+
"git": {
|
| 14 |
+
"remote": "https://github.com/CHYang25/LLM-BC.git",
|
| 15 |
+
"commit": "1d2e1f5818e116390426ef596d075fc0cf1b0081"
|
| 16 |
+
},
|
| 17 |
+
"email": "chris920325@gmail.com",
|
| 18 |
+
"root": "/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2",
|
| 19 |
+
"host": "cbi-lgn01",
|
| 20 |
+
"username": "u1131674",
|
| 21 |
+
"executable": "/home/u1131674/.conda/envs/llm-bc/bin/python3",
|
| 22 |
+
"codePathLocal": "train.py",
|
| 23 |
+
"cpu_count": 112,
|
| 24 |
+
"cpu_count_logical": 224,
|
| 25 |
+
"gpu": "NVIDIA H100 PCIe",
|
| 26 |
+
"gpu_count": 2,
|
| 27 |
+
"disk": {
|
| 28 |
+
"/": {
|
| 29 |
+
"total": "473745891328",
|
| 30 |
+
"used": "389026504704"
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"memory": {
|
| 34 |
+
"total": "540117905408"
|
| 35 |
+
},
|
| 36 |
+
"cpu": {
|
| 37 |
+
"count": 112,
|
| 38 |
+
"countLogical": 224
|
| 39 |
+
},
|
| 40 |
+
"gpu_nvidia": [
|
| 41 |
+
{
|
| 42 |
+
"name": "NVIDIA H100 PCIe",
|
| 43 |
+
"memoryTotal": "85520809984",
|
| 44 |
+
"cudaCores": 14592,
|
| 45 |
+
"architecture": "Hopper"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"name": "NVIDIA H100 PCIe",
|
| 49 |
+
"memoryTotal": "85520809984",
|
| 50 |
+
"cudaCores": 14592,
|
| 51 |
+
"architecture": "Hopper"
|
| 52 |
+
}
|
| 53 |
+
],
|
| 54 |
+
"cudaVersion": "12.4"
|
| 55 |
+
}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train_loss":0.024963906034827232,"_timestamp":1.7689691981223695e+09,"train_loss_bc":0.020005209371447563,"_wandb":{"runtime":71},"epoch":0,"_runtime":71.527189585,"train_loss_llm":0.49586963653564453,"_step":236,"grad_norm":0.1454334259033203,"global_step":236,"lr":0.009999988862926341}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T12:19:19.051245689+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpu06061ms/port-2070718.txt","pid":2070718,"debug":false,"disable-analytics":false}
|
| 2 |
+
{"time":"2026-01-21T12:19:19.051287504+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
|
| 3 |
+
{"time":"2026-01-21T12:19:19.051845713+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":2070718}
|
| 4 |
+
{"time":"2026-01-21T12:19:19.051828427+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43057,"Zone":""}}
|
| 5 |
+
{"time":"2026-01-21T12:19:19.231239451+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33440"}
|
| 6 |
+
{"time":"2026-01-21T12:19:19.675902286+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"9puzigbg","id":"127.0.0.1:33440"}
|
| 7 |
+
{"time":"2026-01-21T12:19:19.791119243+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"9puzigbg","id":"127.0.0.1:33440"}
|
| 8 |
+
{"time":"2026-01-21T12:20:31.202365496+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33440"}
|
| 9 |
+
{"time":"2026-01-21T12:20:31.20258465+08:00","level":"INFO","msg":"server is shutting down"}
|
| 10 |
+
{"time":"2026-01-21T12:20:31.202524542+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:33440"}
|
| 11 |
+
{"time":"2026-01-21T12:20:31.202695965+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:33440"}
|
| 12 |
+
{"time":"2026-01-21T12:20:31.981247472+08:00","level":"INFO","msg":"Parent process exited, terminating service process."}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
|
| 2 |
+
{"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"}
|
| 3 |
+
{"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"}
|
| 4 |
+
{"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"}
|
| 5 |
+
{"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"}
|
| 6 |
+
{"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}}
|
| 7 |
+
{"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}}
|
| 8 |
+
{"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"}
|
| 9 |
+
{"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"}
|
| 10 |
+
{"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"}
|
| 11 |
+
{"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"}
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
|
| 2 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718
|
| 3 |
+
2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
|
| 4 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
|
| 5 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
| 6 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
| 7 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
|
| 8 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {}
|
| 9 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log
|
| 10 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log
|
| 11 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers
|
| 12 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
| 13 |
+
config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
|
| 14 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend
|
| 15 |
+
2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request
|
| 16 |
+
2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 17 |
+
2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected
|
| 18 |
+
2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry
|
| 19 |
+
2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
| 20 |
+
2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend
|
| 21 |
+
2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg
|
| 22 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw
|
| 23 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams.
|
| 24 |
+
2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed.
|
| 25 |
+
2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process
|
| 26 |
+
2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'}
|
| 27 |
+
2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97675973ce8e04390938123162984a49c1513ce052c76ac14c48280b33003e11
|
| 3 |
+
size 229376
|
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"run_id": "9puzigbg"}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 0.001
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task:
|
| 115 |
+
- policy.loss_llm_weight=1.0e-3
|
| 116 |
+
- training.seed=42
|
| 117 |
+
job:
|
| 118 |
+
name: train
|
| 119 |
+
chdir: null
|
| 120 |
+
override_dirname: policy.loss_llm_weight=1.0e-3,training.seed=42
|
| 121 |
+
id: ???
|
| 122 |
+
num: ???
|
| 123 |
+
config_name: llmbc_box-close-v2.yaml
|
| 124 |
+
env_set: {}
|
| 125 |
+
env_copy: []
|
| 126 |
+
config:
|
| 127 |
+
override_dirname:
|
| 128 |
+
kv_sep: '='
|
| 129 |
+
item_sep: ','
|
| 130 |
+
exclude_keys: []
|
| 131 |
+
runtime:
|
| 132 |
+
version: 1.2.0
|
| 133 |
+
version_base: '1.2'
|
| 134 |
+
cwd: /work/u1131674/LLM-BC
|
| 135 |
+
config_sources:
|
| 136 |
+
- path: hydra.conf
|
| 137 |
+
schema: pkg
|
| 138 |
+
provider: hydra
|
| 139 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 140 |
+
schema: file
|
| 141 |
+
provider: main
|
| 142 |
+
- path: ''
|
| 143 |
+
schema: structured
|
| 144 |
+
provider: schema
|
| 145 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2
|
| 146 |
+
choices:
|
| 147 |
+
hydra/env: default
|
| 148 |
+
hydra/callbacks: null
|
| 149 |
+
hydra/job_logging: default
|
| 150 |
+
hydra/hydra_logging: default
|
| 151 |
+
hydra/hydra_help: default
|
| 152 |
+
hydra/help: default
|
| 153 |
+
hydra/sweeper: basic
|
| 154 |
+
hydra/launcher: basic
|
| 155 |
+
hydra/output: default
|
| 156 |
+
verbose: false
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- policy.loss_llm_weight=1.0e-3
|
| 2 |
+
- training.seed=42
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab
|
| 3 |
+
size 864520
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab
|
| 3 |
+
size 864520
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"train_loss": 0.252529501914978, "train_loss_bc": 0.25195011496543884, "train_loss_llm": 0.5793765187263489, "grad_norm": 0.12839388847351074, "global_step": 0, "epoch": 0, "lr": 0.001}
|
| 2 |
+
{"train_loss": 0.273204505443573, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.563692033290863, "grad_norm": 0.13485388457775116, "global_step": 1, "epoch": 0, "lr": 0.001}
|
| 3 |
+
{"train_loss": 0.2867761254310608, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584008693695068, "grad_norm": 0.274769127368927, "global_step": 2, "epoch": 0, "lr": 0.001}
|
| 4 |
+
{"train_loss": 0.2871931791305542, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152305006980896, "grad_norm": 0.41221097111701965, "global_step": 3, "epoch": 0, "lr": 0.001}
|
| 5 |
+
{"train_loss": 0.28025686740875244, "train_loss_bc": 0.2797144949436188, "train_loss_llm": 0.5423757433891296, "grad_norm": 0.5496014356613159, "global_step": 4, "epoch": 0, "lr": 0.001}
|
| 6 |
+
{"train_loss": 0.3149482309818268, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508872866630554, "grad_norm": 0.6956393718719482, "global_step": 5, "epoch": 0, "lr": 0.001}
|
| 7 |
+
{"train_loss": 0.27254703640937805, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829575061798096, "grad_norm": 0.8312950730323792, "global_step": 6, "epoch": 0, "lr": 0.001}
|
| 8 |
+
{"train_loss": 0.22602498531341553, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914822816848755, "grad_norm": 0.9541406631469727, "global_step": 7, "epoch": 0, "lr": 0.001}
|
| 9 |
+
{"train_loss": 0.20342595875263214, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296257734298706, "grad_norm": 1.0699303150177002, "global_step": 8, "epoch": 0, "lr": 0.002}
|
| 10 |
+
{"train_loss": 0.19929638504981995, "train_loss_bc": 0.19871878623962402, "train_loss_llm": 0.5776059627532959, "grad_norm": 0.11595484614372253, "global_step": 9, "epoch": 0, "lr": 0.002}
|
| 11 |
+
{"train_loss": 0.21191416680812836, "train_loss_bc": 0.21130315959453583, "train_loss_llm": 0.6110129952430725, "grad_norm": 0.23422954976558685, "global_step": 10, "epoch": 0, "lr": 0.002}
|
| 12 |
+
{"train_loss": 0.2068999856710434, "train_loss_bc": 0.2063978612422943, "train_loss_llm": 0.5021252632141113, "grad_norm": 0.3522001802921295, "global_step": 11, "epoch": 0, "lr": 0.002}
|
| 13 |
+
{"train_loss": 0.257265567779541, "train_loss_bc": 0.25662338733673096, "train_loss_llm": 0.6421942710876465, "grad_norm": 0.483461856842041, "global_step": 12, "epoch": 0, "lr": 0.002}
|
| 14 |
+
{"train_loss": 0.23878663778305054, "train_loss_bc": 0.2381792515516281, "train_loss_llm": 0.6073929071426392, "grad_norm": 0.6102063059806824, "global_step": 13, "epoch": 0, "lr": 0.002}
|
| 15 |
+
{"train_loss": 0.2712763547897339, "train_loss_bc": 0.27066537737846375, "train_loss_llm": 0.6109854578971863, "grad_norm": 0.7479075789451599, "global_step": 14, "epoch": 0, "lr": 0.002}
|
| 16 |
+
{"train_loss": 0.24330928921699524, "train_loss_bc": 0.2427230179309845, "train_loss_llm": 0.586268424987793, "grad_norm": 0.8762980699539185, "global_step": 15, "epoch": 0, "lr": 0.002}
|
| 17 |
+
{"train_loss": 0.20316186547279358, "train_loss_bc": 0.20266824960708618, "train_loss_llm": 0.4936148524284363, "grad_norm": 0.992440402507782, "global_step": 16, "epoch": 0, "lr": 0.003}
|
| 18 |
+
{"train_loss": 0.1635446846485138, "train_loss_bc": 0.162959486246109, "train_loss_llm": 0.5851912498474121, "grad_norm": 0.11341577023267746, "global_step": 17, "epoch": 0, "lr": 0.003}
|
| 19 |
+
{"train_loss": 0.1420236974954605, "train_loss_bc": 0.14150172472000122, "train_loss_llm": 0.5219756364822388, "grad_norm": 0.2166670560836792, "global_step": 18, "epoch": 0, "lr": 0.003}
|
| 20 |
+
{"train_loss": 0.08970867097377777, "train_loss_bc": 0.08923432230949402, "train_loss_llm": 0.4743492901325226, "grad_norm": 0.2942521870136261, "global_step": 19, "epoch": 0, "lr": 0.003}
|
| 21 |
+
{"train_loss": 0.1407971978187561, "train_loss_bc": 0.14016547799110413, "train_loss_llm": 0.631725013256073, "grad_norm": 0.3959764838218689, "global_step": 20, "epoch": 0, "lr": 0.003}
|
| 22 |
+
{"train_loss": 0.12558668851852417, "train_loss_bc": 0.12498115748167038, "train_loss_llm": 0.6055365800857544, "grad_norm": 0.4912969172000885, "global_step": 21, "epoch": 0, "lr": 0.003}
|
| 23 |
+
{"train_loss": 0.15840043127536774, "train_loss_bc": 0.15789246559143066, "train_loss_llm": 0.5079687833786011, "grad_norm": 0.6031914949417114, "global_step": 22, "epoch": 0, "lr": 0.003}
|
| 24 |
+
{"train_loss": 0.15493251383304596, "train_loss_bc": 0.15430215001106262, "train_loss_llm": 0.6303583383560181, "grad_norm": 0.712800145149231, "global_step": 23, "epoch": 0, "lr": 0.003}
|
| 25 |
+
{"train_loss": 0.09710954874753952, "train_loss_bc": 0.09661616384983063, "train_loss_llm": 0.4933878481388092, "grad_norm": 0.7942712306976318, "global_step": 24, "epoch": 0, "lr": 0.004}
|
| 26 |
+
{"train_loss": 0.04198349267244339, "train_loss_bc": 0.04147119075059891, "train_loss_llm": 0.5123016834259033, "grad_norm": 0.049896661192178726, "global_step": 25, "epoch": 0, "lr": 0.004}
|
| 27 |
+
{"train_loss": 0.04926488921046257, "train_loss_bc": 0.04879248887300491, "train_loss_llm": 0.4724003076553345, "grad_norm": 0.10693306475877762, "global_step": 26, "epoch": 0, "lr": 0.004}
|
| 28 |
+
{"train_loss": 0.03119494765996933, "train_loss_bc": 0.03079175390303135, "train_loss_llm": 0.40319401025772095, "grad_norm": 0.130178764462471, "global_step": 27, "epoch": 0, "lr": 0.004}
|
| 29 |
+
{"train_loss": 0.045984115451574326, "train_loss_bc": 0.04547495022416115, "train_loss_llm": 0.5091666579246521, "grad_norm": 0.18151648342609406, "global_step": 28, "epoch": 0, "lr": 0.004}
|
| 30 |
+
{"train_loss": 0.036746662110090256, "train_loss_bc": 0.036322131752967834, "train_loss_llm": 0.4245292544364929, "grad_norm": 0.22341406345367432, "global_step": 29, "epoch": 0, "lr": 0.004}
|
| 31 |
+
{"train_loss": 0.06587483733892441, "train_loss_bc": 0.06540372967720032, "train_loss_llm": 0.47110506892204285, "grad_norm": 0.30597466230392456, "global_step": 30, "epoch": 0, "lr": 0.004}
|
| 32 |
+
{"train_loss": 0.05170199275016785, "train_loss_bc": 0.05132713168859482, "train_loss_llm": 0.37486234307289124, "grad_norm": 0.3634960949420929, "global_step": 31, "epoch": 0, "lr": 0.004}
|
| 33 |
+
{"train_loss": 0.05630849674344063, "train_loss_bc": 0.0558805912733078, "train_loss_llm": 0.42790722846984863, "grad_norm": 0.4307665228843689, "global_step": 32, "epoch": 0, "lr": 0.005}
|
| 34 |
+
{"train_loss": 0.0553022176027298, "train_loss_bc": 0.05469208583235741, "train_loss_llm": 0.6101305484771729, "grad_norm": 0.08577623218297958, "global_step": 33, "epoch": 0, "lr": 0.005}
|
| 35 |
+
{"train_loss": 0.04831269383430481, "train_loss_bc": 0.04779437929391861, "train_loss_llm": 0.5183138847351074, "grad_norm": 0.15602092444896698, "global_step": 34, "epoch": 0, "lr": 0.005}
|
| 36 |
+
{"train_loss": 0.061867598444223404, "train_loss_bc": 0.06128372997045517, "train_loss_llm": 0.5838690996170044, "grad_norm": 0.2528131902217865, "global_step": 35, "epoch": 0, "lr": 0.005}
|
| 37 |
+
{"train_loss": 0.05686777085065842, "train_loss_bc": 0.05627113953232765, "train_loss_llm": 0.5966323614120483, "grad_norm": 0.3395236134529114, "global_step": 36, "epoch": 0, "lr": 0.005}
|
| 38 |
+
{"train_loss": 0.03382698819041252, "train_loss_bc": 0.03323305398225784, "train_loss_llm": 0.5939337611198425, "grad_norm": 0.3958278000354767, "global_step": 37, "epoch": 0, "lr": 0.005}
|
| 39 |
+
{"train_loss": 0.06224585324525833, "train_loss_bc": 0.0616149976849556, "train_loss_llm": 0.6308567523956299, "grad_norm": 0.4894043505191803, "global_step": 38, "epoch": 0, "lr": 0.005}
|
| 40 |
+
{"train_loss": 0.04555570334196091, "train_loss_bc": 0.04494024068117142, "train_loss_llm": 0.6154611110687256, "grad_norm": 0.5536556839942932, "global_step": 39, "epoch": 0, "lr": 0.005}
|
| 41 |
+
{"train_loss": 0.03574361279606819, "train_loss_bc": 0.03507951647043228, "train_loss_llm": 0.6640970706939697, "grad_norm": 0.6100818514823914, "global_step": 40, "epoch": 0, "lr": 0.006}
|
| 42 |
+
{"train_loss": 0.146262988448143, "train_loss_bc": 0.14580723643302917, "train_loss_llm": 0.4557466208934784, "grad_norm": 0.19763296842575073, "global_step": 41, "epoch": 0, "lr": 0.006}
|
| 43 |
+
{"train_loss": 0.11445678770542145, "train_loss_bc": 0.11390470713376999, "train_loss_llm": 0.5520769357681274, "grad_norm": 0.3685164451599121, "global_step": 42, "epoch": 0, "lr": 0.006}
|
| 44 |
+
{"train_loss": 0.10677710175514221, "train_loss_bc": 0.10625766217708588, "train_loss_llm": 0.5194418430328369, "grad_norm": 0.5320614576339722, "global_step": 43, "epoch": 0, "lr": 0.006}
|
| 45 |
+
{"train_loss": 0.12251483649015427, "train_loss_bc": 0.12198641151189804, "train_loss_llm": 0.5284275412559509, "grad_norm": 0.7118619680404663, "global_step": 44, "epoch": 0, "lr": 0.006}
|
| 46 |
+
{"train_loss": 0.14140570163726807, "train_loss_bc": 0.1408904492855072, "train_loss_llm": 0.5152463316917419, "grad_norm": 0.9093842506408691, "global_step": 45, "epoch": 0, "lr": 0.006}
|
| 47 |
+
{"train_loss": 0.10901694744825363, "train_loss_bc": 0.1084449291229248, "train_loss_llm": 0.5720197558403015, "grad_norm": 1.0770854949951172, "global_step": 46, "epoch": 0, "lr": 0.006}
|
| 48 |
+
{"train_loss": 0.13558131456375122, "train_loss_bc": 0.13501602411270142, "train_loss_llm": 0.565291702747345, "grad_norm": 1.2658616304397583, "global_step": 47, "epoch": 0, "lr": 0.006}
|
| 49 |
+
{"train_loss": 0.14484672248363495, "train_loss_bc": 0.14428021013736725, "train_loss_llm": 0.5665071606636047, "grad_norm": 1.4656471014022827, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999}
|
| 50 |
+
{"train_loss": 0.24264752864837646, "train_loss_bc": 0.2419467568397522, "train_loss_llm": 0.7007750272750854, "grad_norm": 0.2969740033149719, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999}
|
| 51 |
+
{"train_loss": 0.13805940747261047, "train_loss_bc": 0.1374894082546234, "train_loss_llm": 0.5699948072433472, "grad_norm": 0.5104647874832153, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999}
|
| 52 |
+
{"train_loss": 0.16542810201644897, "train_loss_bc": 0.16495351493358612, "train_loss_llm": 0.47459012269973755, "grad_norm": 0.7459866404533386, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999}
|
| 53 |
+
{"train_loss": 0.25657016038894653, "train_loss_bc": 0.25585728883743286, "train_loss_llm": 0.7128623723983765, "grad_norm": 1.0538054704666138, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999}
|
| 54 |
+
{"train_loss": 0.20239487290382385, "train_loss_bc": 0.20180177688598633, "train_loss_llm": 0.5930944085121155, "grad_norm": 1.316612958908081, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999}
|
| 55 |
+
{"train_loss": 0.1541372388601303, "train_loss_bc": 0.15368221700191498, "train_loss_llm": 0.45501962304115295, "grad_norm": 1.5417735576629639, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999}
|
| 56 |
+
{"train_loss": 0.2185448706150055, "train_loss_bc": 0.2180437594652176, "train_loss_llm": 0.5011103749275208, "grad_norm": 1.8187888860702515, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999}
|
| 57 |
+
{"train_loss": 0.20139560103416443, "train_loss_bc": 0.20086990296840668, "train_loss_llm": 0.5256961584091187, "grad_norm": 2.08247447013855, "global_step": 56, "epoch": 0, "lr": 0.008}
|
| 58 |
+
{"train_loss": 0.20989899337291718, "train_loss_bc": 0.20911380648612976, "train_loss_llm": 0.7851892709732056, "grad_norm": 0.27354303002357483, "global_step": 57, "epoch": 0, "lr": 0.008}
|
| 59 |
+
{"train_loss": 0.19207656383514404, "train_loss_bc": 0.19136708974838257, "train_loss_llm": 0.7094740867614746, "grad_norm": 0.534111499786377, "global_step": 58, "epoch": 0, "lr": 0.008}
|
| 60 |
+
{"train_loss": 0.1742924004793167, "train_loss_bc": 0.17367114126682281, "train_loss_llm": 0.6212564706802368, "grad_norm": 0.7795819044113159, "global_step": 59, "epoch": 0, "lr": 0.008}
|
| 61 |
+
{"train_loss": 0.1624690294265747, "train_loss_bc": 0.1617729365825653, "train_loss_llm": 0.6960869431495667, "grad_norm": 1.0119670629501343, "global_step": 60, "epoch": 0, "lr": 0.008}
|
| 62 |
+
{"train_loss": 0.20042455196380615, "train_loss_bc": 0.19979658722877502, "train_loss_llm": 0.6279683113098145, "grad_norm": 1.274623990058899, "global_step": 61, "epoch": 0, "lr": 0.008}
|
| 63 |
+
{"train_loss": 0.16158545017242432, "train_loss_bc": 0.16083624958992004, "train_loss_llm": 0.7492036819458008, "grad_norm": 1.5101232528686523, "global_step": 62, "epoch": 0, "lr": 0.008}
|
| 64 |
+
{"train_loss": 0.13282041251659393, "train_loss_bc": 0.13209721446037292, "train_loss_llm": 0.723200798034668, "grad_norm": 1.7186288833618164, "global_step": 63, "epoch": 0, "lr": 0.008}
|
| 65 |
+
{"train_loss": 0.2033994346857071, "train_loss_bc": 0.2027282416820526, "train_loss_llm": 0.6711894273757935, "grad_norm": 1.9846457242965698, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001}
|
| 66 |
+
{"train_loss": 0.09530064463615417, "train_loss_bc": 0.09461785107851028, "train_loss_llm": 0.6827924847602844, "grad_norm": 0.1637452095746994, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001}
|
| 67 |
+
{"train_loss": 0.09785042703151703, "train_loss_bc": 0.09729202836751938, "train_loss_llm": 0.558398962020874, "grad_norm": 0.3286266624927521, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001}
|
| 68 |
+
{"train_loss": 0.09337419271469116, "train_loss_bc": 0.09270930290222168, "train_loss_llm": 0.6648919582366943, "grad_norm": 0.48786014318466187, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001}
|
| 69 |
+
{"train_loss": 0.17027954757213593, "train_loss_bc": 0.16956308484077454, "train_loss_llm": 0.7164597511291504, "grad_norm": 0.7218278050422668, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001}
|
| 70 |
+
{"train_loss": 0.08503676950931549, "train_loss_bc": 0.08446164429187775, "train_loss_llm": 0.5751272439956665, "grad_norm": 0.8772305250167847, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001}
|
| 71 |
+
{"train_loss": 0.10142002999782562, "train_loss_bc": 0.10081231594085693, "train_loss_llm": 0.6077142953872681, "grad_norm": 1.04507315158844, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001}
|
| 72 |
+
{"train_loss": 0.11661797761917114, "train_loss_bc": 0.11599370092153549, "train_loss_llm": 0.6242777109146118, "grad_norm": 1.2287834882736206, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001}
|
| 73 |
+
{"train_loss": 0.11624407768249512, "train_loss_bc": 0.11565285921096802, "train_loss_llm": 0.5912151336669922, "grad_norm": 1.4120811223983765, "global_step": 72, "epoch": 0, "lr": 0.01}
|
| 74 |
+
{"train_loss": 0.040211960673332214, "train_loss_bc": 0.039551250636577606, "train_loss_llm": 0.6607116460800171, "grad_norm": 0.0777788907289505, "global_step": 73, "epoch": 0, "lr": 0.01}
|
| 75 |
+
{"train_loss": 0.05076095834374428, "train_loss_bc": 0.05007569119334221, "train_loss_llm": 0.6852684617042542, "grad_norm": 0.17003870010375977, "global_step": 74, "epoch": 0, "lr": 0.01}
|
| 76 |
+
{"train_loss": 0.037128813564777374, "train_loss_bc": 0.03643818572163582, "train_loss_llm": 0.6906265020370483, "grad_norm": 0.244222030043602, "global_step": 75, "epoch": 0, "lr": 0.01}
|
| 77 |
+
{"train_loss": 0.037142593413591385, "train_loss_bc": 0.03646159917116165, "train_loss_llm": 0.6809947490692139, "grad_norm": 0.31510722637176514, "global_step": 76, "epoch": 0, "lr": 0.01}
|
| 78 |
+
{"train_loss": 0.05590587481856346, "train_loss_bc": 0.0552542544901371, "train_loss_llm": 0.6516196131706238, "grad_norm": 0.4150258004665375, "global_step": 77, "epoch": 0, "lr": 0.01}
|
| 79 |
+
{"train_loss": 0.030149903148412704, "train_loss_bc": 0.029475240036845207, "train_loss_llm": 0.6746631860733032, "grad_norm": 0.4752899408340454, "global_step": 78, "epoch": 0, "lr": 0.01}
|
| 80 |
+
{"train_loss": 0.050657838582992554, "train_loss_bc": 0.04992213845252991, "train_loss_llm": 0.7356998324394226, "grad_norm": 0.5678731799125671, "global_step": 79, "epoch": 0, "lr": 0.01}
|
| 81 |
+
{"train_loss": 0.02764507196843624, "train_loss_bc": 0.027012458071112633, "train_loss_llm": 0.6326141953468323, "grad_norm": 0.625312089920044, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305}
|
| 82 |
+
{"train_loss": 0.03073396533727646, "train_loss_bc": 0.03021111525595188, "train_loss_llm": 0.5228506326675415, "grad_norm": 0.053040843456983566, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305}
|
| 83 |
+
{"train_loss": 0.027266209945082664, "train_loss_bc": 0.026712927967309952, "train_loss_llm": 0.5532811880111694, "grad_norm": 0.09477357566356659, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305}
|
| 84 |
+
{"train_loss": 0.027156496420502663, "train_loss_bc": 0.026670875027775764, "train_loss_llm": 0.48562145233154297, "grad_norm": 0.13936017453670502, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305}
|
| 85 |
+
{"train_loss": 0.03493297100067139, "train_loss_bc": 0.03437262028455734, "train_loss_llm": 0.5603512525558472, "grad_norm": 0.20259420573711395, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305}
|
| 86 |
+
{"train_loss": 0.027796030044555664, "train_loss_bc": 0.027264408767223358, "train_loss_llm": 0.5316207408905029, "grad_norm": 0.23961691558361053, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305}
|
| 87 |
+
{"train_loss": 0.026962831616401672, "train_loss_bc": 0.026478836312890053, "train_loss_llm": 0.4839947819709778, "grad_norm": 0.278042733669281, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305}
|
| 88 |
+
{"train_loss": 0.022709660232067108, "train_loss_bc": 0.02220826968550682, "train_loss_llm": 0.5013896226882935, "grad_norm": 0.31111451983451843, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305}
|
| 89 |
+
{"train_loss": 0.03472929820418358, "train_loss_bc": 0.03418252617120743, "train_loss_llm": 0.5467737913131714, "grad_norm": 0.36670809984207153, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223}
|
| 90 |
+
{"train_loss": 0.06921354681253433, "train_loss_bc": 0.06863778829574585, "train_loss_llm": 0.5757583975791931, "grad_norm": 0.0987037718296051, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223}
|
| 91 |
+
{"train_loss": 0.0616544634103775, "train_loss_bc": 0.061051469296216965, "train_loss_llm": 0.602994441986084, "grad_norm": 0.18980276584625244, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223}
|
| 92 |
+
{"train_loss": 0.05179845914244652, "train_loss_bc": 0.051266275346279144, "train_loss_llm": 0.5321850180625916, "grad_norm": 0.2702069580554962, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223}
|
| 93 |
+
{"train_loss": 0.06521251052618027, "train_loss_bc": 0.06461584568023682, "train_loss_llm": 0.5966640710830688, "grad_norm": 0.3639739155769348, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223}
|
| 94 |
+
{"train_loss": 0.06196574494242668, "train_loss_bc": 0.06147213280200958, "train_loss_llm": 0.4936124384403229, "grad_norm": 0.45718127489089966, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223}
|
| 95 |
+
{"train_loss": 0.04629657045006752, "train_loss_bc": 0.045801080763339996, "train_loss_llm": 0.4954902231693268, "grad_norm": 0.5287754535675049, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223}
|
| 96 |
+
{"train_loss": 0.04616197198629379, "train_loss_bc": 0.04563061147928238, "train_loss_llm": 0.5313600301742554, "grad_norm": 0.6031498908996582, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223}
|
| 97 |
+
{"train_loss": 0.06340043991804123, "train_loss_bc": 0.06277582049369812, "train_loss_llm": 0.6246193647384644, "grad_norm": 0.6954230666160583, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575}
|
| 98 |
+
{"train_loss": 0.08191214501857758, "train_loss_bc": 0.0813114270567894, "train_loss_llm": 0.6007174253463745, "grad_norm": 0.10619106888771057, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575}
|
| 99 |
+
{"train_loss": 0.08071709424257278, "train_loss_bc": 0.08003760129213333, "train_loss_llm": 0.6794949769973755, "grad_norm": 0.20760591328144073, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575}
|
| 100 |
+
{"train_loss": 0.08332143723964691, "train_loss_bc": 0.08272609114646912, "train_loss_llm": 0.5953459739685059, "grad_norm": 0.3134561777114868, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575}
|
| 101 |
+
{"train_loss": 0.07155264914035797, "train_loss_bc": 0.07099221646785736, "train_loss_llm": 0.5604289770126343, "grad_norm": 0.40877580642700195, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575}
|
| 102 |
+
{"train_loss": 0.08975838869810104, "train_loss_bc": 0.08916652202606201, "train_loss_llm": 0.5918655395507812, "grad_norm": 0.522554337978363, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575}
|
| 103 |
+
{"train_loss": 0.07257966697216034, "train_loss_bc": 0.07201467454433441, "train_loss_llm": 0.5649896860122681, "grad_norm": 0.6189970970153809, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575}
|
| 104 |
+
{"train_loss": 0.06007641553878784, "train_loss_bc": 0.05953027680516243, "train_loss_llm": 0.546138346195221, "grad_norm": 0.702296793460846, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575}
|
| 105 |
+
{"train_loss": 0.04568513110280037, "train_loss_bc": 0.045178987085819244, "train_loss_llm": 0.5061453580856323, "grad_norm": 0.7713168263435364, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895}
|
| 106 |
+
{"train_loss": 0.062235742807388306, "train_loss_bc": 0.061548247933387756, "train_loss_llm": 0.687494158744812, "grad_norm": 0.08229470998048782, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895}
|
| 107 |
+
{"train_loss": 0.08357568085193634, "train_loss_bc": 0.08299360424280167, "train_loss_llm": 0.5820728540420532, "grad_norm": 0.18586039543151855, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895}
|
| 108 |
+
{"train_loss": 0.08088018745183945, "train_loss_bc": 0.08020119369029999, "train_loss_llm": 0.6789901256561279, "grad_norm": 0.2842538356781006, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895}
|
| 109 |
+
{"train_loss": 0.07067245990037918, "train_loss_bc": 0.07003812491893768, "train_loss_llm": 0.6343338489532471, "grad_norm": 0.3756967782974243, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895}
|
| 110 |
+
{"train_loss": 0.062134191393852234, "train_loss_bc": 0.06162497028708458, "train_loss_llm": 0.50922030210495, "grad_norm": 0.45985621213912964, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895}
|
| 111 |
+
{"train_loss": 0.05643927678465843, "train_loss_bc": 0.05575673654675484, "train_loss_llm": 0.6825414896011353, "grad_norm": 0.535986065864563, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895}
|
| 112 |
+
{"train_loss": 0.06275462359189987, "train_loss_bc": 0.06217849254608154, "train_loss_llm": 0.5761322975158691, "grad_norm": 0.6212720274925232, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895}
|
| 113 |
+
{"train_loss": 0.0717947706580162, "train_loss_bc": 0.07112696766853333, "train_loss_llm": 0.6678000688552856, "grad_norm": 0.712874174118042, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654}
|
| 114 |
+
{"train_loss": 0.07107824087142944, "train_loss_bc": 0.07066100835800171, "train_loss_llm": 0.4172302484512329, "grad_norm": 0.09572537988424301, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654}
|
| 115 |
+
{"train_loss": 0.0622958242893219, "train_loss_bc": 0.0617825910449028, "train_loss_llm": 0.5132333040237427, "grad_norm": 0.18129226565361023, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654}
|
| 116 |
+
{"train_loss": 0.05546606332063675, "train_loss_bc": 0.054820477962493896, "train_loss_llm": 0.6455863118171692, "grad_norm": 0.2585110068321228, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654}
|
| 117 |
+
{"train_loss": 0.052835989743471146, "train_loss_bc": 0.052223652601242065, "train_loss_llm": 0.612338662147522, "grad_norm": 0.3340230882167816, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654}
|
| 118 |
+
{"train_loss": 0.07355044782161713, "train_loss_bc": 0.0729360580444336, "train_loss_llm": 0.614387035369873, "grad_norm": 0.43235117197036743, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654}
|
| 119 |
+
{"train_loss": 0.058171968907117844, "train_loss_bc": 0.057668983936309814, "train_loss_llm": 0.5029836893081665, "grad_norm": 0.5117653608322144, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654}
|
| 120 |
+
{"train_loss": 0.06179669499397278, "train_loss_bc": 0.06129191815853119, "train_loss_llm": 0.5047756433486938, "grad_norm": 0.5967010855674744, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654}
|
| 121 |
+
{"train_loss": 0.03286455199122429, "train_loss_bc": 0.03232846036553383, "train_loss_llm": 0.536090612411499, "grad_norm": 0.6471052169799805, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032}
|
| 122 |
+
{"train_loss": 0.03573526442050934, "train_loss_bc": 0.03532949090003967, "train_loss_llm": 0.40577366948127747, "grad_norm": 0.05735393241047859, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032}
|
| 123 |
+
{"train_loss": 0.038237735629081726, "train_loss_bc": 0.0377943217754364, "train_loss_llm": 0.44341397285461426, "grad_norm": 0.11955223232507706, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032}
|
| 124 |
+
{"train_loss": 0.05409952253103256, "train_loss_bc": 0.05373173952102661, "train_loss_llm": 0.3677833676338196, "grad_norm": 0.20442572236061096, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032}
|
| 125 |
+
{"train_loss": 0.04305477812886238, "train_loss_bc": 0.0426463782787323, "train_loss_llm": 0.4084013104438782, "grad_norm": 0.2714554965496063, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032}
|
| 126 |
+
{"train_loss": 0.035634320229291916, "train_loss_bc": 0.03516857698559761, "train_loss_llm": 0.4657438099384308, "grad_norm": 0.3238549530506134, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032}
|
| 127 |
+
{"train_loss": 0.05339725315570831, "train_loss_bc": 0.05303904414176941, "train_loss_llm": 0.35820940136909485, "grad_norm": 0.4088974893093109, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032}
|
| 128 |
+
{"train_loss": 0.04185650870203972, "train_loss_bc": 0.04144421964883804, "train_loss_llm": 0.4122905433177948, "grad_norm": 0.47525259852409363, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032}
|
| 129 |
+
{"train_loss": 0.042993541806936264, "train_loss_bc": 0.042601581662893295, "train_loss_llm": 0.3919590413570404, "grad_norm": 0.5411036014556885, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033}
|
| 130 |
+
{"train_loss": 0.02562362141907215, "train_loss_bc": 0.024971390143036842, "train_loss_llm": 0.6522307395935059, "grad_norm": 0.041133757680654526, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033}
|
| 131 |
+
{"train_loss": 0.02191310189664364, "train_loss_bc": 0.02129165455698967, "train_loss_llm": 0.6214474439620972, "grad_norm": 0.07116004079580307, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033}
|
| 132 |
+
{"train_loss": 0.03156816214323044, "train_loss_bc": 0.0309942327439785, "train_loss_llm": 0.5739300847053528, "grad_norm": 0.12592613697052002, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033}
|
| 133 |
+
{"train_loss": 0.02577713131904602, "train_loss_bc": 0.025125574320554733, "train_loss_llm": 0.6515576839447021, "grad_norm": 0.16578812897205353, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033}
|
| 134 |
+
{"train_loss": 0.02320152334868908, "train_loss_bc": 0.02264384739100933, "train_loss_llm": 0.5576763153076172, "grad_norm": 0.19451332092285156, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033}
|
| 135 |
+
{"train_loss": 0.026553723961114883, "train_loss_bc": 0.02588409185409546, "train_loss_llm": 0.6696317195892334, "grad_norm": 0.23911207914352417, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033}
|
| 136 |
+
{"train_loss": 0.02071528509259224, "train_loss_bc": 0.02012854814529419, "train_loss_llm": 0.5867360830307007, "grad_norm": 0.27343958616256714, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033}
|
| 137 |
+
{"train_loss": 0.0177980437874794, "train_loss_bc": 0.01714349165558815, "train_loss_llm": 0.6545513272285461, "grad_norm": 0.2921649217605591, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659}
|
| 138 |
+
{"train_loss": 0.016527190804481506, "train_loss_bc": 0.015900835394859314, "train_loss_llm": 0.6263555288314819, "grad_norm": 0.027912678197026253, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659}
|
| 139 |
+
{"train_loss": 0.016124101355671883, "train_loss_bc": 0.015586758963763714, "train_loss_llm": 0.5373432636260986, "grad_norm": 0.058148931711912155, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659}
|
| 140 |
+
{"train_loss": 0.02101842127740383, "train_loss_bc": 0.020332563668489456, "train_loss_llm": 0.6858576536178589, "grad_norm": 0.07774510979652405, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659}
|
| 141 |
+
{"train_loss": 0.019856909289956093, "train_loss_bc": 0.019175313413143158, "train_loss_llm": 0.6815959215164185, "grad_norm": 0.1038837879896164, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659}
|
| 142 |
+
{"train_loss": 0.02635011076927185, "train_loss_bc": 0.02552071586251259, "train_loss_llm": 0.8293948769569397, "grad_norm": 0.13838204741477966, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659}
|
| 143 |
+
{"train_loss": 0.017442570999264717, "train_loss_bc": 0.016876015812158585, "train_loss_llm": 0.5665552616119385, "grad_norm": 0.16495107114315033, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659}
|
| 144 |
+
{"train_loss": 0.019354552030563354, "train_loss_bc": 0.018637431785464287, "train_loss_llm": 0.7171201109886169, "grad_norm": 0.19188618659973145, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659}
|
| 145 |
+
{"train_loss": 0.02011699415743351, "train_loss_bc": 0.01947595179080963, "train_loss_llm": 0.6410424709320068, "grad_norm": 0.21645328402519226, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916}
|
| 146 |
+
{"train_loss": 0.02425413206219673, "train_loss_bc": 0.023770108819007874, "train_loss_llm": 0.4840241074562073, "grad_norm": 0.0436902791261673, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916}
|
| 147 |
+
{"train_loss": 0.022116929292678833, "train_loss_bc": 0.021655619144439697, "train_loss_llm": 0.4613092541694641, "grad_norm": 0.08582352846860886, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916}
|
| 148 |
+
{"train_loss": 0.021659119054675102, "train_loss_bc": 0.021171528846025467, "train_loss_llm": 0.48758962750434875, "grad_norm": 0.126007542014122, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916}
|
| 149 |
+
{"train_loss": 0.03805282711982727, "train_loss_bc": 0.03744645416736603, "train_loss_llm": 0.6063730716705322, "grad_norm": 0.19119882583618164, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916}
|
| 150 |
+
{"train_loss": 0.01933918334543705, "train_loss_bc": 0.018880464136600494, "train_loss_llm": 0.45871883630752563, "grad_norm": 0.2215609848499298, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916}
|
| 151 |
+
{"train_loss": 0.020558631047606468, "train_loss_bc": 0.019996277987957, "train_loss_llm": 0.5623538494110107, "grad_norm": 0.2572121322154999, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916}
|
| 152 |
+
{"train_loss": 0.022163955494761467, "train_loss_bc": 0.021520184352993965, "train_loss_llm": 0.6437717080116272, "grad_norm": 0.29958251118659973, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916}
|
| 153 |
+
{"train_loss": 0.026239177212119102, "train_loss_bc": 0.025800224393606186, "train_loss_llm": 0.4389524757862091, "grad_norm": 0.34274646639823914, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081}
|
| 154 |
+
{"train_loss": 0.02925034798681736, "train_loss_bc": 0.028717506676912308, "train_loss_llm": 0.5328419804573059, "grad_norm": 0.05676320195198059, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081}
|
| 155 |
+
{"train_loss": 0.037463825196027756, "train_loss_bc": 0.03690113127231598, "train_loss_llm": 0.5626922845840454, "grad_norm": 0.12929601967334747, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081}
|
| 156 |
+
{"train_loss": 0.018557682633399963, "train_loss_bc": 0.018073368817567825, "train_loss_llm": 0.4843147099018097, "grad_norm": 0.17127014696598053, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081}
|
| 157 |
+
{"train_loss": 0.024363229051232338, "train_loss_bc": 0.02386392466723919, "train_loss_llm": 0.4993036687374115, "grad_norm": 0.2232280820608139, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081}
|
| 158 |
+
{"train_loss": 0.030357468873262405, "train_loss_bc": 0.029831916093826294, "train_loss_llm": 0.5255520939826965, "grad_norm": 0.2857641279697418, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081}
|
| 159 |
+
{"train_loss": 0.038563068956136703, "train_loss_bc": 0.03807063773274422, "train_loss_llm": 0.4924296736717224, "grad_norm": 0.3589693307876587, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081}
|
| 160 |
+
{"train_loss": 0.038679443299770355, "train_loss_bc": 0.03814232721924782, "train_loss_llm": 0.5371164083480835, "grad_norm": 0.42973586916923523, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081}
|
| 161 |
+
{"train_loss": 0.03393377736210823, "train_loss_bc": 0.0334688201546669, "train_loss_llm": 0.4649561643600464, "grad_norm": 0.49458184838294983, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345}
|
| 162 |
+
{"train_loss": 0.03411827236413956, "train_loss_bc": 0.033583398908376694, "train_loss_llm": 0.5348742008209229, "grad_norm": 0.06490988284349442, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345}
|
| 163 |
+
{"train_loss": 0.02351403422653675, "train_loss_bc": 0.02295222505927086, "train_loss_llm": 0.5618085861206055, "grad_norm": 0.11563616991043091, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345}
|
| 164 |
+
{"train_loss": 0.028045671060681343, "train_loss_bc": 0.027559760957956314, "train_loss_llm": 0.4859097898006439, "grad_norm": 0.17181871831417084, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345}
|
| 165 |
+
{"train_loss": 0.012934507802128792, "train_loss_bc": 0.012457642704248428, "train_loss_llm": 0.4768647849559784, "grad_norm": 0.20869013667106628, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345}
|
| 166 |
+
{"train_loss": 0.02624637447297573, "train_loss_bc": 0.02573414519429207, "train_loss_llm": 0.5122296214103699, "grad_norm": 0.2638067603111267, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345}
|
| 167 |
+
{"train_loss": 0.04066107049584389, "train_loss_bc": 0.04009600728750229, "train_loss_llm": 0.5650624632835388, "grad_norm": 0.33961179852485657, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345}
|
| 168 |
+
{"train_loss": 0.018790556117892265, "train_loss_bc": 0.018201837316155434, "train_loss_llm": 0.5887188911437988, "grad_norm": 0.38523611426353455, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345}
|
| 169 |
+
{"train_loss": 0.024333346635103226, "train_loss_bc": 0.023783767595887184, "train_loss_llm": 0.5495793223381042, "grad_norm": 0.4378797113895416, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253}
|
| 170 |
+
{"train_loss": 0.02820535935461521, "train_loss_bc": 0.027705006301403046, "train_loss_llm": 0.5003523826599121, "grad_norm": 0.053658343851566315, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253}
|
| 171 |
+
{"train_loss": 0.025200804695487022, "train_loss_bc": 0.02471126988530159, "train_loss_llm": 0.4895356297492981, "grad_norm": 0.10377084463834763, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253}
|
| 172 |
+
{"train_loss": 0.01955123245716095, "train_loss_bc": 0.019059764221310616, "train_loss_llm": 0.4914677143096924, "grad_norm": 0.14640706777572632, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253}
|
| 173 |
+
{"train_loss": 0.02138841524720192, "train_loss_bc": 0.02090127021074295, "train_loss_llm": 0.48714545369148254, "grad_norm": 0.1854257434606552, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253}
|
| 174 |
+
{"train_loss": 0.024181261658668518, "train_loss_bc": 0.023732315748929977, "train_loss_llm": 0.44894570112228394, "grad_norm": 0.23339462280273438, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253}
|
| 175 |
+
{"train_loss": 0.023717273026704788, "train_loss_bc": 0.02322128415107727, "train_loss_llm": 0.4959881603717804, "grad_norm": 0.2798902988433838, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253}
|
| 176 |
+
{"train_loss": 0.03485918045043945, "train_loss_bc": 0.034368738532066345, "train_loss_llm": 0.49044036865234375, "grad_norm": 0.343951553106308, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253}
|
| 177 |
+
{"train_loss": 0.02320096641778946, "train_loss_bc": 0.022748133167624474, "train_loss_llm": 0.4528330862522125, "grad_norm": 0.38900986313819885, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371}
|
| 178 |
+
{"train_loss": 0.023330306634306908, "train_loss_bc": 0.022773388773202896, "train_loss_llm": 0.5569183826446533, "grad_norm": 0.0416216216981411, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371}
|
| 179 |
+
{"train_loss": 0.017154095694422722, "train_loss_bc": 0.01675502397119999, "train_loss_llm": 0.3990713059902191, "grad_norm": 0.06875938922166824, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371}
|
| 180 |
+
{"train_loss": 0.021150220185518265, "train_loss_bc": 0.02067718282341957, "train_loss_llm": 0.4730375111103058, "grad_norm": 0.10553700476884842, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371}
|
| 181 |
+
{"train_loss": 0.016837185248732567, "train_loss_bc": 0.016467537730932236, "train_loss_llm": 0.36964699625968933, "grad_norm": 0.13219793140888214, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371}
|
| 182 |
+
{"train_loss": 0.009770027361810207, "train_loss_bc": 0.009397734887897968, "train_loss_llm": 0.3722921311855316, "grad_norm": 0.15051698684692383, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371}
|
| 183 |
+
{"train_loss": 0.02333925850689411, "train_loss_bc": 0.02287432923913002, "train_loss_llm": 0.46492841839790344, "grad_norm": 0.18662676215171814, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371}
|
| 184 |
+
{"train_loss": 0.013727608136832714, "train_loss_bc": 0.01333148404955864, "train_loss_llm": 0.39612439274787903, "grad_norm": 0.21264775097370148, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371}
|
| 185 |
+
{"train_loss": 0.01233526412397623, "train_loss_bc": 0.011927351355552673, "train_loss_llm": 0.4079124927520752, "grad_norm": 0.23667089641094208, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874}
|
| 186 |
+
{"train_loss": 0.014251401647925377, "train_loss_bc": 0.013858886435627937, "train_loss_llm": 0.3925148844718933, "grad_norm": 0.02162291295826435, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874}
|
| 187 |
+
{"train_loss": 0.02096753567457199, "train_loss_bc": 0.020425261929631233, "train_loss_llm": 0.542274534702301, "grad_norm": 0.05543696507811546, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874}
|
| 188 |
+
{"train_loss": 0.021045425906777382, "train_loss_bc": 0.02053808979690075, "train_loss_llm": 0.5073364973068237, "grad_norm": 0.08720546215772629, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874}
|
| 189 |
+
{"train_loss": 0.020341763272881508, "train_loss_bc": 0.019736729562282562, "train_loss_llm": 0.6050328016281128, "grad_norm": 0.11292923241853714, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874}
|
| 190 |
+
{"train_loss": 0.011690325103700161, "train_loss_bc": 0.011292900890111923, "train_loss_llm": 0.39742419123649597, "grad_norm": 0.13396863639354706, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874}
|
| 191 |
+
{"train_loss": 0.02014937624335289, "train_loss_bc": 0.019594522193074226, "train_loss_llm": 0.554853618144989, "grad_norm": 0.16246306896209717, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874}
|
| 192 |
+
{"train_loss": 0.014351895079016685, "train_loss_bc": 0.01393540296703577, "train_loss_llm": 0.41649240255355835, "grad_norm": 0.18816952407360077, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874}
|
| 193 |
+
{"train_loss": 0.015411981381475925, "train_loss_bc": 0.014920342713594437, "train_loss_llm": 0.4916388988494873, "grad_norm": 0.2118474692106247, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049}
|
| 194 |
+
{"train_loss": 0.018817156553268433, "train_loss_bc": 0.018374208360910416, "train_loss_llm": 0.4429486393928528, "grad_norm": 0.031203927472233772, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049}
|
| 195 |
+
{"train_loss": 0.015957778319716454, "train_loss_bc": 0.015566591173410416, "train_loss_llm": 0.39118722081184387, "grad_norm": 0.06421653926372528, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049}
|
| 196 |
+
{"train_loss": 0.019087474793195724, "train_loss_bc": 0.018518388271331787, "train_loss_llm": 0.5690857172012329, "grad_norm": 0.0811726450920105, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049}
|
| 197 |
+
{"train_loss": 0.015710245817899704, "train_loss_bc": 0.01526118814945221, "train_loss_llm": 0.44905757904052734, "grad_norm": 0.10153987258672714, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049}
|
| 198 |
+
{"train_loss": 0.019453734159469604, "train_loss_bc": 0.018925407901406288, "train_loss_llm": 0.5283269882202148, "grad_norm": 0.1219358816742897, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049}
|
| 199 |
+
{"train_loss": 0.01505982130765915, "train_loss_bc": 0.014631738886237144, "train_loss_llm": 0.4280821681022644, "grad_norm": 0.14476309716701508, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049}
|
| 200 |
+
{"train_loss": 0.013033466413617134, "train_loss_bc": 0.012675212696194649, "train_loss_llm": 0.3582540452480316, "grad_norm": 0.1675001084804535, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049}
|
| 201 |
+
{"train_loss": 0.01897766813635826, "train_loss_bc": 0.018507644534111023, "train_loss_llm": 0.4700234532356262, "grad_norm": 0.1958317756652832, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905}
|
| 202 |
+
{"train_loss": 0.021655641496181488, "train_loss_bc": 0.021151017397642136, "train_loss_llm": 0.5046237111091614, "grad_norm": 0.03384440392255783, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905}
|
| 203 |
+
{"train_loss": 0.019754817709326744, "train_loss_bc": 0.019261155277490616, "train_loss_llm": 0.49366283416748047, "grad_norm": 0.06363573670387268, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905}
|
| 204 |
+
{"train_loss": 0.0197446309030056, "train_loss_bc": 0.019323352724313736, "train_loss_llm": 0.4212789237499237, "grad_norm": 0.09140758961439133, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905}
|
| 205 |
+
{"train_loss": 0.02213365212082863, "train_loss_bc": 0.02167753502726555, "train_loss_llm": 0.4561164677143097, "grad_norm": 0.12779031693935394, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905}
|
| 206 |
+
{"train_loss": 0.018727730959653854, "train_loss_bc": 0.018266774713993073, "train_loss_llm": 0.46095675230026245, "grad_norm": 0.15023837983608246, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905}
|
| 207 |
+
{"train_loss": 0.021134980022907257, "train_loss_bc": 0.02065064013004303, "train_loss_llm": 0.48433929681777954, "grad_norm": 0.18333274126052856, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905}
|
| 208 |
+
{"train_loss": 0.018410563468933105, "train_loss_bc": 0.017892083153128624, "train_loss_llm": 0.5184803605079651, "grad_norm": 0.20620277523994446, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905}
|
| 209 |
+
{"train_loss": 0.01952839083969593, "train_loss_bc": 0.01910785771906376, "train_loss_llm": 0.42053380608558655, "grad_norm": 0.23822638392448425, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454}
|
| 210 |
+
{"train_loss": 0.020005524158477783, "train_loss_bc": 0.01958916336297989, "train_loss_llm": 0.41636162996292114, "grad_norm": 0.022418994456529617, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454}
|
| 211 |
+
{"train_loss": 0.022034049034118652, "train_loss_bc": 0.02159287966787815, "train_loss_llm": 0.4411696493625641, "grad_norm": 0.058893270790576935, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454}
|
| 212 |
+
{"train_loss": 0.018601490184664726, "train_loss_bc": 0.018017925322055817, "train_loss_llm": 0.583564043045044, "grad_norm": 0.07501673698425293, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454}
|
| 213 |
+
{"train_loss": 0.021930024027824402, "train_loss_bc": 0.021384473890066147, "train_loss_llm": 0.5455496311187744, "grad_norm": 0.09862517565488815, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454}
|
| 214 |
+
{"train_loss": 0.020846663042902946, "train_loss_bc": 0.020350880920886993, "train_loss_llm": 0.49578237533569336, "grad_norm": 0.1402716189622879, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454}
|
| 215 |
+
{"train_loss": 0.01877172477543354, "train_loss_bc": 0.01824098639190197, "train_loss_llm": 0.5307385921478271, "grad_norm": 0.16469259560108185, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454}
|
| 216 |
+
{"train_loss": 0.020292270928621292, "train_loss_bc": 0.019869061186909676, "train_loss_llm": 0.42321062088012695, "grad_norm": 0.19249005615711212, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454}
|
| 217 |
+
{"train_loss": 0.019689541310071945, "train_loss_bc": 0.019232220947742462, "train_loss_llm": 0.4573211669921875, "grad_norm": 0.21812190115451813, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697}
|
| 218 |
+
{"train_loss": 0.01748274266719818, "train_loss_bc": 0.016972113400697708, "train_loss_llm": 0.5106291174888611, "grad_norm": 0.01980498433113098, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697}
|
| 219 |
+
{"train_loss": 0.02484678477048874, "train_loss_bc": 0.024319060146808624, "train_loss_llm": 0.5277247428894043, "grad_norm": 0.0617092065513134, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697}
|
| 220 |
+
{"train_loss": 0.019288551062345505, "train_loss_bc": 0.01883828639984131, "train_loss_llm": 0.4502650499343872, "grad_norm": 0.08189266920089722, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697}
|
| 221 |
+
{"train_loss": 0.01973573863506317, "train_loss_bc": 0.019199654459953308, "train_loss_llm": 0.5360836982727051, "grad_norm": 0.09861791878938675, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697}
|
| 222 |
+
{"train_loss": 0.018722541630268097, "train_loss_bc": 0.018269415944814682, "train_loss_llm": 0.45312485098838806, "grad_norm": 0.12747113406658173, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697}
|
| 223 |
+
{"train_loss": 0.017685379832983017, "train_loss_bc": 0.0172601118683815, "train_loss_llm": 0.4252672493457794, "grad_norm": 0.15524466335773468, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697}
|
| 224 |
+
{"train_loss": 0.021818850189447403, "train_loss_bc": 0.02134229615330696, "train_loss_llm": 0.47655367851257324, "grad_norm": 0.1838337481021881, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697}
|
| 225 |
+
{"train_loss": 0.019331879913806915, "train_loss_bc": 0.01881510019302368, "train_loss_llm": 0.5167800188064575, "grad_norm": 0.2143346071243286, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066}
|
| 226 |
+
{"train_loss": 0.017838943749666214, "train_loss_bc": 0.017424583435058594, "train_loss_llm": 0.4143611192703247, "grad_norm": 0.023944241926074028, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066}
|
| 227 |
+
{"train_loss": 0.01796240359544754, "train_loss_bc": 0.017589787021279335, "train_loss_llm": 0.37261566519737244, "grad_norm": 0.03415573388338089, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066}
|
| 228 |
+
{"train_loss": 0.01584581844508648, "train_loss_bc": 0.015357905998826027, "train_loss_llm": 0.4879117012023926, "grad_norm": 0.05189693719148636, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066}
|
| 229 |
+
{"train_loss": 0.01800801046192646, "train_loss_bc": 0.01758820191025734, "train_loss_llm": 0.4198092520236969, "grad_norm": 0.07924457639455795, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066}
|
| 230 |
+
{"train_loss": 0.018989915028214455, "train_loss_bc": 0.01854291930794716, "train_loss_llm": 0.4469965100288391, "grad_norm": 0.11897021532058716, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066}
|
| 231 |
+
{"train_loss": 0.02125917375087738, "train_loss_bc": 0.020760733634233475, "train_loss_llm": 0.49844038486480713, "grad_norm": 0.1377515345811844, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066}
|
| 232 |
+
{"train_loss": 0.019712205976247787, "train_loss_bc": 0.01920940726995468, "train_loss_llm": 0.5027981996536255, "grad_norm": 0.16281495988368988, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066}
|
| 233 |
+
{"train_loss": 0.020320260897278786, "train_loss_bc": 0.019815631210803986, "train_loss_llm": 0.5046302080154419, "grad_norm": 0.1876341551542282, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341}
|
| 234 |
+
{"train_loss": 0.013357514515519142, "train_loss_bc": 0.01291839312762022, "train_loss_llm": 0.43912118673324585, "grad_norm": 0.02517073042690754, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341}
|
| 235 |
+
{"train_loss": 0.02109229937195778, "train_loss_bc": 0.020597826689481735, "train_loss_llm": 0.49447277188301086, "grad_norm": 0.04559013620018959, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341}
|
| 236 |
+
{"train_loss": 0.02008131518959999, "train_loss_bc": 0.019501332193613052, "train_loss_llm": 0.579983115196228, "grad_norm": 0.07952536642551422, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341}
|
| 237 |
+
{"train_loss": 0.016857489943504333, "train_loss_bc": 0.016379257664084435, "train_loss_llm": 0.4782329797744751, "grad_norm": 0.10649916529655457, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341}
|
| 238 |
+
{"train_loss": 0.06850671758405677, "train_loss_bc": 0.014644688926637173, "train_loss_llm": 0.4756101667881012, "grad_norm": 0.13082890212535858, "global_step": 237, "epoch": 0, "lr": 0.009999988862926341, "train/cumulative_reward": 2.7083310524135573, "train/mean_score": 0.33428478816554785, "train/success_rate": 0.0, "test/cumulative_reward": 2.474044586385482, "test/mean_score": 0.3310451992587934, "test/success_rate": 0.0, "val_loss": 0.017693543806672096, "train_action_mse_error": 0.021953511983156204}
|
| 239 |
+
{"train_loss": 0.017798328772187233, "train_loss_bc": 0.017359893769025803, "train_loss_llm": 0.4384341835975647, "grad_norm": 0.157542422413826, "global_step": 238, "epoch": 1, "lr": 0.009999988862926341}
|
| 240 |
+
{"train_loss": 0.01802152208983898, "train_loss_bc": 0.017627805471420288, "train_loss_llm": 0.39371609687805176, "grad_norm": 0.17802225053310394, "global_step": 239, "epoch": 1, "lr": 0.009999988862926341}
|
| 241 |
+
{"train_loss": 0.018425248563289642, "train_loss_bc": 0.01776362583041191, "train_loss_llm": 0.6616224050521851, "grad_norm": 0.20682503283023834, "global_step": 240, "epoch": 1, "lr": 0.009999987721376759}
|
| 242 |
+
{"train_loss": 0.017822718247771263, "train_loss_bc": 0.017327211797237396, "train_loss_llm": 0.4955056309700012, "grad_norm": 0.03360544890165329, "global_step": 241, "epoch": 1, "lr": 0.009999987721376759}
|
| 243 |
+
{"train_loss": 0.01679021306335926, "train_loss_bc": 0.016283852979540825, "train_loss_llm": 0.5063599348068237, "grad_norm": 0.062126513570547104, "global_step": 242, "epoch": 1, "lr": 0.009999987721376759}
|
| 244 |
+
{"train_loss": 0.020830130204558372, "train_loss_bc": 0.020347915589809418, "train_loss_llm": 0.48221397399902344, "grad_norm": 0.08846676349639893, "global_step": 243, "epoch": 1, "lr": 0.009999987721376759}
|
| 245 |
+
{"train_loss": 0.011690114624798298, "train_loss_bc": 0.01113096158951521, "train_loss_llm": 0.559153139591217, "grad_norm": 0.1047411635518074, "global_step": 244, "epoch": 1, "lr": 0.009999987721376759}
|
| 246 |
+
{"train_loss": 0.020986376330256462, "train_loss_bc": 0.020374851301312447, "train_loss_llm": 0.6115252375602722, "grad_norm": 0.1375197023153305, "global_step": 245, "epoch": 1, "lr": 0.009999987721376759}
|
| 247 |
+
{"train_loss": 0.014499716460704803, "train_loss_bc": 0.013983565382659435, "train_loss_llm": 0.516150951385498, "grad_norm": 0.15900495648384094, "global_step": 246, "epoch": 1, "lr": 0.009999987721376759}
|
| 248 |
+
{"train_loss": 0.02040776051580906, "train_loss_bc": 0.01990542560815811, "train_loss_llm": 0.502334475517273, "grad_norm": 0.1920090615749359, "global_step": 247, "epoch": 1, "lr": 0.009999987721376759}
|
| 249 |
+
{"train_loss": 0.008450948633253574, "train_loss_bc": 0.00804897490888834, "train_loss_llm": 0.4019736349582672, "grad_norm": 0.20613166689872742, "global_step": 248, "epoch": 1, "lr": 0.009999986524141925}
|
| 250 |
+
{"train_loss": 0.01662587560713291, "train_loss_bc": 0.016171330586075783, "train_loss_llm": 0.45454519987106323, "grad_norm": 0.02374984882771969, "global_step": 249, "epoch": 1, "lr": 0.009999986524141925}
|
| 251 |
+
{"train_loss": 0.015652479603886604, "train_loss_bc": 0.015175838023424149, "train_loss_llm": 0.47664228081703186, "grad_norm": 0.04423899948596954, "global_step": 250, "epoch": 1, "lr": 0.009999986524141925}
|
| 252 |
+
{"train_loss": 0.015529230237007141, "train_loss_bc": 0.015086237341165543, "train_loss_llm": 0.4429924190044403, "grad_norm": 0.06532718986272812, "global_step": 251, "epoch": 1, "lr": 0.009999986524141925}
|
| 253 |
+
{"train_loss": 0.018649809062480927, "train_loss_bc": 0.018053732812404633, "train_loss_llm": 0.5960763692855835, "grad_norm": 0.09744929522275925, "global_step": 252, "epoch": 1, "lr": 0.009999986524141925}
|
| 254 |
+
{"train_loss": 0.014919068664312363, "train_loss_bc": 0.014484588988125324, "train_loss_llm": 0.4344799220561981, "grad_norm": 0.1184827908873558, "global_step": 253, "epoch": 1, "lr": 0.009999986524141925}
|
| 255 |
+
{"train_loss": 0.012522549368441105, "train_loss_bc": 0.0121694877743721, "train_loss_llm": 0.3530616760253906, "grad_norm": 0.13075849413871765, "global_step": 254, "epoch": 1, "lr": 0.009999986524141925}
|
| 256 |
+
{"train_loss": 0.017960211262106895, "train_loss_bc": 0.017518820241093636, "train_loss_llm": 0.4413911700248718, "grad_norm": 0.15736038982868195, "global_step": 255, "epoch": 1, "lr": 0.009999986524141925}
|
| 257 |
+
{"train_loss": 0.016007019206881523, "train_loss_bc": 0.015464743599295616, "train_loss_llm": 0.54227614402771, "grad_norm": 0.17768608033657074, "global_step": 256, "epoch": 1, "lr": 0.00999998527122185}
|
| 258 |
+
{"train_loss": 0.01158602349460125, "train_loss_bc": 0.011238181963562965, "train_loss_llm": 0.34784168004989624, "grad_norm": 0.010093354620039463, "global_step": 257, "epoch": 1, "lr": 0.00999998527122185}
|
| 259 |
+
{"train_loss": 0.010712604969739914, "train_loss_bc": 0.010269438847899437, "train_loss_llm": 0.44316577911376953, "grad_norm": 0.021126240491867065, "global_step": 258, "epoch": 1, "lr": 0.00999998527122185}
|
| 260 |
+
{"train_loss": 0.01096857525408268, "train_loss_bc": 0.010642854496836662, "train_loss_llm": 0.3257203996181488, "grad_norm": 0.03387540951371193, "global_step": 259, "epoch": 1, "lr": 0.00999998527122185}
|
| 261 |
+
{"train_loss": 0.01653478853404522, "train_loss_bc": 0.016012927517294884, "train_loss_llm": 0.5218604207038879, "grad_norm": 0.04911898449063301, "global_step": 260, "epoch": 1, "lr": 0.00999998527122185}
|
| 262 |
+
{"train_loss": 0.017165496945381165, "train_loss_bc": 0.01656423695385456, "train_loss_llm": 0.6012594699859619, "grad_norm": 0.06821974366903305, "global_step": 261, "epoch": 1, "lr": 0.00999998527122185}
|
| 263 |
+
{"train_loss": 0.012002137489616871, "train_loss_bc": 0.011621439829468727, "train_loss_llm": 0.38069722056388855, "grad_norm": 0.08292040973901749, "global_step": 262, "epoch": 1, "lr": 0.00999998527122185}
|
| 264 |
+
{"train_loss": 0.018928784877061844, "train_loss_bc": 0.018516037613153458, "train_loss_llm": 0.41274651885032654, "grad_norm": 0.09135116636753082, "global_step": 263, "epoch": 1, "lr": 0.00999998527122185}
|
| 265 |
+
{"train_loss": 0.018131952732801437, "train_loss_bc": 0.017562976107001305, "train_loss_llm": 0.5689768195152283, "grad_norm": 0.11499010771512985, "global_step": 264, "epoch": 1, "lr": 0.009999983962616553}
|
| 266 |
+
{"train_loss": 0.012489533051848412, "train_loss_bc": 0.011947352439165115, "train_loss_llm": 0.5421801805496216, "grad_norm": 0.013015178963541985, "global_step": 265, "epoch": 1, "lr": 0.009999983962616553}
|
| 267 |
+
{"train_loss": 0.013243050314486027, "train_loss_bc": 0.012746745720505714, "train_loss_llm": 0.4963045120239258, "grad_norm": 0.020864736288785934, "global_step": 266, "epoch": 1, "lr": 0.009999983962616553}
|
| 268 |
+
{"train_loss": 0.010356509126722813, "train_loss_bc": 0.009778052568435669, "train_loss_llm": 0.5784561634063721, "grad_norm": 0.0276536475867033, "global_step": 267, "epoch": 1, "lr": 0.009999983962616553}
|
| 269 |
+
{"train_loss": 0.012164799496531487, "train_loss_bc": 0.011662531644105911, "train_loss_llm": 0.5022678375244141, "grad_norm": 0.03239491581916809, "global_step": 268, "epoch": 1, "lr": 0.009999983962616553}
|
| 270 |
+
{"train_loss": 0.014096668921411037, "train_loss_bc": 0.0135754169896245, "train_loss_llm": 0.5212522745132446, "grad_norm": 0.0413699746131897, "global_step": 269, "epoch": 1, "lr": 0.009999983962616553}
|
| 271 |
+
{"train_loss": 0.011443986557424068, "train_loss_bc": 0.011009275913238525, "train_loss_llm": 0.43471041321754456, "grad_norm": 0.038190145045518875, "global_step": 270, "epoch": 1, "lr": 0.009999983962616553}
|
| 272 |
+
{"train_loss": 0.01239698100835085, "train_loss_bc": 0.011889282613992691, "train_loss_llm": 0.5076982975006104, "grad_norm": 0.04352530464529991, "global_step": 271, "epoch": 1, "lr": 0.009999983962616553}
|
| 273 |
+
{"train_loss": 0.013387206010520458, "train_loss_bc": 0.012860596179962158, "train_loss_llm": 0.5266100168228149, "grad_norm": 0.05260344222187996, "global_step": 272, "epoch": 1, "lr": 0.009999982598326042}
|
| 274 |
+
{"train_loss": 0.01481005921959877, "train_loss_bc": 0.014252791181206703, "train_loss_llm": 0.5572683215141296, "grad_norm": 0.0161435529589653, "global_step": 273, "epoch": 1, "lr": 0.009999982598326042}
|
| 275 |
+
{"train_loss": 0.009104442782700062, "train_loss_bc": 0.008664367720484734, "train_loss_llm": 0.44007524847984314, "grad_norm": 0.02379443496465683, "global_step": 274, "epoch": 1, "lr": 0.009999982598326042}
|
| 276 |
+
{"train_loss": 0.016264215111732483, "train_loss_bc": 0.015678897500038147, "train_loss_llm": 0.5853180885314941, "grad_norm": 0.043524160981178284, "global_step": 275, "epoch": 1, "lr": 0.009999982598326042}
|
| 277 |
+
{"train_loss": 0.017824366688728333, "train_loss_bc": 0.017341842874884605, "train_loss_llm": 0.4825235903263092, "grad_norm": 0.062320295721292496, "global_step": 276, "epoch": 1, "lr": 0.009999982598326042}
|
| 278 |
+
{"train_loss": 0.018680082634091377, "train_loss_bc": 0.018099233508110046, "train_loss_llm": 0.5808486342430115, "grad_norm": 0.08115622401237488, "global_step": 277, "epoch": 1, "lr": 0.009999982598326042}
|
| 279 |
+
{"train_loss": 0.012962117791175842, "train_loss_bc": 0.012479234486818314, "train_loss_llm": 0.4828835129737854, "grad_norm": 0.08807636052370071, "global_step": 278, "epoch": 1, "lr": 0.009999982598326042}
|
| 280 |
+
{"train_loss": 0.01446839701384306, "train_loss_bc": 0.013988605700433254, "train_loss_llm": 0.4797913432121277, "grad_norm": 0.09451211988925934, "global_step": 279, "epoch": 1, "lr": 0.009999982598326042}
|
| 281 |
+
{"train_loss": 0.01429255772382021, "train_loss_bc": 0.013756824657320976, "train_loss_llm": 0.5357327461242676, "grad_norm": 0.10867451131343842, "global_step": 280, "epoch": 1, "lr": 0.00999998117835034}
|
| 282 |
+
{"train_loss": 0.013955993577837944, "train_loss_bc": 0.013429549522697926, "train_loss_llm": 0.5264439582824707, "grad_norm": 0.025083180516958237, "global_step": 281, "epoch": 1, "lr": 0.00999998117835034}
|
| 283 |
+
{"train_loss": 0.016641786321997643, "train_loss_bc": 0.01618514023721218, "train_loss_llm": 0.4566459357738495, "grad_norm": 0.045515093952417374, "global_step": 282, "epoch": 1, "lr": 0.00999998117835034}
|
| 284 |
+
{"train_loss": 0.014012634754180908, "train_loss_bc": 0.013572480529546738, "train_loss_llm": 0.4401538670063019, "grad_norm": 0.06089504435658455, "global_step": 283, "epoch": 1, "lr": 0.00999998117835034}
|
| 285 |
+
{"train_loss": 0.01873624697327614, "train_loss_bc": 0.01815981976687908, "train_loss_llm": 0.5764279365539551, "grad_norm": 0.07969119399785995, "global_step": 284, "epoch": 1, "lr": 0.00999998117835034}
|
| 286 |
+
{"train_loss": 0.017088143154978752, "train_loss_bc": 0.01662488281726837, "train_loss_llm": 0.46325966715812683, "grad_norm": 0.09512478858232498, "global_step": 285, "epoch": 1, "lr": 0.00999998117835034}
|
| 287 |
+
{"train_loss": 0.010966386646032333, "train_loss_bc": 0.010503709316253662, "train_loss_llm": 0.46267759799957275, "grad_norm": 0.11192868649959564, "global_step": 286, "epoch": 1, "lr": 0.00999998117835034}
|
| 288 |
+
{"train_loss": 0.01674928329885006, "train_loss_bc": 0.016327429562807083, "train_loss_llm": 0.4218546152114868, "grad_norm": 0.1299564689397812, "global_step": 287, "epoch": 1, "lr": 0.00999998117835034}
|
| 289 |
+
{"train_loss": 0.016223106533288956, "train_loss_bc": 0.015695005655288696, "train_loss_llm": 0.5281013250350952, "grad_norm": 0.14104627072811127, "global_step": 288, "epoch": 1, "lr": 0.009999979702689454}
|
| 290 |
+
{"train_loss": 0.017164213582873344, "train_loss_bc": 0.016666820272803307, "train_loss_llm": 0.4973934590816498, "grad_norm": 0.02183571644127369, "global_step": 289, "epoch": 1, "lr": 0.009999979702689454}
|
| 291 |
+
{"train_loss": 0.01507254596799612, "train_loss_bc": 0.014609228819608688, "train_loss_llm": 0.46331721544265747, "grad_norm": 0.03880901262164116, "global_step": 290, "epoch": 1, "lr": 0.009999979702689454}
|
| 292 |
+
{"train_loss": 0.019254591315984726, "train_loss_bc": 0.01874985173344612, "train_loss_llm": 0.5047386884689331, "grad_norm": 0.06385096162557602, "global_step": 291, "epoch": 1, "lr": 0.009999979702689454}
|
| 293 |
+
{"train_loss": 0.0154347512871027, "train_loss_bc": 0.01500864326953888, "train_loss_llm": 0.42610809206962585, "grad_norm": 0.08098644018173218, "global_step": 292, "epoch": 1, "lr": 0.009999979702689454}
|
| 294 |
+
{"train_loss": 0.01903417333960533, "train_loss_bc": 0.018556609749794006, "train_loss_llm": 0.47756439447402954, "grad_norm": 0.10871503502130508, "global_step": 293, "epoch": 1, "lr": 0.009999979702689454}
|
| 295 |
+
{"train_loss": 0.0156480111181736, "train_loss_bc": 0.015174117870628834, "train_loss_llm": 0.47389230132102966, "grad_norm": 0.13166505098342896, "global_step": 294, "epoch": 1, "lr": 0.009999979702689454}
|
| 296 |
+
{"train_loss": 0.016828790307044983, "train_loss_bc": 0.016389530152082443, "train_loss_llm": 0.43926095962524414, "grad_norm": 0.1540358066558838, "global_step": 295, "epoch": 1, "lr": 0.009999979702689454}
|
| 297 |
+
{"train_loss": 0.013542444445192814, "train_loss_bc": 0.013059152290225029, "train_loss_llm": 0.48329171538352966, "grad_norm": 0.17128632962703705, "global_step": 296, "epoch": 1, "lr": 0.00999997817134341}
|
| 298 |
+
{"train_loss": 0.01356798131018877, "train_loss_bc": 0.013153335079550743, "train_loss_llm": 0.41464588046073914, "grad_norm": 0.01961551606655121, "global_step": 297, "epoch": 1, "lr": 0.00999997817134341}
|
| 299 |
+
{"train_loss": 0.015729112550616264, "train_loss_bc": 0.015209322795271873, "train_loss_llm": 0.5197891592979431, "grad_norm": 0.04597029462456703, "global_step": 298, "epoch": 1, "lr": 0.00999997817134341}
|
| 300 |
+
{"train_loss": 0.015187690034508705, "train_loss_bc": 0.014714469201862812, "train_loss_llm": 0.4732206165790558, "grad_norm": 0.06501750648021698, "global_step": 299, "epoch": 1, "lr": 0.00999997817134341}
|
| 301 |
+
{"train_loss": 0.01496143825352192, "train_loss_bc": 0.014608250930905342, "train_loss_llm": 0.35318759083747864, "grad_norm": 0.09145065397024155, "global_step": 300, "epoch": 1, "lr": 0.00999997817134341}
|
| 302 |
+
{"train_loss": 0.014216883108019829, "train_loss_bc": 0.013763219118118286, "train_loss_llm": 0.4536639451980591, "grad_norm": 0.10366859287023544, "global_step": 301, "epoch": 1, "lr": 0.00999997817134341}
|
| 303 |
+
{"train_loss": 0.01669706590473652, "train_loss_bc": 0.016175638884305954, "train_loss_llm": 0.5214270353317261, "grad_norm": 0.12138961255550385, "global_step": 302, "epoch": 1, "lr": 0.00999997817134341}
|
| 304 |
+
{"train_loss": 0.014355774968862534, "train_loss_bc": 0.013972668908536434, "train_loss_llm": 0.3831060230731964, "grad_norm": 0.14051002264022827, "global_step": 303, "epoch": 1, "lr": 0.00999997817134341}
|
| 305 |
+
{"train_loss": 0.0146627863869071, "train_loss_bc": 0.014223872683942318, "train_loss_llm": 0.4389132857322693, "grad_norm": 0.15912242233753204, "global_step": 304, "epoch": 1, "lr": 0.009999976584312217}
|
| 306 |
+
{"train_loss": 0.01133162435144186, "train_loss_bc": 0.010947933420538902, "train_loss_llm": 0.3836905360221863, "grad_norm": 0.02009022980928421, "global_step": 305, "epoch": 1, "lr": 0.009999976584312217}
|
| 307 |
+
{"train_loss": 0.01270595658570528, "train_loss_bc": 0.012229321524500847, "train_loss_llm": 0.4766354262828827, "grad_norm": 0.029522329568862915, "global_step": 306, "epoch": 1, "lr": 0.009999976584312217}
|
| 308 |
+
{"train_loss": 0.014936204068362713, "train_loss_bc": 0.014451291412115097, "train_loss_llm": 0.4849129617214203, "grad_norm": 0.056380923837423325, "global_step": 307, "epoch": 1, "lr": 0.009999976584312217}
|
| 309 |
+
{"train_loss": 0.010747802443802357, "train_loss_bc": 0.010318206623196602, "train_loss_llm": 0.4295954704284668, "grad_norm": 0.07131356745958328, "global_step": 308, "epoch": 1, "lr": 0.009999976584312217}
|
| 310 |
+
{"train_loss": 0.010907587595283985, "train_loss_bc": 0.010417597368359566, "train_loss_llm": 0.4899904727935791, "grad_norm": 0.08291061967611313, "global_step": 309, "epoch": 1, "lr": 0.009999976584312217}
|
| 311 |
+
{"train_loss": 0.01565735787153244, "train_loss_bc": 0.01524802204221487, "train_loss_llm": 0.4093364179134369, "grad_norm": 0.1052742674946785, "global_step": 310, "epoch": 1, "lr": 0.009999976584312217}
|
| 312 |
+
{"train_loss": 0.013044213876128197, "train_loss_bc": 0.012592458166182041, "train_loss_llm": 0.4517558515071869, "grad_norm": 0.12751010060310364, "global_step": 311, "epoch": 1, "lr": 0.009999976584312217}
|
| 313 |
+
{"train_loss": 0.012058139778673649, "train_loss_bc": 0.011555514298379421, "train_loss_llm": 0.5026251077651978, "grad_norm": 0.1479104608297348, "global_step": 312, "epoch": 1, "lr": 0.009999974941595897}
|
| 314 |
+
{"train_loss": 0.010018293745815754, "train_loss_bc": 0.009687970392405987, "train_loss_llm": 0.3303234279155731, "grad_norm": 0.01355504896491766, "global_step": 313, "epoch": 1, "lr": 0.009999974941595897}
|
| 315 |
+
{"train_loss": 0.012328105047345161, "train_loss_bc": 0.011914866045117378, "train_loss_llm": 0.4132387638092041, "grad_norm": 0.01594623737037182, "global_step": 314, "epoch": 1, "lr": 0.009999974941595897}
|
| 316 |
+
{"train_loss": 0.013978242874145508, "train_loss_bc": 0.013544456101953983, "train_loss_llm": 0.4337867498397827, "grad_norm": 0.032734472304582596, "global_step": 315, "epoch": 1, "lr": 0.009999974941595897}
|
| 317 |
+
{"train_loss": 0.010386270470917225, "train_loss_bc": 0.010068733245134354, "train_loss_llm": 0.31753700971603394, "grad_norm": 0.047776710242033005, "global_step": 316, "epoch": 1, "lr": 0.009999974941595897}
|
| 318 |
+
{"train_loss": 0.012578755617141724, "train_loss_bc": 0.01214287057518959, "train_loss_llm": 0.4358847141265869, "grad_norm": 0.0635766088962555, "global_step": 317, "epoch": 1, "lr": 0.009999974941595897}
|
| 319 |
+
{"train_loss": 0.012422928586602211, "train_loss_bc": 0.012042918242514133, "train_loss_llm": 0.38001012802124023, "grad_norm": 0.07768117636442184, "global_step": 318, "epoch": 1, "lr": 0.009999974941595897}
|
| 320 |
+
{"train_loss": 0.010467208921909332, "train_loss_bc": 0.009988697245717049, "train_loss_llm": 0.478511244058609, "grad_norm": 0.08535484224557877, "global_step": 319, "epoch": 1, "lr": 0.009999974941595897}
|
| 321 |
+
{"train_loss": 0.012417087331414223, "train_loss_bc": 0.01195848360657692, "train_loss_llm": 0.45860394835472107, "grad_norm": 0.10900090634822845, "global_step": 320, "epoch": 1, "lr": 0.009999973243194467}
|
| 322 |
+
{"train_loss": 0.01247399765998125, "train_loss_bc": 0.01207180880010128, "train_loss_llm": 0.4021890163421631, "grad_norm": 0.010779356583952904, "global_step": 321, "epoch": 1, "lr": 0.009999973243194467}
|
| 323 |
+
{"train_loss": 0.011968130245804787, "train_loss_bc": 0.0115253496915102, "train_loss_llm": 0.44278010725975037, "grad_norm": 0.016100643202662468, "global_step": 322, "epoch": 1, "lr": 0.009999973243194467}
|
| 324 |
+
{"train_loss": 0.011378668248653412, "train_loss_bc": 0.010935855098068714, "train_loss_llm": 0.4428134262561798, "grad_norm": 0.02845556102693081, "global_step": 323, "epoch": 1, "lr": 0.009999973243194467}
|
| 325 |
+
{"train_loss": 0.010558527894318104, "train_loss_bc": 0.010113537311553955, "train_loss_llm": 0.4449908137321472, "grad_norm": 0.02639344334602356, "global_step": 324, "epoch": 1, "lr": 0.009999973243194467}
|
| 326 |
+
{"train_loss": 0.008580698631703854, "train_loss_bc": 0.008138567209243774, "train_loss_llm": 0.44213131070137024, "grad_norm": 0.039085108786821365, "global_step": 325, "epoch": 1, "lr": 0.009999973243194467}
|
| 327 |
+
{"train_loss": 0.013355431146919727, "train_loss_bc": 0.012834073975682259, "train_loss_llm": 0.5213567614555359, "grad_norm": 0.04931477829813957, "global_step": 326, "epoch": 1, "lr": 0.009999973243194467}
|
| 328 |
+
{"train_loss": 0.008711469359695911, "train_loss_bc": 0.008368385955691338, "train_loss_llm": 0.3430837392807007, "grad_norm": 0.05239582434296608, "global_step": 327, "epoch": 1, "lr": 0.009999973243194467}
|
| 329 |
+
{"train_loss": 0.009385243989527225, "train_loss_bc": 0.008970173075795174, "train_loss_llm": 0.41507115960121155, "grad_norm": 0.05491437017917633, "global_step": 328, "epoch": 1, "lr": 0.009999971489107947}
|
| 330 |
+
{"train_loss": 0.011174674145877361, "train_loss_bc": 0.010818562470376492, "train_loss_llm": 0.3561112880706787, "grad_norm": 0.011578625068068504, "global_step": 329, "epoch": 1, "lr": 0.009999971489107947}
|
| 331 |
+
{"train_loss": 0.013502768240869045, "train_loss_bc": 0.013065600767731667, "train_loss_llm": 0.4371674656867981, "grad_norm": 0.026880592107772827, "global_step": 330, "epoch": 1, "lr": 0.009999971489107947}
|
| 332 |
+
{"train_loss": 0.012593085877597332, "train_loss_bc": 0.012244774959981441, "train_loss_llm": 0.34831055998802185, "grad_norm": 0.041259463876485825, "global_step": 331, "epoch": 1, "lr": 0.009999971489107947}
|
| 333 |
+
{"train_loss": 0.009598112665116787, "train_loss_bc": 0.009030044078826904, "train_loss_llm": 0.5680687427520752, "grad_norm": 0.05241383612155914, "global_step": 332, "epoch": 1, "lr": 0.009999971489107947}
|
| 334 |
+
{"train_loss": 0.01214287243783474, "train_loss_bc": 0.011670759879052639, "train_loss_llm": 0.4721129238605499, "grad_norm": 0.07233195751905441, "global_step": 333, "epoch": 1, "lr": 0.009999971489107947}
|
| 335 |
+
{"train_loss": 0.015494297258555889, "train_loss_bc": 0.01502845250070095, "train_loss_llm": 0.46584439277648926, "grad_norm": 0.0923396646976471, "global_step": 334, "epoch": 1, "lr": 0.009999971489107947}
|
| 336 |
+
{"train_loss": 0.01266135461628437, "train_loss_bc": 0.012207714840769768, "train_loss_llm": 0.4536397457122803, "grad_norm": 0.10818523913621902, "global_step": 335, "epoch": 1, "lr": 0.009999971489107947}
|
| 337 |
+
{"train_loss": 0.012813151814043522, "train_loss_bc": 0.012354401871562004, "train_loss_llm": 0.45874953269958496, "grad_norm": 0.1238781213760376, "global_step": 336, "epoch": 1, "lr": 0.009999969679336354}
|
| 338 |
+
{"train_loss": 0.013637524098157883, "train_loss_bc": 0.013167794793844223, "train_loss_llm": 0.46972957253456116, "grad_norm": 0.017869004979729652, "global_step": 337, "epoch": 1, "lr": 0.009999969679336354}
|
| 339 |
+
{"train_loss": 0.01720421575009823, "train_loss_bc": 0.01666909269988537, "train_loss_llm": 0.5351230502128601, "grad_norm": 0.04792848974466324, "global_step": 338, "epoch": 1, "lr": 0.009999969679336354}
|
| 340 |
+
{"train_loss": 0.015335088595747948, "train_loss_bc": 0.014754555188119411, "train_loss_llm": 0.5805330276489258, "grad_norm": 0.06892868131399155, "global_step": 339, "epoch": 1, "lr": 0.009999969679336354}
|
| 341 |
+
{"train_loss": 0.010542848147451878, "train_loss_bc": 0.01001989096403122, "train_loss_llm": 0.5229572653770447, "grad_norm": 0.0866188034415245, "global_step": 340, "epoch": 1, "lr": 0.009999969679336354}
|
| 342 |
+
{"train_loss": 0.01668214052915573, "train_loss_bc": 0.01612561196088791, "train_loss_llm": 0.5565277934074402, "grad_norm": 0.10684026032686234, "global_step": 341, "epoch": 1, "lr": 0.009999969679336354}
|
| 343 |
+
{"train_loss": 0.013128525577485561, "train_loss_bc": 0.012647897005081177, "train_loss_llm": 0.4806285798549652, "grad_norm": 0.1274142861366272, "global_step": 342, "epoch": 1, "lr": 0.009999969679336354}
|
| 344 |
+
{"train_loss": 0.012806318700313568, "train_loss_bc": 0.012260029092431068, "train_loss_llm": 0.5462898015975952, "grad_norm": 0.1410902589559555, "global_step": 343, "epoch": 1, "lr": 0.009999969679336354}
|
| 345 |
+
{"train_loss": 0.009487541392445564, "train_loss_bc": 0.009019860997796059, "train_loss_llm": 0.4676806926727295, "grad_norm": 0.15223082900047302, "global_step": 344, "epoch": 1, "lr": 0.00999996781387971}
|
| 346 |
+
{"train_loss": 0.016361601650714874, "train_loss_bc": 0.01573288068175316, "train_loss_llm": 0.6287200450897217, "grad_norm": 0.023004453629255295, "global_step": 345, "epoch": 1, "lr": 0.00999996781387971}
|
| 347 |
+
{"train_loss": 0.01248890906572342, "train_loss_bc": 0.011881167069077492, "train_loss_llm": 0.607742190361023, "grad_norm": 0.031962476670742035, "global_step": 346, "epoch": 1, "lr": 0.00999996781387971}
|
| 348 |
+
{"train_loss": 0.01358707994222641, "train_loss_bc": 0.013073929585516453, "train_loss_llm": 0.5131505727767944, "grad_norm": 0.04822782427072525, "global_step": 347, "epoch": 1, "lr": 0.00999996781387971}
|
| 349 |
+
{"train_loss": 0.015235469676554203, "train_loss_bc": 0.01475644949823618, "train_loss_llm": 0.4790200889110565, "grad_norm": 0.06381597369909286, "global_step": 348, "epoch": 1, "lr": 0.00999996781387971}
|
| 350 |
+
{"train_loss": 0.01751648262143135, "train_loss_bc": 0.016855884343385696, "train_loss_llm": 0.6605973243713379, "grad_norm": 0.08681994676589966, "global_step": 349, "epoch": 1, "lr": 0.00999996781387971}
|
| 351 |
+
{"train_loss": 0.014208652079105377, "train_loss_bc": 0.013812784105539322, "train_loss_llm": 0.3958682119846344, "grad_norm": 0.10667064785957336, "global_step": 350, "epoch": 1, "lr": 0.00999996781387971}
|
| 352 |
+
{"train_loss": 0.011572916992008686, "train_loss_bc": 0.011084744706749916, "train_loss_llm": 0.48817187547683716, "grad_norm": 0.1193293035030365, "global_step": 351, "epoch": 1, "lr": 0.00999996781387971}
|
| 353 |
+
{"train_loss": 0.01146447192877531, "train_loss_bc": 0.01082993671298027, "train_loss_llm": 0.6345353126525879, "grad_norm": 0.13804349303245544, "global_step": 352, "epoch": 1, "lr": 0.009999965892738036}
|
| 354 |
+
{"train_loss": 0.012645299546420574, "train_loss_bc": 0.012096133083105087, "train_loss_llm": 0.5491666793823242, "grad_norm": 0.014580151066184044, "global_step": 353, "epoch": 1, "lr": 0.009999965892738036}
|
| 355 |
+
{"train_loss": 0.012388558126986027, "train_loss_bc": 0.011813423596322536, "train_loss_llm": 0.57513427734375, "grad_norm": 0.029008738696575165, "global_step": 354, "epoch": 1, "lr": 0.009999965892738036}
|
| 356 |
+
{"train_loss": 0.015240795910358429, "train_loss_bc": 0.014640103094279766, "train_loss_llm": 0.6006927490234375, "grad_norm": 0.03927018493413925, "global_step": 355, "epoch": 1, "lr": 0.009999965892738036}
|
| 357 |
+
{"train_loss": 0.011942628771066666, "train_loss_bc": 0.011564518325030804, "train_loss_llm": 0.3781103491783142, "grad_norm": 0.056585244834423065, "global_step": 356, "epoch": 1, "lr": 0.009999965892738036}
|
| 358 |
+
{"train_loss": 0.012461038306355476, "train_loss_bc": 0.011909164488315582, "train_loss_llm": 0.5518735647201538, "grad_norm": 0.06560716778039932, "global_step": 357, "epoch": 1, "lr": 0.009999965892738036}
|
| 359 |
+
{"train_loss": 0.015487094409763813, "train_loss_bc": 0.015003865584731102, "train_loss_llm": 0.48322877287864685, "grad_norm": 0.0779387354850769, "global_step": 358, "epoch": 1, "lr": 0.009999965892738036}
|
| 360 |
+
{"train_loss": 0.010963214561343193, "train_loss_bc": 0.010492322966456413, "train_loss_llm": 0.47089120745658875, "grad_norm": 0.09135229140520096, "global_step": 359, "epoch": 1, "lr": 0.009999965892738036}
|
| 361 |
+
{"train_loss": 0.009490950964391232, "train_loss_bc": 0.00908201839774847, "train_loss_llm": 0.40893298387527466, "grad_norm": 0.11029430478811264, "global_step": 360, "epoch": 1, "lr": 0.009999963915911353}
|
| 362 |
+
{"train_loss": 0.009366891346871853, "train_loss_bc": 0.008832603693008423, "train_loss_llm": 0.534287691116333, "grad_norm": 0.011780070140957832, "global_step": 361, "epoch": 1, "lr": 0.009999963915911353}
|
| 363 |
+
{"train_loss": 0.01090614590793848, "train_loss_bc": 0.010547686368227005, "train_loss_llm": 0.3584598898887634, "grad_norm": 0.018876101821660995, "global_step": 362, "epoch": 1, "lr": 0.009999963915911353}
|
| 364 |
+
{"train_loss": 0.01047124806791544, "train_loss_bc": 0.00999793503433466, "train_loss_llm": 0.4733126759529114, "grad_norm": 0.0347822941839695, "global_step": 363, "epoch": 1, "lr": 0.009999963915911353}
|
| 365 |
+
{"train_loss": 0.013421941548585892, "train_loss_bc": 0.01281831320375204, "train_loss_llm": 0.603628396987915, "grad_norm": 0.0416879765689373, "global_step": 364, "epoch": 1, "lr": 0.009999963915911353}
|
| 366 |
+
{"train_loss": 0.012736831791698933, "train_loss_bc": 0.01226730551570654, "train_loss_llm": 0.4695262610912323, "grad_norm": 0.061996493488550186, "global_step": 365, "epoch": 1, "lr": 0.009999963915911353}
|
| 367 |
+
{"train_loss": 0.015469906851649284, "train_loss_bc": 0.014822958037257195, "train_loss_llm": 0.6469485759735107, "grad_norm": 0.06635187566280365, "global_step": 366, "epoch": 1, "lr": 0.009999963915911353}
|
| 368 |
+
{"train_loss": 0.012995130382478237, "train_loss_bc": 0.012509873136878014, "train_loss_llm": 0.4852573275566101, "grad_norm": 0.08635496348142624, "global_step": 367, "epoch": 1, "lr": 0.009999963915911353}
|
| 369 |
+
{"train_loss": 0.00998271256685257, "train_loss_bc": 0.009579310193657875, "train_loss_llm": 0.4034022390842438, "grad_norm": 0.10520216077566147, "global_step": 368, "epoch": 1, "lr": 0.009999961883399683}
|
| 370 |
+
{"train_loss": 0.0111773069947958, "train_loss_bc": 0.010663645341992378, "train_loss_llm": 0.5136619806289673, "grad_norm": 0.013957403600215912, "global_step": 369, "epoch": 1, "lr": 0.009999961883399683}
|
| 371 |
+
{"train_loss": 0.010809720493853092, "train_loss_bc": 0.010342610068619251, "train_loss_llm": 0.46711012721061707, "grad_norm": 0.028792060911655426, "global_step": 370, "epoch": 1, "lr": 0.009999961883399683}
|
| 372 |
+
{"train_loss": 0.013553488999605179, "train_loss_bc": 0.01306125707924366, "train_loss_llm": 0.49223223328590393, "grad_norm": 0.03614845499396324, "global_step": 371, "epoch": 1, "lr": 0.009999961883399683}
|
| 373 |
+
{"train_loss": 0.008836585097014904, "train_loss_bc": 0.008274243213236332, "train_loss_llm": 0.5623416900634766, "grad_norm": 0.04330018162727356, "global_step": 372, "epoch": 1, "lr": 0.009999961883399683}
|
| 374 |
+
{"train_loss": 0.008892491459846497, "train_loss_bc": 0.008484721183776855, "train_loss_llm": 0.40777063369750977, "grad_norm": 0.061764974147081375, "global_step": 373, "epoch": 1, "lr": 0.009999961883399683}
|
| 375 |
+
{"train_loss": 0.011546115390956402, "train_loss_bc": 0.011045539751648903, "train_loss_llm": 0.5005753636360168, "grad_norm": 0.07327363640069962, "global_step": 374, "epoch": 1, "lr": 0.009999961883399683}
|
| 376 |
+
{"train_loss": 0.013276085257530212, "train_loss_bc": 0.012809041887521744, "train_loss_llm": 0.4670429527759552, "grad_norm": 0.08208861202001572, "global_step": 375, "epoch": 1, "lr": 0.009999961883399683}
|
| 377 |
+
{"train_loss": 0.010347362607717514, "train_loss_bc": 0.009987818077206612, "train_loss_llm": 0.35954442620277405, "grad_norm": 0.09811953455209732, "global_step": 376, "epoch": 1, "lr": 0.009999959795203048}
|
| 378 |
+
{"train_loss": 0.010294072329998016, "train_loss_bc": 0.009917671792209148, "train_loss_llm": 0.37640058994293213, "grad_norm": 0.01656760647892952, "global_step": 377, "epoch": 1, "lr": 0.009999959795203048}
|
| 379 |
+
{"train_loss": 0.012072709389030933, "train_loss_bc": 0.011694014072418213, "train_loss_llm": 0.37869489192962646, "grad_norm": 0.038055505603551865, "global_step": 378, "epoch": 1, "lr": 0.009999959795203048}
|
| 380 |
+
{"train_loss": 0.013379747048020363, "train_loss_bc": 0.012885721400380135, "train_loss_llm": 0.4940251410007477, "grad_norm": 0.048791639506816864, "global_step": 379, "epoch": 1, "lr": 0.009999959795203048}
|
| 381 |
+
{"train_loss": 0.010779447853565216, "train_loss_bc": 0.010418189689517021, "train_loss_llm": 0.3612585663795471, "grad_norm": 0.06931304186582565, "global_step": 380, "epoch": 1, "lr": 0.009999959795203048}
|
| 382 |
+
{"train_loss": 0.013290653005242348, "train_loss_bc": 0.0127793550491333, "train_loss_llm": 0.5112981796264648, "grad_norm": 0.08744651824235916, "global_step": 381, "epoch": 1, "lr": 0.009999959795203048}
|
| 383 |
+
{"train_loss": 0.012638435699045658, "train_loss_bc": 0.012112822383642197, "train_loss_llm": 0.5256132483482361, "grad_norm": 0.08734080195426941, "global_step": 382, "epoch": 1, "lr": 0.009999959795203048}
|
| 384 |
+
{"train_loss": 0.01276348065584898, "train_loss_bc": 0.01233246922492981, "train_loss_llm": 0.43101125955581665, "grad_norm": 0.11115267127752304, "global_step": 383, "epoch": 1, "lr": 0.009999959795203048}
|
| 385 |
+
{"train_loss": 0.013118097558617592, "train_loss_bc": 0.012599822133779526, "train_loss_llm": 0.5182749629020691, "grad_norm": 0.11958809196949005, "global_step": 384, "epoch": 1, "lr": 0.009999957651321473}
|
| 386 |
+
{"train_loss": 0.01025390811264515, "train_loss_bc": 0.009825386106967926, "train_loss_llm": 0.4285220801830292, "grad_norm": 0.018952684476971626, "global_step": 385, "epoch": 1, "lr": 0.009999957651321473}
|
| 387 |
+
{"train_loss": 0.010963615961372852, "train_loss_bc": 0.010562529787421227, "train_loss_llm": 0.40108659863471985, "grad_norm": 0.03306251019239426, "global_step": 386, "epoch": 1, "lr": 0.009999957651321473}
|
| 388 |
+
{"train_loss": 0.010541597381234169, "train_loss_bc": 0.010126705281436443, "train_loss_llm": 0.4148922562599182, "grad_norm": 0.042092613875865936, "global_step": 387, "epoch": 1, "lr": 0.009999957651321473}
|
| 389 |
+
{"train_loss": 0.008702381514012814, "train_loss_bc": 0.008199061267077923, "train_loss_llm": 0.5033202171325684, "grad_norm": 0.043085530400276184, "global_step": 388, "epoch": 1, "lr": 0.009999957651321473}
|
| 390 |
+
{"train_loss": 0.008918298408389091, "train_loss_bc": 0.008477844297885895, "train_loss_llm": 0.4404541254043579, "grad_norm": 0.055809881538152695, "global_step": 389, "epoch": 1, "lr": 0.009999957651321473}
|
| 391 |
+
{"train_loss": 0.009737114422023296, "train_loss_bc": 0.009314477443695068, "train_loss_llm": 0.4226372539997101, "grad_norm": 0.07001017779111862, "global_step": 390, "epoch": 1, "lr": 0.009999957651321473}
|
| 392 |
+
{"train_loss": 0.010793833062052727, "train_loss_bc": 0.010402481071650982, "train_loss_llm": 0.3913517892360687, "grad_norm": 0.07023876160383224, "global_step": 391, "epoch": 1, "lr": 0.009999957651321473}
|
| 393 |
+
{"train_loss": 0.010602637194097042, "train_loss_bc": 0.010235416702926159, "train_loss_llm": 0.36722007393836975, "grad_norm": 0.09314236044883728, "global_step": 392, "epoch": 1, "lr": 0.00999995545175498}
|
| 394 |
+
{"train_loss": 0.013229576870799065, "train_loss_bc": 0.01280839741230011, "train_loss_llm": 0.4211796224117279, "grad_norm": 0.010268638841807842, "global_step": 393, "epoch": 1, "lr": 0.00999995545175498}
|
| 395 |
+
{"train_loss": 0.006598448846489191, "train_loss_bc": 0.006211167201399803, "train_loss_llm": 0.38728177547454834, "grad_norm": 0.012780013494193554, "global_step": 394, "epoch": 1, "lr": 0.00999995545175498}
|
| 396 |
+
{"train_loss": 0.010388839058578014, "train_loss_bc": 0.00987747497856617, "train_loss_llm": 0.5113644599914551, "grad_norm": 0.015021376311779022, "global_step": 395, "epoch": 1, "lr": 0.00999995545175498}
|
| 397 |
+
{"train_loss": 0.014541227370500565, "train_loss_bc": 0.014116690494120121, "train_loss_llm": 0.4245363771915436, "grad_norm": 0.02319067344069481, "global_step": 396, "epoch": 1, "lr": 0.00999995545175498}
|
| 398 |
+
{"train_loss": 0.007016970310360193, "train_loss_bc": 0.006666948553174734, "train_loss_llm": 0.3500216007232666, "grad_norm": 0.027652941644191742, "global_step": 397, "epoch": 1, "lr": 0.00999995545175498}
|
| 399 |
+
{"train_loss": 0.007159297354519367, "train_loss_bc": 0.006787103600800037, "train_loss_llm": 0.3721938133239746, "grad_norm": 0.03523283079266548, "global_step": 398, "epoch": 1, "lr": 0.00999995545175498}
|
| 400 |
+
{"train_loss": 0.009045234881341457, "train_loss_bc": 0.008669788017868996, "train_loss_llm": 0.37544700503349304, "grad_norm": 0.049455754458904266, "global_step": 399, "epoch": 1, "lr": 0.00999995545175498}
|
| 401 |
+
{"train_loss": 0.008435660041868687, "train_loss_bc": 0.008010749705135822, "train_loss_llm": 0.42491012811660767, "grad_norm": 0.0516187846660614, "global_step": 400, "epoch": 1, "lr": 0.009999953196503595}
|
| 402 |
+
{"train_loss": 0.010924630798399448, "train_loss_bc": 0.01047501340508461, "train_loss_llm": 0.44961774349212646, "grad_norm": 0.019651779904961586, "global_step": 401, "epoch": 1, "lr": 0.009999953196503595}
|
| 403 |
+
{"train_loss": 0.006148731801658869, "train_loss_bc": 0.0057486011646687984, "train_loss_llm": 0.4001305103302002, "grad_norm": 0.02275880053639412, "global_step": 402, "epoch": 1, "lr": 0.009999953196503595}
|
| 404 |
+
{"train_loss": 0.007822881452739239, "train_loss_bc": 0.00734285730868578, "train_loss_llm": 0.48002392053604126, "grad_norm": 0.03126152977347374, "global_step": 403, "epoch": 1, "lr": 0.009999953196503595}
|
| 405 |
+
{"train_loss": 0.00978546217083931, "train_loss_bc": 0.009389730170369148, "train_loss_llm": 0.3957315683364868, "grad_norm": 0.0482921376824379, "global_step": 404, "epoch": 1, "lr": 0.009999953196503595}
|
| 406 |
+
{"train_loss": 0.00851339939981699, "train_loss_bc": 0.008063830435276031, "train_loss_llm": 0.4495692849159241, "grad_norm": 0.05878711864352226, "global_step": 405, "epoch": 1, "lr": 0.009999953196503595}
|
| 407 |
+
{"train_loss": 0.012543894350528717, "train_loss_bc": 0.012006400153040886, "train_loss_llm": 0.5374938249588013, "grad_norm": 0.07312177866697311, "global_step": 406, "epoch": 1, "lr": 0.009999953196503595}
|
| 408 |
+
{"train_loss": 0.004626167938113213, "train_loss_bc": 0.004230715800076723, "train_loss_llm": 0.39545202255249023, "grad_norm": 0.0742395669221878, "global_step": 407, "epoch": 1, "lr": 0.009999953196503595}
|
| 409 |
+
{"train_loss": 0.010349040850996971, "train_loss_bc": 0.00993720255792141, "train_loss_llm": 0.41183826327323914, "grad_norm": 0.08678025007247925, "global_step": 408, "epoch": 1, "lr": 0.009999950885567342}
|
| 410 |
+
{"train_loss": 0.006372408010065556, "train_loss_bc": 0.006014788523316383, "train_loss_llm": 0.35761937499046326, "grad_norm": 0.010603218339383602, "global_step": 409, "epoch": 1, "lr": 0.009999950885567342}
|
| 411 |
+
{"train_loss": 0.009057055227458477, "train_loss_bc": 0.008602965623140335, "train_loss_llm": 0.45408937335014343, "grad_norm": 0.029408499598503113, "global_step": 410, "epoch": 1, "lr": 0.009999950885567342}
|
| 412 |
+
{"train_loss": 0.006392288021743298, "train_loss_bc": 0.006004677154123783, "train_loss_llm": 0.38761094212532043, "grad_norm": 0.040126774460077286, "global_step": 411, "epoch": 1, "lr": 0.009999950885567342}
|
| 413 |
+
{"train_loss": 0.007694873958826065, "train_loss_bc": 0.007268495857715607, "train_loss_llm": 0.42637819051742554, "grad_norm": 0.051206592470407486, "global_step": 412, "epoch": 1, "lr": 0.009999950885567342}
|
| 414 |
+
{"train_loss": 0.00797163788229227, "train_loss_bc": 0.007495692931115627, "train_loss_llm": 0.47594505548477173, "grad_norm": 0.06213797628879547, "global_step": 413, "epoch": 1, "lr": 0.009999950885567342}
|
| 415 |
+
{"train_loss": 0.008228043094277382, "train_loss_bc": 0.007861753925681114, "train_loss_llm": 0.366288959980011, "grad_norm": 0.07453621178865433, "global_step": 414, "epoch": 1, "lr": 0.009999950885567342}
|
| 416 |
+
{"train_loss": 0.007793589495122433, "train_loss_bc": 0.00744793564081192, "train_loss_llm": 0.34565383195877075, "grad_norm": 0.08818801492452621, "global_step": 415, "epoch": 1, "lr": 0.009999950885567342}
|
| 417 |
+
{"train_loss": 0.010696557350456715, "train_loss_bc": 0.01020999439060688, "train_loss_llm": 0.4865627586841583, "grad_norm": 0.10583324730396271, "global_step": 416, "epoch": 1, "lr": 0.009999948518946245}
|
| 418 |
+
{"train_loss": 0.016479648649692535, "train_loss_bc": 0.016043461859226227, "train_loss_llm": 0.43618765473365784, "grad_norm": 0.02455216646194458, "global_step": 417, "epoch": 1, "lr": 0.009999948518946245}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: detected 112 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
|
| 2 |
+
[2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: NumExpr detected 112 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
|
| 3 |
+
[2026-01-21 13:12:21,183][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
|
| 4 |
+
[2026-01-21 13:12:27,199][datasets][INFO] - PyTorch version 2.2.2 available.
|
| 5 |
+
[2026-01-21 13:12:27,200][datasets][INFO] - TensorFlow version 2.15.1 available.
|
| 6 |
+
[2026-01-21 13:12:27,201][datasets][INFO] - JAX version 0.4.30 available.
|
| 7 |
+
[2026-01-21 13:12:35,484][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
|
| 8 |
+
[2026-01-21 13:12:35,493][absl][INFO] - MuJoCo library version is: 2.3.7
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T13:12:36.44966483+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
|
| 2 |
+
{"time":"2026-01-21T13:12:36.449675304+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log"}
|
| 3 |
+
{"time":"2026-01-21T13:12:36.564980991+08:00","level":"INFO","msg":"created new stream","id":"yhjy9tz9"}
|
| 4 |
+
{"time":"2026-01-21T13:12:36.565006242+08:00","level":"INFO","msg":"stream: started","id":"yhjy9tz9"}
|
| 5 |
+
{"time":"2026-01-21T13:12:36.565029519+08:00","level":"INFO","msg":"sender: started","stream_id":"yhjy9tz9"}
|
| 6 |
+
{"time":"2026-01-21T13:12:36.565021074+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"yhjy9tz9"}}
|
| 7 |
+
{"time":"2026-01-21T13:12:36.565029409+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"yhjy9tz9"}}
|
| 8 |
+
{"time":"2026-01-21T13:12:37.456830647+08:00","level":"INFO","msg":"Starting system monitor"}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
|
| 2 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Configure stats pid to 3666395
|
| 3 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
|
| 4 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
|
| 5 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
| 6 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
| 7 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
|
| 8 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying login settings: {}
|
| 9 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log
|
| 10 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log
|
| 11 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():619] calling init triggers
|
| 12 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
| 13 |
+
config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.001, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
|
| 14 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():669] starting backend
|
| 15 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():673] sending inform_init request
|
| 16 |
+
2026-01-21 13:12:36,447 INFO MainThread:3666395 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 17 |
+
2026-01-21 13:12:36,447 INFO MainThread:3666395 [wandb_init.py:init():686] backend started and connected
|
| 18 |
+
2026-01-21 13:12:36,456 INFO MainThread:3666395 [wandb_init.py:init():781] updated telemetry
|
| 19 |
+
2026-01-21 13:12:36,506 INFO MainThread:3666395 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
| 20 |
+
2026-01-21 13:12:37,452 INFO MainThread:3666395 [wandb_init.py:init():867] starting run threads in backend
|
| 21 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_console_start():2451] atexit reg
|
| 22 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2299] redirect: wrap_raw
|
| 23 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2364] Wrapping output streams.
|
| 24 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2389] Redirects installed.
|
| 25 |
+
2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_init.py:init():911] run started, returning control to user process
|
| 26 |
+
2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2'}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00<?, ?it/s]/work/u1131674/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
|
| 2 |
+
obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
|
| 3 |
+
Training epoch 1: 74%|███████▍ | 176/238 [00:23<00:08, 7.45it/s, grad_norm=0.0621, loss=0.00797]
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/requirements.txt
ADDED
|
@@ -0,0 +1,857 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
rpds-py==0.27.1
|
| 2 |
+
typeguard==4.4.4
|
| 3 |
+
flatbuffers==25.12.19
|
| 4 |
+
toppra==0.6.3
|
| 5 |
+
sympy==1.14.0
|
| 6 |
+
tiktoken==0.8.0
|
| 7 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 8 |
+
arm_pytorch_utilities==0.4.3
|
| 9 |
+
pynndescent==0.6.0
|
| 10 |
+
multidict==6.7.0
|
| 11 |
+
fonttools==4.60.2
|
| 12 |
+
numexpr==2.10.1
|
| 13 |
+
cmudict==1.0.13
|
| 14 |
+
PyOpenGL-accelerate==3.1.10
|
| 15 |
+
gmpy2==2.2.1
|
| 16 |
+
peft==0.14.0
|
| 17 |
+
metaworld==2.0.0
|
| 18 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 19 |
+
python-dateutil==2.9.0.post0
|
| 20 |
+
aiosignal==1.4.0
|
| 21 |
+
pexpect==4.9.0
|
| 22 |
+
protobuf==4.25.8
|
| 23 |
+
typing_extensions==4.15.0
|
| 24 |
+
mujoco==2.3.7
|
| 25 |
+
tokenizers==0.21.0
|
| 26 |
+
pytorch-kinematics==0.7.5
|
| 27 |
+
sniffio==1.3.1
|
| 28 |
+
aiofiles==25.1.0
|
| 29 |
+
mplib==0.1.1
|
| 30 |
+
wcwidth==0.2.14
|
| 31 |
+
Pygments==2.19.1
|
| 32 |
+
anyio==4.12.1
|
| 33 |
+
tensorflow-estimator==2.15.0
|
| 34 |
+
filelock==3.17.0
|
| 35 |
+
numpy==1.23.5
|
| 36 |
+
attrs==25.4.0
|
| 37 |
+
Markdown==3.9
|
| 38 |
+
fsspec==2024.3.1
|
| 39 |
+
libclang==18.1.1
|
| 40 |
+
umap-learn==0.5.9.post2
|
| 41 |
+
dill==0.3.8
|
| 42 |
+
narwhals==2.15.0
|
| 43 |
+
tensorboard==2.15.2
|
| 44 |
+
dacite==1.9.2
|
| 45 |
+
termcolor==3.1.0
|
| 46 |
+
llmbc==0.0.0
|
| 47 |
+
python-multipart==0.0.20
|
| 48 |
+
exceptiongroup==1.3.1
|
| 49 |
+
sapien==3.0.0b1
|
| 50 |
+
pygame==2.6.1
|
| 51 |
+
nvidia-curand-cu12==10.3.2.106
|
| 52 |
+
evaluate==0.4.3
|
| 53 |
+
msgpack==1.1.1
|
| 54 |
+
tensorflow-probability==0.23.0
|
| 55 |
+
diffusers==0.31.0
|
| 56 |
+
certifi==2025.10.5
|
| 57 |
+
d4rl==1.1
|
| 58 |
+
pydub==0.25.1
|
| 59 |
+
annotated-doc==0.0.4
|
| 60 |
+
gitdb==4.0.12
|
| 61 |
+
gradio_client==0.2.9
|
| 62 |
+
Shapely==1.8.4
|
| 63 |
+
mani_skill==3.0.0b20
|
| 64 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 65 |
+
fasteners==0.20
|
| 66 |
+
hjson==3.1.0
|
| 67 |
+
ninja==1.13.0
|
| 68 |
+
stack-data==0.6.3
|
| 69 |
+
pyarrow==21.0.0
|
| 70 |
+
networkx==3.2.1
|
| 71 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 72 |
+
pyparsing==3.3.1
|
| 73 |
+
timm==1.0.22
|
| 74 |
+
typing-inspection==0.4.2
|
| 75 |
+
openai==2.8.1
|
| 76 |
+
pybullet==3.2.6
|
| 77 |
+
hydra-core==1.2.0
|
| 78 |
+
gradio==3.36.1
|
| 79 |
+
tensorflow==2.15.1
|
| 80 |
+
asttokens==3.0.1
|
| 81 |
+
importlib-metadata==5.2.0
|
| 82 |
+
astunparse==1.6.3
|
| 83 |
+
tifffile==2024.8.30
|
| 84 |
+
annotated-types==0.7.0
|
| 85 |
+
Bottleneck==1.4.2
|
| 86 |
+
accelerate==1.0.1
|
| 87 |
+
pytz==2025.2
|
| 88 |
+
urllib3==2.5.0
|
| 89 |
+
frozenlist==1.8.0
|
| 90 |
+
sentry-sdk==2.50.0
|
| 91 |
+
jsonschema==4.25.1
|
| 92 |
+
tyro==0.9.1
|
| 93 |
+
Farama-Notifications==0.0.4
|
| 94 |
+
ffmpy==1.0.0
|
| 95 |
+
httpx==0.28.1
|
| 96 |
+
pymunk==6.2.1
|
| 97 |
+
shtab==1.7.2
|
| 98 |
+
glfw==2.0.0
|
| 99 |
+
hf-xet==1.1.8
|
| 100 |
+
omegaconf==2.2.1
|
| 101 |
+
blobfile==3.0.0
|
| 102 |
+
decorator==5.2.1
|
| 103 |
+
cffi==1.17.1
|
| 104 |
+
matplotlib-inline==0.2.1
|
| 105 |
+
eval_type_backport==0.2.2
|
| 106 |
+
torchaudio==2.2.2
|
| 107 |
+
colorama==0.4.6
|
| 108 |
+
click==8.1.8
|
| 109 |
+
Cython==0.29.37
|
| 110 |
+
orjson==3.11.5
|
| 111 |
+
gym_bandits==0.0.2
|
| 112 |
+
traitlets==5.14.3
|
| 113 |
+
docker-pycreds==0.4.0
|
| 114 |
+
multiprocess==0.70.15
|
| 115 |
+
zipp==3.21.0
|
| 116 |
+
antlr4-python3-runtime==4.9.3
|
| 117 |
+
uc-micro-py==1.0.3
|
| 118 |
+
mpmath==1.3.0
|
| 119 |
+
idna==3.11
|
| 120 |
+
aiodns==3.5.0
|
| 121 |
+
charset-normalizer==3.4.4
|
| 122 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 123 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 124 |
+
seaborn==0.13.2
|
| 125 |
+
pyarrow-hotfix==0.7
|
| 126 |
+
pillow==11.3.0
|
| 127 |
+
pyautogen==0.1.0
|
| 128 |
+
requests==2.32.0
|
| 129 |
+
MarkupSafe==3.0.2
|
| 130 |
+
websockets==15.0.1
|
| 131 |
+
nvidia-nccl-cu12==2.19.3
|
| 132 |
+
pure_eval==0.2.3
|
| 133 |
+
parso==0.8.5
|
| 134 |
+
huggingface-hub==0.26.2
|
| 135 |
+
syllables==1.0.9
|
| 136 |
+
tf-agents==0.19.0
|
| 137 |
+
six==1.17.0
|
| 138 |
+
referencing==0.36.2
|
| 139 |
+
ptyprocess==0.7.0
|
| 140 |
+
platformdirs==4.4.0
|
| 141 |
+
fastapi==0.128.0
|
| 142 |
+
stable-baselines3==2.2.1
|
| 143 |
+
av==10.0.0
|
| 144 |
+
diskcache==5.6.3
|
| 145 |
+
pynvml==13.0.1
|
| 146 |
+
pytorch-seed==0.2.0
|
| 147 |
+
zarr==2.12.0
|
| 148 |
+
mdurl==0.1.2
|
| 149 |
+
docstring-parser==0.16
|
| 150 |
+
packaging==25.0
|
| 151 |
+
numcodecs==0.12.1
|
| 152 |
+
opt_einsum==3.4.0
|
| 153 |
+
markdown-it-py==2.2.0
|
| 154 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 155 |
+
PyWavelets==1.6.0
|
| 156 |
+
datasets==2.19.0
|
| 157 |
+
contourpy==1.3.0
|
| 158 |
+
aiohappyeyeballs==2.6.1
|
| 159 |
+
jaxlib==0.4.30
|
| 160 |
+
ImageIO==2.37.2
|
| 161 |
+
wandb==0.18.6
|
| 162 |
+
jiter==0.12.0
|
| 163 |
+
gymnasium==0.29.1
|
| 164 |
+
pycryptodomex==3.23.0
|
| 165 |
+
google-pasta==0.2.0
|
| 166 |
+
ipython==8.18.1
|
| 167 |
+
threadpoolctl==3.6.0
|
| 168 |
+
py-cpuinfo==9.0.0
|
| 169 |
+
bitsandbytes==0.45.0
|
| 170 |
+
xxhash==3.5.0
|
| 171 |
+
google-auth-oauthlib==1.2.4
|
| 172 |
+
rsa==4.9.1
|
| 173 |
+
rouge_score==0.1.2
|
| 174 |
+
dm-control==1.0.14
|
| 175 |
+
oauthlib==3.3.1
|
| 176 |
+
pandas==2.3.3
|
| 177 |
+
tenacity==9.1.2
|
| 178 |
+
asciitree==0.3.3
|
| 179 |
+
scipy==1.13.1
|
| 180 |
+
jedi==0.19.2
|
| 181 |
+
gast==0.7.0
|
| 182 |
+
google-auth==2.47.0
|
| 183 |
+
transforms3d==0.4.2
|
| 184 |
+
kiwisolver==1.4.7
|
| 185 |
+
matplotlib==3.7.5
|
| 186 |
+
aiohttp==3.12.15
|
| 187 |
+
pip==23.3.2
|
| 188 |
+
imageio-ffmpeg==0.6.0
|
| 189 |
+
deepspeed==0.16.1
|
| 190 |
+
yarl==1.18.0
|
| 191 |
+
nvidia-nvtx-cu12==12.1.105
|
| 192 |
+
llfbench==0.1.0
|
| 193 |
+
wheel==0.45.1
|
| 194 |
+
PySocks==1.7.1
|
| 195 |
+
ml-dtypes==0.3.2
|
| 196 |
+
PyYAML==6.0.2
|
| 197 |
+
fast_kinematics==0.2.2
|
| 198 |
+
gin-config==0.5.0
|
| 199 |
+
setproctitle==1.3.7
|
| 200 |
+
safetensors==0.5.3
|
| 201 |
+
torchvision==0.17.2
|
| 202 |
+
semantic-version==2.10.0
|
| 203 |
+
PyOpenGL==3.1.10
|
| 204 |
+
nltk==3.9.2
|
| 205 |
+
lxml==6.0.2
|
| 206 |
+
pydantic==2.12.5
|
| 207 |
+
tqdm==4.67.1
|
| 208 |
+
keras==2.15.0
|
| 209 |
+
parse==1.19.1
|
| 210 |
+
linkify-it-py==2.0.3
|
| 211 |
+
dm-tree==0.1.8
|
| 212 |
+
requests-oauthlib==2.0.0
|
| 213 |
+
scikit-learn==1.6.1
|
| 214 |
+
altair==6.0.0
|
| 215 |
+
Werkzeug==3.1.5
|
| 216 |
+
sentencepiece==0.2.0
|
| 217 |
+
uvicorn==0.39.0
|
| 218 |
+
cycler==0.12.1
|
| 219 |
+
transformers==4.47.1
|
| 220 |
+
uvloop==0.22.1
|
| 221 |
+
mkl_random==1.2.8
|
| 222 |
+
GitPython==3.1.46
|
| 223 |
+
regex==2025.9.1
|
| 224 |
+
jax==0.4.30
|
| 225 |
+
llvmlite==0.39.1
|
| 226 |
+
pyasn1_modules==0.4.2
|
| 227 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 228 |
+
pydantic_core==2.41.5
|
| 229 |
+
google-genai==1.47.0
|
| 230 |
+
propcache==0.3.1
|
| 231 |
+
pycares==4.10.0
|
| 232 |
+
pyperclip==1.11.0
|
| 233 |
+
pyasn1==0.6.2
|
| 234 |
+
async-timeout==5.0.1
|
| 235 |
+
psutil==7.0.0
|
| 236 |
+
gym==0.23.1
|
| 237 |
+
dm-env==1.6
|
| 238 |
+
Jinja2==3.1.6
|
| 239 |
+
sentence-transformers==3.2.1
|
| 240 |
+
einops==0.4.1
|
| 241 |
+
triton==2.2.0
|
| 242 |
+
grpcio==1.76.0
|
| 243 |
+
labmaze==1.0.6
|
| 244 |
+
nvidia-ml-py==13.590.44
|
| 245 |
+
brotlicffi==1.0.9.2
|
| 246 |
+
smmap==5.0.2
|
| 247 |
+
cloudpickle==3.1.2
|
| 248 |
+
setuptools==80.9.0
|
| 249 |
+
starlette==0.49.3
|
| 250 |
+
prompt_toolkit==3.0.52
|
| 251 |
+
wrapt==1.14.2
|
| 252 |
+
h5py==3.14.0
|
| 253 |
+
scikit-image==0.19.3
|
| 254 |
+
joblib==1.5.3
|
| 255 |
+
opencv-python==4.11.0.86
|
| 256 |
+
rich==14.2.0
|
| 257 |
+
trl==0.11.4
|
| 258 |
+
gym-notices==0.1.0
|
| 259 |
+
trimesh==4.11.1
|
| 260 |
+
mdit-py-plugins==0.3.3
|
| 261 |
+
distro==1.9.0
|
| 262 |
+
executing==2.2.1
|
| 263 |
+
mkl-service==2.4.0
|
| 264 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 265 |
+
FLAML==2.3.6
|
| 266 |
+
mujoco-py==2.1.2.14
|
| 267 |
+
h11==0.16.0
|
| 268 |
+
highway-env==1.9.1
|
| 269 |
+
httpcore==1.0.9
|
| 270 |
+
tensorboard-data-server==0.7.2
|
| 271 |
+
tzdata==2025.3
|
| 272 |
+
absl-py==2.3.1
|
| 273 |
+
jsonschema-specifications==2025.9.1
|
| 274 |
+
numba==0.56.4
|
| 275 |
+
tabulate==0.9.0
|
| 276 |
+
importlib-resources==5.13.0
|
| 277 |
+
pycparser==2.23
|
| 278 |
+
mkl_fft==1.3.11
|
| 279 |
+
torch==2.2.2
|
| 280 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 281 |
+
rpds-py==0.27.1
|
| 282 |
+
typeguard==4.4.4
|
| 283 |
+
flatbuffers==25.12.19
|
| 284 |
+
toppra==0.6.3
|
| 285 |
+
sympy==1.14.0
|
| 286 |
+
tiktoken==0.8.0
|
| 287 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 288 |
+
arm_pytorch_utilities==0.4.3
|
| 289 |
+
pynndescent==0.6.0
|
| 290 |
+
multidict==6.7.0
|
| 291 |
+
fonttools==4.60.2
|
| 292 |
+
numexpr==2.10.1
|
| 293 |
+
cmudict==1.0.13
|
| 294 |
+
PyOpenGL-accelerate==3.1.10
|
| 295 |
+
gmpy2==2.2.1
|
| 296 |
+
peft==0.14.0
|
| 297 |
+
metaworld==2.0.0
|
| 298 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 299 |
+
python-dateutil==2.9.0.post0
|
| 300 |
+
aiosignal==1.4.0
|
| 301 |
+
pexpect==4.9.0
|
| 302 |
+
protobuf==4.25.8
|
| 303 |
+
typing_extensions==4.15.0
|
| 304 |
+
mujoco==2.3.7
|
| 305 |
+
tokenizers==0.21.0
|
| 306 |
+
pytorch-kinematics==0.7.5
|
| 307 |
+
sniffio==1.3.1
|
| 308 |
+
aiofiles==25.1.0
|
| 309 |
+
mplib==0.1.1
|
| 310 |
+
wcwidth==0.2.14
|
| 311 |
+
Pygments==2.19.1
|
| 312 |
+
anyio==4.12.1
|
| 313 |
+
tensorflow-estimator==2.15.0
|
| 314 |
+
filelock==3.17.0
|
| 315 |
+
numpy==1.23.5
|
| 316 |
+
attrs==25.4.0
|
| 317 |
+
Markdown==3.9
|
| 318 |
+
fsspec==2024.3.1
|
| 319 |
+
libclang==18.1.1
|
| 320 |
+
umap-learn==0.5.9.post2
|
| 321 |
+
dill==0.3.8
|
| 322 |
+
narwhals==2.15.0
|
| 323 |
+
tensorboard==2.15.2
|
| 324 |
+
dacite==1.9.2
|
| 325 |
+
termcolor==3.1.0
|
| 326 |
+
llmbc==0.0.0
|
| 327 |
+
python-multipart==0.0.20
|
| 328 |
+
exceptiongroup==1.3.1
|
| 329 |
+
sapien==3.0.0b1
|
| 330 |
+
pygame==2.6.1
|
| 331 |
+
nvidia-curand-cu12==10.3.2.106
|
| 332 |
+
evaluate==0.4.3
|
| 333 |
+
msgpack==1.1.1
|
| 334 |
+
tensorflow-probability==0.23.0
|
| 335 |
+
diffusers==0.31.0
|
| 336 |
+
certifi==2025.10.5
|
| 337 |
+
d4rl==1.1
|
| 338 |
+
pydub==0.25.1
|
| 339 |
+
annotated-doc==0.0.4
|
| 340 |
+
gitdb==4.0.12
|
| 341 |
+
gradio_client==0.2.9
|
| 342 |
+
Shapely==1.8.4
|
| 343 |
+
mani_skill==3.0.0b20
|
| 344 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 345 |
+
fasteners==0.20
|
| 346 |
+
hjson==3.1.0
|
| 347 |
+
ninja==1.13.0
|
| 348 |
+
stack-data==0.6.3
|
| 349 |
+
pyarrow==21.0.0
|
| 350 |
+
networkx==3.2.1
|
| 351 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 352 |
+
pyparsing==3.3.1
|
| 353 |
+
timm==1.0.22
|
| 354 |
+
typing-inspection==0.4.2
|
| 355 |
+
openai==2.8.1
|
| 356 |
+
pybullet==3.2.6
|
| 357 |
+
hydra-core==1.2.0
|
| 358 |
+
gradio==3.36.1
|
| 359 |
+
tensorflow==2.15.1
|
| 360 |
+
asttokens==3.0.1
|
| 361 |
+
importlib-metadata==5.2.0
|
| 362 |
+
astunparse==1.6.3
|
| 363 |
+
tifffile==2024.8.30
|
| 364 |
+
annotated-types==0.7.0
|
| 365 |
+
Bottleneck==1.4.2
|
| 366 |
+
accelerate==1.0.1
|
| 367 |
+
pytz==2025.2
|
| 368 |
+
urllib3==2.5.0
|
| 369 |
+
frozenlist==1.8.0
|
| 370 |
+
sentry-sdk==2.50.0
|
| 371 |
+
jsonschema==4.25.1
|
| 372 |
+
tyro==0.9.1
|
| 373 |
+
Farama-Notifications==0.0.4
|
| 374 |
+
ffmpy==1.0.0
|
| 375 |
+
httpx==0.28.1
|
| 376 |
+
pymunk==6.2.1
|
| 377 |
+
shtab==1.7.2
|
| 378 |
+
glfw==2.0.0
|
| 379 |
+
hf-xet==1.1.8
|
| 380 |
+
omegaconf==2.2.1
|
| 381 |
+
blobfile==3.0.0
|
| 382 |
+
decorator==5.2.1
|
| 383 |
+
cffi==1.17.1
|
| 384 |
+
matplotlib-inline==0.2.1
|
| 385 |
+
eval_type_backport==0.2.2
|
| 386 |
+
torchaudio==2.2.2
|
| 387 |
+
colorama==0.4.6
|
| 388 |
+
click==8.1.8
|
| 389 |
+
Cython==0.29.37
|
| 390 |
+
orjson==3.11.5
|
| 391 |
+
gym_bandits==0.0.2
|
| 392 |
+
traitlets==5.14.3
|
| 393 |
+
docker-pycreds==0.4.0
|
| 394 |
+
multiprocess==0.70.15
|
| 395 |
+
zipp==3.21.0
|
| 396 |
+
antlr4-python3-runtime==4.9.3
|
| 397 |
+
uc-micro-py==1.0.3
|
| 398 |
+
mpmath==1.3.0
|
| 399 |
+
idna==3.11
|
| 400 |
+
aiodns==3.5.0
|
| 401 |
+
charset-normalizer==3.4.4
|
| 402 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 403 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 404 |
+
seaborn==0.13.2
|
| 405 |
+
pyarrow-hotfix==0.7
|
| 406 |
+
pillow==11.3.0
|
| 407 |
+
pyautogen==0.1.0
|
| 408 |
+
requests==2.32.0
|
| 409 |
+
MarkupSafe==3.0.2
|
| 410 |
+
websockets==15.0.1
|
| 411 |
+
nvidia-nccl-cu12==2.19.3
|
| 412 |
+
pure_eval==0.2.3
|
| 413 |
+
parso==0.8.5
|
| 414 |
+
huggingface-hub==0.26.2
|
| 415 |
+
syllables==1.0.9
|
| 416 |
+
tf-agents==0.19.0
|
| 417 |
+
six==1.17.0
|
| 418 |
+
referencing==0.36.2
|
| 419 |
+
ptyprocess==0.7.0
|
| 420 |
+
platformdirs==4.4.0
|
| 421 |
+
fastapi==0.128.0
|
| 422 |
+
stable-baselines3==2.2.1
|
| 423 |
+
av==10.0.0
|
| 424 |
+
diskcache==5.6.3
|
| 425 |
+
pynvml==13.0.1
|
| 426 |
+
pytorch-seed==0.2.0
|
| 427 |
+
zarr==2.12.0
|
| 428 |
+
mdurl==0.1.2
|
| 429 |
+
docstring-parser==0.16
|
| 430 |
+
packaging==25.0
|
| 431 |
+
numcodecs==0.12.1
|
| 432 |
+
opt_einsum==3.4.0
|
| 433 |
+
markdown-it-py==2.2.0
|
| 434 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 435 |
+
PyWavelets==1.6.0
|
| 436 |
+
datasets==2.19.0
|
| 437 |
+
contourpy==1.3.0
|
| 438 |
+
aiohappyeyeballs==2.6.1
|
| 439 |
+
jaxlib==0.4.30
|
| 440 |
+
ImageIO==2.37.2
|
| 441 |
+
wandb==0.18.6
|
| 442 |
+
jiter==0.12.0
|
| 443 |
+
gymnasium==0.29.1
|
| 444 |
+
pycryptodomex==3.23.0
|
| 445 |
+
google-pasta==0.2.0
|
| 446 |
+
ipython==8.18.1
|
| 447 |
+
threadpoolctl==3.6.0
|
| 448 |
+
py-cpuinfo==9.0.0
|
| 449 |
+
bitsandbytes==0.45.0
|
| 450 |
+
xxhash==3.5.0
|
| 451 |
+
google-auth-oauthlib==1.2.4
|
| 452 |
+
rsa==4.9.1
|
| 453 |
+
rouge_score==0.1.2
|
| 454 |
+
dm-control==1.0.14
|
| 455 |
+
oauthlib==3.3.1
|
| 456 |
+
pandas==2.3.3
|
| 457 |
+
tenacity==9.1.2
|
| 458 |
+
asciitree==0.3.3
|
| 459 |
+
scipy==1.13.1
|
| 460 |
+
jedi==0.19.2
|
| 461 |
+
gast==0.7.0
|
| 462 |
+
google-auth==2.47.0
|
| 463 |
+
transforms3d==0.4.2
|
| 464 |
+
kiwisolver==1.4.7
|
| 465 |
+
matplotlib==3.7.5
|
| 466 |
+
aiohttp==3.12.15
|
| 467 |
+
pip==23.3.2
|
| 468 |
+
imageio-ffmpeg==0.6.0
|
| 469 |
+
deepspeed==0.16.1
|
| 470 |
+
yarl==1.18.0
|
| 471 |
+
nvidia-nvtx-cu12==12.1.105
|
| 472 |
+
llfbench==0.1.0
|
| 473 |
+
wheel==0.45.1
|
| 474 |
+
PySocks==1.7.1
|
| 475 |
+
ml-dtypes==0.3.2
|
| 476 |
+
PyYAML==6.0.2
|
| 477 |
+
fast_kinematics==0.2.2
|
| 478 |
+
gin-config==0.5.0
|
| 479 |
+
setproctitle==1.3.7
|
| 480 |
+
safetensors==0.5.3
|
| 481 |
+
torchvision==0.17.2
|
| 482 |
+
semantic-version==2.10.0
|
| 483 |
+
PyOpenGL==3.1.10
|
| 484 |
+
nltk==3.9.2
|
| 485 |
+
lxml==6.0.2
|
| 486 |
+
pydantic==2.12.5
|
| 487 |
+
tqdm==4.67.1
|
| 488 |
+
keras==2.15.0
|
| 489 |
+
parse==1.19.1
|
| 490 |
+
linkify-it-py==2.0.3
|
| 491 |
+
dm-tree==0.1.8
|
| 492 |
+
requests-oauthlib==2.0.0
|
| 493 |
+
scikit-learn==1.6.1
|
| 494 |
+
altair==6.0.0
|
| 495 |
+
Werkzeug==3.1.5
|
| 496 |
+
sentencepiece==0.2.0
|
| 497 |
+
uvicorn==0.39.0
|
| 498 |
+
cycler==0.12.1
|
| 499 |
+
transformers==4.47.1
|
| 500 |
+
uvloop==0.22.1
|
| 501 |
+
mkl_random==1.2.8
|
| 502 |
+
GitPython==3.1.46
|
| 503 |
+
regex==2025.9.1
|
| 504 |
+
jax==0.4.30
|
| 505 |
+
llvmlite==0.39.1
|
| 506 |
+
pyasn1_modules==0.4.2
|
| 507 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 508 |
+
pydantic_core==2.41.5
|
| 509 |
+
google-genai==1.47.0
|
| 510 |
+
propcache==0.3.1
|
| 511 |
+
pycares==4.10.0
|
| 512 |
+
pyperclip==1.11.0
|
| 513 |
+
pyasn1==0.6.2
|
| 514 |
+
async-timeout==5.0.1
|
| 515 |
+
psutil==7.0.0
|
| 516 |
+
gym==0.23.1
|
| 517 |
+
dm-env==1.6
|
| 518 |
+
Jinja2==3.1.6
|
| 519 |
+
sentence-transformers==3.2.1
|
| 520 |
+
einops==0.4.1
|
| 521 |
+
triton==2.2.0
|
| 522 |
+
grpcio==1.76.0
|
| 523 |
+
labmaze==1.0.6
|
| 524 |
+
nvidia-ml-py==13.590.44
|
| 525 |
+
brotlicffi==1.0.9.2
|
| 526 |
+
smmap==5.0.2
|
| 527 |
+
cloudpickle==3.1.2
|
| 528 |
+
setuptools==80.9.0
|
| 529 |
+
starlette==0.49.3
|
| 530 |
+
prompt_toolkit==3.0.52
|
| 531 |
+
wrapt==1.14.2
|
| 532 |
+
h5py==3.14.0
|
| 533 |
+
scikit-image==0.19.3
|
| 534 |
+
joblib==1.5.3
|
| 535 |
+
opencv-python==4.11.0.86
|
| 536 |
+
rich==14.2.0
|
| 537 |
+
trl==0.11.4
|
| 538 |
+
gym-notices==0.1.0
|
| 539 |
+
trimesh==4.11.1
|
| 540 |
+
mdit-py-plugins==0.3.3
|
| 541 |
+
distro==1.9.0
|
| 542 |
+
executing==2.2.1
|
| 543 |
+
mkl-service==2.4.0
|
| 544 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 545 |
+
FLAML==2.3.6
|
| 546 |
+
mujoco-py==2.1.2.14
|
| 547 |
+
h11==0.16.0
|
| 548 |
+
highway-env==1.9.1
|
| 549 |
+
httpcore==1.0.9
|
| 550 |
+
tensorboard-data-server==0.7.2
|
| 551 |
+
tzdata==2025.3
|
| 552 |
+
absl-py==2.3.1
|
| 553 |
+
jsonschema-specifications==2025.9.1
|
| 554 |
+
numba==0.56.4
|
| 555 |
+
tabulate==0.9.0
|
| 556 |
+
importlib-resources==5.13.0
|
| 557 |
+
pycparser==2.23
|
| 558 |
+
mkl_fft==1.3.11
|
| 559 |
+
torch==2.2.2
|
| 560 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 561 |
+
llmbc==0.0.0
|
| 562 |
+
rpds-py==0.27.1
|
| 563 |
+
typeguard==4.4.4
|
| 564 |
+
flatbuffers==25.12.19
|
| 565 |
+
toppra==0.6.3
|
| 566 |
+
sympy==1.14.0
|
| 567 |
+
tiktoken==0.8.0
|
| 568 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 569 |
+
arm_pytorch_utilities==0.4.3
|
| 570 |
+
pynndescent==0.6.0
|
| 571 |
+
multidict==6.7.0
|
| 572 |
+
fonttools==4.60.2
|
| 573 |
+
numexpr==2.10.1
|
| 574 |
+
cmudict==1.0.13
|
| 575 |
+
PyOpenGL-accelerate==3.1.10
|
| 576 |
+
gmpy2==2.2.1
|
| 577 |
+
peft==0.14.0
|
| 578 |
+
metaworld==2.0.0
|
| 579 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 580 |
+
python-dateutil==2.9.0.post0
|
| 581 |
+
aiosignal==1.4.0
|
| 582 |
+
pexpect==4.9.0
|
| 583 |
+
protobuf==4.25.8
|
| 584 |
+
typing_extensions==4.15.0
|
| 585 |
+
mujoco==2.3.7
|
| 586 |
+
tokenizers==0.21.0
|
| 587 |
+
pytorch-kinematics==0.7.5
|
| 588 |
+
sniffio==1.3.1
|
| 589 |
+
aiofiles==25.1.0
|
| 590 |
+
mplib==0.1.1
|
| 591 |
+
wcwidth==0.2.14
|
| 592 |
+
Pygments==2.19.1
|
| 593 |
+
anyio==4.12.1
|
| 594 |
+
tensorflow-estimator==2.15.0
|
| 595 |
+
filelock==3.17.0
|
| 596 |
+
numpy==1.23.5
|
| 597 |
+
attrs==25.4.0
|
| 598 |
+
Markdown==3.9
|
| 599 |
+
fsspec==2024.3.1
|
| 600 |
+
libclang==18.1.1
|
| 601 |
+
umap-learn==0.5.9.post2
|
| 602 |
+
dill==0.3.8
|
| 603 |
+
narwhals==2.15.0
|
| 604 |
+
tensorboard==2.15.2
|
| 605 |
+
dacite==1.9.2
|
| 606 |
+
termcolor==3.1.0
|
| 607 |
+
llmbc==0.0.0
|
| 608 |
+
python-multipart==0.0.20
|
| 609 |
+
exceptiongroup==1.3.1
|
| 610 |
+
sapien==3.0.0b1
|
| 611 |
+
pygame==2.6.1
|
| 612 |
+
nvidia-curand-cu12==10.3.2.106
|
| 613 |
+
evaluate==0.4.3
|
| 614 |
+
msgpack==1.1.1
|
| 615 |
+
tensorflow-probability==0.23.0
|
| 616 |
+
diffusers==0.31.0
|
| 617 |
+
certifi==2025.10.5
|
| 618 |
+
d4rl==1.1
|
| 619 |
+
pydub==0.25.1
|
| 620 |
+
annotated-doc==0.0.4
|
| 621 |
+
gitdb==4.0.12
|
| 622 |
+
gradio_client==0.2.9
|
| 623 |
+
Shapely==1.8.4
|
| 624 |
+
mani_skill==3.0.0b20
|
| 625 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 626 |
+
fasteners==0.20
|
| 627 |
+
hjson==3.1.0
|
| 628 |
+
ninja==1.13.0
|
| 629 |
+
stack-data==0.6.3
|
| 630 |
+
pyarrow==21.0.0
|
| 631 |
+
networkx==3.2.1
|
| 632 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 633 |
+
pyparsing==3.3.1
|
| 634 |
+
timm==1.0.22
|
| 635 |
+
typing-inspection==0.4.2
|
| 636 |
+
openai==2.8.1
|
| 637 |
+
pybullet==3.2.6
|
| 638 |
+
hydra-core==1.2.0
|
| 639 |
+
gradio==3.36.1
|
| 640 |
+
tensorflow==2.15.1
|
| 641 |
+
asttokens==3.0.1
|
| 642 |
+
importlib-metadata==5.2.0
|
| 643 |
+
astunparse==1.6.3
|
| 644 |
+
tifffile==2024.8.30
|
| 645 |
+
annotated-types==0.7.0
|
| 646 |
+
Bottleneck==1.4.2
|
| 647 |
+
accelerate==1.0.1
|
| 648 |
+
pytz==2025.2
|
| 649 |
+
urllib3==2.5.0
|
| 650 |
+
frozenlist==1.8.0
|
| 651 |
+
sentry-sdk==2.50.0
|
| 652 |
+
jsonschema==4.25.1
|
| 653 |
+
tyro==0.9.1
|
| 654 |
+
Farama-Notifications==0.0.4
|
| 655 |
+
ffmpy==1.0.0
|
| 656 |
+
httpx==0.28.1
|
| 657 |
+
pymunk==6.2.1
|
| 658 |
+
shtab==1.7.2
|
| 659 |
+
glfw==2.0.0
|
| 660 |
+
hf-xet==1.1.8
|
| 661 |
+
omegaconf==2.2.1
|
| 662 |
+
blobfile==3.0.0
|
| 663 |
+
decorator==5.2.1
|
| 664 |
+
cffi==1.17.1
|
| 665 |
+
matplotlib-inline==0.2.1
|
| 666 |
+
eval_type_backport==0.2.2
|
| 667 |
+
torchaudio==2.2.2
|
| 668 |
+
colorama==0.4.6
|
| 669 |
+
click==8.1.8
|
| 670 |
+
Cython==0.29.37
|
| 671 |
+
orjson==3.11.5
|
| 672 |
+
gym_bandits==0.0.2
|
| 673 |
+
traitlets==5.14.3
|
| 674 |
+
docker-pycreds==0.4.0
|
| 675 |
+
multiprocess==0.70.15
|
| 676 |
+
zipp==3.21.0
|
| 677 |
+
antlr4-python3-runtime==4.9.3
|
| 678 |
+
uc-micro-py==1.0.3
|
| 679 |
+
mpmath==1.3.0
|
| 680 |
+
idna==3.11
|
| 681 |
+
aiodns==3.5.0
|
| 682 |
+
charset-normalizer==3.4.4
|
| 683 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 684 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 685 |
+
seaborn==0.13.2
|
| 686 |
+
pyarrow-hotfix==0.7
|
| 687 |
+
pillow==11.3.0
|
| 688 |
+
pyautogen==0.1.0
|
| 689 |
+
requests==2.32.0
|
| 690 |
+
MarkupSafe==3.0.2
|
| 691 |
+
websockets==15.0.1
|
| 692 |
+
nvidia-nccl-cu12==2.19.3
|
| 693 |
+
pure_eval==0.2.3
|
| 694 |
+
parso==0.8.5
|
| 695 |
+
huggingface-hub==0.26.2
|
| 696 |
+
syllables==1.0.9
|
| 697 |
+
tf-agents==0.19.0
|
| 698 |
+
six==1.17.0
|
| 699 |
+
referencing==0.36.2
|
| 700 |
+
ptyprocess==0.7.0
|
| 701 |
+
platformdirs==4.4.0
|
| 702 |
+
fastapi==0.128.0
|
| 703 |
+
stable-baselines3==2.2.1
|
| 704 |
+
av==10.0.0
|
| 705 |
+
diskcache==5.6.3
|
| 706 |
+
pynvml==13.0.1
|
| 707 |
+
pytorch-seed==0.2.0
|
| 708 |
+
zarr==2.12.0
|
| 709 |
+
mdurl==0.1.2
|
| 710 |
+
docstring-parser==0.16
|
| 711 |
+
packaging==25.0
|
| 712 |
+
numcodecs==0.12.1
|
| 713 |
+
opt_einsum==3.4.0
|
| 714 |
+
markdown-it-py==2.2.0
|
| 715 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 716 |
+
PyWavelets==1.6.0
|
| 717 |
+
datasets==2.19.0
|
| 718 |
+
contourpy==1.3.0
|
| 719 |
+
aiohappyeyeballs==2.6.1
|
| 720 |
+
jaxlib==0.4.30
|
| 721 |
+
ImageIO==2.37.2
|
| 722 |
+
wandb==0.18.6
|
| 723 |
+
jiter==0.12.0
|
| 724 |
+
gymnasium==0.29.1
|
| 725 |
+
pycryptodomex==3.23.0
|
| 726 |
+
google-pasta==0.2.0
|
| 727 |
+
ipython==8.18.1
|
| 728 |
+
threadpoolctl==3.6.0
|
| 729 |
+
py-cpuinfo==9.0.0
|
| 730 |
+
bitsandbytes==0.45.0
|
| 731 |
+
xxhash==3.5.0
|
| 732 |
+
google-auth-oauthlib==1.2.4
|
| 733 |
+
rsa==4.9.1
|
| 734 |
+
rouge_score==0.1.2
|
| 735 |
+
dm-control==1.0.14
|
| 736 |
+
oauthlib==3.3.1
|
| 737 |
+
pandas==2.3.3
|
| 738 |
+
tenacity==9.1.2
|
| 739 |
+
asciitree==0.3.3
|
| 740 |
+
scipy==1.13.1
|
| 741 |
+
jedi==0.19.2
|
| 742 |
+
gast==0.7.0
|
| 743 |
+
google-auth==2.47.0
|
| 744 |
+
transforms3d==0.4.2
|
| 745 |
+
kiwisolver==1.4.7
|
| 746 |
+
matplotlib==3.7.5
|
| 747 |
+
aiohttp==3.12.15
|
| 748 |
+
pip==23.3.2
|
| 749 |
+
imageio-ffmpeg==0.6.0
|
| 750 |
+
deepspeed==0.16.1
|
| 751 |
+
yarl==1.18.0
|
| 752 |
+
nvidia-nvtx-cu12==12.1.105
|
| 753 |
+
llfbench==0.1.0
|
| 754 |
+
wheel==0.45.1
|
| 755 |
+
PySocks==1.7.1
|
| 756 |
+
ml-dtypes==0.3.2
|
| 757 |
+
PyYAML==6.0.2
|
| 758 |
+
fast_kinematics==0.2.2
|
| 759 |
+
gin-config==0.5.0
|
| 760 |
+
setproctitle==1.3.7
|
| 761 |
+
safetensors==0.5.3
|
| 762 |
+
torchvision==0.17.2
|
| 763 |
+
semantic-version==2.10.0
|
| 764 |
+
PyOpenGL==3.1.10
|
| 765 |
+
nltk==3.9.2
|
| 766 |
+
lxml==6.0.2
|
| 767 |
+
pydantic==2.12.5
|
| 768 |
+
tqdm==4.67.1
|
| 769 |
+
keras==2.15.0
|
| 770 |
+
parse==1.19.1
|
| 771 |
+
linkify-it-py==2.0.3
|
| 772 |
+
dm-tree==0.1.8
|
| 773 |
+
requests-oauthlib==2.0.0
|
| 774 |
+
scikit-learn==1.6.1
|
| 775 |
+
altair==6.0.0
|
| 776 |
+
Werkzeug==3.1.5
|
| 777 |
+
sentencepiece==0.2.0
|
| 778 |
+
uvicorn==0.39.0
|
| 779 |
+
cycler==0.12.1
|
| 780 |
+
transformers==4.47.1
|
| 781 |
+
uvloop==0.22.1
|
| 782 |
+
mkl_random==1.2.8
|
| 783 |
+
GitPython==3.1.46
|
| 784 |
+
regex==2025.9.1
|
| 785 |
+
jax==0.4.30
|
| 786 |
+
llvmlite==0.39.1
|
| 787 |
+
pyasn1_modules==0.4.2
|
| 788 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 789 |
+
pydantic_core==2.41.5
|
| 790 |
+
google-genai==1.47.0
|
| 791 |
+
propcache==0.3.1
|
| 792 |
+
pycares==4.10.0
|
| 793 |
+
pyperclip==1.11.0
|
| 794 |
+
pyasn1==0.6.2
|
| 795 |
+
async-timeout==5.0.1
|
| 796 |
+
psutil==7.0.0
|
| 797 |
+
gym==0.23.1
|
| 798 |
+
dm-env==1.6
|
| 799 |
+
Jinja2==3.1.6
|
| 800 |
+
sentence-transformers==3.2.1
|
| 801 |
+
einops==0.4.1
|
| 802 |
+
triton==2.2.0
|
| 803 |
+
grpcio==1.76.0
|
| 804 |
+
labmaze==1.0.6
|
| 805 |
+
nvidia-ml-py==13.590.44
|
| 806 |
+
brotlicffi==1.0.9.2
|
| 807 |
+
smmap==5.0.2
|
| 808 |
+
cloudpickle==3.1.2
|
| 809 |
+
setuptools==80.9.0
|
| 810 |
+
starlette==0.49.3
|
| 811 |
+
prompt_toolkit==3.0.52
|
| 812 |
+
wrapt==1.14.2
|
| 813 |
+
h5py==3.14.0
|
| 814 |
+
scikit-image==0.19.3
|
| 815 |
+
joblib==1.5.3
|
| 816 |
+
opencv-python==4.11.0.86
|
| 817 |
+
rich==14.2.0
|
| 818 |
+
trl==0.11.4
|
| 819 |
+
gym-notices==0.1.0
|
| 820 |
+
trimesh==4.11.1
|
| 821 |
+
mdit-py-plugins==0.3.3
|
| 822 |
+
distro==1.9.0
|
| 823 |
+
executing==2.2.1
|
| 824 |
+
mkl-service==2.4.0
|
| 825 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 826 |
+
FLAML==2.3.6
|
| 827 |
+
mujoco-py==2.1.2.14
|
| 828 |
+
h11==0.16.0
|
| 829 |
+
highway-env==1.9.1
|
| 830 |
+
httpcore==1.0.9
|
| 831 |
+
tensorboard-data-server==0.7.2
|
| 832 |
+
tzdata==2025.3
|
| 833 |
+
absl-py==2.3.1
|
| 834 |
+
jsonschema-specifications==2025.9.1
|
| 835 |
+
numba==0.56.4
|
| 836 |
+
tabulate==0.9.0
|
| 837 |
+
importlib-resources==5.13.0
|
| 838 |
+
pycparser==2.23
|
| 839 |
+
mkl_fft==1.3.11
|
| 840 |
+
torch==2.2.2
|
| 841 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 842 |
+
zipp==3.19.2
|
| 843 |
+
jaraco.text==3.12.1
|
| 844 |
+
jaraco.context==5.3.0
|
| 845 |
+
importlib_metadata==8.0.0
|
| 846 |
+
typeguard==4.3.0
|
| 847 |
+
inflect==7.3.1
|
| 848 |
+
more-itertools==10.3.0
|
| 849 |
+
wheel==0.45.1
|
| 850 |
+
packaging==24.2
|
| 851 |
+
backports.tarfile==1.2.0
|
| 852 |
+
autocommand==2.2.2
|
| 853 |
+
jaraco.collections==5.1.0
|
| 854 |
+
tomli==2.0.1
|
| 855 |
+
platformdirs==4.2.2
|
| 856 |
+
jaraco.functools==4.0.1
|
| 857 |
+
typing_extensions==4.12.2
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
|
| 3 |
+
"python": "3.9.25",
|
| 4 |
+
"startedAt": "2026-01-21T05:12:36.447778Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--config-path",
|
| 7 |
+
"./config/main_table",
|
| 8 |
+
"--config-name",
|
| 9 |
+
"llmbc_box-close-v2.yaml",
|
| 10 |
+
"policy.loss_llm_weight=1.0e-3",
|
| 11 |
+
"training.seed=42"
|
| 12 |
+
],
|
| 13 |
+
"program": "/work/u1131674/LLM-BC/./train.py",
|
| 14 |
+
"codePath": "train.py",
|
| 15 |
+
"git": {
|
| 16 |
+
"remote": "https://github.com/CHYang25/LLM-BC.git",
|
| 17 |
+
"commit": "1d2e1f5818e116390426ef596d075fc0cf1b0081"
|
| 18 |
+
},
|
| 19 |
+
"email": "chris920325@gmail.com",
|
| 20 |
+
"root": "/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2",
|
| 21 |
+
"host": "hgpn19",
|
| 22 |
+
"username": "u1131674",
|
| 23 |
+
"executable": "/home/u1131674/.conda/envs/llm-bc/bin/python3",
|
| 24 |
+
"codePathLocal": "train.py",
|
| 25 |
+
"cpu_count": 112,
|
| 26 |
+
"cpu_count_logical": 112,
|
| 27 |
+
"gpu": "NVIDIA H100 80GB HBM3",
|
| 28 |
+
"gpu_count": 1,
|
| 29 |
+
"disk": {
|
| 30 |
+
"/": {
|
| 31 |
+
"total": "1918024196096",
|
| 32 |
+
"used": "394359058432"
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"memory": {
|
| 36 |
+
"total": "2163685928960"
|
| 37 |
+
},
|
| 38 |
+
"cpu": {
|
| 39 |
+
"count": 112,
|
| 40 |
+
"countLogical": 112
|
| 41 |
+
},
|
| 42 |
+
"gpu_nvidia": [
|
| 43 |
+
{
|
| 44 |
+
"name": "NVIDIA H100 80GB HBM3",
|
| 45 |
+
"memoryTotal": "85520809984",
|
| 46 |
+
"cudaCores": 16896,
|
| 47 |
+
"architecture": "Hopper"
|
| 48 |
+
}
|
| 49 |
+
],
|
| 50 |
+
"slurm": {
|
| 51 |
+
"cluster_name": "hpc",
|
| 52 |
+
"conf": "/etc/slurm/slurm.conf",
|
| 53 |
+
"cpu_bind": "quiet,mask_cpu:0x00000000000000000000000000FF",
|
| 54 |
+
"cpu_bind_list": "0x00000000000000000000000000FF",
|
| 55 |
+
"cpu_bind_type": "mask_cpu:",
|
| 56 |
+
"cpu_bind_verbose": "quiet",
|
| 57 |
+
"cpus_on_node": "8",
|
| 58 |
+
"cpus_per_task": "8",
|
| 59 |
+
"distribution": "cyclic,pack",
|
| 60 |
+
"gpus_on_node": "1",
|
| 61 |
+
"gpus_per_node": "1",
|
| 62 |
+
"gtids": "0",
|
| 63 |
+
"job_account": "mst114558",
|
| 64 |
+
"job_cpus_per_node": "8",
|
| 65 |
+
"job_end_time": "1769145136",
|
| 66 |
+
"job_gid": "106773",
|
| 67 |
+
"job_group": "MST114558",
|
| 68 |
+
"job_id": "99320",
|
| 69 |
+
"job_name": "python3",
|
| 70 |
+
"job_nodelist": "hgpn19",
|
| 71 |
+
"job_num_nodes": "1",
|
| 72 |
+
"job_partition": "normal",
|
| 73 |
+
"job_qos": "normal",
|
| 74 |
+
"job_start_time": "1768972336",
|
| 75 |
+
"job_uid": "41408",
|
| 76 |
+
"job_user": "u1131674",
|
| 77 |
+
"jobid": "99320",
|
| 78 |
+
"launch_node_ipaddr": "172.21.101.1",
|
| 79 |
+
"localid": "0",
|
| 80 |
+
"mem_per_node": "204800",
|
| 81 |
+
"nnodes": "1",
|
| 82 |
+
"nodeid": "0",
|
| 83 |
+
"nodelist": "hgpn19",
|
| 84 |
+
"nprocs": "1",
|
| 85 |
+
"ntasks": "1",
|
| 86 |
+
"prio_process": "0",
|
| 87 |
+
"procid": "0",
|
| 88 |
+
"srun_comm_host": "172.21.101.1",
|
| 89 |
+
"srun_comm_port": "37185",
|
| 90 |
+
"step_gpus": "0",
|
| 91 |
+
"step_id": "0",
|
| 92 |
+
"step_launcher_port": "37185",
|
| 93 |
+
"step_nodelist": "hgpn19",
|
| 94 |
+
"step_num_nodes": "1",
|
| 95 |
+
"step_num_tasks": "1",
|
| 96 |
+
"step_tasks_per_node": "1",
|
| 97 |
+
"stepid": "0",
|
| 98 |
+
"submit_dir": "/work/u1131674/LLM-BC",
|
| 99 |
+
"submit_host": "cbi-lgn01",
|
| 100 |
+
"task_pid": "3666395",
|
| 101 |
+
"tasks_per_node": "1",
|
| 102 |
+
"topology_addr": "ibsw1.hgpn19",
|
| 103 |
+
"topology_addr_pattern": "switch.node",
|
| 104 |
+
"tres_per_task": "cpu:8",
|
| 105 |
+
"umask": "0022"
|
| 106 |
+
},
|
| 107 |
+
"cudaVersion": "12.4"
|
| 108 |
+
}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T13:12:35.832711383+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpgly62w7c/port-3666395.txt","pid":3666395,"debug":false,"disable-analytics":false}
|
| 2 |
+
{"time":"2026-01-21T13:12:35.8327317+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
|
| 3 |
+
{"time":"2026-01-21T13:12:35.833044589+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3666395}
|
| 4 |
+
{"time":"2026-01-21T13:12:35.833040266+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39835,"Zone":""}}
|
| 5 |
+
{"time":"2026-01-21T13:12:36.026076689+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:57216"}
|
| 6 |
+
{"time":"2026-01-21T13:12:36.448305573+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"yhjy9tz9","id":"127.0.0.1:57216"}
|
| 7 |
+
{"time":"2026-01-21T13:12:36.565012013+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yhjy9tz9","id":"127.0.0.1:57216"}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2026-01-21T13:12:36.44966483+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
|
| 2 |
+
{"time":"2026-01-21T13:12:36.449675304+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log"}
|
| 3 |
+
{"time":"2026-01-21T13:12:36.564980991+08:00","level":"INFO","msg":"created new stream","id":"yhjy9tz9"}
|
| 4 |
+
{"time":"2026-01-21T13:12:36.565006242+08:00","level":"INFO","msg":"stream: started","id":"yhjy9tz9"}
|
| 5 |
+
{"time":"2026-01-21T13:12:36.565029519+08:00","level":"INFO","msg":"sender: started","stream_id":"yhjy9tz9"}
|
| 6 |
+
{"time":"2026-01-21T13:12:36.565021074+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"yhjy9tz9"}}
|
| 7 |
+
{"time":"2026-01-21T13:12:36.565029409+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"yhjy9tz9"}}
|
| 8 |
+
{"time":"2026-01-21T13:12:37.456830647+08:00","level":"INFO","msg":"Starting system monitor"}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
|
| 2 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Configure stats pid to 3666395
|
| 3 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
|
| 4 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
|
| 5 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
| 6 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
|
| 7 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
|
| 8 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying login settings: {}
|
| 9 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log
|
| 10 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log
|
| 11 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():619] calling init triggers
|
| 12 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
| 13 |
+
config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.001, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
|
| 14 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():669] starting backend
|
| 15 |
+
2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():673] sending inform_init request
|
| 16 |
+
2026-01-21 13:12:36,447 INFO MainThread:3666395 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
| 17 |
+
2026-01-21 13:12:36,447 INFO MainThread:3666395 [wandb_init.py:init():686] backend started and connected
|
| 18 |
+
2026-01-21 13:12:36,456 INFO MainThread:3666395 [wandb_init.py:init():781] updated telemetry
|
| 19 |
+
2026-01-21 13:12:36,506 INFO MainThread:3666395 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
| 20 |
+
2026-01-21 13:12:37,452 INFO MainThread:3666395 [wandb_init.py:init():867] starting run threads in backend
|
| 21 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_console_start():2451] atexit reg
|
| 22 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2299] redirect: wrap_raw
|
| 23 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2364] Wrapping output streams.
|
| 24 |
+
2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2389] Redirects installed.
|
| 25 |
+
2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_init.py:init():911] run started, returning control to user process
|
| 26 |
+
2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2'}
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a15abdc3557e86e80be7f022afc84d1b6a86d97d6b37b42da9bb26e3ca4834e
|
| 3 |
+
size 327680
|
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"run_id": "yhjy9tz9"}
|
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: train_llmbc_lowdim
|
| 2 |
+
_target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
|
| 3 |
+
obs_dim: ${task.obs_dim}
|
| 4 |
+
action_dim: ${task.action_dim}
|
| 5 |
+
task_name: ${task.name}
|
| 6 |
+
exp_name: default
|
| 7 |
+
model_name: ${llm.name}
|
| 8 |
+
horizon: 1
|
| 9 |
+
n_obs_steps: 1
|
| 10 |
+
n_action_steps: 1
|
| 11 |
+
n_latency_steps: 0
|
| 12 |
+
past_action_visible: false
|
| 13 |
+
llm_orig_expert_feedback: true
|
| 14 |
+
llm_do_sample: false
|
| 15 |
+
policy:
|
| 16 |
+
_target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
|
| 17 |
+
model:
|
| 18 |
+
_target_: llmbc.model.policy.policy_mlp.PolicyMLP
|
| 19 |
+
input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
|
| 20 |
+
hidden_size:
|
| 21 |
+
- 256
|
| 22 |
+
- 256
|
| 23 |
+
output_size: ${eval:'${n_action_steps}*${action_dim}'}
|
| 24 |
+
activation: relu
|
| 25 |
+
n_obs_steps: ${n_obs_steps}
|
| 26 |
+
n_action_steps: ${n_action_steps}
|
| 27 |
+
obs_dim: ${obs_dim}
|
| 28 |
+
action_dim: ${action_dim}
|
| 29 |
+
llm_discriminator:
|
| 30 |
+
_target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
|
| 31 |
+
task_id: ${task_name}
|
| 32 |
+
llm_translator:
|
| 33 |
+
_target_: llmbc.translator.llm_translator.LLMTranslator
|
| 34 |
+
cfg: ${llm}
|
| 35 |
+
obs_dim: ${task.obs_dim}
|
| 36 |
+
action_dim: ${task.action_dim}
|
| 37 |
+
horizon: ${horizon}
|
| 38 |
+
n_obs_steps: ${n_obs_steps}
|
| 39 |
+
n_action_steps: ${n_action_steps}
|
| 40 |
+
loss_bc_weight: 1.0
|
| 41 |
+
loss_llm_weight: 1.0
|
| 42 |
+
horizon: ${horizon}
|
| 43 |
+
n_obs_steps: ${n_obs_steps}
|
| 44 |
+
n_action_steps: ${n_action_steps}
|
| 45 |
+
normalize_llm_loss: true
|
| 46 |
+
dataloader:
|
| 47 |
+
batch_size: 16
|
| 48 |
+
num_workers: 0
|
| 49 |
+
shuffle: true
|
| 50 |
+
pin_memory: false
|
| 51 |
+
persistent_workers: false
|
| 52 |
+
val_dataloader:
|
| 53 |
+
batch_size: 16
|
| 54 |
+
num_workers: 0
|
| 55 |
+
shuffle: true
|
| 56 |
+
pin_memory: false
|
| 57 |
+
persistent_workers: false
|
| 58 |
+
optimizer:
|
| 59 |
+
_target_: torch.optim.AdamW
|
| 60 |
+
lr: 0.01
|
| 61 |
+
betas:
|
| 62 |
+
- 0.95
|
| 63 |
+
- 0.999
|
| 64 |
+
eps: 1.0e-08
|
| 65 |
+
weight_decay: 1.0e-06
|
| 66 |
+
training:
|
| 67 |
+
device: cuda:0
|
| 68 |
+
seed: 42
|
| 69 |
+
debug: false
|
| 70 |
+
resume: false
|
| 71 |
+
lr_scheduler: cosine
|
| 72 |
+
lr_warmup_steps: 10
|
| 73 |
+
num_epochs: 1001
|
| 74 |
+
gradient_accumulate_every: 8
|
| 75 |
+
grad_norm_clip: 0.5
|
| 76 |
+
rollout_every: 5
|
| 77 |
+
checkpoint_every: 5
|
| 78 |
+
val_every: 1
|
| 79 |
+
sample_every: 5
|
| 80 |
+
sample_max_batch: 128
|
| 81 |
+
max_train_steps: null
|
| 82 |
+
max_val_steps: null
|
| 83 |
+
tqdm_interval_sec: 1.0
|
| 84 |
+
logging:
|
| 85 |
+
project: ${task.name}-training
|
| 86 |
+
resume: true
|
| 87 |
+
mode: online
|
| 88 |
+
name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 89 |
+
tags:
|
| 90 |
+
- ${name}
|
| 91 |
+
- ${task_name}
|
| 92 |
+
- ${exp_name}
|
| 93 |
+
id: null
|
| 94 |
+
group: null
|
| 95 |
+
checkpoint:
|
| 96 |
+
topk:
|
| 97 |
+
monitor_key: test_success_rate
|
| 98 |
+
mode: max
|
| 99 |
+
k: 5
|
| 100 |
+
format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
|
| 101 |
+
save_last_ckpt: true
|
| 102 |
+
save_last_snapshot: false
|
| 103 |
+
multi_run:
|
| 104 |
+
run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 105 |
+
wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
|
| 106 |
+
task:
|
| 107 |
+
name: box-close-v2
|
| 108 |
+
obs_dim: 9
|
| 109 |
+
action_dim: 4
|
| 110 |
+
env_runner:
|
| 111 |
+
_target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
|
| 112 |
+
env_name: llf-metaworld-box-close-v2
|
| 113 |
+
n_train: 10
|
| 114 |
+
n_test: 50
|
| 115 |
+
n_envs: 10
|
| 116 |
+
max_steps: 30
|
| 117 |
+
n_obs_steps: ${n_obs_steps}
|
| 118 |
+
n_action_steps: ${n_action_steps}
|
| 119 |
+
instruction_type: b
|
| 120 |
+
feedback_type:
|
| 121 |
+
- hp
|
| 122 |
+
- hn
|
| 123 |
+
- fp
|
| 124 |
+
visual: false
|
| 125 |
+
discount: 0.9
|
| 126 |
+
dataset:
|
| 127 |
+
_target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
|
| 128 |
+
data_path: datasets/box-close-v2.pt
|
| 129 |
+
data_path2: datasets/box-close-v2.pt
|
| 130 |
+
horizon: ${horizon}
|
| 131 |
+
pad_before: ${eval:'${n_obs_steps}-1'}
|
| 132 |
+
pad_after: ${eval:'${n_action_steps}-1'}
|
| 133 |
+
obs_eef_target: true
|
| 134 |
+
use_manual_normalizer: false
|
| 135 |
+
val_ratio: 0.1
|
| 136 |
+
dummy_normalizer: true
|
| 137 |
+
instructor:
|
| 138 |
+
_target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
|
| 139 |
+
llm:
|
| 140 |
+
name: HuggingFaceTB/SmolLM2-135M-Instruct
|
| 141 |
+
model_name: SmolLM2-135M-Instruct
|
| 142 |
+
config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
|
| 143 |
+
causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
|
| 144 |
+
use_quantization: false
|
| 145 |
+
use_joint_mlp_projector: true
|
| 146 |
+
llm_mode: ete-finetuned
|
| 147 |
+
finetune_mode: orig
|
| 148 |
+
checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
|
| 149 |
+
max_length: 100
|
| 150 |
+
lora_config:
|
| 151 |
+
r: 32
|
| 152 |
+
lora_alpha: 64
|
| 153 |
+
lora_dropout: 0.05
|
| 154 |
+
bias: none
|
| 155 |
+
task_type: CAUSAL_LM
|
| 156 |
+
prompter:
|
| 157 |
+
_target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
|
| 158 |
+
use_joint_mlp_projector: true
|
| 159 |
+
hydra:
|
| 160 |
+
job:
|
| 161 |
+
override_dirname: ${model_name}
|
| 162 |
+
run:
|
| 163 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
|
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
params: null
|
| 13 |
+
help:
|
| 14 |
+
app_name: ${hydra.job.name}
|
| 15 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 16 |
+
|
| 17 |
+
'
|
| 18 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 19 |
+
|
| 20 |
+
Use --hydra-help to view Hydra specific help
|
| 21 |
+
|
| 22 |
+
'
|
| 23 |
+
template: '${hydra.help.header}
|
| 24 |
+
|
| 25 |
+
== Configuration groups ==
|
| 26 |
+
|
| 27 |
+
Compose your configuration from those groups (group=option)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
$APP_CONFIG_GROUPS
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
== Config ==
|
| 34 |
+
|
| 35 |
+
Override anything in the config (foo.bar=value)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
$CONFIG
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
${hydra.help.footer}
|
| 42 |
+
|
| 43 |
+
'
|
| 44 |
+
hydra_help:
|
| 45 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 46 |
+
|
| 47 |
+
See https://hydra.cc for more info.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
== Flags ==
|
| 51 |
+
|
| 52 |
+
$FLAGS_HELP
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
== Configuration groups ==
|
| 56 |
+
|
| 57 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 58 |
+
to command line)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
$HYDRA_CONFIG_GROUPS
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 65 |
+
|
| 66 |
+
'
|
| 67 |
+
hydra_help: ???
|
| 68 |
+
hydra_logging:
|
| 69 |
+
version: 1
|
| 70 |
+
formatters:
|
| 71 |
+
simple:
|
| 72 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 73 |
+
handlers:
|
| 74 |
+
console:
|
| 75 |
+
class: logging.StreamHandler
|
| 76 |
+
formatter: simple
|
| 77 |
+
stream: ext://sys.stdout
|
| 78 |
+
root:
|
| 79 |
+
level: INFO
|
| 80 |
+
handlers:
|
| 81 |
+
- console
|
| 82 |
+
loggers:
|
| 83 |
+
logging_example:
|
| 84 |
+
level: DEBUG
|
| 85 |
+
disable_existing_loggers: false
|
| 86 |
+
job_logging:
|
| 87 |
+
version: 1
|
| 88 |
+
formatters:
|
| 89 |
+
simple:
|
| 90 |
+
format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
|
| 91 |
+
handlers:
|
| 92 |
+
console:
|
| 93 |
+
class: logging.StreamHandler
|
| 94 |
+
formatter: simple
|
| 95 |
+
stream: ext://sys.stdout
|
| 96 |
+
file:
|
| 97 |
+
class: logging.FileHandler
|
| 98 |
+
formatter: simple
|
| 99 |
+
filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
|
| 100 |
+
root:
|
| 101 |
+
level: INFO
|
| 102 |
+
handlers:
|
| 103 |
+
- console
|
| 104 |
+
- file
|
| 105 |
+
disable_existing_loggers: false
|
| 106 |
+
env: {}
|
| 107 |
+
mode: RUN
|
| 108 |
+
searchpath: []
|
| 109 |
+
callbacks: {}
|
| 110 |
+
output_subdir: .hydra
|
| 111 |
+
overrides:
|
| 112 |
+
hydra:
|
| 113 |
+
- hydra.mode=RUN
|
| 114 |
+
task:
|
| 115 |
+
- policy.loss_llm_weight=1.0
|
| 116 |
+
- training.seed=42
|
| 117 |
+
job:
|
| 118 |
+
name: train
|
| 119 |
+
chdir: null
|
| 120 |
+
override_dirname: policy.loss_llm_weight=1.0,training.seed=42
|
| 121 |
+
id: ???
|
| 122 |
+
num: ???
|
| 123 |
+
config_name: llmbc_box-close-v2.yaml
|
| 124 |
+
env_set: {}
|
| 125 |
+
env_copy: []
|
| 126 |
+
config:
|
| 127 |
+
override_dirname:
|
| 128 |
+
kv_sep: '='
|
| 129 |
+
item_sep: ','
|
| 130 |
+
exclude_keys: []
|
| 131 |
+
runtime:
|
| 132 |
+
version: 1.2.0
|
| 133 |
+
version_base: '1.2'
|
| 134 |
+
cwd: /work/u1131674/LLM-BC
|
| 135 |
+
config_sources:
|
| 136 |
+
- path: hydra.conf
|
| 137 |
+
schema: pkg
|
| 138 |
+
provider: hydra
|
| 139 |
+
- path: /work/u1131674/LLM-BC/config/main_table
|
| 140 |
+
schema: file
|
| 141 |
+
provider: main
|
| 142 |
+
- path: ''
|
| 143 |
+
schema: structured
|
| 144 |
+
provider: schema
|
| 145 |
+
output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2
|
| 146 |
+
choices:
|
| 147 |
+
hydra/env: default
|
| 148 |
+
hydra/callbacks: null
|
| 149 |
+
hydra/job_logging: default
|
| 150 |
+
hydra/hydra_logging: default
|
| 151 |
+
hydra/hydra_help: default
|
| 152 |
+
hydra/help: default
|
| 153 |
+
hydra/sweeper: basic
|
| 154 |
+
hydra/launcher: basic
|
| 155 |
+
hydra/output: default
|
| 156 |
+
verbose: false
|
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- policy.loss_llm_weight=1.0
|
| 2 |
+
- training.seed=42
|