CHYang25 commited on
Commit
6f006e9
·
verified ·
1 Parent(s): 89275f2

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  3. 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
  4. 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
  5. 2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log +2 -0
  6. 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  7. 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
  8. 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
  9. 2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log +9 -0
  10. 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  11. 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
  12. 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
  13. 2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log +12 -0
  14. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  15. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +154 -0
  16. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +1 -0
  17. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt +237 -0
  18. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log +14 -0
  19. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log +11 -0
  20. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log +27 -0
  21. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml +271 -0
  22. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log +78 -0
  23. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt +857 -0
  24. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json +55 -0
  25. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json +1 -0
  26. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log +12 -0
  27. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log +11 -0
  28. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log +27 -0
  29. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb +3 -0
  30. 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json +1 -0
  31. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  32. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +156 -0
  33. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +2 -0
  34. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt +3 -0
  35. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt +3 -0
  36. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt +418 -0
  37. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log +8 -0
  38. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log +8 -0
  39. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log +26 -0
  40. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log +3 -0
  41. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/requirements.txt +857 -0
  42. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/wandb-metadata.json +108 -0
  43. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log +7 -0
  44. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log +8 -0
  45. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log +26 -0
  46. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb +3 -0
  47. 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json +1 -0
  48. 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml +163 -0
  49. 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml +156 -0
  50. 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml +2 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb filter=lfs diff=lfs merge=lfs -text
37
+ 2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb filter=lfs diff=lfs merge=lfs -text
38
+ 2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132338-qrt50pak/run-qrt50pak.wandb filter=lfs diff=lfs merge=lfs -text
39
+ 2026.01.21/13.27.30_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_132748-8pqnk39p/run-8pqnk39p.wandb filter=lfs diff=lfs merge=lfs -text
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 0.01
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: null
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: llmbc_box-close-v2.yaml
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.2.0
131
+ version_base: '1.2'
132
+ cwd: /work/u1131674/LLM-BC
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /work/u1131674/LLM-BC/config/main_table
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
2026.01.21/12.08.38_train_llmbc_lowdim_box-close-v2/train.log ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [2026-01-21 12:08:39,454][hydra.utils][ERROR] - Error initializing class at llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace: Error loading 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace':
2
+ ImportError("cannot import name 'Sentinel' from 'typing_extensions' (/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/typing_extensions.py)")
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 0.01
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: null
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: llmbc_box-close-v2.yaml
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.2.0
131
+ version_base: '1.2'
132
+ cwd: /work/u1131674/LLM-BC
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /work/u1131674/LLM-BC/config/main_table
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
2026.01.21/12.10.34_train_llmbc_lowdim_box-close-v2/train.log ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
2
+ [2026-01-21 12:10:36,293][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
3
+ [2026-01-21 12:10:36,293][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
4
+ [2026-01-21 12:10:42,233][datasets][INFO] - PyTorch version 2.2.2 available.
5
+ [2026-01-21 12:10:42,234][datasets][INFO] - TensorFlow version 2.15.1 available.
6
+ [2026-01-21 12:10:42,235][datasets][INFO] - JAX version 0.4.30 available.
7
+ [2026-01-21 12:11:05,787][matplotlib.font_manager][INFO] - Failed to extract font properties from /usr/share/fonts/google-noto-emoji/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2)
8
+ [2026-01-21 12:11:05,799][matplotlib.font_manager][INFO] - generated new fontManager
9
+ [2026-01-21 12:11:07,857][OpenGL.platform.ctypesloader][INFO] - Failed to load library ( 'libOSMesa.so.0' ): libOSMesa.so.0: cannot open shared object file: No such file or directory
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 0.01
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: null
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: llmbc_box-close-v2.yaml
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.2.0
131
+ version_base: '1.2'
132
+ cwd: /work/u1131674/LLM-BC
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /work/u1131674/LLM-BC/config/main_table
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
2026.01.21/12.13.09_train_llmbc_lowdim_box-close-v2/train.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
2
+ [2026-01-21 12:13:11,502][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
3
+ [2026-01-21 12:13:11,502][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
4
+ [2026-01-21 12:13:16,444][datasets][INFO] - PyTorch version 2.2.2 available.
5
+ [2026-01-21 12:13:16,445][datasets][INFO] - TensorFlow version 2.15.1 available.
6
+ [2026-01-21 12:13:16,446][datasets][INFO] - JAX version 0.4.30 available.
7
+ [2026-01-21 12:13:41,170][root][INFO] - running build_ext
8
+ [2026-01-21 12:13:41,174][root][INFO] - building 'mujoco_py.cymj' extension
9
+ [2026-01-21 12:13:41,174][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py
10
+ [2026-01-21 12:13:41,196][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl
11
+ [2026-01-21 12:13:41,197][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w
12
+ [2026-01-21 12:14:08,619][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 0.01
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task: []
115
+ job:
116
+ name: train
117
+ chdir: null
118
+ override_dirname: ''
119
+ id: ???
120
+ num: ???
121
+ config_name: llmbc_box-close-v2.yaml
122
+ env_set: {}
123
+ env_copy: []
124
+ config:
125
+ override_dirname:
126
+ kv_sep: '='
127
+ item_sep: ','
128
+ exclude_keys: []
129
+ runtime:
130
+ version: 1.2.0
131
+ version_base: '1.2'
132
+ cwd: /work/u1131674/LLM-BC
133
+ config_sources:
134
+ - path: hydra.conf
135
+ schema: pkg
136
+ provider: hydra
137
+ - path: /work/u1131674/LLM-BC/config/main_table
138
+ schema: file
139
+ provider: main
140
+ - path: ''
141
+ schema: structured
142
+ provider: schema
143
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
144
+ choices:
145
+ hydra/env: default
146
+ hydra/callbacks: null
147
+ hydra/job_logging: default
148
+ hydra/hydra_logging: default
149
+ hydra/hydra_help: default
150
+ hydra/help: default
151
+ hydra/sweeper: basic
152
+ hydra/launcher: basic
153
+ hydra/output: default
154
+ verbose: false
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/logs.json.txt ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_loss": 0.2577439248561859, "train_loss_bc": 0.25195014476776123, "train_loss_llm": 0.5793781280517578, "grad_norm": 0.12829534709453583, "global_step": 0, "epoch": 0, "lr": 0.001}
2
+ {"train_loss": 0.278277724981308, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.5636913180351257, "grad_norm": 0.13488440215587616, "global_step": 1, "epoch": 0, "lr": 0.001}
3
+ {"train_loss": 0.29180172085762024, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584006309509277, "grad_norm": 0.27443262934684753, "global_step": 2, "epoch": 0, "lr": 0.001}
4
+ {"train_loss": 0.2927302420139313, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152303218841553, "grad_norm": 0.4118553102016449, "global_step": 3, "epoch": 0, "lr": 0.001}
5
+ {"train_loss": 0.28513821959495544, "train_loss_bc": 0.2797144651412964, "train_loss_llm": 0.5423756241798401, "grad_norm": 0.5492109656333923, "global_step": 4, "epoch": 0, "lr": 0.001}
6
+ {"train_loss": 0.31990620493888855, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508874654769897, "grad_norm": 0.6955047845840454, "global_step": 5, "epoch": 0, "lr": 0.001}
7
+ {"train_loss": 0.27779361605644226, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829552412033081, "grad_norm": 0.8313235640525818, "global_step": 6, "epoch": 0, "lr": 0.001}
8
+ {"train_loss": 0.23134832084178925, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914825201034546, "grad_norm": 0.9541349411010742, "global_step": 7, "epoch": 0, "lr": 0.001}
9
+ {"train_loss": 0.2081925868988037, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296263694763184, "grad_norm": 1.0696462392807007, "global_step": 8, "epoch": 0, "lr": 0.002}
10
+ {"train_loss": 0.2047700732946396, "train_loss_bc": 0.19888944923877716, "train_loss_llm": 0.5880619287490845, "grad_norm": 0.11593382805585861, "global_step": 9, "epoch": 0, "lr": 0.002}
11
+ {"train_loss": 0.2171408236026764, "train_loss_bc": 0.2111976146697998, "train_loss_llm": 0.5943207740783691, "grad_norm": 0.23421066999435425, "global_step": 10, "epoch": 0, "lr": 0.002}
12
+ {"train_loss": 0.211279034614563, "train_loss_bc": 0.2063535749912262, "train_loss_llm": 0.4925457537174225, "grad_norm": 0.3522197902202606, "global_step": 11, "epoch": 0, "lr": 0.002}
13
+ {"train_loss": 0.2630419433116913, "train_loss_bc": 0.2565857172012329, "train_loss_llm": 0.6456230282783508, "grad_norm": 0.48301446437835693, "global_step": 12, "epoch": 0, "lr": 0.002}
14
+ {"train_loss": 0.2441762089729309, "train_loss_bc": 0.23813079297542572, "train_loss_llm": 0.604541540145874, "grad_norm": 0.609789252281189, "global_step": 13, "epoch": 0, "lr": 0.002}
15
+ {"train_loss": 0.2768160402774811, "train_loss_bc": 0.27063897252082825, "train_loss_llm": 0.6177071332931519, "grad_norm": 0.7476180195808411, "global_step": 14, "epoch": 0, "lr": 0.002}
16
+ {"train_loss": 0.2484627217054367, "train_loss_bc": 0.24271151423454285, "train_loss_llm": 0.5751214623451233, "grad_norm": 0.8759933710098267, "global_step": 15, "epoch": 0, "lr": 0.002}
17
+ {"train_loss": 0.20768630504608154, "train_loss_bc": 0.20276379585266113, "train_loss_llm": 0.49225085973739624, "grad_norm": 0.9921573996543884, "global_step": 16, "epoch": 0, "lr": 0.003}
18
+ {"train_loss": 0.16910794377326965, "train_loss_bc": 0.16317197680473328, "train_loss_llm": 0.5935962796211243, "grad_norm": 0.11413145065307617, "global_step": 17, "epoch": 0, "lr": 0.003}
19
+ {"train_loss": 0.1472136378288269, "train_loss_bc": 0.14170503616333008, "train_loss_llm": 0.5508600473403931, "grad_norm": 0.2181655466556549, "global_step": 18, "epoch": 0, "lr": 0.003}
20
+ {"train_loss": 0.09437470138072968, "train_loss_bc": 0.08951498568058014, "train_loss_llm": 0.4859713315963745, "grad_norm": 0.2965621054172516, "global_step": 19, "epoch": 0, "lr": 0.003}
21
+ {"train_loss": 0.14634960889816284, "train_loss_bc": 0.14015674591064453, "train_loss_llm": 0.6192870140075684, "grad_norm": 0.399164617061615, "global_step": 20, "epoch": 0, "lr": 0.003}
22
+ {"train_loss": 0.13075421750545502, "train_loss_bc": 0.12502902746200562, "train_loss_llm": 0.5725185871124268, "grad_norm": 0.49499645829200745, "global_step": 21, "epoch": 0, "lr": 0.003}
23
+ {"train_loss": 0.1632406860589981, "train_loss_bc": 0.15791185200214386, "train_loss_llm": 0.5328830480575562, "grad_norm": 0.6078411936759949, "global_step": 22, "epoch": 0, "lr": 0.003}
24
+ {"train_loss": 0.16032128036022186, "train_loss_bc": 0.1541915237903595, "train_loss_llm": 0.6129759550094604, "grad_norm": 0.7181513905525208, "global_step": 23, "epoch": 0, "lr": 0.003}
25
+ {"train_loss": 0.10194464772939682, "train_loss_bc": 0.09674602746963501, "train_loss_llm": 0.5198622345924377, "grad_norm": 0.8007318377494812, "global_step": 24, "epoch": 0, "lr": 0.004}
26
+ {"train_loss": 0.04645621404051781, "train_loss_bc": 0.04119991511106491, "train_loss_llm": 0.5256298780441284, "grad_norm": 0.04878818616271019, "global_step": 25, "epoch": 0, "lr": 0.004}
27
+ {"train_loss": 0.05316489189863205, "train_loss_bc": 0.04852021113038063, "train_loss_llm": 0.46446824073791504, "grad_norm": 0.10567886382341385, "global_step": 26, "epoch": 0, "lr": 0.004}
28
+ {"train_loss": 0.034993816167116165, "train_loss_bc": 0.03099265694618225, "train_loss_llm": 0.40011608600616455, "grad_norm": 0.12614615261554718, "global_step": 27, "epoch": 0, "lr": 0.004}
29
+ {"train_loss": 0.05056390166282654, "train_loss_bc": 0.04542642831802368, "train_loss_llm": 0.5137471556663513, "grad_norm": 0.17804424464702606, "global_step": 28, "epoch": 0, "lr": 0.004}
30
+ {"train_loss": 0.040129225701093674, "train_loss_bc": 0.03615850210189819, "train_loss_llm": 0.3970724642276764, "grad_norm": 0.21957509219646454, "global_step": 29, "epoch": 0, "lr": 0.004}
31
+ {"train_loss": 0.06979431211948395, "train_loss_bc": 0.06506022810935974, "train_loss_llm": 0.47340837121009827, "grad_norm": 0.30375877022743225, "global_step": 30, "epoch": 0, "lr": 0.004}
32
+ {"train_loss": 0.05452805757522583, "train_loss_bc": 0.050960805267095566, "train_loss_llm": 0.3567253649234772, "grad_norm": 0.3601897656917572, "global_step": 31, "epoch": 0, "lr": 0.004}
33
+ {"train_loss": 0.05965254083275795, "train_loss_bc": 0.055447064340114594, "train_loss_llm": 0.4205475151538849, "grad_norm": 0.4274352192878723, "global_step": 32, "epoch": 0, "lr": 0.005}
34
+ {"train_loss": 0.06257618218660355, "train_loss_bc": 0.05625780671834946, "train_loss_llm": 0.6318378448486328, "grad_norm": 0.09260464459657669, "global_step": 33, "epoch": 0, "lr": 0.005}
35
+ {"train_loss": 0.05445178598165512, "train_loss_bc": 0.04902859777212143, "train_loss_llm": 0.5423187017440796, "grad_norm": 0.16763924062252045, "global_step": 34, "epoch": 0, "lr": 0.005}
36
+ {"train_loss": 0.06851150095462799, "train_loss_bc": 0.06271672248840332, "train_loss_llm": 0.5794777870178223, "grad_norm": 0.2678099274635315, "global_step": 35, "epoch": 0, "lr": 0.005}
37
+ {"train_loss": 0.0630837082862854, "train_loss_bc": 0.0575467087328434, "train_loss_llm": 0.553699791431427, "grad_norm": 0.3552546501159668, "global_step": 36, "epoch": 0, "lr": 0.005}
38
+ {"train_loss": 0.040140487253665924, "train_loss_bc": 0.03421059995889664, "train_loss_llm": 0.5929888486862183, "grad_norm": 0.41354402899742126, "global_step": 37, "epoch": 0, "lr": 0.005}
39
+ {"train_loss": 0.06981470435857773, "train_loss_bc": 0.063104547560215, "train_loss_llm": 0.6710153818130493, "grad_norm": 0.5134375095367432, "global_step": 38, "epoch": 0, "lr": 0.005}
40
+ {"train_loss": 0.051894403994083405, "train_loss_bc": 0.045866355299949646, "train_loss_llm": 0.6028048992156982, "grad_norm": 0.5825293660163879, "global_step": 39, "epoch": 0, "lr": 0.005}
41
+ {"train_loss": 0.04342593997716904, "train_loss_bc": 0.03640042245388031, "train_loss_llm": 0.7025519013404846, "grad_norm": 0.6445399522781372, "global_step": 40, "epoch": 0, "lr": 0.006}
42
+ {"train_loss": 0.1558080017566681, "train_loss_bc": 0.15039610862731934, "train_loss_llm": 0.5411889553070068, "grad_norm": 0.20307017862796783, "global_step": 41, "epoch": 0, "lr": 0.006}
43
+ {"train_loss": 0.12238138169050217, "train_loss_bc": 0.11733964830636978, "train_loss_llm": 0.5041730403900146, "grad_norm": 0.3785540461540222, "global_step": 42, "epoch": 0, "lr": 0.006}
44
+ {"train_loss": 0.11476962268352509, "train_loss_bc": 0.1102944016456604, "train_loss_llm": 0.44752180576324463, "grad_norm": 0.5496576428413391, "global_step": 43, "epoch": 0, "lr": 0.006}
45
+ {"train_loss": 0.1318601667881012, "train_loss_bc": 0.12625660002231598, "train_loss_llm": 0.5603567957878113, "grad_norm": 0.7342697381973267, "global_step": 44, "epoch": 0, "lr": 0.006}
46
+ {"train_loss": 0.15008734166622162, "train_loss_bc": 0.14480489492416382, "train_loss_llm": 0.5282450914382935, "grad_norm": 0.9383558630943298, "global_step": 45, "epoch": 0, "lr": 0.006}
47
+ {"train_loss": 0.11853287369012833, "train_loss_bc": 0.11271888017654419, "train_loss_llm": 0.5813996195793152, "grad_norm": 1.1123522520065308, "global_step": 46, "epoch": 0, "lr": 0.006}
48
+ {"train_loss": 0.14414051175117493, "train_loss_bc": 0.13898390531539917, "train_loss_llm": 0.5156602263450623, "grad_norm": 1.3082720041275024, "global_step": 47, "epoch": 0, "lr": 0.006}
49
+ {"train_loss": 0.1536247432231903, "train_loss_bc": 0.14848382771015167, "train_loss_llm": 0.5140920877456665, "grad_norm": 1.5149050951004028, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999}
50
+ {"train_loss": 0.25954943895339966, "train_loss_bc": 0.25265026092529297, "train_loss_llm": 0.6899186372756958, "grad_norm": 0.3054397702217102, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999}
51
+ {"train_loss": 0.1506877839565277, "train_loss_bc": 0.1453518569469452, "train_loss_llm": 0.5335921049118042, "grad_norm": 0.5257424116134644, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999}
52
+ {"train_loss": 0.17754197120666504, "train_loss_bc": 0.17331534624099731, "train_loss_llm": 0.4226621985435486, "grad_norm": 0.769081711769104, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999}
53
+ {"train_loss": 0.27337175607681274, "train_loss_bc": 0.26682397723197937, "train_loss_llm": 0.6547775268554688, "grad_norm": 1.0860713720321655, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999}
54
+ {"train_loss": 0.21706189215183258, "train_loss_bc": 0.21164150536060333, "train_loss_llm": 0.5420382022857666, "grad_norm": 1.3574727773666382, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999}
55
+ {"train_loss": 0.16595229506492615, "train_loss_bc": 0.16188554465770721, "train_loss_llm": 0.4066758155822754, "grad_norm": 1.5899840593338013, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999}
56
+ {"train_loss": 0.23229315876960754, "train_loss_bc": 0.22762833535671234, "train_loss_llm": 0.4664822220802307, "grad_norm": 1.8754494190216064, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999}
57
+ {"train_loss": 0.21556805074214935, "train_loss_bc": 0.2103624939918518, "train_loss_llm": 0.5205552577972412, "grad_norm": 2.147486448287964, "global_step": 56, "epoch": 0, "lr": 0.008}
58
+ {"train_loss": 0.22826582193374634, "train_loss_bc": 0.22174005210399628, "train_loss_llm": 0.6525774002075195, "grad_norm": 0.286575049161911, "global_step": 57, "epoch": 0, "lr": 0.008}
59
+ {"train_loss": 0.20953819155693054, "train_loss_bc": 0.203176349401474, "train_loss_llm": 0.6361845135688782, "grad_norm": 0.5594003200531006, "global_step": 58, "epoch": 0, "lr": 0.008}
60
+ {"train_loss": 0.191473588347435, "train_loss_bc": 0.18566101789474487, "train_loss_llm": 0.581256628036499, "grad_norm": 0.8172082304954529, "global_step": 59, "epoch": 0, "lr": 0.008}
61
+ {"train_loss": 0.17888422310352325, "train_loss_bc": 0.17266017198562622, "train_loss_llm": 0.6224054098129272, "grad_norm": 1.0602154731750488, "global_step": 60, "epoch": 0, "lr": 0.008}
62
+ {"train_loss": 0.21835987269878387, "train_loss_bc": 0.21199063956737518, "train_loss_llm": 0.6369228959083557, "grad_norm": 1.3346713781356812, "global_step": 61, "epoch": 0, "lr": 0.008}
63
+ {"train_loss": 0.17873793840408325, "train_loss_bc": 0.17244993150234222, "train_loss_llm": 0.6288003921508789, "grad_norm": 1.583105206489563, "global_step": 62, "epoch": 0, "lr": 0.008}
64
+ {"train_loss": 0.14904041588306427, "train_loss_bc": 0.14287304878234863, "train_loss_llm": 0.616736888885498, "grad_norm": 1.8050798177719116, "global_step": 63, "epoch": 0, "lr": 0.008}
65
+ {"train_loss": 0.22122563421726227, "train_loss_bc": 0.215244859457016, "train_loss_llm": 0.5980769395828247, "grad_norm": 2.082054615020752, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001}
66
+ {"train_loss": 0.11144096404314041, "train_loss_bc": 0.10432037711143494, "train_loss_llm": 0.712058424949646, "grad_norm": 0.1753779500722885, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001}
67
+ {"train_loss": 0.11379032582044601, "train_loss_bc": 0.107419952750206, "train_loss_llm": 0.6370369791984558, "grad_norm": 0.3535049855709076, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001}
68
+ {"train_loss": 0.10985075682401657, "train_loss_bc": 0.1022319421172142, "train_loss_llm": 0.7618812322616577, "grad_norm": 0.5256584286689758, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001}
69
+ {"train_loss": 0.18938198685646057, "train_loss_bc": 0.18246878683567047, "train_loss_llm": 0.691320538520813, "grad_norm": 0.7720930576324463, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001}
70
+ {"train_loss": 0.10004343092441559, "train_loss_bc": 0.09400247782468796, "train_loss_llm": 0.6040955781936646, "grad_norm": 0.939155638217926, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001}
71
+ {"train_loss": 0.11703117191791534, "train_loss_bc": 0.11094395071268082, "train_loss_llm": 0.6087222099304199, "grad_norm": 1.1172370910644531, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001}
72
+ {"train_loss": 0.13404561579227448, "train_loss_bc": 0.12686075270175934, "train_loss_llm": 0.7184867262840271, "grad_norm": 1.312468409538269, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001}
73
+ {"train_loss": 0.13330930471420288, "train_loss_bc": 0.12657678127288818, "train_loss_llm": 0.6732516288757324, "grad_norm": 1.5088775157928467, "global_step": 72, "epoch": 0, "lr": 0.01}
74
+ {"train_loss": 0.05257038772106171, "train_loss_bc": 0.04637575149536133, "train_loss_llm": 0.6194634437561035, "grad_norm": 0.09083625674247742, "global_step": 73, "epoch": 0, "lr": 0.01}
75
+ {"train_loss": 0.06475914269685745, "train_loss_bc": 0.057880476117134094, "train_loss_llm": 0.6878665089607239, "grad_norm": 0.1966981440782547, "global_step": 74, "epoch": 0, "lr": 0.01}
76
+ {"train_loss": 0.04975426197052002, "train_loss_bc": 0.043193425983190536, "train_loss_llm": 0.6560835242271423, "grad_norm": 0.28462910652160645, "global_step": 75, "epoch": 0, "lr": 0.01}
77
+ {"train_loss": 0.04952416196465492, "train_loss_bc": 0.04294995218515396, "train_loss_llm": 0.6574209332466125, "grad_norm": 0.368166983127594, "global_step": 76, "epoch": 0, "lr": 0.01}
78
+ {"train_loss": 0.07074079662561417, "train_loss_bc": 0.06346137821674347, "train_loss_llm": 0.7279415130615234, "grad_norm": 0.484068363904953, "global_step": 77, "epoch": 0, "lr": 0.01}
79
+ {"train_loss": 0.04157562926411629, "train_loss_bc": 0.034751974046230316, "train_loss_llm": 0.6823655962944031, "grad_norm": 0.5569941997528076, "global_step": 78, "epoch": 0, "lr": 0.01}
80
+ {"train_loss": 0.06484629958868027, "train_loss_bc": 0.05785399675369263, "train_loss_llm": 0.6992301940917969, "grad_norm": 0.6628190279006958, "global_step": 79, "epoch": 0, "lr": 0.01}
81
+ {"train_loss": 0.038789354264736176, "train_loss_bc": 0.03276902064681053, "train_loss_llm": 0.6020334959030151, "grad_norm": 0.7350778579711914, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305}
82
+ {"train_loss": 0.03389202430844307, "train_loss_bc": 0.02817351743578911, "train_loss_llm": 0.571850597858429, "grad_norm": 0.04861941188573837, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305}
83
+ {"train_loss": 0.03024495765566826, "train_loss_bc": 0.02487185411155224, "train_loss_llm": 0.5373104214668274, "grad_norm": 0.08896133303642273, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305}
84
+ {"train_loss": 0.029436565935611725, "train_loss_bc": 0.024766096845269203, "train_loss_llm": 0.46704691648483276, "grad_norm": 0.13158170878887177, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305}
85
+ {"train_loss": 0.03704115003347397, "train_loss_bc": 0.03144294396042824, "train_loss_llm": 0.5598207712173462, "grad_norm": 0.18903131783008575, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305}
86
+ {"train_loss": 0.031894855201244354, "train_loss_bc": 0.026735499501228333, "train_loss_llm": 0.5159357786178589, "grad_norm": 0.22145399451255798, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305}
87
+ {"train_loss": 0.03053618222475052, "train_loss_bc": 0.025796514004468918, "train_loss_llm": 0.47396671772003174, "grad_norm": 0.2594376802444458, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305}
88
+ {"train_loss": 0.025953643023967743, "train_loss_bc": 0.021002870053052902, "train_loss_llm": 0.49507731199264526, "grad_norm": 0.28883251547813416, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305}
89
+ {"train_loss": 0.03711831569671631, "train_loss_bc": 0.03182109445333481, "train_loss_llm": 0.5297219753265381, "grad_norm": 0.33612799644470215, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223}
90
+ {"train_loss": 0.07443847507238388, "train_loss_bc": 0.06750228255987167, "train_loss_llm": 0.6936193704605103, "grad_norm": 0.1032935231924057, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223}
91
+ {"train_loss": 0.06578436493873596, "train_loss_bc": 0.059477001428604126, "train_loss_llm": 0.6307359933853149, "grad_norm": 0.19785623252391815, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223}
92
+ {"train_loss": 0.05691196769475937, "train_loss_bc": 0.05067047104239464, "train_loss_llm": 0.6241495013237, "grad_norm": 0.28224730491638184, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223}
93
+ {"train_loss": 0.07031725347042084, "train_loss_bc": 0.06331950426101685, "train_loss_llm": 0.699774980545044, "grad_norm": 0.38025128841400146, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223}
94
+ {"train_loss": 0.06619272381067276, "train_loss_bc": 0.059530019760131836, "train_loss_llm": 0.6662706136703491, "grad_norm": 0.47631222009658813, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223}
95
+ {"train_loss": 0.050842322409152985, "train_loss_bc": 0.04463043063879013, "train_loss_llm": 0.6211893558502197, "grad_norm": 0.5518149137496948, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223}
96
+ {"train_loss": 0.05087399110198021, "train_loss_bc": 0.044860679656267166, "train_loss_llm": 0.6013312339782715, "grad_norm": 0.6292504668235779, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223}
97
+ {"train_loss": 0.06841012090444565, "train_loss_bc": 0.061625488102436066, "train_loss_llm": 0.6784631013870239, "grad_norm": 0.726737916469574, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575}
98
+ {"train_loss": 0.08856374025344849, "train_loss_bc": 0.08115855604410172, "train_loss_llm": 0.7405182123184204, "grad_norm": 0.11317727714776993, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575}
99
+ {"train_loss": 0.08638611435890198, "train_loss_bc": 0.07939188182353973, "train_loss_llm": 0.6994235515594482, "grad_norm": 0.22164146602153778, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575}
100
+ {"train_loss": 0.08941305428743362, "train_loss_bc": 0.0817980170249939, "train_loss_llm": 0.7615037560462952, "grad_norm": 0.32890111207962036, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575}
101
+ {"train_loss": 0.07866586744785309, "train_loss_bc": 0.07075173407793045, "train_loss_llm": 0.7914135456085205, "grad_norm": 0.4279417097568512, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575}
102
+ {"train_loss": 0.09740469604730606, "train_loss_bc": 0.0890614315867424, "train_loss_llm": 0.8343262672424316, "grad_norm": 0.5465472340583801, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575}
103
+ {"train_loss": 0.07890348881483078, "train_loss_bc": 0.07148407399654388, "train_loss_llm": 0.7419418096542358, "grad_norm": 0.6493978500366211, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575}
104
+ {"train_loss": 0.06637918949127197, "train_loss_bc": 0.05943997576832771, "train_loss_llm": 0.6939213275909424, "grad_norm": 0.736656665802002, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575}
105
+ {"train_loss": 0.052137844264507294, "train_loss_bc": 0.04496845602989197, "train_loss_llm": 0.716938853263855, "grad_norm": 0.8118408918380737, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895}
106
+ {"train_loss": 0.06986512988805771, "train_loss_bc": 0.06309865415096283, "train_loss_llm": 0.6766473650932312, "grad_norm": 0.08536022901535034, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895}
107
+ {"train_loss": 0.0901651680469513, "train_loss_bc": 0.08455254882574081, "train_loss_llm": 0.5612622499465942, "grad_norm": 0.19402463734149933, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895}
108
+ {"train_loss": 0.08825574815273285, "train_loss_bc": 0.08174335211515427, "train_loss_llm": 0.6512394547462463, "grad_norm": 0.29752182960510254, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895}
109
+ {"train_loss": 0.07733944058418274, "train_loss_bc": 0.07105374336242676, "train_loss_llm": 0.628569483757019, "grad_norm": 0.39171040058135986, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895}
110
+ {"train_loss": 0.06732399016618729, "train_loss_bc": 0.06240474805235863, "train_loss_llm": 0.49192410707473755, "grad_norm": 0.4783252775669098, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895}
111
+ {"train_loss": 0.06321073323488235, "train_loss_bc": 0.05660167708992958, "train_loss_llm": 0.6609058380126953, "grad_norm": 0.558458149433136, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895}
112
+ {"train_loss": 0.06905204057693481, "train_loss_bc": 0.06339387595653534, "train_loss_llm": 0.5658166408538818, "grad_norm": 0.6481609344482422, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895}
113
+ {"train_loss": 0.07884093374013901, "train_loss_bc": 0.07224734127521515, "train_loss_llm": 0.6593592166900635, "grad_norm": 0.7421054840087891, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654}
114
+ {"train_loss": 0.07946255803108215, "train_loss_bc": 0.07475702464580536, "train_loss_llm": 0.4705533981323242, "grad_norm": 0.10471871495246887, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654}
115
+ {"train_loss": 0.07216629385948181, "train_loss_bc": 0.06603223085403442, "train_loss_llm": 0.6134059429168701, "grad_norm": 0.19753926992416382, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654}
116
+ {"train_loss": 0.06510302424430847, "train_loss_bc": 0.057828500866889954, "train_loss_llm": 0.7274521589279175, "grad_norm": 0.28359219431877136, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654}
117
+ {"train_loss": 0.06222425401210785, "train_loss_bc": 0.055748678743839264, "train_loss_llm": 0.6475574970245361, "grad_norm": 0.36533209681510925, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654}
118
+ {"train_loss": 0.0845273807644844, "train_loss_bc": 0.07734745740890503, "train_loss_llm": 0.7179924249649048, "grad_norm": 0.4720841646194458, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654}
119
+ {"train_loss": 0.06714431196451187, "train_loss_bc": 0.06066868081688881, "train_loss_llm": 0.6475629210472107, "grad_norm": 0.5596445798873901, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654}
120
+ {"train_loss": 0.07048200070858002, "train_loss_bc": 0.06500747799873352, "train_loss_llm": 0.5474520921707153, "grad_norm": 0.6513513326644897, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654}
121
+ {"train_loss": 0.04110037535429001, "train_loss_bc": 0.03525649011135101, "train_loss_llm": 0.5843884944915771, "grad_norm": 0.7102450132369995, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032}
122
+ {"train_loss": 0.04190563037991524, "train_loss_bc": 0.03765689581632614, "train_loss_llm": 0.4248734712600708, "grad_norm": 0.06533454358577728, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032}
123
+ {"train_loss": 0.04612841457128525, "train_loss_bc": 0.04169066250324249, "train_loss_llm": 0.4437751770019531, "grad_norm": 0.13389350473880768, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032}
124
+ {"train_loss": 0.06232012063264847, "train_loss_bc": 0.057958535850048065, "train_loss_llm": 0.436158686876297, "grad_norm": 0.22540993988513947, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032}
125
+ {"train_loss": 0.05091412365436554, "train_loss_bc": 0.04628019779920578, "train_loss_llm": 0.4633924067020416, "grad_norm": 0.29657596349716187, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032}
126
+ {"train_loss": 0.04201853275299072, "train_loss_bc": 0.03731508180499077, "train_loss_llm": 0.47034499049186707, "grad_norm": 0.3558724820613861, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032}
127
+ {"train_loss": 0.06030768156051636, "train_loss_bc": 0.0569755993783474, "train_loss_llm": 0.3332084119319916, "grad_norm": 0.4466772675514221, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032}
128
+ {"train_loss": 0.049573902040719986, "train_loss_bc": 0.044726163148880005, "train_loss_llm": 0.48477375507354736, "grad_norm": 0.518007755279541, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032}
129
+ {"train_loss": 0.05068175494670868, "train_loss_bc": 0.04603324085474014, "train_loss_llm": 0.46485158801078796, "grad_norm": 0.584708034992218, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033}
130
+ {"train_loss": 0.03280109167098999, "train_loss_bc": 0.026645543053746223, "train_loss_llm": 0.6155548095703125, "grad_norm": 0.04615609720349312, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033}
131
+ {"train_loss": 0.02815183810889721, "train_loss_bc": 0.022307250648736954, "train_loss_llm": 0.5844587087631226, "grad_norm": 0.08306025713682175, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033}
132
+ {"train_loss": 0.04145112261176109, "train_loss_bc": 0.03547768294811249, "train_loss_llm": 0.597343921661377, "grad_norm": 0.1467656046152115, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033}
133
+ {"train_loss": 0.03268067538738251, "train_loss_bc": 0.026548977941274643, "train_loss_llm": 0.6131698489189148, "grad_norm": 0.1975640058517456, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033}
134
+ {"train_loss": 0.02972070872783661, "train_loss_bc": 0.024114008992910385, "train_loss_llm": 0.5606698989868164, "grad_norm": 0.2313450276851654, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033}
135
+ {"train_loss": 0.034362196922302246, "train_loss_bc": 0.028685620054602623, "train_loss_llm": 0.5676577091217041, "grad_norm": 0.2870166003704071, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033}
136
+ {"train_loss": 0.026356279850006104, "train_loss_bc": 0.021087775006890297, "train_loss_llm": 0.5268504023551941, "grad_norm": 0.32789376378059387, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033}
137
+ {"train_loss": 0.02352694608271122, "train_loss_bc": 0.017692333087325096, "train_loss_llm": 0.5834612846374512, "grad_norm": 0.3600025475025177, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659}
138
+ {"train_loss": 0.021489372476935387, "train_loss_bc": 0.015556419268250465, "train_loss_llm": 0.5932953953742981, "grad_norm": 0.029839487746357918, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659}
139
+ {"train_loss": 0.022915281355381012, "train_loss_bc": 0.016903359442949295, "train_loss_llm": 0.6011921167373657, "grad_norm": 0.0649353489279747, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659}
140
+ {"train_loss": 0.028618421405553818, "train_loss_bc": 0.021326089277863503, "train_loss_llm": 0.7292331457138062, "grad_norm": 0.09133722633123398, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659}
141
+ {"train_loss": 0.022449012845754623, "train_loss_bc": 0.016372717916965485, "train_loss_llm": 0.6076295375823975, "grad_norm": 0.11012815684080124, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659}
142
+ {"train_loss": 0.029746074229478836, "train_loss_bc": 0.023105649277567863, "train_loss_llm": 0.6640425324440002, "grad_norm": 0.13848648965358734, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659}
143
+ {"train_loss": 0.024118199944496155, "train_loss_bc": 0.018623564392328262, "train_loss_llm": 0.5494635105133057, "grad_norm": 0.1677691638469696, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659}
144
+ {"train_loss": 0.02615225501358509, "train_loss_bc": 0.020367056131362915, "train_loss_llm": 0.5785199403762817, "grad_norm": 0.2057863473892212, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659}
145
+ {"train_loss": 0.02474672719836235, "train_loss_bc": 0.01899828016757965, "train_loss_llm": 0.5748447179794312, "grad_norm": 0.2312604933977127, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916}
146
+ {"train_loss": 0.027259020134806633, "train_loss_bc": 0.022779621183872223, "train_loss_llm": 0.44793984293937683, "grad_norm": 0.03558708727359772, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916}
147
+ {"train_loss": 0.026615606620907784, "train_loss_bc": 0.022006575018167496, "train_loss_llm": 0.4609031677246094, "grad_norm": 0.07820506393909454, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916}
148
+ {"train_loss": 0.025012901052832603, "train_loss_bc": 0.020857524126768112, "train_loss_llm": 0.4155377149581909, "grad_norm": 0.11317337304353714, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916}
149
+ {"train_loss": 0.04018259048461914, "train_loss_bc": 0.034745171666145325, "train_loss_llm": 0.5437417030334473, "grad_norm": 0.1679946333169937, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916}
150
+ {"train_loss": 0.02269160747528076, "train_loss_bc": 0.018508322536945343, "train_loss_llm": 0.4183286130428314, "grad_norm": 0.1906110793352127, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916}
151
+ {"train_loss": 0.02399151585996151, "train_loss_bc": 0.01900067925453186, "train_loss_llm": 0.49908363819122314, "grad_norm": 0.2185346633195877, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916}
152
+ {"train_loss": 0.025642897933721542, "train_loss_bc": 0.01971365511417389, "train_loss_llm": 0.5929243564605713, "grad_norm": 0.256346195936203, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916}
153
+ {"train_loss": 0.0291135311126709, "train_loss_bc": 0.025170352309942245, "train_loss_llm": 0.3943178951740265, "grad_norm": 0.29621225595474243, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081}
154
+ {"train_loss": 0.031623922288417816, "train_loss_bc": 0.026910781860351562, "train_loss_llm": 0.4713141918182373, "grad_norm": 0.050291482359170914, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081}
155
+ {"train_loss": 0.04012516513466835, "train_loss_bc": 0.03538067638874054, "train_loss_llm": 0.47444888949394226, "grad_norm": 0.11879635602235794, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081}
156
+ {"train_loss": 0.02348470687866211, "train_loss_bc": 0.018497150391340256, "train_loss_llm": 0.49875572323799133, "grad_norm": 0.1580817550420761, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081}
157
+ {"train_loss": 0.02868938073515892, "train_loss_bc": 0.024003252387046814, "train_loss_llm": 0.4686127305030823, "grad_norm": 0.20671528577804565, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081}
158
+ {"train_loss": 0.03526413440704346, "train_loss_bc": 0.0299256332218647, "train_loss_llm": 0.5338499546051025, "grad_norm": 0.26737433671951294, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081}
159
+ {"train_loss": 0.04240602254867554, "train_loss_bc": 0.03778018057346344, "train_loss_llm": 0.46258440613746643, "grad_norm": 0.3377738893032074, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081}
160
+ {"train_loss": 0.04258374869823456, "train_loss_bc": 0.037083160132169724, "train_loss_llm": 0.5500588417053223, "grad_norm": 0.4062163233757019, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081}
161
+ {"train_loss": 0.03730035200715065, "train_loss_bc": 0.0325319766998291, "train_loss_llm": 0.4768376052379608, "grad_norm": 0.46591275930404663, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345}
162
+ {"train_loss": 0.03809020668268204, "train_loss_bc": 0.03346116095781326, "train_loss_llm": 0.46290475130081177, "grad_norm": 0.06493347138166428, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345}
163
+ {"train_loss": 0.027684010565280914, "train_loss_bc": 0.023107346147298813, "train_loss_llm": 0.4576663374900818, "grad_norm": 0.11537288874387741, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345}
164
+ {"train_loss": 0.03135323151946068, "train_loss_bc": 0.027640309184789658, "train_loss_llm": 0.3712920844554901, "grad_norm": 0.17289584875106812, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345}
165
+ {"train_loss": 0.0167723186314106, "train_loss_bc": 0.012659368105232716, "train_loss_llm": 0.41129496693611145, "grad_norm": 0.20604096353054047, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345}
166
+ {"train_loss": 0.03136400133371353, "train_loss_bc": 0.02577000856399536, "train_loss_llm": 0.5593993663787842, "grad_norm": 0.26496362686157227, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345}
167
+ {"train_loss": 0.04508890211582184, "train_loss_bc": 0.039878830313682556, "train_loss_llm": 0.5210072994232178, "grad_norm": 0.3450776934623718, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345}
168
+ {"train_loss": 0.02305273897945881, "train_loss_bc": 0.018481142818927765, "train_loss_llm": 0.4571595788002014, "grad_norm": 0.3923070430755615, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345}
169
+ {"train_loss": 0.028364604339003563, "train_loss_bc": 0.023760396987199783, "train_loss_llm": 0.46042078733444214, "grad_norm": 0.44835156202316284, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253}
170
+ {"train_loss": 0.03444590047001839, "train_loss_bc": 0.029000703245401382, "train_loss_llm": 0.5445197224617004, "grad_norm": 0.058123886585235596, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253}
171
+ {"train_loss": 0.031413737684488297, "train_loss_bc": 0.026274994015693665, "train_loss_llm": 0.5138742327690125, "grad_norm": 0.11113490164279938, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253}
172
+ {"train_loss": 0.026483573019504547, "train_loss_bc": 0.021075624972581863, "train_loss_llm": 0.540794849395752, "grad_norm": 0.15856337547302246, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253}
173
+ {"train_loss": 0.026955293491482735, "train_loss_bc": 0.02244516834616661, "train_loss_llm": 0.4510125517845154, "grad_norm": 0.20139986276626587, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253}
174
+ {"train_loss": 0.03016134910285473, "train_loss_bc": 0.02493642270565033, "train_loss_llm": 0.5224926471710205, "grad_norm": 0.25067630410194397, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253}
175
+ {"train_loss": 0.029931407421827316, "train_loss_bc": 0.024894531816244125, "train_loss_llm": 0.5036876797676086, "grad_norm": 0.2985679507255554, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253}
176
+ {"train_loss": 0.040666207671165466, "train_loss_bc": 0.035972896963357925, "train_loss_llm": 0.46933093667030334, "grad_norm": 0.36520418524742126, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253}
177
+ {"train_loss": 0.02875429019331932, "train_loss_bc": 0.024598199874162674, "train_loss_llm": 0.4156089723110199, "grad_norm": 0.4146167039871216, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371}
178
+ {"train_loss": 0.03293757140636444, "train_loss_bc": 0.02679475024342537, "train_loss_llm": 0.6142822504043579, "grad_norm": 0.05058354139328003, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371}
179
+ {"train_loss": 0.025597713887691498, "train_loss_bc": 0.01985827460885048, "train_loss_llm": 0.5739438533782959, "grad_norm": 0.08788882941007614, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371}
180
+ {"train_loss": 0.02832857519388199, "train_loss_bc": 0.023654501885175705, "train_loss_llm": 0.4674074053764343, "grad_norm": 0.1335342526435852, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371}
181
+ {"train_loss": 0.023435339331626892, "train_loss_bc": 0.018604157492518425, "train_loss_llm": 0.48311811685562134, "grad_norm": 0.16893020272254944, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371}
182
+ {"train_loss": 0.01497327908873558, "train_loss_bc": 0.010697474703192711, "train_loss_llm": 0.4275803864002228, "grad_norm": 0.1971648633480072, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371}
183
+ {"train_loss": 0.03193127363920212, "train_loss_bc": 0.025609299540519714, "train_loss_llm": 0.6321975588798523, "grad_norm": 0.2400187849998474, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371}
184
+ {"train_loss": 0.020016666501760483, "train_loss_bc": 0.01540004089474678, "train_loss_llm": 0.461662620306015, "grad_norm": 0.27775779366493225, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371}
185
+ {"train_loss": 0.019674330949783325, "train_loss_bc": 0.014173893257975578, "train_loss_llm": 0.5500437021255493, "grad_norm": 0.3127053380012512, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874}
186
+ {"train_loss": 0.020366767421364784, "train_loss_bc": 0.015372475609183311, "train_loss_llm": 0.49942925572395325, "grad_norm": 0.027160177007317543, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874}
187
+ {"train_loss": 0.02773042395710945, "train_loss_bc": 0.022009629756212234, "train_loss_llm": 0.5720794796943665, "grad_norm": 0.06589915603399277, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874}
188
+ {"train_loss": 0.027988407760858536, "train_loss_bc": 0.02180863544344902, "train_loss_llm": 0.6179772615432739, "grad_norm": 0.1006016656756401, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874}
189
+ {"train_loss": 0.027591602876782417, "train_loss_bc": 0.022058088332414627, "train_loss_llm": 0.5533514022827148, "grad_norm": 0.13344469666481018, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874}
190
+ {"train_loss": 0.017557095736265182, "train_loss_bc": 0.012610466219484806, "train_loss_llm": 0.494662880897522, "grad_norm": 0.1636464148759842, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874}
191
+ {"train_loss": 0.028389083221554756, "train_loss_bc": 0.021804803982377052, "train_loss_llm": 0.6584279537200928, "grad_norm": 0.20365768671035767, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874}
192
+ {"train_loss": 0.020810682326555252, "train_loss_bc": 0.014902697876095772, "train_loss_llm": 0.5907983779907227, "grad_norm": 0.23229533433914185, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874}
193
+ {"train_loss": 0.021981600672006607, "train_loss_bc": 0.016103900969028473, "train_loss_llm": 0.5877700448036194, "grad_norm": 0.2603759467601776, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049}
194
+ {"train_loss": 0.022998729720711708, "train_loss_bc": 0.01771724969148636, "train_loss_llm": 0.5281479954719543, "grad_norm": 0.034455616027116776, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049}
195
+ {"train_loss": 0.019327782094478607, "train_loss_bc": 0.014658035710453987, "train_loss_llm": 0.4669746160507202, "grad_norm": 0.0668833777308464, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049}
196
+ {"train_loss": 0.024879198521375656, "train_loss_bc": 0.0185236893594265, "train_loss_llm": 0.6355509757995605, "grad_norm": 0.08858254551887512, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049}
197
+ {"train_loss": 0.019756946712732315, "train_loss_bc": 0.014940358698368073, "train_loss_llm": 0.4816588759422302, "grad_norm": 0.11397459357976913, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049}
198
+ {"train_loss": 0.024903442710638046, "train_loss_bc": 0.018497081473469734, "train_loss_llm": 0.6406360864639282, "grad_norm": 0.14657457172870636, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049}
199
+ {"train_loss": 0.019728384912014008, "train_loss_bc": 0.014158019796013832, "train_loss_llm": 0.5570365190505981, "grad_norm": 0.1720155030488968, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049}
200
+ {"train_loss": 0.016792047768831253, "train_loss_bc": 0.011875113472342491, "train_loss_llm": 0.49169355630874634, "grad_norm": 0.20192894339561462, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049}
201
+ {"train_loss": 0.023527009412646294, "train_loss_bc": 0.017446376383304596, "train_loss_llm": 0.6080633401870728, "grad_norm": 0.2486913502216339, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905}
202
+ {"train_loss": 0.02416856773197651, "train_loss_bc": 0.018999043852090836, "train_loss_llm": 0.5169523358345032, "grad_norm": 0.029791679233312607, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905}
203
+ {"train_loss": 0.022336507216095924, "train_loss_bc": 0.017620330676436424, "train_loss_llm": 0.4716176390647888, "grad_norm": 0.056961867958307266, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905}
204
+ {"train_loss": 0.021891754120588303, "train_loss_bc": 0.01769360713660717, "train_loss_llm": 0.4198147654533386, "grad_norm": 0.07886364310979843, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905}
205
+ {"train_loss": 0.02422039769589901, "train_loss_bc": 0.01944451406598091, "train_loss_llm": 0.47758832573890686, "grad_norm": 0.11191964149475098, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905}
206
+ {"train_loss": 0.02202729508280754, "train_loss_bc": 0.016946561634540558, "train_loss_llm": 0.5080732703208923, "grad_norm": 0.12720732390880585, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905}
207
+ {"train_loss": 0.02344614453613758, "train_loss_bc": 0.01831752434372902, "train_loss_llm": 0.512861967086792, "grad_norm": 0.15643325448036194, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905}
208
+ {"train_loss": 0.021590720862150192, "train_loss_bc": 0.01635618507862091, "train_loss_llm": 0.5234535932540894, "grad_norm": 0.17777878046035767, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905}
209
+ {"train_loss": 0.02145991660654545, "train_loss_bc": 0.01685245707631111, "train_loss_llm": 0.46074602007865906, "grad_norm": 0.20263022184371948, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454}
210
+ {"train_loss": 0.023527000099420547, "train_loss_bc": 0.019175242632627487, "train_loss_llm": 0.4351757764816284, "grad_norm": 0.02783939242362976, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454}
211
+ {"train_loss": 0.02509186789393425, "train_loss_bc": 0.020386580377817154, "train_loss_llm": 0.4705287218093872, "grad_norm": 0.06272286921739578, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454}
212
+ {"train_loss": 0.023855067789554596, "train_loss_bc": 0.018275782465934753, "train_loss_llm": 0.5579285025596619, "grad_norm": 0.07670474052429199, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454}
213
+ {"train_loss": 0.024523191154003143, "train_loss_bc": 0.020061926916241646, "train_loss_llm": 0.4461265206336975, "grad_norm": 0.09239604324102402, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454}
214
+ {"train_loss": 0.023320389911532402, "train_loss_bc": 0.019183896481990814, "train_loss_llm": 0.413649320602417, "grad_norm": 0.12721095979213715, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454}
215
+ {"train_loss": 0.02343529649078846, "train_loss_bc": 0.018135903403162956, "train_loss_llm": 0.5299392938613892, "grad_norm": 0.14981500804424286, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454}
216
+ {"train_loss": 0.024323690682649612, "train_loss_bc": 0.01939486525952816, "train_loss_llm": 0.49288249015808105, "grad_norm": 0.17683890461921692, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454}
217
+ {"train_loss": 0.022757865488529205, "train_loss_bc": 0.018279647454619408, "train_loss_llm": 0.44782188534736633, "grad_norm": 0.19883114099502563, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697}
218
+ {"train_loss": 0.023337459191679955, "train_loss_bc": 0.01846153847873211, "train_loss_llm": 0.48759210109710693, "grad_norm": 0.02261751890182495, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697}
219
+ {"train_loss": 0.031769875437021255, "train_loss_bc": 0.026411594823002815, "train_loss_llm": 0.5358280539512634, "grad_norm": 0.06921491771936417, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697}
220
+ {"train_loss": 0.023737115785479546, "train_loss_bc": 0.019920486956834793, "train_loss_llm": 0.38166290521621704, "grad_norm": 0.09409084916114807, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697}
221
+ {"train_loss": 0.025979334488511086, "train_loss_bc": 0.021304704248905182, "train_loss_llm": 0.46746301651000977, "grad_norm": 0.11882251501083374, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697}
222
+ {"train_loss": 0.025000201538205147, "train_loss_bc": 0.019991103559732437, "train_loss_llm": 0.5009097456932068, "grad_norm": 0.15056195855140686, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697}
223
+ {"train_loss": 0.02242461033165455, "train_loss_bc": 0.018378354609012604, "train_loss_llm": 0.4046255350112915, "grad_norm": 0.1780581921339035, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697}
224
+ {"train_loss": 0.027378899976611137, "train_loss_bc": 0.023318542167544365, "train_loss_llm": 0.406035840511322, "grad_norm": 0.2123226374387741, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697}
225
+ {"train_loss": 0.02599770948290825, "train_loss_bc": 0.0203506201505661, "train_loss_llm": 0.564708948135376, "grad_norm": 0.24404466152191162, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066}
226
+ {"train_loss": 0.02545972168445587, "train_loss_bc": 0.020537808537483215, "train_loss_llm": 0.49219125509262085, "grad_norm": 0.0350002683699131, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066}
227
+ {"train_loss": 0.025176143273711205, "train_loss_bc": 0.020890595391392708, "train_loss_llm": 0.42855486273765564, "grad_norm": 0.06209180876612663, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066}
228
+ {"train_loss": 0.023136619478464127, "train_loss_bc": 0.01860974170267582, "train_loss_llm": 0.4526877701282501, "grad_norm": 0.09024433046579361, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066}
229
+ {"train_loss": 0.025945395231246948, "train_loss_bc": 0.021624740213155746, "train_loss_llm": 0.43206557631492615, "grad_norm": 0.12185320258140564, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066}
230
+ {"train_loss": 0.028116080909967422, "train_loss_bc": 0.02361183986067772, "train_loss_llm": 0.45042404532432556, "grad_norm": 0.16623881459236145, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066}
231
+ {"train_loss": 0.028553711250424385, "train_loss_bc": 0.02347782626748085, "train_loss_llm": 0.5075885057449341, "grad_norm": 0.19257326424121857, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066}
232
+ {"train_loss": 0.027766291052103043, "train_loss_bc": 0.022831808775663376, "train_loss_llm": 0.49344828724861145, "grad_norm": 0.22710655629634857, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066}
233
+ {"train_loss": 0.029369812458753586, "train_loss_bc": 0.024203170090913773, "train_loss_llm": 0.5166641473770142, "grad_norm": 0.2672453820705414, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341}
234
+ {"train_loss": 0.019986841827630997, "train_loss_bc": 0.01582282781600952, "train_loss_llm": 0.41640135645866394, "grad_norm": 0.03304322436451912, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341}
235
+ {"train_loss": 0.027561256662011147, "train_loss_bc": 0.0231167059391737, "train_loss_llm": 0.4444551467895508, "grad_norm": 0.06243205443024635, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341}
236
+ {"train_loss": 0.02846396341919899, "train_loss_bc": 0.023669028654694557, "train_loss_llm": 0.47949355840682983, "grad_norm": 0.10598953068256378, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341}
237
+ {"train_loss": 0.024963906034827232, "train_loss_bc": 0.020005209371447563, "train_loss_llm": 0.49586963653564453, "grad_norm": 0.1454334259033203, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/train.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: detected 224 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
2
+ [2026-01-21 12:18:20,592][numexpr.utils][INFO] - Note: NumExpr detected 224 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
3
+ [2026-01-21 12:18:20,592][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
4
+ [2026-01-21 12:18:26,191][datasets][INFO] - PyTorch version 2.2.2 available.
5
+ [2026-01-21 12:18:26,192][datasets][INFO] - TensorFlow version 2.15.1 available.
6
+ [2026-01-21 12:18:26,193][datasets][INFO] - JAX version 0.4.30 available.
7
+ [2026-01-21 12:18:49,867][root][INFO] - running build_ext
8
+ [2026-01-21 12:18:49,870][root][INFO] - building 'mujoco_py.cymj' extension
9
+ [2026-01-21 12:18:49,872][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o -fopenmp -w
10
+ [2026-01-21 12:19:17,011][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -I/home/u1131674/.conda/envs/llm-bc/include -fPIC -O2 -isystem /home/u1131674/.conda/envs/llm-bc/include -fPIC -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py -I/home/u1131674/.mujoco/mujoco210/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/numpy/core/include -I/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/vendor/egl -I/home/u1131674/.conda/envs/llm-bc/include/python3.9 -c /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.c -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -fopenmp -w
11
+ [2026-01-21 12:19:17,219][root][INFO] - creating /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py
12
+ [2026-01-21 12:19:17,222][root][INFO] - gcc -pthread -B /home/u1131674/.conda/envs/llm-bc/compiler_compat -shared -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath,/home/u1131674/.conda/envs/llm-bc/lib -Wl,-rpath-link,/home/u1131674/.conda/envs/llm-bc/lib -L/home/u1131674/.conda/envs/llm-bc/lib /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/cymj.o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/temp.linux-x86_64-cpython-39/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/gl/eglshim.o -L/home/u1131674/.mujoco/mujoco210/bin -Wl,--enable-new-dtags,-rpath,/home/u1131674/.mujoco/mujoco210/bin -lmujoco210 -lglewegl -o /home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/mujoco_py/generated/_pyxbld_2.1.2.14_39_linuxgpuextensionbuilder/lib.linux-x86_64-cpython-39/mujoco_py/cymj.cpython-39-x86_64-linux-gnu.so -fopenmp
13
+ [2026-01-21 12:19:18,581][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
14
+ [2026-01-21 12:19:18,590][absl][INFO] - MuJoCo library version is: 2.3.7
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
+ {"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"}
3
+ {"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"}
4
+ {"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"}
5
+ {"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"}
6
+ {"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}}
7
+ {"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}}
8
+ {"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"}
10
+ {"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718
3
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
4
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
5
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
8
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log
10
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log
11
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers
12
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
14
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend
15
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request
16
+ 2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected
18
+ 2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry
19
+ 2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg
22
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed.
25
+ 2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'}
27
+ 2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/config.yaml ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_:
2
+ value: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ _wandb:
4
+ value:
5
+ cli_version: 0.18.6
6
+ m: []
7
+ python_version: 3.9.25
8
+ t:
9
+ "1":
10
+ - 1
11
+ - 2
12
+ - 3
13
+ - 5
14
+ - 11
15
+ - 12
16
+ - 41
17
+ - 49
18
+ - 50
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 83
24
+ - 95
25
+ - 98
26
+ - 100
27
+ - 105
28
+ "2":
29
+ - 1
30
+ - 2
31
+ - 3
32
+ - 5
33
+ - 11
34
+ - 12
35
+ - 41
36
+ - 49
37
+ - 50
38
+ - 51
39
+ - 53
40
+ - 55
41
+ - 71
42
+ - 83
43
+ - 95
44
+ - 98
45
+ - 100
46
+ - 105
47
+ "3":
48
+ - 13
49
+ - 15
50
+ - 16
51
+ - 23
52
+ - 55
53
+ - 61
54
+ "4": 3.9.25
55
+ "5": 0.18.6
56
+ "6": 4.47.1
57
+ "8":
58
+ - 5
59
+ "12": 0.18.6
60
+ "13": linux-x86_64
61
+ action_dim:
62
+ value: 4
63
+ checkpoint:
64
+ value:
65
+ save_last_ckpt: true
66
+ save_last_snapshot: false
67
+ topk:
68
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
69
+ k: 5
70
+ mode: max
71
+ monitor_key: test_success_rate
72
+ dataloader:
73
+ value:
74
+ batch_size: 16
75
+ num_workers: 0
76
+ persistent_workers: false
77
+ pin_memory: false
78
+ shuffle: true
79
+ exp_name:
80
+ value: default
81
+ horizon:
82
+ value: 1
83
+ llm:
84
+ value:
85
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
86
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
87
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
88
+ finetune_mode: orig
89
+ hydra:
90
+ job:
91
+ override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct
92
+ run:
93
+ dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct
94
+ llm_mode: ete-finetuned
95
+ lora_config:
96
+ bias: none
97
+ lora_alpha: 64
98
+ lora_dropout: 0.05
99
+ r: 32
100
+ task_type: CAUSAL_LM
101
+ max_length: 100
102
+ model_name: SmolLM2-135M-Instruct
103
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
104
+ prompter:
105
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
106
+ use_joint_mlp_projector: true
107
+ use_joint_mlp_projector: true
108
+ use_quantization: false
109
+ llm_do_sample:
110
+ value: false
111
+ llm_orig_expert_feedback:
112
+ value: true
113
+ logging:
114
+ value:
115
+ group: null
116
+ id: null
117
+ mode: online
118
+ name: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2
119
+ project: box-close-v2-training
120
+ resume: true
121
+ tags:
122
+ - train_llmbc_lowdim
123
+ - box-close-v2
124
+ - default
125
+ model_name:
126
+ value: HuggingFaceTB/SmolLM2-135M-Instruct
127
+ multi_run:
128
+ value:
129
+ run_dir: data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
130
+ wandb_name_base: 2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2
131
+ n_action_steps:
132
+ value: 1
133
+ n_latency_steps:
134
+ value: 0
135
+ n_obs_steps:
136
+ value: 1
137
+ name:
138
+ value: train_llmbc_lowdim
139
+ obs_dim:
140
+ value: 9
141
+ optimizer:
142
+ value:
143
+ _target_: torch.optim.AdamW
144
+ betas:
145
+ - 0.95
146
+ - 0.999
147
+ eps: 1e-08
148
+ lr: 0.01
149
+ weight_decay: 1e-06
150
+ output_dir:
151
+ value: /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2
152
+ past_action_visible:
153
+ value: false
154
+ policy:
155
+ value:
156
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
157
+ action_dim: 4
158
+ horizon: 1
159
+ llm_discriminator:
160
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
161
+ llm_translator:
162
+ _target_: llmbc.translator.llm_translator.LLMTranslator
163
+ action_dim: 4
164
+ cfg:
165
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
166
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
167
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
168
+ finetune_mode: orig
169
+ hydra:
170
+ job:
171
+ override_dirname: HuggingFaceTB/SmolLM2-135M-Instruct
172
+ run:
173
+ dir: data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct
174
+ llm_mode: ete-finetuned
175
+ lora_config:
176
+ bias: none
177
+ lora_alpha: 64
178
+ lora_dropout: 0.05
179
+ r: 32
180
+ task_type: CAUSAL_LM
181
+ max_length: 100
182
+ model_name: SmolLM2-135M-Instruct
183
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
184
+ prompter:
185
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
186
+ use_joint_mlp_projector: true
187
+ use_joint_mlp_projector: true
188
+ use_quantization: false
189
+ horizon: 1
190
+ n_action_steps: 1
191
+ n_obs_steps: 1
192
+ obs_dim: 9
193
+ task_id: box-close-v2
194
+ loss_bc_weight: 1
195
+ loss_llm_weight: 0.01
196
+ model:
197
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
198
+ activation: relu
199
+ hidden_size:
200
+ - 256
201
+ - 256
202
+ input_size: 9
203
+ n_action_steps: 1
204
+ n_obs_steps: 1
205
+ output_size: 4
206
+ n_action_steps: 1
207
+ n_obs_steps: 1
208
+ normalize_llm_loss: true
209
+ obs_dim: 9
210
+ task:
211
+ value:
212
+ action_dim: 4
213
+ dataset:
214
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
215
+ data_path: datasets/box-close-v2.pt
216
+ data_path2: datasets/box-close-v2.pt
217
+ dummy_normalizer: true
218
+ horizon: 1
219
+ obs_eef_target: true
220
+ pad_after: 0
221
+ pad_before: 0
222
+ use_manual_normalizer: false
223
+ val_ratio: 0.1
224
+ env_runner:
225
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
226
+ discount: 0.9
227
+ env_name: llf-metaworld-box-close-v2
228
+ feedback_type:
229
+ - hp
230
+ - hn
231
+ - fp
232
+ instruction_type: b
233
+ max_steps: 30
234
+ n_action_steps: 1
235
+ n_envs: 10
236
+ n_obs_steps: 1
237
+ n_test: 50
238
+ n_train: 10
239
+ visual: false
240
+ instructor:
241
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
242
+ name: box-close-v2
243
+ obs_dim: 9
244
+ task_name:
245
+ value: box-close-v2
246
+ training:
247
+ value:
248
+ checkpoint_every: 5
249
+ debug: false
250
+ device: cuda:0
251
+ grad_norm_clip: 0.5
252
+ gradient_accumulate_every: 8
253
+ lr_scheduler: cosine
254
+ lr_warmup_steps: 10
255
+ max_train_steps: null
256
+ max_val_steps: null
257
+ num_epochs: 1001
258
+ resume: false
259
+ rollout_every: 5
260
+ sample_every: 5
261
+ sample_max_batch: 128
262
+ seed: 42
263
+ tqdm_interval_sec: 1
264
+ val_every: 1
265
+ val_dataloader:
266
+ value:
267
+ batch_size: 16
268
+ num_workers: 0
269
+ persistent_workers: false
270
+ pin_memory: false
271
+ shuffle: true
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/output.log ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00<?, ?it/s]/work/u1131674/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
2
+ obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
3
+ Eval MetaworldLowdimRunner 4/6: 13%|██████████▊ | 4/30 [00:00<00:01, 15.43it/s]Traceback (most recent call last):
4
+ File "/work/u1131674/LLM-BC/./train.py", line 35, in <module>
5
+ main()
6
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main
7
+ _run_hydra(
8
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra
9
+ _run_app(
10
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app
11
+ run_and_report(
12
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report
13
+ return func()
14
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in <lambda>
15
+ lambda: hydra.run(
16
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run
17
+ ret = run_job(
18
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job
19
+ ret.return_value = task_function(task_cfg)
20
+ File "/work/u1131674/LLM-BC/./train.py", line 32, in main
21
+ workspace.run()
22
+ File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run
23
+ runner_log = env_runner.run(policy)
24
+ File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run
25
+ action_dict = policy.predict_action(obs_dict)
26
+ File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action
27
+ action_mean, action_log_std = self.model.a_mean_logstd(obs)
28
+ File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd
29
+ y = self.forward(obs)
30
+ File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward
31
+ y = super().forward(y)
32
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward
33
+ input = module(input)
34
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
35
+ return self._call_impl(*args, **kwargs)
36
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
37
+ return forward_call(*args, **kwargs)
38
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward
39
+ return F.linear(input, self.weight, self.bias)
40
+ KeyboardInterrupt
41
+ Traceback (most recent call last):
42
+ File "/work/u1131674/LLM-BC/./train.py", line 35, in <module>
43
+ main()
44
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/main.py", line 90, in decorated_main
45
+ _run_hydra(
46
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 389, in _run_hydra
47
+ _run_app(
48
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 452, in _run_app
49
+ run_and_report(
50
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 213, in run_and_report
51
+ return func()
52
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/utils.py", line 453, in <lambda>
53
+ lambda: hydra.run(
54
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/_internal/hydra.py", line 119, in run
55
+ ret = run_job(
56
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/hydra/core/utils.py", line 186, in run_job
57
+ ret.return_value = task_function(task_cfg)
58
+ File "/work/u1131674/LLM-BC/./train.py", line 32, in main
59
+ workspace.run()
60
+ File "/work/u1131674/LLM-BC/llmbc/workspace/train_llmbc_lowdim_workspace.py", line 238, in run
61
+ runner_log = env_runner.run(policy)
62
+ File "/work/u1131674/LLM-BC/llmbc/env_runner/metaworld_lowdim_runner.py", line 153, in run
63
+ action_dict = policy.predict_action(obs_dict)
64
+ File "/work/u1131674/LLM-BC/llmbc/policy/llmbc_lowdim_policy.py", line 80, in predict_action
65
+ action_mean, action_log_std = self.model.a_mean_logstd(obs)
66
+ File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 74, in a_mean_logstd
67
+ y = self.forward(obs)
68
+ File "/work/u1131674/LLM-BC/llmbc/model/policy/policy_mlp.py", line 68, in forward
69
+ y = super().forward(y)
70
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/container.py", line 217, in forward
71
+ input = module(input)
72
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
73
+ return self._call_impl(*args, **kwargs)
74
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
75
+ return forward_call(*args, **kwargs)
76
+ File "/home/u1131674/.conda/envs/llm-bc/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 116, in forward
77
+ return F.linear(input, self.weight, self.bias)
78
+ KeyboardInterrupt
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/requirements.txt ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rpds-py==0.27.1
2
+ typeguard==4.4.4
3
+ flatbuffers==25.12.19
4
+ toppra==0.6.3
5
+ sympy==1.14.0
6
+ tiktoken==0.8.0
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ arm_pytorch_utilities==0.4.3
9
+ pynndescent==0.6.0
10
+ multidict==6.7.0
11
+ fonttools==4.60.2
12
+ numexpr==2.10.1
13
+ cmudict==1.0.13
14
+ PyOpenGL-accelerate==3.1.10
15
+ gmpy2==2.2.1
16
+ peft==0.14.0
17
+ metaworld==2.0.0
18
+ nvidia-cufft-cu12==11.0.2.54
19
+ python-dateutil==2.9.0.post0
20
+ aiosignal==1.4.0
21
+ pexpect==4.9.0
22
+ protobuf==4.25.8
23
+ typing_extensions==4.15.0
24
+ mujoco==2.3.7
25
+ tokenizers==0.21.0
26
+ pytorch-kinematics==0.7.5
27
+ sniffio==1.3.1
28
+ aiofiles==25.1.0
29
+ mplib==0.1.1
30
+ wcwidth==0.2.14
31
+ Pygments==2.19.1
32
+ anyio==4.12.1
33
+ tensorflow-estimator==2.15.0
34
+ filelock==3.17.0
35
+ numpy==1.23.5
36
+ attrs==25.4.0
37
+ Markdown==3.9
38
+ fsspec==2024.3.1
39
+ libclang==18.1.1
40
+ umap-learn==0.5.9.post2
41
+ dill==0.3.8
42
+ narwhals==2.15.0
43
+ tensorboard==2.15.2
44
+ dacite==1.9.2
45
+ termcolor==3.1.0
46
+ llmbc==0.0.0
47
+ python-multipart==0.0.20
48
+ exceptiongroup==1.3.1
49
+ sapien==3.0.0b1
50
+ pygame==2.6.1
51
+ nvidia-curand-cu12==10.3.2.106
52
+ evaluate==0.4.3
53
+ msgpack==1.1.1
54
+ tensorflow-probability==0.23.0
55
+ diffusers==0.31.0
56
+ certifi==2025.10.5
57
+ d4rl==1.1
58
+ pydub==0.25.1
59
+ annotated-doc==0.0.4
60
+ gitdb==4.0.12
61
+ gradio_client==0.2.9
62
+ Shapely==1.8.4
63
+ mani_skill==3.0.0b20
64
+ tensorflow-io-gcs-filesystem==0.37.1
65
+ fasteners==0.20
66
+ hjson==3.1.0
67
+ ninja==1.13.0
68
+ stack-data==0.6.3
69
+ pyarrow==21.0.0
70
+ networkx==3.2.1
71
+ nvidia-cusparse-cu12==12.1.0.106
72
+ pyparsing==3.3.1
73
+ timm==1.0.22
74
+ typing-inspection==0.4.2
75
+ openai==2.8.1
76
+ pybullet==3.2.6
77
+ hydra-core==1.2.0
78
+ gradio==3.36.1
79
+ tensorflow==2.15.1
80
+ asttokens==3.0.1
81
+ importlib-metadata==5.2.0
82
+ astunparse==1.6.3
83
+ tifffile==2024.8.30
84
+ annotated-types==0.7.0
85
+ Bottleneck==1.4.2
86
+ accelerate==1.0.1
87
+ pytz==2025.2
88
+ urllib3==2.5.0
89
+ frozenlist==1.8.0
90
+ sentry-sdk==2.50.0
91
+ jsonschema==4.25.1
92
+ tyro==0.9.1
93
+ Farama-Notifications==0.0.4
94
+ ffmpy==1.0.0
95
+ httpx==0.28.1
96
+ pymunk==6.2.1
97
+ shtab==1.7.2
98
+ glfw==2.0.0
99
+ hf-xet==1.1.8
100
+ omegaconf==2.2.1
101
+ blobfile==3.0.0
102
+ decorator==5.2.1
103
+ cffi==1.17.1
104
+ matplotlib-inline==0.2.1
105
+ eval_type_backport==0.2.2
106
+ torchaudio==2.2.2
107
+ colorama==0.4.6
108
+ click==8.1.8
109
+ Cython==0.29.37
110
+ orjson==3.11.5
111
+ gym_bandits==0.0.2
112
+ traitlets==5.14.3
113
+ docker-pycreds==0.4.0
114
+ multiprocess==0.70.15
115
+ zipp==3.21.0
116
+ antlr4-python3-runtime==4.9.3
117
+ uc-micro-py==1.0.3
118
+ mpmath==1.3.0
119
+ idna==3.11
120
+ aiodns==3.5.0
121
+ charset-normalizer==3.4.4
122
+ nvidia-nvjitlink-cu12==12.9.86
123
+ nvidia-cuda-nvrtc-cu12==12.1.105
124
+ seaborn==0.13.2
125
+ pyarrow-hotfix==0.7
126
+ pillow==11.3.0
127
+ pyautogen==0.1.0
128
+ requests==2.32.0
129
+ MarkupSafe==3.0.2
130
+ websockets==15.0.1
131
+ nvidia-nccl-cu12==2.19.3
132
+ pure_eval==0.2.3
133
+ parso==0.8.5
134
+ huggingface-hub==0.26.2
135
+ syllables==1.0.9
136
+ tf-agents==0.19.0
137
+ six==1.17.0
138
+ referencing==0.36.2
139
+ ptyprocess==0.7.0
140
+ platformdirs==4.4.0
141
+ fastapi==0.128.0
142
+ stable-baselines3==2.2.1
143
+ av==10.0.0
144
+ diskcache==5.6.3
145
+ pynvml==13.0.1
146
+ pytorch-seed==0.2.0
147
+ zarr==2.12.0
148
+ mdurl==0.1.2
149
+ docstring-parser==0.16
150
+ packaging==25.0
151
+ numcodecs==0.12.1
152
+ opt_einsum==3.4.0
153
+ markdown-it-py==2.2.0
154
+ nvidia-cuda-runtime-cu12==12.1.105
155
+ PyWavelets==1.6.0
156
+ datasets==2.19.0
157
+ contourpy==1.3.0
158
+ aiohappyeyeballs==2.6.1
159
+ jaxlib==0.4.30
160
+ ImageIO==2.37.2
161
+ wandb==0.18.6
162
+ jiter==0.12.0
163
+ gymnasium==0.29.1
164
+ pycryptodomex==3.23.0
165
+ google-pasta==0.2.0
166
+ ipython==8.18.1
167
+ threadpoolctl==3.6.0
168
+ py-cpuinfo==9.0.0
169
+ bitsandbytes==0.45.0
170
+ xxhash==3.5.0
171
+ google-auth-oauthlib==1.2.4
172
+ rsa==4.9.1
173
+ rouge_score==0.1.2
174
+ dm-control==1.0.14
175
+ oauthlib==3.3.1
176
+ pandas==2.3.3
177
+ tenacity==9.1.2
178
+ asciitree==0.3.3
179
+ scipy==1.13.1
180
+ jedi==0.19.2
181
+ gast==0.7.0
182
+ google-auth==2.47.0
183
+ transforms3d==0.4.2
184
+ kiwisolver==1.4.7
185
+ matplotlib==3.7.5
186
+ aiohttp==3.12.15
187
+ pip==23.3.2
188
+ imageio-ffmpeg==0.6.0
189
+ deepspeed==0.16.1
190
+ yarl==1.18.0
191
+ nvidia-nvtx-cu12==12.1.105
192
+ llfbench==0.1.0
193
+ wheel==0.45.1
194
+ PySocks==1.7.1
195
+ ml-dtypes==0.3.2
196
+ PyYAML==6.0.2
197
+ fast_kinematics==0.2.2
198
+ gin-config==0.5.0
199
+ setproctitle==1.3.7
200
+ safetensors==0.5.3
201
+ torchvision==0.17.2
202
+ semantic-version==2.10.0
203
+ PyOpenGL==3.1.10
204
+ nltk==3.9.2
205
+ lxml==6.0.2
206
+ pydantic==2.12.5
207
+ tqdm==4.67.1
208
+ keras==2.15.0
209
+ parse==1.19.1
210
+ linkify-it-py==2.0.3
211
+ dm-tree==0.1.8
212
+ requests-oauthlib==2.0.0
213
+ scikit-learn==1.6.1
214
+ altair==6.0.0
215
+ Werkzeug==3.1.5
216
+ sentencepiece==0.2.0
217
+ uvicorn==0.39.0
218
+ cycler==0.12.1
219
+ transformers==4.47.1
220
+ uvloop==0.22.1
221
+ mkl_random==1.2.8
222
+ GitPython==3.1.46
223
+ regex==2025.9.1
224
+ jax==0.4.30
225
+ llvmlite==0.39.1
226
+ pyasn1_modules==0.4.2
227
+ nvidia-cudnn-cu12==8.9.2.26
228
+ pydantic_core==2.41.5
229
+ google-genai==1.47.0
230
+ propcache==0.3.1
231
+ pycares==4.10.0
232
+ pyperclip==1.11.0
233
+ pyasn1==0.6.2
234
+ async-timeout==5.0.1
235
+ psutil==7.0.0
236
+ gym==0.23.1
237
+ dm-env==1.6
238
+ Jinja2==3.1.6
239
+ sentence-transformers==3.2.1
240
+ einops==0.4.1
241
+ triton==2.2.0
242
+ grpcio==1.76.0
243
+ labmaze==1.0.6
244
+ nvidia-ml-py==13.590.44
245
+ brotlicffi==1.0.9.2
246
+ smmap==5.0.2
247
+ cloudpickle==3.1.2
248
+ setuptools==80.9.0
249
+ starlette==0.49.3
250
+ prompt_toolkit==3.0.52
251
+ wrapt==1.14.2
252
+ h5py==3.14.0
253
+ scikit-image==0.19.3
254
+ joblib==1.5.3
255
+ opencv-python==4.11.0.86
256
+ rich==14.2.0
257
+ trl==0.11.4
258
+ gym-notices==0.1.0
259
+ trimesh==4.11.1
260
+ mdit-py-plugins==0.3.3
261
+ distro==1.9.0
262
+ executing==2.2.1
263
+ mkl-service==2.4.0
264
+ nvidia-cusolver-cu12==11.4.5.107
265
+ FLAML==2.3.6
266
+ mujoco-py==2.1.2.14
267
+ h11==0.16.0
268
+ highway-env==1.9.1
269
+ httpcore==1.0.9
270
+ tensorboard-data-server==0.7.2
271
+ tzdata==2025.3
272
+ absl-py==2.3.1
273
+ jsonschema-specifications==2025.9.1
274
+ numba==0.56.4
275
+ tabulate==0.9.0
276
+ importlib-resources==5.13.0
277
+ pycparser==2.23
278
+ mkl_fft==1.3.11
279
+ torch==2.2.2
280
+ nvidia-cublas-cu12==12.1.3.1
281
+ rpds-py==0.27.1
282
+ typeguard==4.4.4
283
+ flatbuffers==25.12.19
284
+ toppra==0.6.3
285
+ sympy==1.14.0
286
+ tiktoken==0.8.0
287
+ nvidia-cuda-cupti-cu12==12.1.105
288
+ arm_pytorch_utilities==0.4.3
289
+ pynndescent==0.6.0
290
+ multidict==6.7.0
291
+ fonttools==4.60.2
292
+ numexpr==2.10.1
293
+ cmudict==1.0.13
294
+ PyOpenGL-accelerate==3.1.10
295
+ gmpy2==2.2.1
296
+ peft==0.14.0
297
+ metaworld==2.0.0
298
+ nvidia-cufft-cu12==11.0.2.54
299
+ python-dateutil==2.9.0.post0
300
+ aiosignal==1.4.0
301
+ pexpect==4.9.0
302
+ protobuf==4.25.8
303
+ typing_extensions==4.15.0
304
+ mujoco==2.3.7
305
+ tokenizers==0.21.0
306
+ pytorch-kinematics==0.7.5
307
+ sniffio==1.3.1
308
+ aiofiles==25.1.0
309
+ mplib==0.1.1
310
+ wcwidth==0.2.14
311
+ Pygments==2.19.1
312
+ anyio==4.12.1
313
+ tensorflow-estimator==2.15.0
314
+ filelock==3.17.0
315
+ numpy==1.23.5
316
+ attrs==25.4.0
317
+ Markdown==3.9
318
+ fsspec==2024.3.1
319
+ libclang==18.1.1
320
+ umap-learn==0.5.9.post2
321
+ dill==0.3.8
322
+ narwhals==2.15.0
323
+ tensorboard==2.15.2
324
+ dacite==1.9.2
325
+ termcolor==3.1.0
326
+ llmbc==0.0.0
327
+ python-multipart==0.0.20
328
+ exceptiongroup==1.3.1
329
+ sapien==3.0.0b1
330
+ pygame==2.6.1
331
+ nvidia-curand-cu12==10.3.2.106
332
+ evaluate==0.4.3
333
+ msgpack==1.1.1
334
+ tensorflow-probability==0.23.0
335
+ diffusers==0.31.0
336
+ certifi==2025.10.5
337
+ d4rl==1.1
338
+ pydub==0.25.1
339
+ annotated-doc==0.0.4
340
+ gitdb==4.0.12
341
+ gradio_client==0.2.9
342
+ Shapely==1.8.4
343
+ mani_skill==3.0.0b20
344
+ tensorflow-io-gcs-filesystem==0.37.1
345
+ fasteners==0.20
346
+ hjson==3.1.0
347
+ ninja==1.13.0
348
+ stack-data==0.6.3
349
+ pyarrow==21.0.0
350
+ networkx==3.2.1
351
+ nvidia-cusparse-cu12==12.1.0.106
352
+ pyparsing==3.3.1
353
+ timm==1.0.22
354
+ typing-inspection==0.4.2
355
+ openai==2.8.1
356
+ pybullet==3.2.6
357
+ hydra-core==1.2.0
358
+ gradio==3.36.1
359
+ tensorflow==2.15.1
360
+ asttokens==3.0.1
361
+ importlib-metadata==5.2.0
362
+ astunparse==1.6.3
363
+ tifffile==2024.8.30
364
+ annotated-types==0.7.0
365
+ Bottleneck==1.4.2
366
+ accelerate==1.0.1
367
+ pytz==2025.2
368
+ urllib3==2.5.0
369
+ frozenlist==1.8.0
370
+ sentry-sdk==2.50.0
371
+ jsonschema==4.25.1
372
+ tyro==0.9.1
373
+ Farama-Notifications==0.0.4
374
+ ffmpy==1.0.0
375
+ httpx==0.28.1
376
+ pymunk==6.2.1
377
+ shtab==1.7.2
378
+ glfw==2.0.0
379
+ hf-xet==1.1.8
380
+ omegaconf==2.2.1
381
+ blobfile==3.0.0
382
+ decorator==5.2.1
383
+ cffi==1.17.1
384
+ matplotlib-inline==0.2.1
385
+ eval_type_backport==0.2.2
386
+ torchaudio==2.2.2
387
+ colorama==0.4.6
388
+ click==8.1.8
389
+ Cython==0.29.37
390
+ orjson==3.11.5
391
+ gym_bandits==0.0.2
392
+ traitlets==5.14.3
393
+ docker-pycreds==0.4.0
394
+ multiprocess==0.70.15
395
+ zipp==3.21.0
396
+ antlr4-python3-runtime==4.9.3
397
+ uc-micro-py==1.0.3
398
+ mpmath==1.3.0
399
+ idna==3.11
400
+ aiodns==3.5.0
401
+ charset-normalizer==3.4.4
402
+ nvidia-nvjitlink-cu12==12.9.86
403
+ nvidia-cuda-nvrtc-cu12==12.1.105
404
+ seaborn==0.13.2
405
+ pyarrow-hotfix==0.7
406
+ pillow==11.3.0
407
+ pyautogen==0.1.0
408
+ requests==2.32.0
409
+ MarkupSafe==3.0.2
410
+ websockets==15.0.1
411
+ nvidia-nccl-cu12==2.19.3
412
+ pure_eval==0.2.3
413
+ parso==0.8.5
414
+ huggingface-hub==0.26.2
415
+ syllables==1.0.9
416
+ tf-agents==0.19.0
417
+ six==1.17.0
418
+ referencing==0.36.2
419
+ ptyprocess==0.7.0
420
+ platformdirs==4.4.0
421
+ fastapi==0.128.0
422
+ stable-baselines3==2.2.1
423
+ av==10.0.0
424
+ diskcache==5.6.3
425
+ pynvml==13.0.1
426
+ pytorch-seed==0.2.0
427
+ zarr==2.12.0
428
+ mdurl==0.1.2
429
+ docstring-parser==0.16
430
+ packaging==25.0
431
+ numcodecs==0.12.1
432
+ opt_einsum==3.4.0
433
+ markdown-it-py==2.2.0
434
+ nvidia-cuda-runtime-cu12==12.1.105
435
+ PyWavelets==1.6.0
436
+ datasets==2.19.0
437
+ contourpy==1.3.0
438
+ aiohappyeyeballs==2.6.1
439
+ jaxlib==0.4.30
440
+ ImageIO==2.37.2
441
+ wandb==0.18.6
442
+ jiter==0.12.0
443
+ gymnasium==0.29.1
444
+ pycryptodomex==3.23.0
445
+ google-pasta==0.2.0
446
+ ipython==8.18.1
447
+ threadpoolctl==3.6.0
448
+ py-cpuinfo==9.0.0
449
+ bitsandbytes==0.45.0
450
+ xxhash==3.5.0
451
+ google-auth-oauthlib==1.2.4
452
+ rsa==4.9.1
453
+ rouge_score==0.1.2
454
+ dm-control==1.0.14
455
+ oauthlib==3.3.1
456
+ pandas==2.3.3
457
+ tenacity==9.1.2
458
+ asciitree==0.3.3
459
+ scipy==1.13.1
460
+ jedi==0.19.2
461
+ gast==0.7.0
462
+ google-auth==2.47.0
463
+ transforms3d==0.4.2
464
+ kiwisolver==1.4.7
465
+ matplotlib==3.7.5
466
+ aiohttp==3.12.15
467
+ pip==23.3.2
468
+ imageio-ffmpeg==0.6.0
469
+ deepspeed==0.16.1
470
+ yarl==1.18.0
471
+ nvidia-nvtx-cu12==12.1.105
472
+ llfbench==0.1.0
473
+ wheel==0.45.1
474
+ PySocks==1.7.1
475
+ ml-dtypes==0.3.2
476
+ PyYAML==6.0.2
477
+ fast_kinematics==0.2.2
478
+ gin-config==0.5.0
479
+ setproctitle==1.3.7
480
+ safetensors==0.5.3
481
+ torchvision==0.17.2
482
+ semantic-version==2.10.0
483
+ PyOpenGL==3.1.10
484
+ nltk==3.9.2
485
+ lxml==6.0.2
486
+ pydantic==2.12.5
487
+ tqdm==4.67.1
488
+ keras==2.15.0
489
+ parse==1.19.1
490
+ linkify-it-py==2.0.3
491
+ dm-tree==0.1.8
492
+ requests-oauthlib==2.0.0
493
+ scikit-learn==1.6.1
494
+ altair==6.0.0
495
+ Werkzeug==3.1.5
496
+ sentencepiece==0.2.0
497
+ uvicorn==0.39.0
498
+ cycler==0.12.1
499
+ transformers==4.47.1
500
+ uvloop==0.22.1
501
+ mkl_random==1.2.8
502
+ GitPython==3.1.46
503
+ regex==2025.9.1
504
+ jax==0.4.30
505
+ llvmlite==0.39.1
506
+ pyasn1_modules==0.4.2
507
+ nvidia-cudnn-cu12==8.9.2.26
508
+ pydantic_core==2.41.5
509
+ google-genai==1.47.0
510
+ propcache==0.3.1
511
+ pycares==4.10.0
512
+ pyperclip==1.11.0
513
+ pyasn1==0.6.2
514
+ async-timeout==5.0.1
515
+ psutil==7.0.0
516
+ gym==0.23.1
517
+ dm-env==1.6
518
+ Jinja2==3.1.6
519
+ sentence-transformers==3.2.1
520
+ einops==0.4.1
521
+ triton==2.2.0
522
+ grpcio==1.76.0
523
+ labmaze==1.0.6
524
+ nvidia-ml-py==13.590.44
525
+ brotlicffi==1.0.9.2
526
+ smmap==5.0.2
527
+ cloudpickle==3.1.2
528
+ setuptools==80.9.0
529
+ starlette==0.49.3
530
+ prompt_toolkit==3.0.52
531
+ wrapt==1.14.2
532
+ h5py==3.14.0
533
+ scikit-image==0.19.3
534
+ joblib==1.5.3
535
+ opencv-python==4.11.0.86
536
+ rich==14.2.0
537
+ trl==0.11.4
538
+ gym-notices==0.1.0
539
+ trimesh==4.11.1
540
+ mdit-py-plugins==0.3.3
541
+ distro==1.9.0
542
+ executing==2.2.1
543
+ mkl-service==2.4.0
544
+ nvidia-cusolver-cu12==11.4.5.107
545
+ FLAML==2.3.6
546
+ mujoco-py==2.1.2.14
547
+ h11==0.16.0
548
+ highway-env==1.9.1
549
+ httpcore==1.0.9
550
+ tensorboard-data-server==0.7.2
551
+ tzdata==2025.3
552
+ absl-py==2.3.1
553
+ jsonschema-specifications==2025.9.1
554
+ numba==0.56.4
555
+ tabulate==0.9.0
556
+ importlib-resources==5.13.0
557
+ pycparser==2.23
558
+ mkl_fft==1.3.11
559
+ torch==2.2.2
560
+ nvidia-cublas-cu12==12.1.3.1
561
+ llmbc==0.0.0
562
+ rpds-py==0.27.1
563
+ typeguard==4.4.4
564
+ flatbuffers==25.12.19
565
+ toppra==0.6.3
566
+ sympy==1.14.0
567
+ tiktoken==0.8.0
568
+ nvidia-cuda-cupti-cu12==12.1.105
569
+ arm_pytorch_utilities==0.4.3
570
+ pynndescent==0.6.0
571
+ multidict==6.7.0
572
+ fonttools==4.60.2
573
+ numexpr==2.10.1
574
+ cmudict==1.0.13
575
+ PyOpenGL-accelerate==3.1.10
576
+ gmpy2==2.2.1
577
+ peft==0.14.0
578
+ metaworld==2.0.0
579
+ nvidia-cufft-cu12==11.0.2.54
580
+ python-dateutil==2.9.0.post0
581
+ aiosignal==1.4.0
582
+ pexpect==4.9.0
583
+ protobuf==4.25.8
584
+ typing_extensions==4.15.0
585
+ mujoco==2.3.7
586
+ tokenizers==0.21.0
587
+ pytorch-kinematics==0.7.5
588
+ sniffio==1.3.1
589
+ aiofiles==25.1.0
590
+ mplib==0.1.1
591
+ wcwidth==0.2.14
592
+ Pygments==2.19.1
593
+ anyio==4.12.1
594
+ tensorflow-estimator==2.15.0
595
+ filelock==3.17.0
596
+ numpy==1.23.5
597
+ attrs==25.4.0
598
+ Markdown==3.9
599
+ fsspec==2024.3.1
600
+ libclang==18.1.1
601
+ umap-learn==0.5.9.post2
602
+ dill==0.3.8
603
+ narwhals==2.15.0
604
+ tensorboard==2.15.2
605
+ dacite==1.9.2
606
+ termcolor==3.1.0
607
+ llmbc==0.0.0
608
+ python-multipart==0.0.20
609
+ exceptiongroup==1.3.1
610
+ sapien==3.0.0b1
611
+ pygame==2.6.1
612
+ nvidia-curand-cu12==10.3.2.106
613
+ evaluate==0.4.3
614
+ msgpack==1.1.1
615
+ tensorflow-probability==0.23.0
616
+ diffusers==0.31.0
617
+ certifi==2025.10.5
618
+ d4rl==1.1
619
+ pydub==0.25.1
620
+ annotated-doc==0.0.4
621
+ gitdb==4.0.12
622
+ gradio_client==0.2.9
623
+ Shapely==1.8.4
624
+ mani_skill==3.0.0b20
625
+ tensorflow-io-gcs-filesystem==0.37.1
626
+ fasteners==0.20
627
+ hjson==3.1.0
628
+ ninja==1.13.0
629
+ stack-data==0.6.3
630
+ pyarrow==21.0.0
631
+ networkx==3.2.1
632
+ nvidia-cusparse-cu12==12.1.0.106
633
+ pyparsing==3.3.1
634
+ timm==1.0.22
635
+ typing-inspection==0.4.2
636
+ openai==2.8.1
637
+ pybullet==3.2.6
638
+ hydra-core==1.2.0
639
+ gradio==3.36.1
640
+ tensorflow==2.15.1
641
+ asttokens==3.0.1
642
+ importlib-metadata==5.2.0
643
+ astunparse==1.6.3
644
+ tifffile==2024.8.30
645
+ annotated-types==0.7.0
646
+ Bottleneck==1.4.2
647
+ accelerate==1.0.1
648
+ pytz==2025.2
649
+ urllib3==2.5.0
650
+ frozenlist==1.8.0
651
+ sentry-sdk==2.50.0
652
+ jsonschema==4.25.1
653
+ tyro==0.9.1
654
+ Farama-Notifications==0.0.4
655
+ ffmpy==1.0.0
656
+ httpx==0.28.1
657
+ pymunk==6.2.1
658
+ shtab==1.7.2
659
+ glfw==2.0.0
660
+ hf-xet==1.1.8
661
+ omegaconf==2.2.1
662
+ blobfile==3.0.0
663
+ decorator==5.2.1
664
+ cffi==1.17.1
665
+ matplotlib-inline==0.2.1
666
+ eval_type_backport==0.2.2
667
+ torchaudio==2.2.2
668
+ colorama==0.4.6
669
+ click==8.1.8
670
+ Cython==0.29.37
671
+ orjson==3.11.5
672
+ gym_bandits==0.0.2
673
+ traitlets==5.14.3
674
+ docker-pycreds==0.4.0
675
+ multiprocess==0.70.15
676
+ zipp==3.21.0
677
+ antlr4-python3-runtime==4.9.3
678
+ uc-micro-py==1.0.3
679
+ mpmath==1.3.0
680
+ idna==3.11
681
+ aiodns==3.5.0
682
+ charset-normalizer==3.4.4
683
+ nvidia-nvjitlink-cu12==12.9.86
684
+ nvidia-cuda-nvrtc-cu12==12.1.105
685
+ seaborn==0.13.2
686
+ pyarrow-hotfix==0.7
687
+ pillow==11.3.0
688
+ pyautogen==0.1.0
689
+ requests==2.32.0
690
+ MarkupSafe==3.0.2
691
+ websockets==15.0.1
692
+ nvidia-nccl-cu12==2.19.3
693
+ pure_eval==0.2.3
694
+ parso==0.8.5
695
+ huggingface-hub==0.26.2
696
+ syllables==1.0.9
697
+ tf-agents==0.19.0
698
+ six==1.17.0
699
+ referencing==0.36.2
700
+ ptyprocess==0.7.0
701
+ platformdirs==4.4.0
702
+ fastapi==0.128.0
703
+ stable-baselines3==2.2.1
704
+ av==10.0.0
705
+ diskcache==5.6.3
706
+ pynvml==13.0.1
707
+ pytorch-seed==0.2.0
708
+ zarr==2.12.0
709
+ mdurl==0.1.2
710
+ docstring-parser==0.16
711
+ packaging==25.0
712
+ numcodecs==0.12.1
713
+ opt_einsum==3.4.0
714
+ markdown-it-py==2.2.0
715
+ nvidia-cuda-runtime-cu12==12.1.105
716
+ PyWavelets==1.6.0
717
+ datasets==2.19.0
718
+ contourpy==1.3.0
719
+ aiohappyeyeballs==2.6.1
720
+ jaxlib==0.4.30
721
+ ImageIO==2.37.2
722
+ wandb==0.18.6
723
+ jiter==0.12.0
724
+ gymnasium==0.29.1
725
+ pycryptodomex==3.23.0
726
+ google-pasta==0.2.0
727
+ ipython==8.18.1
728
+ threadpoolctl==3.6.0
729
+ py-cpuinfo==9.0.0
730
+ bitsandbytes==0.45.0
731
+ xxhash==3.5.0
732
+ google-auth-oauthlib==1.2.4
733
+ rsa==4.9.1
734
+ rouge_score==0.1.2
735
+ dm-control==1.0.14
736
+ oauthlib==3.3.1
737
+ pandas==2.3.3
738
+ tenacity==9.1.2
739
+ asciitree==0.3.3
740
+ scipy==1.13.1
741
+ jedi==0.19.2
742
+ gast==0.7.0
743
+ google-auth==2.47.0
744
+ transforms3d==0.4.2
745
+ kiwisolver==1.4.7
746
+ matplotlib==3.7.5
747
+ aiohttp==3.12.15
748
+ pip==23.3.2
749
+ imageio-ffmpeg==0.6.0
750
+ deepspeed==0.16.1
751
+ yarl==1.18.0
752
+ nvidia-nvtx-cu12==12.1.105
753
+ llfbench==0.1.0
754
+ wheel==0.45.1
755
+ PySocks==1.7.1
756
+ ml-dtypes==0.3.2
757
+ PyYAML==6.0.2
758
+ fast_kinematics==0.2.2
759
+ gin-config==0.5.0
760
+ setproctitle==1.3.7
761
+ safetensors==0.5.3
762
+ torchvision==0.17.2
763
+ semantic-version==2.10.0
764
+ PyOpenGL==3.1.10
765
+ nltk==3.9.2
766
+ lxml==6.0.2
767
+ pydantic==2.12.5
768
+ tqdm==4.67.1
769
+ keras==2.15.0
770
+ parse==1.19.1
771
+ linkify-it-py==2.0.3
772
+ dm-tree==0.1.8
773
+ requests-oauthlib==2.0.0
774
+ scikit-learn==1.6.1
775
+ altair==6.0.0
776
+ Werkzeug==3.1.5
777
+ sentencepiece==0.2.0
778
+ uvicorn==0.39.0
779
+ cycler==0.12.1
780
+ transformers==4.47.1
781
+ uvloop==0.22.1
782
+ mkl_random==1.2.8
783
+ GitPython==3.1.46
784
+ regex==2025.9.1
785
+ jax==0.4.30
786
+ llvmlite==0.39.1
787
+ pyasn1_modules==0.4.2
788
+ nvidia-cudnn-cu12==8.9.2.26
789
+ pydantic_core==2.41.5
790
+ google-genai==1.47.0
791
+ propcache==0.3.1
792
+ pycares==4.10.0
793
+ pyperclip==1.11.0
794
+ pyasn1==0.6.2
795
+ async-timeout==5.0.1
796
+ psutil==7.0.0
797
+ gym==0.23.1
798
+ dm-env==1.6
799
+ Jinja2==3.1.6
800
+ sentence-transformers==3.2.1
801
+ einops==0.4.1
802
+ triton==2.2.0
803
+ grpcio==1.76.0
804
+ labmaze==1.0.6
805
+ nvidia-ml-py==13.590.44
806
+ brotlicffi==1.0.9.2
807
+ smmap==5.0.2
808
+ cloudpickle==3.1.2
809
+ setuptools==80.9.0
810
+ starlette==0.49.3
811
+ prompt_toolkit==3.0.52
812
+ wrapt==1.14.2
813
+ h5py==3.14.0
814
+ scikit-image==0.19.3
815
+ joblib==1.5.3
816
+ opencv-python==4.11.0.86
817
+ rich==14.2.0
818
+ trl==0.11.4
819
+ gym-notices==0.1.0
820
+ trimesh==4.11.1
821
+ mdit-py-plugins==0.3.3
822
+ distro==1.9.0
823
+ executing==2.2.1
824
+ mkl-service==2.4.0
825
+ nvidia-cusolver-cu12==11.4.5.107
826
+ FLAML==2.3.6
827
+ mujoco-py==2.1.2.14
828
+ h11==0.16.0
829
+ highway-env==1.9.1
830
+ httpcore==1.0.9
831
+ tensorboard-data-server==0.7.2
832
+ tzdata==2025.3
833
+ absl-py==2.3.1
834
+ jsonschema-specifications==2025.9.1
835
+ numba==0.56.4
836
+ tabulate==0.9.0
837
+ importlib-resources==5.13.0
838
+ pycparser==2.23
839
+ mkl_fft==1.3.11
840
+ torch==2.2.2
841
+ nvidia-cublas-cu12==12.1.3.1
842
+ zipp==3.19.2
843
+ jaraco.text==3.12.1
844
+ jaraco.context==5.3.0
845
+ importlib_metadata==8.0.0
846
+ typeguard==4.3.0
847
+ inflect==7.3.1
848
+ more-itertools==10.3.0
849
+ wheel==0.45.1
850
+ packaging==24.2
851
+ backports.tarfile==1.2.0
852
+ autocommand==2.2.2
853
+ jaraco.collections==5.1.0
854
+ tomli==2.0.1
855
+ platformdirs==4.2.2
856
+ jaraco.functools==4.0.1
857
+ typing_extensions==4.12.2
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-metadata.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
3
+ "python": "3.9.25",
4
+ "startedAt": "2026-01-21T04:19:19.675401Z",
5
+ "args": [
6
+ "--config-path",
7
+ "./config/main_table",
8
+ "--config-name",
9
+ "llmbc_box-close-v2.yaml"
10
+ ],
11
+ "program": "/work/u1131674/LLM-BC/./train.py",
12
+ "codePath": "train.py",
13
+ "git": {
14
+ "remote": "https://github.com/CHYang25/LLM-BC.git",
15
+ "commit": "1d2e1f5818e116390426ef596d075fc0cf1b0081"
16
+ },
17
+ "email": "chris920325@gmail.com",
18
+ "root": "/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2",
19
+ "host": "cbi-lgn01",
20
+ "username": "u1131674",
21
+ "executable": "/home/u1131674/.conda/envs/llm-bc/bin/python3",
22
+ "codePathLocal": "train.py",
23
+ "cpu_count": 112,
24
+ "cpu_count_logical": 224,
25
+ "gpu": "NVIDIA H100 PCIe",
26
+ "gpu_count": 2,
27
+ "disk": {
28
+ "/": {
29
+ "total": "473745891328",
30
+ "used": "389026504704"
31
+ }
32
+ },
33
+ "memory": {
34
+ "total": "540117905408"
35
+ },
36
+ "cpu": {
37
+ "count": 112,
38
+ "countLogical": 224
39
+ },
40
+ "gpu_nvidia": [
41
+ {
42
+ "name": "NVIDIA H100 PCIe",
43
+ "memoryTotal": "85520809984",
44
+ "cudaCores": 14592,
45
+ "architecture": "Hopper"
46
+ },
47
+ {
48
+ "name": "NVIDIA H100 PCIe",
49
+ "memoryTotal": "85520809984",
50
+ "cudaCores": 14592,
51
+ "architecture": "Hopper"
52
+ }
53
+ ],
54
+ "cudaVersion": "12.4"
55
+ }
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train_loss":0.024963906034827232,"_timestamp":1.7689691981223695e+09,"train_loss_bc":0.020005209371447563,"_wandb":{"runtime":71},"epoch":0,"_runtime":71.527189585,"train_loss_llm":0.49586963653564453,"_step":236,"grad_norm":0.1454334259033203,"global_step":236,"lr":0.009999988862926341}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T12:19:19.051245689+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpu06061ms/port-2070718.txt","pid":2070718,"debug":false,"disable-analytics":false}
2
+ {"time":"2026-01-21T12:19:19.051287504+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2026-01-21T12:19:19.051845713+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":2070718}
4
+ {"time":"2026-01-21T12:19:19.051828427+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43057,"Zone":""}}
5
+ {"time":"2026-01-21T12:19:19.231239451+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33440"}
6
+ {"time":"2026-01-21T12:19:19.675902286+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"9puzigbg","id":"127.0.0.1:33440"}
7
+ {"time":"2026-01-21T12:19:19.791119243+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"9puzigbg","id":"127.0.0.1:33440"}
8
+ {"time":"2026-01-21T12:20:31.202365496+08:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33440"}
9
+ {"time":"2026-01-21T12:20:31.20258465+08:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2026-01-21T12:20:31.202524542+08:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:33440"}
11
+ {"time":"2026-01-21T12:20:31.202695965+08:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:33440"}
12
+ {"time":"2026-01-21T12:20:31.981247472+08:00","level":"INFO","msg":"Parent process exited, terminating service process."}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T12:19:19.67691431+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
+ {"time":"2026-01-21T12:19:19.676924583+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-core.log"}
3
+ {"time":"2026-01-21T12:19:19.791067511+08:00","level":"INFO","msg":"created new stream","id":"9puzigbg"}
4
+ {"time":"2026-01-21T12:19:19.791113731+08:00","level":"INFO","msg":"stream: started","id":"9puzigbg"}
5
+ {"time":"2026-01-21T12:19:19.791148479+08:00","level":"INFO","msg":"sender: started","stream_id":"9puzigbg"}
6
+ {"time":"2026-01-21T12:19:19.791138771+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"9puzigbg"}}
7
+ {"time":"2026-01-21T12:19:19.791131709+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"9puzigbg"}}
8
+ {"time":"2026-01-21T12:19:20.473667126+08:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2026-01-21T12:20:31.202511022+08:00","level":"INFO","msg":"stream: closing","id":"9puzigbg"}
10
+ {"time":"2026-01-21T12:20:31.202606065+08:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2026-01-21T12:20:31.262777289+08:00","level":"INFO","msg":"Stopped system monitor"}
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Configure stats pid to 2070718
3
+ 2026-01-21 12:19:19,672 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
4
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
5
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
8
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug.log
10
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/logs/debug-internal.log
11
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():619] calling init triggers
12
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.01, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-12.18.18_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/12.18.18_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
14
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():669] starting backend
15
+ 2026-01-21 12:19:19,673 INFO MainThread:2070718 [wandb_init.py:init():673] sending inform_init request
16
+ 2026-01-21 12:19:19,674 INFO MainThread:2070718 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-21 12:19:19,675 INFO MainThread:2070718 [wandb_init.py:init():686] backend started and connected
18
+ 2026-01-21 12:19:19,684 INFO MainThread:2070718 [wandb_init.py:init():781] updated telemetry
19
+ 2026-01-21 12:19:19,759 INFO MainThread:2070718 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2026-01-21 12:19:20,469 INFO MainThread:2070718 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-21 12:19:20,990 INFO MainThread:2070718 [wandb_run.py:_console_start():2451] atexit reg
22
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
+ 2026-01-21 12:19:20,991 INFO MainThread:2070718 [wandb_run.py:_redirect():2389] Redirects installed.
25
+ 2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-21 12:19:20,994 INFO MainThread:2070718 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2'}
27
+ 2026-01-21 12:20:31,202 WARNING MsgRouterThr:2070718 [router.py:message_loop():75] message_loop has been closed
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_121919-9puzigbg/run-9puzigbg.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97675973ce8e04390938123162984a49c1513ce052c76ac14c48280b33003e11
3
+ size 229376
2026.01.21/12.18.18_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"run_id": "9puzigbg"}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 0.001
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - policy.loss_llm_weight=1.0e-3
116
+ - training.seed=42
117
+ job:
118
+ name: train
119
+ chdir: null
120
+ override_dirname: policy.loss_llm_weight=1.0e-3,training.seed=42
121
+ id: ???
122
+ num: ???
123
+ config_name: llmbc_box-close-v2.yaml
124
+ env_set: {}
125
+ env_copy: []
126
+ config:
127
+ override_dirname:
128
+ kv_sep: '='
129
+ item_sep: ','
130
+ exclude_keys: []
131
+ runtime:
132
+ version: 1.2.0
133
+ version_base: '1.2'
134
+ cwd: /work/u1131674/LLM-BC
135
+ config_sources:
136
+ - path: hydra.conf
137
+ schema: pkg
138
+ provider: hydra
139
+ - path: /work/u1131674/LLM-BC/config/main_table
140
+ schema: file
141
+ provider: main
142
+ - path: ''
143
+ schema: structured
144
+ provider: schema
145
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2
146
+ choices:
147
+ hydra/env: default
148
+ hydra/callbacks: null
149
+ hydra/job_logging: default
150
+ hydra/hydra_logging: default
151
+ hydra/hydra_help: default
152
+ hydra/help: default
153
+ hydra/sweeper: basic
154
+ hydra/launcher: basic
155
+ hydra/output: default
156
+ verbose: false
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - policy.loss_llm_weight=1.0e-3
2
+ - training.seed=42
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/epoch=0000-test_success_rate=0.000.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab
3
+ size 864520
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/checkpoints/latest.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841ce226dfd93b12ebfd588842ca350a66ecadb7d9fc334812aa3b8de27543ab
3
+ size 864520
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/logs.json.txt ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"train_loss": 0.252529501914978, "train_loss_bc": 0.25195011496543884, "train_loss_llm": 0.5793765187263489, "grad_norm": 0.12839388847351074, "global_step": 0, "epoch": 0, "lr": 0.001}
2
+ {"train_loss": 0.273204505443573, "train_loss_bc": 0.27264082431793213, "train_loss_llm": 0.563692033290863, "grad_norm": 0.13485388457775116, "global_step": 1, "epoch": 0, "lr": 0.001}
3
+ {"train_loss": 0.2867761254310608, "train_loss_bc": 0.28621771931648254, "train_loss_llm": 0.5584008693695068, "grad_norm": 0.274769127368927, "global_step": 2, "epoch": 0, "lr": 0.001}
4
+ {"train_loss": 0.2871931791305542, "train_loss_bc": 0.2865779399871826, "train_loss_llm": 0.6152305006980896, "grad_norm": 0.41221097111701965, "global_step": 3, "epoch": 0, "lr": 0.001}
5
+ {"train_loss": 0.28025686740875244, "train_loss_bc": 0.2797144949436188, "train_loss_llm": 0.5423757433891296, "grad_norm": 0.5496014356613159, "global_step": 4, "epoch": 0, "lr": 0.001}
6
+ {"train_loss": 0.3149482309818268, "train_loss_bc": 0.31439733505249023, "train_loss_llm": 0.5508872866630554, "grad_norm": 0.6956393718719482, "global_step": 5, "epoch": 0, "lr": 0.001}
7
+ {"train_loss": 0.27254703640937805, "train_loss_bc": 0.27196407318115234, "train_loss_llm": 0.5829575061798096, "grad_norm": 0.8312950730323792, "global_step": 6, "epoch": 0, "lr": 0.001}
8
+ {"train_loss": 0.22602498531341553, "train_loss_bc": 0.22543349862098694, "train_loss_llm": 0.5914822816848755, "grad_norm": 0.9541406631469727, "global_step": 7, "epoch": 0, "lr": 0.001}
9
+ {"train_loss": 0.20342595875263214, "train_loss_bc": 0.2028963267803192, "train_loss_llm": 0.5296257734298706, "grad_norm": 1.0699303150177002, "global_step": 8, "epoch": 0, "lr": 0.002}
10
+ {"train_loss": 0.19929638504981995, "train_loss_bc": 0.19871878623962402, "train_loss_llm": 0.5776059627532959, "grad_norm": 0.11595484614372253, "global_step": 9, "epoch": 0, "lr": 0.002}
11
+ {"train_loss": 0.21191416680812836, "train_loss_bc": 0.21130315959453583, "train_loss_llm": 0.6110129952430725, "grad_norm": 0.23422954976558685, "global_step": 10, "epoch": 0, "lr": 0.002}
12
+ {"train_loss": 0.2068999856710434, "train_loss_bc": 0.2063978612422943, "train_loss_llm": 0.5021252632141113, "grad_norm": 0.3522001802921295, "global_step": 11, "epoch": 0, "lr": 0.002}
13
+ {"train_loss": 0.257265567779541, "train_loss_bc": 0.25662338733673096, "train_loss_llm": 0.6421942710876465, "grad_norm": 0.483461856842041, "global_step": 12, "epoch": 0, "lr": 0.002}
14
+ {"train_loss": 0.23878663778305054, "train_loss_bc": 0.2381792515516281, "train_loss_llm": 0.6073929071426392, "grad_norm": 0.6102063059806824, "global_step": 13, "epoch": 0, "lr": 0.002}
15
+ {"train_loss": 0.2712763547897339, "train_loss_bc": 0.27066537737846375, "train_loss_llm": 0.6109854578971863, "grad_norm": 0.7479075789451599, "global_step": 14, "epoch": 0, "lr": 0.002}
16
+ {"train_loss": 0.24330928921699524, "train_loss_bc": 0.2427230179309845, "train_loss_llm": 0.586268424987793, "grad_norm": 0.8762980699539185, "global_step": 15, "epoch": 0, "lr": 0.002}
17
+ {"train_loss": 0.20316186547279358, "train_loss_bc": 0.20266824960708618, "train_loss_llm": 0.4936148524284363, "grad_norm": 0.992440402507782, "global_step": 16, "epoch": 0, "lr": 0.003}
18
+ {"train_loss": 0.1635446846485138, "train_loss_bc": 0.162959486246109, "train_loss_llm": 0.5851912498474121, "grad_norm": 0.11341577023267746, "global_step": 17, "epoch": 0, "lr": 0.003}
19
+ {"train_loss": 0.1420236974954605, "train_loss_bc": 0.14150172472000122, "train_loss_llm": 0.5219756364822388, "grad_norm": 0.2166670560836792, "global_step": 18, "epoch": 0, "lr": 0.003}
20
+ {"train_loss": 0.08970867097377777, "train_loss_bc": 0.08923432230949402, "train_loss_llm": 0.4743492901325226, "grad_norm": 0.2942521870136261, "global_step": 19, "epoch": 0, "lr": 0.003}
21
+ {"train_loss": 0.1407971978187561, "train_loss_bc": 0.14016547799110413, "train_loss_llm": 0.631725013256073, "grad_norm": 0.3959764838218689, "global_step": 20, "epoch": 0, "lr": 0.003}
22
+ {"train_loss": 0.12558668851852417, "train_loss_bc": 0.12498115748167038, "train_loss_llm": 0.6055365800857544, "grad_norm": 0.4912969172000885, "global_step": 21, "epoch": 0, "lr": 0.003}
23
+ {"train_loss": 0.15840043127536774, "train_loss_bc": 0.15789246559143066, "train_loss_llm": 0.5079687833786011, "grad_norm": 0.6031914949417114, "global_step": 22, "epoch": 0, "lr": 0.003}
24
+ {"train_loss": 0.15493251383304596, "train_loss_bc": 0.15430215001106262, "train_loss_llm": 0.6303583383560181, "grad_norm": 0.712800145149231, "global_step": 23, "epoch": 0, "lr": 0.003}
25
+ {"train_loss": 0.09710954874753952, "train_loss_bc": 0.09661616384983063, "train_loss_llm": 0.4933878481388092, "grad_norm": 0.7942712306976318, "global_step": 24, "epoch": 0, "lr": 0.004}
26
+ {"train_loss": 0.04198349267244339, "train_loss_bc": 0.04147119075059891, "train_loss_llm": 0.5123016834259033, "grad_norm": 0.049896661192178726, "global_step": 25, "epoch": 0, "lr": 0.004}
27
+ {"train_loss": 0.04926488921046257, "train_loss_bc": 0.04879248887300491, "train_loss_llm": 0.4724003076553345, "grad_norm": 0.10693306475877762, "global_step": 26, "epoch": 0, "lr": 0.004}
28
+ {"train_loss": 0.03119494765996933, "train_loss_bc": 0.03079175390303135, "train_loss_llm": 0.40319401025772095, "grad_norm": 0.130178764462471, "global_step": 27, "epoch": 0, "lr": 0.004}
29
+ {"train_loss": 0.045984115451574326, "train_loss_bc": 0.04547495022416115, "train_loss_llm": 0.5091666579246521, "grad_norm": 0.18151648342609406, "global_step": 28, "epoch": 0, "lr": 0.004}
30
+ {"train_loss": 0.036746662110090256, "train_loss_bc": 0.036322131752967834, "train_loss_llm": 0.4245292544364929, "grad_norm": 0.22341406345367432, "global_step": 29, "epoch": 0, "lr": 0.004}
31
+ {"train_loss": 0.06587483733892441, "train_loss_bc": 0.06540372967720032, "train_loss_llm": 0.47110506892204285, "grad_norm": 0.30597466230392456, "global_step": 30, "epoch": 0, "lr": 0.004}
32
+ {"train_loss": 0.05170199275016785, "train_loss_bc": 0.05132713168859482, "train_loss_llm": 0.37486234307289124, "grad_norm": 0.3634960949420929, "global_step": 31, "epoch": 0, "lr": 0.004}
33
+ {"train_loss": 0.05630849674344063, "train_loss_bc": 0.0558805912733078, "train_loss_llm": 0.42790722846984863, "grad_norm": 0.4307665228843689, "global_step": 32, "epoch": 0, "lr": 0.005}
34
+ {"train_loss": 0.0553022176027298, "train_loss_bc": 0.05469208583235741, "train_loss_llm": 0.6101305484771729, "grad_norm": 0.08577623218297958, "global_step": 33, "epoch": 0, "lr": 0.005}
35
+ {"train_loss": 0.04831269383430481, "train_loss_bc": 0.04779437929391861, "train_loss_llm": 0.5183138847351074, "grad_norm": 0.15602092444896698, "global_step": 34, "epoch": 0, "lr": 0.005}
36
+ {"train_loss": 0.061867598444223404, "train_loss_bc": 0.06128372997045517, "train_loss_llm": 0.5838690996170044, "grad_norm": 0.2528131902217865, "global_step": 35, "epoch": 0, "lr": 0.005}
37
+ {"train_loss": 0.05686777085065842, "train_loss_bc": 0.05627113953232765, "train_loss_llm": 0.5966323614120483, "grad_norm": 0.3395236134529114, "global_step": 36, "epoch": 0, "lr": 0.005}
38
+ {"train_loss": 0.03382698819041252, "train_loss_bc": 0.03323305398225784, "train_loss_llm": 0.5939337611198425, "grad_norm": 0.3958278000354767, "global_step": 37, "epoch": 0, "lr": 0.005}
39
+ {"train_loss": 0.06224585324525833, "train_loss_bc": 0.0616149976849556, "train_loss_llm": 0.6308567523956299, "grad_norm": 0.4894043505191803, "global_step": 38, "epoch": 0, "lr": 0.005}
40
+ {"train_loss": 0.04555570334196091, "train_loss_bc": 0.04494024068117142, "train_loss_llm": 0.6154611110687256, "grad_norm": 0.5536556839942932, "global_step": 39, "epoch": 0, "lr": 0.005}
41
+ {"train_loss": 0.03574361279606819, "train_loss_bc": 0.03507951647043228, "train_loss_llm": 0.6640970706939697, "grad_norm": 0.6100818514823914, "global_step": 40, "epoch": 0, "lr": 0.006}
42
+ {"train_loss": 0.146262988448143, "train_loss_bc": 0.14580723643302917, "train_loss_llm": 0.4557466208934784, "grad_norm": 0.19763296842575073, "global_step": 41, "epoch": 0, "lr": 0.006}
43
+ {"train_loss": 0.11445678770542145, "train_loss_bc": 0.11390470713376999, "train_loss_llm": 0.5520769357681274, "grad_norm": 0.3685164451599121, "global_step": 42, "epoch": 0, "lr": 0.006}
44
+ {"train_loss": 0.10677710175514221, "train_loss_bc": 0.10625766217708588, "train_loss_llm": 0.5194418430328369, "grad_norm": 0.5320614576339722, "global_step": 43, "epoch": 0, "lr": 0.006}
45
+ {"train_loss": 0.12251483649015427, "train_loss_bc": 0.12198641151189804, "train_loss_llm": 0.5284275412559509, "grad_norm": 0.7118619680404663, "global_step": 44, "epoch": 0, "lr": 0.006}
46
+ {"train_loss": 0.14140570163726807, "train_loss_bc": 0.1408904492855072, "train_loss_llm": 0.5152463316917419, "grad_norm": 0.9093842506408691, "global_step": 45, "epoch": 0, "lr": 0.006}
47
+ {"train_loss": 0.10901694744825363, "train_loss_bc": 0.1084449291229248, "train_loss_llm": 0.5720197558403015, "grad_norm": 1.0770854949951172, "global_step": 46, "epoch": 0, "lr": 0.006}
48
+ {"train_loss": 0.13558131456375122, "train_loss_bc": 0.13501602411270142, "train_loss_llm": 0.565291702747345, "grad_norm": 1.2658616304397583, "global_step": 47, "epoch": 0, "lr": 0.006}
49
+ {"train_loss": 0.14484672248363495, "train_loss_bc": 0.14428021013736725, "train_loss_llm": 0.5665071606636047, "grad_norm": 1.4656471014022827, "global_step": 48, "epoch": 0, "lr": 0.006999999999999999}
50
+ {"train_loss": 0.24264752864837646, "train_loss_bc": 0.2419467568397522, "train_loss_llm": 0.7007750272750854, "grad_norm": 0.2969740033149719, "global_step": 49, "epoch": 0, "lr": 0.006999999999999999}
51
+ {"train_loss": 0.13805940747261047, "train_loss_bc": 0.1374894082546234, "train_loss_llm": 0.5699948072433472, "grad_norm": 0.5104647874832153, "global_step": 50, "epoch": 0, "lr": 0.006999999999999999}
52
+ {"train_loss": 0.16542810201644897, "train_loss_bc": 0.16495351493358612, "train_loss_llm": 0.47459012269973755, "grad_norm": 0.7459866404533386, "global_step": 51, "epoch": 0, "lr": 0.006999999999999999}
53
+ {"train_loss": 0.25657016038894653, "train_loss_bc": 0.25585728883743286, "train_loss_llm": 0.7128623723983765, "grad_norm": 1.0538054704666138, "global_step": 52, "epoch": 0, "lr": 0.006999999999999999}
54
+ {"train_loss": 0.20239487290382385, "train_loss_bc": 0.20180177688598633, "train_loss_llm": 0.5930944085121155, "grad_norm": 1.316612958908081, "global_step": 53, "epoch": 0, "lr": 0.006999999999999999}
55
+ {"train_loss": 0.1541372388601303, "train_loss_bc": 0.15368221700191498, "train_loss_llm": 0.45501962304115295, "grad_norm": 1.5417735576629639, "global_step": 54, "epoch": 0, "lr": 0.006999999999999999}
56
+ {"train_loss": 0.2185448706150055, "train_loss_bc": 0.2180437594652176, "train_loss_llm": 0.5011103749275208, "grad_norm": 1.8187888860702515, "global_step": 55, "epoch": 0, "lr": 0.006999999999999999}
57
+ {"train_loss": 0.20139560103416443, "train_loss_bc": 0.20086990296840668, "train_loss_llm": 0.5256961584091187, "grad_norm": 2.08247447013855, "global_step": 56, "epoch": 0, "lr": 0.008}
58
+ {"train_loss": 0.20989899337291718, "train_loss_bc": 0.20911380648612976, "train_loss_llm": 0.7851892709732056, "grad_norm": 0.27354303002357483, "global_step": 57, "epoch": 0, "lr": 0.008}
59
+ {"train_loss": 0.19207656383514404, "train_loss_bc": 0.19136708974838257, "train_loss_llm": 0.7094740867614746, "grad_norm": 0.534111499786377, "global_step": 58, "epoch": 0, "lr": 0.008}
60
+ {"train_loss": 0.1742924004793167, "train_loss_bc": 0.17367114126682281, "train_loss_llm": 0.6212564706802368, "grad_norm": 0.7795819044113159, "global_step": 59, "epoch": 0, "lr": 0.008}
61
+ {"train_loss": 0.1624690294265747, "train_loss_bc": 0.1617729365825653, "train_loss_llm": 0.6960869431495667, "grad_norm": 1.0119670629501343, "global_step": 60, "epoch": 0, "lr": 0.008}
62
+ {"train_loss": 0.20042455196380615, "train_loss_bc": 0.19979658722877502, "train_loss_llm": 0.6279683113098145, "grad_norm": 1.274623990058899, "global_step": 61, "epoch": 0, "lr": 0.008}
63
+ {"train_loss": 0.16158545017242432, "train_loss_bc": 0.16083624958992004, "train_loss_llm": 0.7492036819458008, "grad_norm": 1.5101232528686523, "global_step": 62, "epoch": 0, "lr": 0.008}
64
+ {"train_loss": 0.13282041251659393, "train_loss_bc": 0.13209721446037292, "train_loss_llm": 0.723200798034668, "grad_norm": 1.7186288833618164, "global_step": 63, "epoch": 0, "lr": 0.008}
65
+ {"train_loss": 0.2033994346857071, "train_loss_bc": 0.2027282416820526, "train_loss_llm": 0.6711894273757935, "grad_norm": 1.9846457242965698, "global_step": 64, "epoch": 0, "lr": 0.009000000000000001}
66
+ {"train_loss": 0.09530064463615417, "train_loss_bc": 0.09461785107851028, "train_loss_llm": 0.6827924847602844, "grad_norm": 0.1637452095746994, "global_step": 65, "epoch": 0, "lr": 0.009000000000000001}
67
+ {"train_loss": 0.09785042703151703, "train_loss_bc": 0.09729202836751938, "train_loss_llm": 0.558398962020874, "grad_norm": 0.3286266624927521, "global_step": 66, "epoch": 0, "lr": 0.009000000000000001}
68
+ {"train_loss": 0.09337419271469116, "train_loss_bc": 0.09270930290222168, "train_loss_llm": 0.6648919582366943, "grad_norm": 0.48786014318466187, "global_step": 67, "epoch": 0, "lr": 0.009000000000000001}
69
+ {"train_loss": 0.17027954757213593, "train_loss_bc": 0.16956308484077454, "train_loss_llm": 0.7164597511291504, "grad_norm": 0.7218278050422668, "global_step": 68, "epoch": 0, "lr": 0.009000000000000001}
70
+ {"train_loss": 0.08503676950931549, "train_loss_bc": 0.08446164429187775, "train_loss_llm": 0.5751272439956665, "grad_norm": 0.8772305250167847, "global_step": 69, "epoch": 0, "lr": 0.009000000000000001}
71
+ {"train_loss": 0.10142002999782562, "train_loss_bc": 0.10081231594085693, "train_loss_llm": 0.6077142953872681, "grad_norm": 1.04507315158844, "global_step": 70, "epoch": 0, "lr": 0.009000000000000001}
72
+ {"train_loss": 0.11661797761917114, "train_loss_bc": 0.11599370092153549, "train_loss_llm": 0.6242777109146118, "grad_norm": 1.2287834882736206, "global_step": 71, "epoch": 0, "lr": 0.009000000000000001}
73
+ {"train_loss": 0.11624407768249512, "train_loss_bc": 0.11565285921096802, "train_loss_llm": 0.5912151336669922, "grad_norm": 1.4120811223983765, "global_step": 72, "epoch": 0, "lr": 0.01}
74
+ {"train_loss": 0.040211960673332214, "train_loss_bc": 0.039551250636577606, "train_loss_llm": 0.6607116460800171, "grad_norm": 0.0777788907289505, "global_step": 73, "epoch": 0, "lr": 0.01}
75
+ {"train_loss": 0.05076095834374428, "train_loss_bc": 0.05007569119334221, "train_loss_llm": 0.6852684617042542, "grad_norm": 0.17003870010375977, "global_step": 74, "epoch": 0, "lr": 0.01}
76
+ {"train_loss": 0.037128813564777374, "train_loss_bc": 0.03643818572163582, "train_loss_llm": 0.6906265020370483, "grad_norm": 0.244222030043602, "global_step": 75, "epoch": 0, "lr": 0.01}
77
+ {"train_loss": 0.037142593413591385, "train_loss_bc": 0.03646159917116165, "train_loss_llm": 0.6809947490692139, "grad_norm": 0.31510722637176514, "global_step": 76, "epoch": 0, "lr": 0.01}
78
+ {"train_loss": 0.05590587481856346, "train_loss_bc": 0.0552542544901371, "train_loss_llm": 0.6516196131706238, "grad_norm": 0.4150258004665375, "global_step": 77, "epoch": 0, "lr": 0.01}
79
+ {"train_loss": 0.030149903148412704, "train_loss_bc": 0.029475240036845207, "train_loss_llm": 0.6746631860733032, "grad_norm": 0.4752899408340454, "global_step": 78, "epoch": 0, "lr": 0.01}
80
+ {"train_loss": 0.050657838582992554, "train_loss_bc": 0.04992213845252991, "train_loss_llm": 0.7356998324394226, "grad_norm": 0.5678731799125671, "global_step": 79, "epoch": 0, "lr": 0.01}
81
+ {"train_loss": 0.02764507196843624, "train_loss_bc": 0.027012458071112633, "train_loss_llm": 0.6326141953468323, "grad_norm": 0.625312089920044, "global_step": 80, "epoch": 0, "lr": 0.009999999972157305}
82
+ {"train_loss": 0.03073396533727646, "train_loss_bc": 0.03021111525595188, "train_loss_llm": 0.5228506326675415, "grad_norm": 0.053040843456983566, "global_step": 81, "epoch": 0, "lr": 0.009999999972157305}
83
+ {"train_loss": 0.027266209945082664, "train_loss_bc": 0.026712927967309952, "train_loss_llm": 0.5532811880111694, "grad_norm": 0.09477357566356659, "global_step": 82, "epoch": 0, "lr": 0.009999999972157305}
84
+ {"train_loss": 0.027156496420502663, "train_loss_bc": 0.026670875027775764, "train_loss_llm": 0.48562145233154297, "grad_norm": 0.13936017453670502, "global_step": 83, "epoch": 0, "lr": 0.009999999972157305}
85
+ {"train_loss": 0.03493297100067139, "train_loss_bc": 0.03437262028455734, "train_loss_llm": 0.5603512525558472, "grad_norm": 0.20259420573711395, "global_step": 84, "epoch": 0, "lr": 0.009999999972157305}
86
+ {"train_loss": 0.027796030044555664, "train_loss_bc": 0.027264408767223358, "train_loss_llm": 0.5316207408905029, "grad_norm": 0.23961691558361053, "global_step": 85, "epoch": 0, "lr": 0.009999999972157305}
87
+ {"train_loss": 0.026962831616401672, "train_loss_bc": 0.026478836312890053, "train_loss_llm": 0.4839947819709778, "grad_norm": 0.278042733669281, "global_step": 86, "epoch": 0, "lr": 0.009999999972157305}
88
+ {"train_loss": 0.022709660232067108, "train_loss_bc": 0.02220826968550682, "train_loss_llm": 0.5013896226882935, "grad_norm": 0.31111451983451843, "global_step": 87, "epoch": 0, "lr": 0.009999999972157305}
89
+ {"train_loss": 0.03472929820418358, "train_loss_bc": 0.03418252617120743, "train_loss_llm": 0.5467737913131714, "grad_norm": 0.36670809984207153, "global_step": 88, "epoch": 0, "lr": 0.009999999888629223}
90
+ {"train_loss": 0.06921354681253433, "train_loss_bc": 0.06863778829574585, "train_loss_llm": 0.5757583975791931, "grad_norm": 0.0987037718296051, "global_step": 89, "epoch": 0, "lr": 0.009999999888629223}
91
+ {"train_loss": 0.0616544634103775, "train_loss_bc": 0.061051469296216965, "train_loss_llm": 0.602994441986084, "grad_norm": 0.18980276584625244, "global_step": 90, "epoch": 0, "lr": 0.009999999888629223}
92
+ {"train_loss": 0.05179845914244652, "train_loss_bc": 0.051266275346279144, "train_loss_llm": 0.5321850180625916, "grad_norm": 0.2702069580554962, "global_step": 91, "epoch": 0, "lr": 0.009999999888629223}
93
+ {"train_loss": 0.06521251052618027, "train_loss_bc": 0.06461584568023682, "train_loss_llm": 0.5966640710830688, "grad_norm": 0.3639739155769348, "global_step": 92, "epoch": 0, "lr": 0.009999999888629223}
94
+ {"train_loss": 0.06196574494242668, "train_loss_bc": 0.06147213280200958, "train_loss_llm": 0.4936124384403229, "grad_norm": 0.45718127489089966, "global_step": 93, "epoch": 0, "lr": 0.009999999888629223}
95
+ {"train_loss": 0.04629657045006752, "train_loss_bc": 0.045801080763339996, "train_loss_llm": 0.4954902231693268, "grad_norm": 0.5287754535675049, "global_step": 94, "epoch": 0, "lr": 0.009999999888629223}
96
+ {"train_loss": 0.04616197198629379, "train_loss_bc": 0.04563061147928238, "train_loss_llm": 0.5313600301742554, "grad_norm": 0.6031498908996582, "global_step": 95, "epoch": 0, "lr": 0.009999999888629223}
97
+ {"train_loss": 0.06340043991804123, "train_loss_bc": 0.06277582049369812, "train_loss_llm": 0.6246193647384644, "grad_norm": 0.6954230666160583, "global_step": 96, "epoch": 0, "lr": 0.00999999974941575}
98
+ {"train_loss": 0.08191214501857758, "train_loss_bc": 0.0813114270567894, "train_loss_llm": 0.6007174253463745, "grad_norm": 0.10619106888771057, "global_step": 97, "epoch": 0, "lr": 0.00999999974941575}
99
+ {"train_loss": 0.08071709424257278, "train_loss_bc": 0.08003760129213333, "train_loss_llm": 0.6794949769973755, "grad_norm": 0.20760591328144073, "global_step": 98, "epoch": 0, "lr": 0.00999999974941575}
100
+ {"train_loss": 0.08332143723964691, "train_loss_bc": 0.08272609114646912, "train_loss_llm": 0.5953459739685059, "grad_norm": 0.3134561777114868, "global_step": 99, "epoch": 0, "lr": 0.00999999974941575}
101
+ {"train_loss": 0.07155264914035797, "train_loss_bc": 0.07099221646785736, "train_loss_llm": 0.5604289770126343, "grad_norm": 0.40877580642700195, "global_step": 100, "epoch": 0, "lr": 0.00999999974941575}
102
+ {"train_loss": 0.08975838869810104, "train_loss_bc": 0.08916652202606201, "train_loss_llm": 0.5918655395507812, "grad_norm": 0.522554337978363, "global_step": 101, "epoch": 0, "lr": 0.00999999974941575}
103
+ {"train_loss": 0.07257966697216034, "train_loss_bc": 0.07201467454433441, "train_loss_llm": 0.5649896860122681, "grad_norm": 0.6189970970153809, "global_step": 102, "epoch": 0, "lr": 0.00999999974941575}
104
+ {"train_loss": 0.06007641553878784, "train_loss_bc": 0.05953027680516243, "train_loss_llm": 0.546138346195221, "grad_norm": 0.702296793460846, "global_step": 103, "epoch": 0, "lr": 0.00999999974941575}
105
+ {"train_loss": 0.04568513110280037, "train_loss_bc": 0.045178987085819244, "train_loss_llm": 0.5061453580856323, "grad_norm": 0.7713168263435364, "global_step": 104, "epoch": 0, "lr": 0.009999999554516895}
106
+ {"train_loss": 0.062235742807388306, "train_loss_bc": 0.061548247933387756, "train_loss_llm": 0.687494158744812, "grad_norm": 0.08229470998048782, "global_step": 105, "epoch": 0, "lr": 0.009999999554516895}
107
+ {"train_loss": 0.08357568085193634, "train_loss_bc": 0.08299360424280167, "train_loss_llm": 0.5820728540420532, "grad_norm": 0.18586039543151855, "global_step": 106, "epoch": 0, "lr": 0.009999999554516895}
108
+ {"train_loss": 0.08088018745183945, "train_loss_bc": 0.08020119369029999, "train_loss_llm": 0.6789901256561279, "grad_norm": 0.2842538356781006, "global_step": 107, "epoch": 0, "lr": 0.009999999554516895}
109
+ {"train_loss": 0.07067245990037918, "train_loss_bc": 0.07003812491893768, "train_loss_llm": 0.6343338489532471, "grad_norm": 0.3756967782974243, "global_step": 108, "epoch": 0, "lr": 0.009999999554516895}
110
+ {"train_loss": 0.062134191393852234, "train_loss_bc": 0.06162497028708458, "train_loss_llm": 0.50922030210495, "grad_norm": 0.45985621213912964, "global_step": 109, "epoch": 0, "lr": 0.009999999554516895}
111
+ {"train_loss": 0.05643927678465843, "train_loss_bc": 0.05575673654675484, "train_loss_llm": 0.6825414896011353, "grad_norm": 0.535986065864563, "global_step": 110, "epoch": 0, "lr": 0.009999999554516895}
112
+ {"train_loss": 0.06275462359189987, "train_loss_bc": 0.06217849254608154, "train_loss_llm": 0.5761322975158691, "grad_norm": 0.6212720274925232, "global_step": 111, "epoch": 0, "lr": 0.009999999554516895}
113
+ {"train_loss": 0.0717947706580162, "train_loss_bc": 0.07112696766853333, "train_loss_llm": 0.6678000688552856, "grad_norm": 0.712874174118042, "global_step": 112, "epoch": 0, "lr": 0.009999999303932654}
114
+ {"train_loss": 0.07107824087142944, "train_loss_bc": 0.07066100835800171, "train_loss_llm": 0.4172302484512329, "grad_norm": 0.09572537988424301, "global_step": 113, "epoch": 0, "lr": 0.009999999303932654}
115
+ {"train_loss": 0.0622958242893219, "train_loss_bc": 0.0617825910449028, "train_loss_llm": 0.5132333040237427, "grad_norm": 0.18129226565361023, "global_step": 114, "epoch": 0, "lr": 0.009999999303932654}
116
+ {"train_loss": 0.05546606332063675, "train_loss_bc": 0.054820477962493896, "train_loss_llm": 0.6455863118171692, "grad_norm": 0.2585110068321228, "global_step": 115, "epoch": 0, "lr": 0.009999999303932654}
117
+ {"train_loss": 0.052835989743471146, "train_loss_bc": 0.052223652601242065, "train_loss_llm": 0.612338662147522, "grad_norm": 0.3340230882167816, "global_step": 116, "epoch": 0, "lr": 0.009999999303932654}
118
+ {"train_loss": 0.07355044782161713, "train_loss_bc": 0.0729360580444336, "train_loss_llm": 0.614387035369873, "grad_norm": 0.43235117197036743, "global_step": 117, "epoch": 0, "lr": 0.009999999303932654}
119
+ {"train_loss": 0.058171968907117844, "train_loss_bc": 0.057668983936309814, "train_loss_llm": 0.5029836893081665, "grad_norm": 0.5117653608322144, "global_step": 118, "epoch": 0, "lr": 0.009999999303932654}
120
+ {"train_loss": 0.06179669499397278, "train_loss_bc": 0.06129191815853119, "train_loss_llm": 0.5047756433486938, "grad_norm": 0.5967010855674744, "global_step": 119, "epoch": 0, "lr": 0.009999999303932654}
121
+ {"train_loss": 0.03286455199122429, "train_loss_bc": 0.03232846036553383, "train_loss_llm": 0.536090612411499, "grad_norm": 0.6471052169799805, "global_step": 120, "epoch": 0, "lr": 0.009999998997663032}
122
+ {"train_loss": 0.03573526442050934, "train_loss_bc": 0.03532949090003967, "train_loss_llm": 0.40577366948127747, "grad_norm": 0.05735393241047859, "global_step": 121, "epoch": 0, "lr": 0.009999998997663032}
123
+ {"train_loss": 0.038237735629081726, "train_loss_bc": 0.0377943217754364, "train_loss_llm": 0.44341397285461426, "grad_norm": 0.11955223232507706, "global_step": 122, "epoch": 0, "lr": 0.009999998997663032}
124
+ {"train_loss": 0.05409952253103256, "train_loss_bc": 0.05373173952102661, "train_loss_llm": 0.3677833676338196, "grad_norm": 0.20442572236061096, "global_step": 123, "epoch": 0, "lr": 0.009999998997663032}
125
+ {"train_loss": 0.04305477812886238, "train_loss_bc": 0.0426463782787323, "train_loss_llm": 0.4084013104438782, "grad_norm": 0.2714554965496063, "global_step": 124, "epoch": 0, "lr": 0.009999998997663032}
126
+ {"train_loss": 0.035634320229291916, "train_loss_bc": 0.03516857698559761, "train_loss_llm": 0.4657438099384308, "grad_norm": 0.3238549530506134, "global_step": 125, "epoch": 0, "lr": 0.009999998997663032}
127
+ {"train_loss": 0.05339725315570831, "train_loss_bc": 0.05303904414176941, "train_loss_llm": 0.35820940136909485, "grad_norm": 0.4088974893093109, "global_step": 126, "epoch": 0, "lr": 0.009999998997663032}
128
+ {"train_loss": 0.04185650870203972, "train_loss_bc": 0.04144421964883804, "train_loss_llm": 0.4122905433177948, "grad_norm": 0.47525259852409363, "global_step": 127, "epoch": 0, "lr": 0.009999998997663032}
129
+ {"train_loss": 0.042993541806936264, "train_loss_bc": 0.042601581662893295, "train_loss_llm": 0.3919590413570404, "grad_norm": 0.5411036014556885, "global_step": 128, "epoch": 0, "lr": 0.009999998635708033}
130
+ {"train_loss": 0.02562362141907215, "train_loss_bc": 0.024971390143036842, "train_loss_llm": 0.6522307395935059, "grad_norm": 0.041133757680654526, "global_step": 129, "epoch": 0, "lr": 0.009999998635708033}
131
+ {"train_loss": 0.02191310189664364, "train_loss_bc": 0.02129165455698967, "train_loss_llm": 0.6214474439620972, "grad_norm": 0.07116004079580307, "global_step": 130, "epoch": 0, "lr": 0.009999998635708033}
132
+ {"train_loss": 0.03156816214323044, "train_loss_bc": 0.0309942327439785, "train_loss_llm": 0.5739300847053528, "grad_norm": 0.12592613697052002, "global_step": 131, "epoch": 0, "lr": 0.009999998635708033}
133
+ {"train_loss": 0.02577713131904602, "train_loss_bc": 0.025125574320554733, "train_loss_llm": 0.6515576839447021, "grad_norm": 0.16578812897205353, "global_step": 132, "epoch": 0, "lr": 0.009999998635708033}
134
+ {"train_loss": 0.02320152334868908, "train_loss_bc": 0.02264384739100933, "train_loss_llm": 0.5576763153076172, "grad_norm": 0.19451332092285156, "global_step": 133, "epoch": 0, "lr": 0.009999998635708033}
135
+ {"train_loss": 0.026553723961114883, "train_loss_bc": 0.02588409185409546, "train_loss_llm": 0.6696317195892334, "grad_norm": 0.23911207914352417, "global_step": 134, "epoch": 0, "lr": 0.009999998635708033}
136
+ {"train_loss": 0.02071528509259224, "train_loss_bc": 0.02012854814529419, "train_loss_llm": 0.5867360830307007, "grad_norm": 0.27343958616256714, "global_step": 135, "epoch": 0, "lr": 0.009999998635708033}
137
+ {"train_loss": 0.0177980437874794, "train_loss_bc": 0.01714349165558815, "train_loss_llm": 0.6545513272285461, "grad_norm": 0.2921649217605591, "global_step": 136, "epoch": 0, "lr": 0.009999998218067659}
138
+ {"train_loss": 0.016527190804481506, "train_loss_bc": 0.015900835394859314, "train_loss_llm": 0.6263555288314819, "grad_norm": 0.027912678197026253, "global_step": 137, "epoch": 0, "lr": 0.009999998218067659}
139
+ {"train_loss": 0.016124101355671883, "train_loss_bc": 0.015586758963763714, "train_loss_llm": 0.5373432636260986, "grad_norm": 0.058148931711912155, "global_step": 138, "epoch": 0, "lr": 0.009999998218067659}
140
+ {"train_loss": 0.02101842127740383, "train_loss_bc": 0.020332563668489456, "train_loss_llm": 0.6858576536178589, "grad_norm": 0.07774510979652405, "global_step": 139, "epoch": 0, "lr": 0.009999998218067659}
141
+ {"train_loss": 0.019856909289956093, "train_loss_bc": 0.019175313413143158, "train_loss_llm": 0.6815959215164185, "grad_norm": 0.1038837879896164, "global_step": 140, "epoch": 0, "lr": 0.009999998218067659}
142
+ {"train_loss": 0.02635011076927185, "train_loss_bc": 0.02552071586251259, "train_loss_llm": 0.8293948769569397, "grad_norm": 0.13838204741477966, "global_step": 141, "epoch": 0, "lr": 0.009999998218067659}
143
+ {"train_loss": 0.017442570999264717, "train_loss_bc": 0.016876015812158585, "train_loss_llm": 0.5665552616119385, "grad_norm": 0.16495107114315033, "global_step": 142, "epoch": 0, "lr": 0.009999998218067659}
144
+ {"train_loss": 0.019354552030563354, "train_loss_bc": 0.018637431785464287, "train_loss_llm": 0.7171201109886169, "grad_norm": 0.19188618659973145, "global_step": 143, "epoch": 0, "lr": 0.009999998218067659}
145
+ {"train_loss": 0.02011699415743351, "train_loss_bc": 0.01947595179080963, "train_loss_llm": 0.6410424709320068, "grad_norm": 0.21645328402519226, "global_step": 144, "epoch": 0, "lr": 0.009999997744741916}
146
+ {"train_loss": 0.02425413206219673, "train_loss_bc": 0.023770108819007874, "train_loss_llm": 0.4840241074562073, "grad_norm": 0.0436902791261673, "global_step": 145, "epoch": 0, "lr": 0.009999997744741916}
147
+ {"train_loss": 0.022116929292678833, "train_loss_bc": 0.021655619144439697, "train_loss_llm": 0.4613092541694641, "grad_norm": 0.08582352846860886, "global_step": 146, "epoch": 0, "lr": 0.009999997744741916}
148
+ {"train_loss": 0.021659119054675102, "train_loss_bc": 0.021171528846025467, "train_loss_llm": 0.48758962750434875, "grad_norm": 0.126007542014122, "global_step": 147, "epoch": 0, "lr": 0.009999997744741916}
149
+ {"train_loss": 0.03805282711982727, "train_loss_bc": 0.03744645416736603, "train_loss_llm": 0.6063730716705322, "grad_norm": 0.19119882583618164, "global_step": 148, "epoch": 0, "lr": 0.009999997744741916}
150
+ {"train_loss": 0.01933918334543705, "train_loss_bc": 0.018880464136600494, "train_loss_llm": 0.45871883630752563, "grad_norm": 0.2215609848499298, "global_step": 149, "epoch": 0, "lr": 0.009999997744741916}
151
+ {"train_loss": 0.020558631047606468, "train_loss_bc": 0.019996277987957, "train_loss_llm": 0.5623538494110107, "grad_norm": 0.2572121322154999, "global_step": 150, "epoch": 0, "lr": 0.009999997744741916}
152
+ {"train_loss": 0.022163955494761467, "train_loss_bc": 0.021520184352993965, "train_loss_llm": 0.6437717080116272, "grad_norm": 0.29958251118659973, "global_step": 151, "epoch": 0, "lr": 0.009999997744741916}
153
+ {"train_loss": 0.026239177212119102, "train_loss_bc": 0.025800224393606186, "train_loss_llm": 0.4389524757862091, "grad_norm": 0.34274646639823914, "global_step": 152, "epoch": 0, "lr": 0.00999999721573081}
154
+ {"train_loss": 0.02925034798681736, "train_loss_bc": 0.028717506676912308, "train_loss_llm": 0.5328419804573059, "grad_norm": 0.05676320195198059, "global_step": 153, "epoch": 0, "lr": 0.00999999721573081}
155
+ {"train_loss": 0.037463825196027756, "train_loss_bc": 0.03690113127231598, "train_loss_llm": 0.5626922845840454, "grad_norm": 0.12929601967334747, "global_step": 154, "epoch": 0, "lr": 0.00999999721573081}
156
+ {"train_loss": 0.018557682633399963, "train_loss_bc": 0.018073368817567825, "train_loss_llm": 0.4843147099018097, "grad_norm": 0.17127014696598053, "global_step": 155, "epoch": 0, "lr": 0.00999999721573081}
157
+ {"train_loss": 0.024363229051232338, "train_loss_bc": 0.02386392466723919, "train_loss_llm": 0.4993036687374115, "grad_norm": 0.2232280820608139, "global_step": 156, "epoch": 0, "lr": 0.00999999721573081}
158
+ {"train_loss": 0.030357468873262405, "train_loss_bc": 0.029831916093826294, "train_loss_llm": 0.5255520939826965, "grad_norm": 0.2857641279697418, "global_step": 157, "epoch": 0, "lr": 0.00999999721573081}
159
+ {"train_loss": 0.038563068956136703, "train_loss_bc": 0.03807063773274422, "train_loss_llm": 0.4924296736717224, "grad_norm": 0.3589693307876587, "global_step": 158, "epoch": 0, "lr": 0.00999999721573081}
160
+ {"train_loss": 0.038679443299770355, "train_loss_bc": 0.03814232721924782, "train_loss_llm": 0.5371164083480835, "grad_norm": 0.42973586916923523, "global_step": 159, "epoch": 0, "lr": 0.00999999721573081}
161
+ {"train_loss": 0.03393377736210823, "train_loss_bc": 0.0334688201546669, "train_loss_llm": 0.4649561643600464, "grad_norm": 0.49458184838294983, "global_step": 160, "epoch": 0, "lr": 0.009999996631034345}
162
+ {"train_loss": 0.03411827236413956, "train_loss_bc": 0.033583398908376694, "train_loss_llm": 0.5348742008209229, "grad_norm": 0.06490988284349442, "global_step": 161, "epoch": 0, "lr": 0.009999996631034345}
163
+ {"train_loss": 0.02351403422653675, "train_loss_bc": 0.02295222505927086, "train_loss_llm": 0.5618085861206055, "grad_norm": 0.11563616991043091, "global_step": 162, "epoch": 0, "lr": 0.009999996631034345}
164
+ {"train_loss": 0.028045671060681343, "train_loss_bc": 0.027559760957956314, "train_loss_llm": 0.4859097898006439, "grad_norm": 0.17181871831417084, "global_step": 163, "epoch": 0, "lr": 0.009999996631034345}
165
+ {"train_loss": 0.012934507802128792, "train_loss_bc": 0.012457642704248428, "train_loss_llm": 0.4768647849559784, "grad_norm": 0.20869013667106628, "global_step": 164, "epoch": 0, "lr": 0.009999996631034345}
166
+ {"train_loss": 0.02624637447297573, "train_loss_bc": 0.02573414519429207, "train_loss_llm": 0.5122296214103699, "grad_norm": 0.2638067603111267, "global_step": 165, "epoch": 0, "lr": 0.009999996631034345}
167
+ {"train_loss": 0.04066107049584389, "train_loss_bc": 0.04009600728750229, "train_loss_llm": 0.5650624632835388, "grad_norm": 0.33961179852485657, "global_step": 166, "epoch": 0, "lr": 0.009999996631034345}
168
+ {"train_loss": 0.018790556117892265, "train_loss_bc": 0.018201837316155434, "train_loss_llm": 0.5887188911437988, "grad_norm": 0.38523611426353455, "global_step": 167, "epoch": 0, "lr": 0.009999996631034345}
169
+ {"train_loss": 0.024333346635103226, "train_loss_bc": 0.023783767595887184, "train_loss_llm": 0.5495793223381042, "grad_norm": 0.4378797113895416, "global_step": 168, "epoch": 0, "lr": 0.00999999599065253}
170
+ {"train_loss": 0.02820535935461521, "train_loss_bc": 0.027705006301403046, "train_loss_llm": 0.5003523826599121, "grad_norm": 0.053658343851566315, "global_step": 169, "epoch": 0, "lr": 0.00999999599065253}
171
+ {"train_loss": 0.025200804695487022, "train_loss_bc": 0.02471126988530159, "train_loss_llm": 0.4895356297492981, "grad_norm": 0.10377084463834763, "global_step": 170, "epoch": 0, "lr": 0.00999999599065253}
172
+ {"train_loss": 0.01955123245716095, "train_loss_bc": 0.019059764221310616, "train_loss_llm": 0.4914677143096924, "grad_norm": 0.14640706777572632, "global_step": 171, "epoch": 0, "lr": 0.00999999599065253}
173
+ {"train_loss": 0.02138841524720192, "train_loss_bc": 0.02090127021074295, "train_loss_llm": 0.48714545369148254, "grad_norm": 0.1854257434606552, "global_step": 172, "epoch": 0, "lr": 0.00999999599065253}
174
+ {"train_loss": 0.024181261658668518, "train_loss_bc": 0.023732315748929977, "train_loss_llm": 0.44894570112228394, "grad_norm": 0.23339462280273438, "global_step": 173, "epoch": 0, "lr": 0.00999999599065253}
175
+ {"train_loss": 0.023717273026704788, "train_loss_bc": 0.02322128415107727, "train_loss_llm": 0.4959881603717804, "grad_norm": 0.2798902988433838, "global_step": 174, "epoch": 0, "lr": 0.00999999599065253}
176
+ {"train_loss": 0.03485918045043945, "train_loss_bc": 0.034368738532066345, "train_loss_llm": 0.49044036865234375, "grad_norm": 0.343951553106308, "global_step": 175, "epoch": 0, "lr": 0.00999999599065253}
177
+ {"train_loss": 0.02320096641778946, "train_loss_bc": 0.022748133167624474, "train_loss_llm": 0.4528330862522125, "grad_norm": 0.38900986313819885, "global_step": 176, "epoch": 0, "lr": 0.009999995294585371}
178
+ {"train_loss": 0.023330306634306908, "train_loss_bc": 0.022773388773202896, "train_loss_llm": 0.5569183826446533, "grad_norm": 0.0416216216981411, "global_step": 177, "epoch": 0, "lr": 0.009999995294585371}
179
+ {"train_loss": 0.017154095694422722, "train_loss_bc": 0.01675502397119999, "train_loss_llm": 0.3990713059902191, "grad_norm": 0.06875938922166824, "global_step": 178, "epoch": 0, "lr": 0.009999995294585371}
180
+ {"train_loss": 0.021150220185518265, "train_loss_bc": 0.02067718282341957, "train_loss_llm": 0.4730375111103058, "grad_norm": 0.10553700476884842, "global_step": 179, "epoch": 0, "lr": 0.009999995294585371}
181
+ {"train_loss": 0.016837185248732567, "train_loss_bc": 0.016467537730932236, "train_loss_llm": 0.36964699625968933, "grad_norm": 0.13219793140888214, "global_step": 180, "epoch": 0, "lr": 0.009999995294585371}
182
+ {"train_loss": 0.009770027361810207, "train_loss_bc": 0.009397734887897968, "train_loss_llm": 0.3722921311855316, "grad_norm": 0.15051698684692383, "global_step": 181, "epoch": 0, "lr": 0.009999995294585371}
183
+ {"train_loss": 0.02333925850689411, "train_loss_bc": 0.02287432923913002, "train_loss_llm": 0.46492841839790344, "grad_norm": 0.18662676215171814, "global_step": 182, "epoch": 0, "lr": 0.009999995294585371}
184
+ {"train_loss": 0.013727608136832714, "train_loss_bc": 0.01333148404955864, "train_loss_llm": 0.39612439274787903, "grad_norm": 0.21264775097370148, "global_step": 183, "epoch": 0, "lr": 0.009999995294585371}
185
+ {"train_loss": 0.01233526412397623, "train_loss_bc": 0.011927351355552673, "train_loss_llm": 0.4079124927520752, "grad_norm": 0.23667089641094208, "global_step": 184, "epoch": 0, "lr": 0.009999994542832874}
186
+ {"train_loss": 0.014251401647925377, "train_loss_bc": 0.013858886435627937, "train_loss_llm": 0.3925148844718933, "grad_norm": 0.02162291295826435, "global_step": 185, "epoch": 0, "lr": 0.009999994542832874}
187
+ {"train_loss": 0.02096753567457199, "train_loss_bc": 0.020425261929631233, "train_loss_llm": 0.542274534702301, "grad_norm": 0.05543696507811546, "global_step": 186, "epoch": 0, "lr": 0.009999994542832874}
188
+ {"train_loss": 0.021045425906777382, "train_loss_bc": 0.02053808979690075, "train_loss_llm": 0.5073364973068237, "grad_norm": 0.08720546215772629, "global_step": 187, "epoch": 0, "lr": 0.009999994542832874}
189
+ {"train_loss": 0.020341763272881508, "train_loss_bc": 0.019736729562282562, "train_loss_llm": 0.6050328016281128, "grad_norm": 0.11292923241853714, "global_step": 188, "epoch": 0, "lr": 0.009999994542832874}
190
+ {"train_loss": 0.011690325103700161, "train_loss_bc": 0.011292900890111923, "train_loss_llm": 0.39742419123649597, "grad_norm": 0.13396863639354706, "global_step": 189, "epoch": 0, "lr": 0.009999994542832874}
191
+ {"train_loss": 0.02014937624335289, "train_loss_bc": 0.019594522193074226, "train_loss_llm": 0.554853618144989, "grad_norm": 0.16246306896209717, "global_step": 190, "epoch": 0, "lr": 0.009999994542832874}
192
+ {"train_loss": 0.014351895079016685, "train_loss_bc": 0.01393540296703577, "train_loss_llm": 0.41649240255355835, "grad_norm": 0.18816952407360077, "global_step": 191, "epoch": 0, "lr": 0.009999994542832874}
193
+ {"train_loss": 0.015411981381475925, "train_loss_bc": 0.014920342713594437, "train_loss_llm": 0.4916388988494873, "grad_norm": 0.2118474692106247, "global_step": 192, "epoch": 0, "lr": 0.009999993735395049}
194
+ {"train_loss": 0.018817156553268433, "train_loss_bc": 0.018374208360910416, "train_loss_llm": 0.4429486393928528, "grad_norm": 0.031203927472233772, "global_step": 193, "epoch": 0, "lr": 0.009999993735395049}
195
+ {"train_loss": 0.015957778319716454, "train_loss_bc": 0.015566591173410416, "train_loss_llm": 0.39118722081184387, "grad_norm": 0.06421653926372528, "global_step": 194, "epoch": 0, "lr": 0.009999993735395049}
196
+ {"train_loss": 0.019087474793195724, "train_loss_bc": 0.018518388271331787, "train_loss_llm": 0.5690857172012329, "grad_norm": 0.0811726450920105, "global_step": 195, "epoch": 0, "lr": 0.009999993735395049}
197
+ {"train_loss": 0.015710245817899704, "train_loss_bc": 0.01526118814945221, "train_loss_llm": 0.44905757904052734, "grad_norm": 0.10153987258672714, "global_step": 196, "epoch": 0, "lr": 0.009999993735395049}
198
+ {"train_loss": 0.019453734159469604, "train_loss_bc": 0.018925407901406288, "train_loss_llm": 0.5283269882202148, "grad_norm": 0.1219358816742897, "global_step": 197, "epoch": 0, "lr": 0.009999993735395049}
199
+ {"train_loss": 0.01505982130765915, "train_loss_bc": 0.014631738886237144, "train_loss_llm": 0.4280821681022644, "grad_norm": 0.14476309716701508, "global_step": 198, "epoch": 0, "lr": 0.009999993735395049}
200
+ {"train_loss": 0.013033466413617134, "train_loss_bc": 0.012675212696194649, "train_loss_llm": 0.3582540452480316, "grad_norm": 0.1675001084804535, "global_step": 199, "epoch": 0, "lr": 0.009999993735395049}
201
+ {"train_loss": 0.01897766813635826, "train_loss_bc": 0.018507644534111023, "train_loss_llm": 0.4700234532356262, "grad_norm": 0.1958317756652832, "global_step": 200, "epoch": 0, "lr": 0.009999992872271905}
202
+ {"train_loss": 0.021655641496181488, "train_loss_bc": 0.021151017397642136, "train_loss_llm": 0.5046237111091614, "grad_norm": 0.03384440392255783, "global_step": 201, "epoch": 0, "lr": 0.009999992872271905}
203
+ {"train_loss": 0.019754817709326744, "train_loss_bc": 0.019261155277490616, "train_loss_llm": 0.49366283416748047, "grad_norm": 0.06363573670387268, "global_step": 202, "epoch": 0, "lr": 0.009999992872271905}
204
+ {"train_loss": 0.0197446309030056, "train_loss_bc": 0.019323352724313736, "train_loss_llm": 0.4212789237499237, "grad_norm": 0.09140758961439133, "global_step": 203, "epoch": 0, "lr": 0.009999992872271905}
205
+ {"train_loss": 0.02213365212082863, "train_loss_bc": 0.02167753502726555, "train_loss_llm": 0.4561164677143097, "grad_norm": 0.12779031693935394, "global_step": 204, "epoch": 0, "lr": 0.009999992872271905}
206
+ {"train_loss": 0.018727730959653854, "train_loss_bc": 0.018266774713993073, "train_loss_llm": 0.46095675230026245, "grad_norm": 0.15023837983608246, "global_step": 205, "epoch": 0, "lr": 0.009999992872271905}
207
+ {"train_loss": 0.021134980022907257, "train_loss_bc": 0.02065064013004303, "train_loss_llm": 0.48433929681777954, "grad_norm": 0.18333274126052856, "global_step": 206, "epoch": 0, "lr": 0.009999992872271905}
208
+ {"train_loss": 0.018410563468933105, "train_loss_bc": 0.017892083153128624, "train_loss_llm": 0.5184803605079651, "grad_norm": 0.20620277523994446, "global_step": 207, "epoch": 0, "lr": 0.009999992872271905}
209
+ {"train_loss": 0.01952839083969593, "train_loss_bc": 0.01910785771906376, "train_loss_llm": 0.42053380608558655, "grad_norm": 0.23822638392448425, "global_step": 208, "epoch": 0, "lr": 0.009999991953463454}
210
+ {"train_loss": 0.020005524158477783, "train_loss_bc": 0.01958916336297989, "train_loss_llm": 0.41636162996292114, "grad_norm": 0.022418994456529617, "global_step": 209, "epoch": 0, "lr": 0.009999991953463454}
211
+ {"train_loss": 0.022034049034118652, "train_loss_bc": 0.02159287966787815, "train_loss_llm": 0.4411696493625641, "grad_norm": 0.058893270790576935, "global_step": 210, "epoch": 0, "lr": 0.009999991953463454}
212
+ {"train_loss": 0.018601490184664726, "train_loss_bc": 0.018017925322055817, "train_loss_llm": 0.583564043045044, "grad_norm": 0.07501673698425293, "global_step": 211, "epoch": 0, "lr": 0.009999991953463454}
213
+ {"train_loss": 0.021930024027824402, "train_loss_bc": 0.021384473890066147, "train_loss_llm": 0.5455496311187744, "grad_norm": 0.09862517565488815, "global_step": 212, "epoch": 0, "lr": 0.009999991953463454}
214
+ {"train_loss": 0.020846663042902946, "train_loss_bc": 0.020350880920886993, "train_loss_llm": 0.49578237533569336, "grad_norm": 0.1402716189622879, "global_step": 213, "epoch": 0, "lr": 0.009999991953463454}
215
+ {"train_loss": 0.01877172477543354, "train_loss_bc": 0.01824098639190197, "train_loss_llm": 0.5307385921478271, "grad_norm": 0.16469259560108185, "global_step": 214, "epoch": 0, "lr": 0.009999991953463454}
216
+ {"train_loss": 0.020292270928621292, "train_loss_bc": 0.019869061186909676, "train_loss_llm": 0.42321062088012695, "grad_norm": 0.19249005615711212, "global_step": 215, "epoch": 0, "lr": 0.009999991953463454}
217
+ {"train_loss": 0.019689541310071945, "train_loss_bc": 0.019232220947742462, "train_loss_llm": 0.4573211669921875, "grad_norm": 0.21812190115451813, "global_step": 216, "epoch": 0, "lr": 0.0099999909789697}
218
+ {"train_loss": 0.01748274266719818, "train_loss_bc": 0.016972113400697708, "train_loss_llm": 0.5106291174888611, "grad_norm": 0.01980498433113098, "global_step": 217, "epoch": 0, "lr": 0.0099999909789697}
219
+ {"train_loss": 0.02484678477048874, "train_loss_bc": 0.024319060146808624, "train_loss_llm": 0.5277247428894043, "grad_norm": 0.0617092065513134, "global_step": 218, "epoch": 0, "lr": 0.0099999909789697}
220
+ {"train_loss": 0.019288551062345505, "train_loss_bc": 0.01883828639984131, "train_loss_llm": 0.4502650499343872, "grad_norm": 0.08189266920089722, "global_step": 219, "epoch": 0, "lr": 0.0099999909789697}
221
+ {"train_loss": 0.01973573863506317, "train_loss_bc": 0.019199654459953308, "train_loss_llm": 0.5360836982727051, "grad_norm": 0.09861791878938675, "global_step": 220, "epoch": 0, "lr": 0.0099999909789697}
222
+ {"train_loss": 0.018722541630268097, "train_loss_bc": 0.018269415944814682, "train_loss_llm": 0.45312485098838806, "grad_norm": 0.12747113406658173, "global_step": 221, "epoch": 0, "lr": 0.0099999909789697}
223
+ {"train_loss": 0.017685379832983017, "train_loss_bc": 0.0172601118683815, "train_loss_llm": 0.4252672493457794, "grad_norm": 0.15524466335773468, "global_step": 222, "epoch": 0, "lr": 0.0099999909789697}
224
+ {"train_loss": 0.021818850189447403, "train_loss_bc": 0.02134229615330696, "train_loss_llm": 0.47655367851257324, "grad_norm": 0.1838337481021881, "global_step": 223, "epoch": 0, "lr": 0.0099999909789697}
225
+ {"train_loss": 0.019331879913806915, "train_loss_bc": 0.01881510019302368, "train_loss_llm": 0.5167800188064575, "grad_norm": 0.2143346071243286, "global_step": 224, "epoch": 0, "lr": 0.00999998994879066}
226
+ {"train_loss": 0.017838943749666214, "train_loss_bc": 0.017424583435058594, "train_loss_llm": 0.4143611192703247, "grad_norm": 0.023944241926074028, "global_step": 225, "epoch": 0, "lr": 0.00999998994879066}
227
+ {"train_loss": 0.01796240359544754, "train_loss_bc": 0.017589787021279335, "train_loss_llm": 0.37261566519737244, "grad_norm": 0.03415573388338089, "global_step": 226, "epoch": 0, "lr": 0.00999998994879066}
228
+ {"train_loss": 0.01584581844508648, "train_loss_bc": 0.015357905998826027, "train_loss_llm": 0.4879117012023926, "grad_norm": 0.05189693719148636, "global_step": 227, "epoch": 0, "lr": 0.00999998994879066}
229
+ {"train_loss": 0.01800801046192646, "train_loss_bc": 0.01758820191025734, "train_loss_llm": 0.4198092520236969, "grad_norm": 0.07924457639455795, "global_step": 228, "epoch": 0, "lr": 0.00999998994879066}
230
+ {"train_loss": 0.018989915028214455, "train_loss_bc": 0.01854291930794716, "train_loss_llm": 0.4469965100288391, "grad_norm": 0.11897021532058716, "global_step": 229, "epoch": 0, "lr": 0.00999998994879066}
231
+ {"train_loss": 0.02125917375087738, "train_loss_bc": 0.020760733634233475, "train_loss_llm": 0.49844038486480713, "grad_norm": 0.1377515345811844, "global_step": 230, "epoch": 0, "lr": 0.00999998994879066}
232
+ {"train_loss": 0.019712205976247787, "train_loss_bc": 0.01920940726995468, "train_loss_llm": 0.5027981996536255, "grad_norm": 0.16281495988368988, "global_step": 231, "epoch": 0, "lr": 0.00999998994879066}
233
+ {"train_loss": 0.020320260897278786, "train_loss_bc": 0.019815631210803986, "train_loss_llm": 0.5046302080154419, "grad_norm": 0.1876341551542282, "global_step": 232, "epoch": 0, "lr": 0.009999988862926341}
234
+ {"train_loss": 0.013357514515519142, "train_loss_bc": 0.01291839312762022, "train_loss_llm": 0.43912118673324585, "grad_norm": 0.02517073042690754, "global_step": 233, "epoch": 0, "lr": 0.009999988862926341}
235
+ {"train_loss": 0.02109229937195778, "train_loss_bc": 0.020597826689481735, "train_loss_llm": 0.49447277188301086, "grad_norm": 0.04559013620018959, "global_step": 234, "epoch": 0, "lr": 0.009999988862926341}
236
+ {"train_loss": 0.02008131518959999, "train_loss_bc": 0.019501332193613052, "train_loss_llm": 0.579983115196228, "grad_norm": 0.07952536642551422, "global_step": 235, "epoch": 0, "lr": 0.009999988862926341}
237
+ {"train_loss": 0.016857489943504333, "train_loss_bc": 0.016379257664084435, "train_loss_llm": 0.4782329797744751, "grad_norm": 0.10649916529655457, "global_step": 236, "epoch": 0, "lr": 0.009999988862926341}
238
+ {"train_loss": 0.06850671758405677, "train_loss_bc": 0.014644688926637173, "train_loss_llm": 0.4756101667881012, "grad_norm": 0.13082890212535858, "global_step": 237, "epoch": 0, "lr": 0.009999988862926341, "train/cumulative_reward": 2.7083310524135573, "train/mean_score": 0.33428478816554785, "train/success_rate": 0.0, "test/cumulative_reward": 2.474044586385482, "test/mean_score": 0.3310451992587934, "test/success_rate": 0.0, "val_loss": 0.017693543806672096, "train_action_mse_error": 0.021953511983156204}
239
+ {"train_loss": 0.017798328772187233, "train_loss_bc": 0.017359893769025803, "train_loss_llm": 0.4384341835975647, "grad_norm": 0.157542422413826, "global_step": 238, "epoch": 1, "lr": 0.009999988862926341}
240
+ {"train_loss": 0.01802152208983898, "train_loss_bc": 0.017627805471420288, "train_loss_llm": 0.39371609687805176, "grad_norm": 0.17802225053310394, "global_step": 239, "epoch": 1, "lr": 0.009999988862926341}
241
+ {"train_loss": 0.018425248563289642, "train_loss_bc": 0.01776362583041191, "train_loss_llm": 0.6616224050521851, "grad_norm": 0.20682503283023834, "global_step": 240, "epoch": 1, "lr": 0.009999987721376759}
242
+ {"train_loss": 0.017822718247771263, "train_loss_bc": 0.017327211797237396, "train_loss_llm": 0.4955056309700012, "grad_norm": 0.03360544890165329, "global_step": 241, "epoch": 1, "lr": 0.009999987721376759}
243
+ {"train_loss": 0.01679021306335926, "train_loss_bc": 0.016283852979540825, "train_loss_llm": 0.5063599348068237, "grad_norm": 0.062126513570547104, "global_step": 242, "epoch": 1, "lr": 0.009999987721376759}
244
+ {"train_loss": 0.020830130204558372, "train_loss_bc": 0.020347915589809418, "train_loss_llm": 0.48221397399902344, "grad_norm": 0.08846676349639893, "global_step": 243, "epoch": 1, "lr": 0.009999987721376759}
245
+ {"train_loss": 0.011690114624798298, "train_loss_bc": 0.01113096158951521, "train_loss_llm": 0.559153139591217, "grad_norm": 0.1047411635518074, "global_step": 244, "epoch": 1, "lr": 0.009999987721376759}
246
+ {"train_loss": 0.020986376330256462, "train_loss_bc": 0.020374851301312447, "train_loss_llm": 0.6115252375602722, "grad_norm": 0.1375197023153305, "global_step": 245, "epoch": 1, "lr": 0.009999987721376759}
247
+ {"train_loss": 0.014499716460704803, "train_loss_bc": 0.013983565382659435, "train_loss_llm": 0.516150951385498, "grad_norm": 0.15900495648384094, "global_step": 246, "epoch": 1, "lr": 0.009999987721376759}
248
+ {"train_loss": 0.02040776051580906, "train_loss_bc": 0.01990542560815811, "train_loss_llm": 0.502334475517273, "grad_norm": 0.1920090615749359, "global_step": 247, "epoch": 1, "lr": 0.009999987721376759}
249
+ {"train_loss": 0.008450948633253574, "train_loss_bc": 0.00804897490888834, "train_loss_llm": 0.4019736349582672, "grad_norm": 0.20613166689872742, "global_step": 248, "epoch": 1, "lr": 0.009999986524141925}
250
+ {"train_loss": 0.01662587560713291, "train_loss_bc": 0.016171330586075783, "train_loss_llm": 0.45454519987106323, "grad_norm": 0.02374984882771969, "global_step": 249, "epoch": 1, "lr": 0.009999986524141925}
251
+ {"train_loss": 0.015652479603886604, "train_loss_bc": 0.015175838023424149, "train_loss_llm": 0.47664228081703186, "grad_norm": 0.04423899948596954, "global_step": 250, "epoch": 1, "lr": 0.009999986524141925}
252
+ {"train_loss": 0.015529230237007141, "train_loss_bc": 0.015086237341165543, "train_loss_llm": 0.4429924190044403, "grad_norm": 0.06532718986272812, "global_step": 251, "epoch": 1, "lr": 0.009999986524141925}
253
+ {"train_loss": 0.018649809062480927, "train_loss_bc": 0.018053732812404633, "train_loss_llm": 0.5960763692855835, "grad_norm": 0.09744929522275925, "global_step": 252, "epoch": 1, "lr": 0.009999986524141925}
254
+ {"train_loss": 0.014919068664312363, "train_loss_bc": 0.014484588988125324, "train_loss_llm": 0.4344799220561981, "grad_norm": 0.1184827908873558, "global_step": 253, "epoch": 1, "lr": 0.009999986524141925}
255
+ {"train_loss": 0.012522549368441105, "train_loss_bc": 0.0121694877743721, "train_loss_llm": 0.3530616760253906, "grad_norm": 0.13075849413871765, "global_step": 254, "epoch": 1, "lr": 0.009999986524141925}
256
+ {"train_loss": 0.017960211262106895, "train_loss_bc": 0.017518820241093636, "train_loss_llm": 0.4413911700248718, "grad_norm": 0.15736038982868195, "global_step": 255, "epoch": 1, "lr": 0.009999986524141925}
257
+ {"train_loss": 0.016007019206881523, "train_loss_bc": 0.015464743599295616, "train_loss_llm": 0.54227614402771, "grad_norm": 0.17768608033657074, "global_step": 256, "epoch": 1, "lr": 0.00999998527122185}
258
+ {"train_loss": 0.01158602349460125, "train_loss_bc": 0.011238181963562965, "train_loss_llm": 0.34784168004989624, "grad_norm": 0.010093354620039463, "global_step": 257, "epoch": 1, "lr": 0.00999998527122185}
259
+ {"train_loss": 0.010712604969739914, "train_loss_bc": 0.010269438847899437, "train_loss_llm": 0.44316577911376953, "grad_norm": 0.021126240491867065, "global_step": 258, "epoch": 1, "lr": 0.00999998527122185}
260
+ {"train_loss": 0.01096857525408268, "train_loss_bc": 0.010642854496836662, "train_loss_llm": 0.3257203996181488, "grad_norm": 0.03387540951371193, "global_step": 259, "epoch": 1, "lr": 0.00999998527122185}
261
+ {"train_loss": 0.01653478853404522, "train_loss_bc": 0.016012927517294884, "train_loss_llm": 0.5218604207038879, "grad_norm": 0.04911898449063301, "global_step": 260, "epoch": 1, "lr": 0.00999998527122185}
262
+ {"train_loss": 0.017165496945381165, "train_loss_bc": 0.01656423695385456, "train_loss_llm": 0.6012594699859619, "grad_norm": 0.06821974366903305, "global_step": 261, "epoch": 1, "lr": 0.00999998527122185}
263
+ {"train_loss": 0.012002137489616871, "train_loss_bc": 0.011621439829468727, "train_loss_llm": 0.38069722056388855, "grad_norm": 0.08292040973901749, "global_step": 262, "epoch": 1, "lr": 0.00999998527122185}
264
+ {"train_loss": 0.018928784877061844, "train_loss_bc": 0.018516037613153458, "train_loss_llm": 0.41274651885032654, "grad_norm": 0.09135116636753082, "global_step": 263, "epoch": 1, "lr": 0.00999998527122185}
265
+ {"train_loss": 0.018131952732801437, "train_loss_bc": 0.017562976107001305, "train_loss_llm": 0.5689768195152283, "grad_norm": 0.11499010771512985, "global_step": 264, "epoch": 1, "lr": 0.009999983962616553}
266
+ {"train_loss": 0.012489533051848412, "train_loss_bc": 0.011947352439165115, "train_loss_llm": 0.5421801805496216, "grad_norm": 0.013015178963541985, "global_step": 265, "epoch": 1, "lr": 0.009999983962616553}
267
+ {"train_loss": 0.013243050314486027, "train_loss_bc": 0.012746745720505714, "train_loss_llm": 0.4963045120239258, "grad_norm": 0.020864736288785934, "global_step": 266, "epoch": 1, "lr": 0.009999983962616553}
268
+ {"train_loss": 0.010356509126722813, "train_loss_bc": 0.009778052568435669, "train_loss_llm": 0.5784561634063721, "grad_norm": 0.0276536475867033, "global_step": 267, "epoch": 1, "lr": 0.009999983962616553}
269
+ {"train_loss": 0.012164799496531487, "train_loss_bc": 0.011662531644105911, "train_loss_llm": 0.5022678375244141, "grad_norm": 0.03239491581916809, "global_step": 268, "epoch": 1, "lr": 0.009999983962616553}
270
+ {"train_loss": 0.014096668921411037, "train_loss_bc": 0.0135754169896245, "train_loss_llm": 0.5212522745132446, "grad_norm": 0.0413699746131897, "global_step": 269, "epoch": 1, "lr": 0.009999983962616553}
271
+ {"train_loss": 0.011443986557424068, "train_loss_bc": 0.011009275913238525, "train_loss_llm": 0.43471041321754456, "grad_norm": 0.038190145045518875, "global_step": 270, "epoch": 1, "lr": 0.009999983962616553}
272
+ {"train_loss": 0.01239698100835085, "train_loss_bc": 0.011889282613992691, "train_loss_llm": 0.5076982975006104, "grad_norm": 0.04352530464529991, "global_step": 271, "epoch": 1, "lr": 0.009999983962616553}
273
+ {"train_loss": 0.013387206010520458, "train_loss_bc": 0.012860596179962158, "train_loss_llm": 0.5266100168228149, "grad_norm": 0.05260344222187996, "global_step": 272, "epoch": 1, "lr": 0.009999982598326042}
274
+ {"train_loss": 0.01481005921959877, "train_loss_bc": 0.014252791181206703, "train_loss_llm": 0.5572683215141296, "grad_norm": 0.0161435529589653, "global_step": 273, "epoch": 1, "lr": 0.009999982598326042}
275
+ {"train_loss": 0.009104442782700062, "train_loss_bc": 0.008664367720484734, "train_loss_llm": 0.44007524847984314, "grad_norm": 0.02379443496465683, "global_step": 274, "epoch": 1, "lr": 0.009999982598326042}
276
+ {"train_loss": 0.016264215111732483, "train_loss_bc": 0.015678897500038147, "train_loss_llm": 0.5853180885314941, "grad_norm": 0.043524160981178284, "global_step": 275, "epoch": 1, "lr": 0.009999982598326042}
277
+ {"train_loss": 0.017824366688728333, "train_loss_bc": 0.017341842874884605, "train_loss_llm": 0.4825235903263092, "grad_norm": 0.062320295721292496, "global_step": 276, "epoch": 1, "lr": 0.009999982598326042}
278
+ {"train_loss": 0.018680082634091377, "train_loss_bc": 0.018099233508110046, "train_loss_llm": 0.5808486342430115, "grad_norm": 0.08115622401237488, "global_step": 277, "epoch": 1, "lr": 0.009999982598326042}
279
+ {"train_loss": 0.012962117791175842, "train_loss_bc": 0.012479234486818314, "train_loss_llm": 0.4828835129737854, "grad_norm": 0.08807636052370071, "global_step": 278, "epoch": 1, "lr": 0.009999982598326042}
280
+ {"train_loss": 0.01446839701384306, "train_loss_bc": 0.013988605700433254, "train_loss_llm": 0.4797913432121277, "grad_norm": 0.09451211988925934, "global_step": 279, "epoch": 1, "lr": 0.009999982598326042}
281
+ {"train_loss": 0.01429255772382021, "train_loss_bc": 0.013756824657320976, "train_loss_llm": 0.5357327461242676, "grad_norm": 0.10867451131343842, "global_step": 280, "epoch": 1, "lr": 0.00999998117835034}
282
+ {"train_loss": 0.013955993577837944, "train_loss_bc": 0.013429549522697926, "train_loss_llm": 0.5264439582824707, "grad_norm": 0.025083180516958237, "global_step": 281, "epoch": 1, "lr": 0.00999998117835034}
283
+ {"train_loss": 0.016641786321997643, "train_loss_bc": 0.01618514023721218, "train_loss_llm": 0.4566459357738495, "grad_norm": 0.045515093952417374, "global_step": 282, "epoch": 1, "lr": 0.00999998117835034}
284
+ {"train_loss": 0.014012634754180908, "train_loss_bc": 0.013572480529546738, "train_loss_llm": 0.4401538670063019, "grad_norm": 0.06089504435658455, "global_step": 283, "epoch": 1, "lr": 0.00999998117835034}
285
+ {"train_loss": 0.01873624697327614, "train_loss_bc": 0.01815981976687908, "train_loss_llm": 0.5764279365539551, "grad_norm": 0.07969119399785995, "global_step": 284, "epoch": 1, "lr": 0.00999998117835034}
286
+ {"train_loss": 0.017088143154978752, "train_loss_bc": 0.01662488281726837, "train_loss_llm": 0.46325966715812683, "grad_norm": 0.09512478858232498, "global_step": 285, "epoch": 1, "lr": 0.00999998117835034}
287
+ {"train_loss": 0.010966386646032333, "train_loss_bc": 0.010503709316253662, "train_loss_llm": 0.46267759799957275, "grad_norm": 0.11192868649959564, "global_step": 286, "epoch": 1, "lr": 0.00999998117835034}
288
+ {"train_loss": 0.01674928329885006, "train_loss_bc": 0.016327429562807083, "train_loss_llm": 0.4218546152114868, "grad_norm": 0.1299564689397812, "global_step": 287, "epoch": 1, "lr": 0.00999998117835034}
289
+ {"train_loss": 0.016223106533288956, "train_loss_bc": 0.015695005655288696, "train_loss_llm": 0.5281013250350952, "grad_norm": 0.14104627072811127, "global_step": 288, "epoch": 1, "lr": 0.009999979702689454}
290
+ {"train_loss": 0.017164213582873344, "train_loss_bc": 0.016666820272803307, "train_loss_llm": 0.4973934590816498, "grad_norm": 0.02183571644127369, "global_step": 289, "epoch": 1, "lr": 0.009999979702689454}
291
+ {"train_loss": 0.01507254596799612, "train_loss_bc": 0.014609228819608688, "train_loss_llm": 0.46331721544265747, "grad_norm": 0.03880901262164116, "global_step": 290, "epoch": 1, "lr": 0.009999979702689454}
292
+ {"train_loss": 0.019254591315984726, "train_loss_bc": 0.01874985173344612, "train_loss_llm": 0.5047386884689331, "grad_norm": 0.06385096162557602, "global_step": 291, "epoch": 1, "lr": 0.009999979702689454}
293
+ {"train_loss": 0.0154347512871027, "train_loss_bc": 0.01500864326953888, "train_loss_llm": 0.42610809206962585, "grad_norm": 0.08098644018173218, "global_step": 292, "epoch": 1, "lr": 0.009999979702689454}
294
+ {"train_loss": 0.01903417333960533, "train_loss_bc": 0.018556609749794006, "train_loss_llm": 0.47756439447402954, "grad_norm": 0.10871503502130508, "global_step": 293, "epoch": 1, "lr": 0.009999979702689454}
295
+ {"train_loss": 0.0156480111181736, "train_loss_bc": 0.015174117870628834, "train_loss_llm": 0.47389230132102966, "grad_norm": 0.13166505098342896, "global_step": 294, "epoch": 1, "lr": 0.009999979702689454}
296
+ {"train_loss": 0.016828790307044983, "train_loss_bc": 0.016389530152082443, "train_loss_llm": 0.43926095962524414, "grad_norm": 0.1540358066558838, "global_step": 295, "epoch": 1, "lr": 0.009999979702689454}
297
+ {"train_loss": 0.013542444445192814, "train_loss_bc": 0.013059152290225029, "train_loss_llm": 0.48329171538352966, "grad_norm": 0.17128632962703705, "global_step": 296, "epoch": 1, "lr": 0.00999997817134341}
298
+ {"train_loss": 0.01356798131018877, "train_loss_bc": 0.013153335079550743, "train_loss_llm": 0.41464588046073914, "grad_norm": 0.01961551606655121, "global_step": 297, "epoch": 1, "lr": 0.00999997817134341}
299
+ {"train_loss": 0.015729112550616264, "train_loss_bc": 0.015209322795271873, "train_loss_llm": 0.5197891592979431, "grad_norm": 0.04597029462456703, "global_step": 298, "epoch": 1, "lr": 0.00999997817134341}
300
+ {"train_loss": 0.015187690034508705, "train_loss_bc": 0.014714469201862812, "train_loss_llm": 0.4732206165790558, "grad_norm": 0.06501750648021698, "global_step": 299, "epoch": 1, "lr": 0.00999997817134341}
301
+ {"train_loss": 0.01496143825352192, "train_loss_bc": 0.014608250930905342, "train_loss_llm": 0.35318759083747864, "grad_norm": 0.09145065397024155, "global_step": 300, "epoch": 1, "lr": 0.00999997817134341}
302
+ {"train_loss": 0.014216883108019829, "train_loss_bc": 0.013763219118118286, "train_loss_llm": 0.4536639451980591, "grad_norm": 0.10366859287023544, "global_step": 301, "epoch": 1, "lr": 0.00999997817134341}
303
+ {"train_loss": 0.01669706590473652, "train_loss_bc": 0.016175638884305954, "train_loss_llm": 0.5214270353317261, "grad_norm": 0.12138961255550385, "global_step": 302, "epoch": 1, "lr": 0.00999997817134341}
304
+ {"train_loss": 0.014355774968862534, "train_loss_bc": 0.013972668908536434, "train_loss_llm": 0.3831060230731964, "grad_norm": 0.14051002264022827, "global_step": 303, "epoch": 1, "lr": 0.00999997817134341}
305
+ {"train_loss": 0.0146627863869071, "train_loss_bc": 0.014223872683942318, "train_loss_llm": 0.4389132857322693, "grad_norm": 0.15912242233753204, "global_step": 304, "epoch": 1, "lr": 0.009999976584312217}
306
+ {"train_loss": 0.01133162435144186, "train_loss_bc": 0.010947933420538902, "train_loss_llm": 0.3836905360221863, "grad_norm": 0.02009022980928421, "global_step": 305, "epoch": 1, "lr": 0.009999976584312217}
307
+ {"train_loss": 0.01270595658570528, "train_loss_bc": 0.012229321524500847, "train_loss_llm": 0.4766354262828827, "grad_norm": 0.029522329568862915, "global_step": 306, "epoch": 1, "lr": 0.009999976584312217}
308
+ {"train_loss": 0.014936204068362713, "train_loss_bc": 0.014451291412115097, "train_loss_llm": 0.4849129617214203, "grad_norm": 0.056380923837423325, "global_step": 307, "epoch": 1, "lr": 0.009999976584312217}
309
+ {"train_loss": 0.010747802443802357, "train_loss_bc": 0.010318206623196602, "train_loss_llm": 0.4295954704284668, "grad_norm": 0.07131356745958328, "global_step": 308, "epoch": 1, "lr": 0.009999976584312217}
310
+ {"train_loss": 0.010907587595283985, "train_loss_bc": 0.010417597368359566, "train_loss_llm": 0.4899904727935791, "grad_norm": 0.08291061967611313, "global_step": 309, "epoch": 1, "lr": 0.009999976584312217}
311
+ {"train_loss": 0.01565735787153244, "train_loss_bc": 0.01524802204221487, "train_loss_llm": 0.4093364179134369, "grad_norm": 0.1052742674946785, "global_step": 310, "epoch": 1, "lr": 0.009999976584312217}
312
+ {"train_loss": 0.013044213876128197, "train_loss_bc": 0.012592458166182041, "train_loss_llm": 0.4517558515071869, "grad_norm": 0.12751010060310364, "global_step": 311, "epoch": 1, "lr": 0.009999976584312217}
313
+ {"train_loss": 0.012058139778673649, "train_loss_bc": 0.011555514298379421, "train_loss_llm": 0.5026251077651978, "grad_norm": 0.1479104608297348, "global_step": 312, "epoch": 1, "lr": 0.009999974941595897}
314
+ {"train_loss": 0.010018293745815754, "train_loss_bc": 0.009687970392405987, "train_loss_llm": 0.3303234279155731, "grad_norm": 0.01355504896491766, "global_step": 313, "epoch": 1, "lr": 0.009999974941595897}
315
+ {"train_loss": 0.012328105047345161, "train_loss_bc": 0.011914866045117378, "train_loss_llm": 0.4132387638092041, "grad_norm": 0.01594623737037182, "global_step": 314, "epoch": 1, "lr": 0.009999974941595897}
316
+ {"train_loss": 0.013978242874145508, "train_loss_bc": 0.013544456101953983, "train_loss_llm": 0.4337867498397827, "grad_norm": 0.032734472304582596, "global_step": 315, "epoch": 1, "lr": 0.009999974941595897}
317
+ {"train_loss": 0.010386270470917225, "train_loss_bc": 0.010068733245134354, "train_loss_llm": 0.31753700971603394, "grad_norm": 0.047776710242033005, "global_step": 316, "epoch": 1, "lr": 0.009999974941595897}
318
+ {"train_loss": 0.012578755617141724, "train_loss_bc": 0.01214287057518959, "train_loss_llm": 0.4358847141265869, "grad_norm": 0.0635766088962555, "global_step": 317, "epoch": 1, "lr": 0.009999974941595897}
319
+ {"train_loss": 0.012422928586602211, "train_loss_bc": 0.012042918242514133, "train_loss_llm": 0.38001012802124023, "grad_norm": 0.07768117636442184, "global_step": 318, "epoch": 1, "lr": 0.009999974941595897}
320
+ {"train_loss": 0.010467208921909332, "train_loss_bc": 0.009988697245717049, "train_loss_llm": 0.478511244058609, "grad_norm": 0.08535484224557877, "global_step": 319, "epoch": 1, "lr": 0.009999974941595897}
321
+ {"train_loss": 0.012417087331414223, "train_loss_bc": 0.01195848360657692, "train_loss_llm": 0.45860394835472107, "grad_norm": 0.10900090634822845, "global_step": 320, "epoch": 1, "lr": 0.009999973243194467}
322
+ {"train_loss": 0.01247399765998125, "train_loss_bc": 0.01207180880010128, "train_loss_llm": 0.4021890163421631, "grad_norm": 0.010779356583952904, "global_step": 321, "epoch": 1, "lr": 0.009999973243194467}
323
+ {"train_loss": 0.011968130245804787, "train_loss_bc": 0.0115253496915102, "train_loss_llm": 0.44278010725975037, "grad_norm": 0.016100643202662468, "global_step": 322, "epoch": 1, "lr": 0.009999973243194467}
324
+ {"train_loss": 0.011378668248653412, "train_loss_bc": 0.010935855098068714, "train_loss_llm": 0.4428134262561798, "grad_norm": 0.02845556102693081, "global_step": 323, "epoch": 1, "lr": 0.009999973243194467}
325
+ {"train_loss": 0.010558527894318104, "train_loss_bc": 0.010113537311553955, "train_loss_llm": 0.4449908137321472, "grad_norm": 0.02639344334602356, "global_step": 324, "epoch": 1, "lr": 0.009999973243194467}
326
+ {"train_loss": 0.008580698631703854, "train_loss_bc": 0.008138567209243774, "train_loss_llm": 0.44213131070137024, "grad_norm": 0.039085108786821365, "global_step": 325, "epoch": 1, "lr": 0.009999973243194467}
327
+ {"train_loss": 0.013355431146919727, "train_loss_bc": 0.012834073975682259, "train_loss_llm": 0.5213567614555359, "grad_norm": 0.04931477829813957, "global_step": 326, "epoch": 1, "lr": 0.009999973243194467}
328
+ {"train_loss": 0.008711469359695911, "train_loss_bc": 0.008368385955691338, "train_loss_llm": 0.3430837392807007, "grad_norm": 0.05239582434296608, "global_step": 327, "epoch": 1, "lr": 0.009999973243194467}
329
+ {"train_loss": 0.009385243989527225, "train_loss_bc": 0.008970173075795174, "train_loss_llm": 0.41507115960121155, "grad_norm": 0.05491437017917633, "global_step": 328, "epoch": 1, "lr": 0.009999971489107947}
330
+ {"train_loss": 0.011174674145877361, "train_loss_bc": 0.010818562470376492, "train_loss_llm": 0.3561112880706787, "grad_norm": 0.011578625068068504, "global_step": 329, "epoch": 1, "lr": 0.009999971489107947}
331
+ {"train_loss": 0.013502768240869045, "train_loss_bc": 0.013065600767731667, "train_loss_llm": 0.4371674656867981, "grad_norm": 0.026880592107772827, "global_step": 330, "epoch": 1, "lr": 0.009999971489107947}
332
+ {"train_loss": 0.012593085877597332, "train_loss_bc": 0.012244774959981441, "train_loss_llm": 0.34831055998802185, "grad_norm": 0.041259463876485825, "global_step": 331, "epoch": 1, "lr": 0.009999971489107947}
333
+ {"train_loss": 0.009598112665116787, "train_loss_bc": 0.009030044078826904, "train_loss_llm": 0.5680687427520752, "grad_norm": 0.05241383612155914, "global_step": 332, "epoch": 1, "lr": 0.009999971489107947}
334
+ {"train_loss": 0.01214287243783474, "train_loss_bc": 0.011670759879052639, "train_loss_llm": 0.4721129238605499, "grad_norm": 0.07233195751905441, "global_step": 333, "epoch": 1, "lr": 0.009999971489107947}
335
+ {"train_loss": 0.015494297258555889, "train_loss_bc": 0.01502845250070095, "train_loss_llm": 0.46584439277648926, "grad_norm": 0.0923396646976471, "global_step": 334, "epoch": 1, "lr": 0.009999971489107947}
336
+ {"train_loss": 0.01266135461628437, "train_loss_bc": 0.012207714840769768, "train_loss_llm": 0.4536397457122803, "grad_norm": 0.10818523913621902, "global_step": 335, "epoch": 1, "lr": 0.009999971489107947}
337
+ {"train_loss": 0.012813151814043522, "train_loss_bc": 0.012354401871562004, "train_loss_llm": 0.45874953269958496, "grad_norm": 0.1238781213760376, "global_step": 336, "epoch": 1, "lr": 0.009999969679336354}
338
+ {"train_loss": 0.013637524098157883, "train_loss_bc": 0.013167794793844223, "train_loss_llm": 0.46972957253456116, "grad_norm": 0.017869004979729652, "global_step": 337, "epoch": 1, "lr": 0.009999969679336354}
339
+ {"train_loss": 0.01720421575009823, "train_loss_bc": 0.01666909269988537, "train_loss_llm": 0.5351230502128601, "grad_norm": 0.04792848974466324, "global_step": 338, "epoch": 1, "lr": 0.009999969679336354}
340
+ {"train_loss": 0.015335088595747948, "train_loss_bc": 0.014754555188119411, "train_loss_llm": 0.5805330276489258, "grad_norm": 0.06892868131399155, "global_step": 339, "epoch": 1, "lr": 0.009999969679336354}
341
+ {"train_loss": 0.010542848147451878, "train_loss_bc": 0.01001989096403122, "train_loss_llm": 0.5229572653770447, "grad_norm": 0.0866188034415245, "global_step": 340, "epoch": 1, "lr": 0.009999969679336354}
342
+ {"train_loss": 0.01668214052915573, "train_loss_bc": 0.01612561196088791, "train_loss_llm": 0.5565277934074402, "grad_norm": 0.10684026032686234, "global_step": 341, "epoch": 1, "lr": 0.009999969679336354}
343
+ {"train_loss": 0.013128525577485561, "train_loss_bc": 0.012647897005081177, "train_loss_llm": 0.4806285798549652, "grad_norm": 0.1274142861366272, "global_step": 342, "epoch": 1, "lr": 0.009999969679336354}
344
+ {"train_loss": 0.012806318700313568, "train_loss_bc": 0.012260029092431068, "train_loss_llm": 0.5462898015975952, "grad_norm": 0.1410902589559555, "global_step": 343, "epoch": 1, "lr": 0.009999969679336354}
345
+ {"train_loss": 0.009487541392445564, "train_loss_bc": 0.009019860997796059, "train_loss_llm": 0.4676806926727295, "grad_norm": 0.15223082900047302, "global_step": 344, "epoch": 1, "lr": 0.00999996781387971}
346
+ {"train_loss": 0.016361601650714874, "train_loss_bc": 0.01573288068175316, "train_loss_llm": 0.6287200450897217, "grad_norm": 0.023004453629255295, "global_step": 345, "epoch": 1, "lr": 0.00999996781387971}
347
+ {"train_loss": 0.01248890906572342, "train_loss_bc": 0.011881167069077492, "train_loss_llm": 0.607742190361023, "grad_norm": 0.031962476670742035, "global_step": 346, "epoch": 1, "lr": 0.00999996781387971}
348
+ {"train_loss": 0.01358707994222641, "train_loss_bc": 0.013073929585516453, "train_loss_llm": 0.5131505727767944, "grad_norm": 0.04822782427072525, "global_step": 347, "epoch": 1, "lr": 0.00999996781387971}
349
+ {"train_loss": 0.015235469676554203, "train_loss_bc": 0.01475644949823618, "train_loss_llm": 0.4790200889110565, "grad_norm": 0.06381597369909286, "global_step": 348, "epoch": 1, "lr": 0.00999996781387971}
350
+ {"train_loss": 0.01751648262143135, "train_loss_bc": 0.016855884343385696, "train_loss_llm": 0.6605973243713379, "grad_norm": 0.08681994676589966, "global_step": 349, "epoch": 1, "lr": 0.00999996781387971}
351
+ {"train_loss": 0.014208652079105377, "train_loss_bc": 0.013812784105539322, "train_loss_llm": 0.3958682119846344, "grad_norm": 0.10667064785957336, "global_step": 350, "epoch": 1, "lr": 0.00999996781387971}
352
+ {"train_loss": 0.011572916992008686, "train_loss_bc": 0.011084744706749916, "train_loss_llm": 0.48817187547683716, "grad_norm": 0.1193293035030365, "global_step": 351, "epoch": 1, "lr": 0.00999996781387971}
353
+ {"train_loss": 0.01146447192877531, "train_loss_bc": 0.01082993671298027, "train_loss_llm": 0.6345353126525879, "grad_norm": 0.13804349303245544, "global_step": 352, "epoch": 1, "lr": 0.009999965892738036}
354
+ {"train_loss": 0.012645299546420574, "train_loss_bc": 0.012096133083105087, "train_loss_llm": 0.5491666793823242, "grad_norm": 0.014580151066184044, "global_step": 353, "epoch": 1, "lr": 0.009999965892738036}
355
+ {"train_loss": 0.012388558126986027, "train_loss_bc": 0.011813423596322536, "train_loss_llm": 0.57513427734375, "grad_norm": 0.029008738696575165, "global_step": 354, "epoch": 1, "lr": 0.009999965892738036}
356
+ {"train_loss": 0.015240795910358429, "train_loss_bc": 0.014640103094279766, "train_loss_llm": 0.6006927490234375, "grad_norm": 0.03927018493413925, "global_step": 355, "epoch": 1, "lr": 0.009999965892738036}
357
+ {"train_loss": 0.011942628771066666, "train_loss_bc": 0.011564518325030804, "train_loss_llm": 0.3781103491783142, "grad_norm": 0.056585244834423065, "global_step": 356, "epoch": 1, "lr": 0.009999965892738036}
358
+ {"train_loss": 0.012461038306355476, "train_loss_bc": 0.011909164488315582, "train_loss_llm": 0.5518735647201538, "grad_norm": 0.06560716778039932, "global_step": 357, "epoch": 1, "lr": 0.009999965892738036}
359
+ {"train_loss": 0.015487094409763813, "train_loss_bc": 0.015003865584731102, "train_loss_llm": 0.48322877287864685, "grad_norm": 0.0779387354850769, "global_step": 358, "epoch": 1, "lr": 0.009999965892738036}
360
+ {"train_loss": 0.010963214561343193, "train_loss_bc": 0.010492322966456413, "train_loss_llm": 0.47089120745658875, "grad_norm": 0.09135229140520096, "global_step": 359, "epoch": 1, "lr": 0.009999965892738036}
361
+ {"train_loss": 0.009490950964391232, "train_loss_bc": 0.00908201839774847, "train_loss_llm": 0.40893298387527466, "grad_norm": 0.11029430478811264, "global_step": 360, "epoch": 1, "lr": 0.009999963915911353}
362
+ {"train_loss": 0.009366891346871853, "train_loss_bc": 0.008832603693008423, "train_loss_llm": 0.534287691116333, "grad_norm": 0.011780070140957832, "global_step": 361, "epoch": 1, "lr": 0.009999963915911353}
363
+ {"train_loss": 0.01090614590793848, "train_loss_bc": 0.010547686368227005, "train_loss_llm": 0.3584598898887634, "grad_norm": 0.018876101821660995, "global_step": 362, "epoch": 1, "lr": 0.009999963915911353}
364
+ {"train_loss": 0.01047124806791544, "train_loss_bc": 0.00999793503433466, "train_loss_llm": 0.4733126759529114, "grad_norm": 0.0347822941839695, "global_step": 363, "epoch": 1, "lr": 0.009999963915911353}
365
+ {"train_loss": 0.013421941548585892, "train_loss_bc": 0.01281831320375204, "train_loss_llm": 0.603628396987915, "grad_norm": 0.0416879765689373, "global_step": 364, "epoch": 1, "lr": 0.009999963915911353}
366
+ {"train_loss": 0.012736831791698933, "train_loss_bc": 0.01226730551570654, "train_loss_llm": 0.4695262610912323, "grad_norm": 0.061996493488550186, "global_step": 365, "epoch": 1, "lr": 0.009999963915911353}
367
+ {"train_loss": 0.015469906851649284, "train_loss_bc": 0.014822958037257195, "train_loss_llm": 0.6469485759735107, "grad_norm": 0.06635187566280365, "global_step": 366, "epoch": 1, "lr": 0.009999963915911353}
368
+ {"train_loss": 0.012995130382478237, "train_loss_bc": 0.012509873136878014, "train_loss_llm": 0.4852573275566101, "grad_norm": 0.08635496348142624, "global_step": 367, "epoch": 1, "lr": 0.009999963915911353}
369
+ {"train_loss": 0.00998271256685257, "train_loss_bc": 0.009579310193657875, "train_loss_llm": 0.4034022390842438, "grad_norm": 0.10520216077566147, "global_step": 368, "epoch": 1, "lr": 0.009999961883399683}
370
+ {"train_loss": 0.0111773069947958, "train_loss_bc": 0.010663645341992378, "train_loss_llm": 0.5136619806289673, "grad_norm": 0.013957403600215912, "global_step": 369, "epoch": 1, "lr": 0.009999961883399683}
371
+ {"train_loss": 0.010809720493853092, "train_loss_bc": 0.010342610068619251, "train_loss_llm": 0.46711012721061707, "grad_norm": 0.028792060911655426, "global_step": 370, "epoch": 1, "lr": 0.009999961883399683}
372
+ {"train_loss": 0.013553488999605179, "train_loss_bc": 0.01306125707924366, "train_loss_llm": 0.49223223328590393, "grad_norm": 0.03614845499396324, "global_step": 371, "epoch": 1, "lr": 0.009999961883399683}
373
+ {"train_loss": 0.008836585097014904, "train_loss_bc": 0.008274243213236332, "train_loss_llm": 0.5623416900634766, "grad_norm": 0.04330018162727356, "global_step": 372, "epoch": 1, "lr": 0.009999961883399683}
374
+ {"train_loss": 0.008892491459846497, "train_loss_bc": 0.008484721183776855, "train_loss_llm": 0.40777063369750977, "grad_norm": 0.061764974147081375, "global_step": 373, "epoch": 1, "lr": 0.009999961883399683}
375
+ {"train_loss": 0.011546115390956402, "train_loss_bc": 0.011045539751648903, "train_loss_llm": 0.5005753636360168, "grad_norm": 0.07327363640069962, "global_step": 374, "epoch": 1, "lr": 0.009999961883399683}
376
+ {"train_loss": 0.013276085257530212, "train_loss_bc": 0.012809041887521744, "train_loss_llm": 0.4670429527759552, "grad_norm": 0.08208861202001572, "global_step": 375, "epoch": 1, "lr": 0.009999961883399683}
377
+ {"train_loss": 0.010347362607717514, "train_loss_bc": 0.009987818077206612, "train_loss_llm": 0.35954442620277405, "grad_norm": 0.09811953455209732, "global_step": 376, "epoch": 1, "lr": 0.009999959795203048}
378
+ {"train_loss": 0.010294072329998016, "train_loss_bc": 0.009917671792209148, "train_loss_llm": 0.37640058994293213, "grad_norm": 0.01656760647892952, "global_step": 377, "epoch": 1, "lr": 0.009999959795203048}
379
+ {"train_loss": 0.012072709389030933, "train_loss_bc": 0.011694014072418213, "train_loss_llm": 0.37869489192962646, "grad_norm": 0.038055505603551865, "global_step": 378, "epoch": 1, "lr": 0.009999959795203048}
380
+ {"train_loss": 0.013379747048020363, "train_loss_bc": 0.012885721400380135, "train_loss_llm": 0.4940251410007477, "grad_norm": 0.048791639506816864, "global_step": 379, "epoch": 1, "lr": 0.009999959795203048}
381
+ {"train_loss": 0.010779447853565216, "train_loss_bc": 0.010418189689517021, "train_loss_llm": 0.3612585663795471, "grad_norm": 0.06931304186582565, "global_step": 380, "epoch": 1, "lr": 0.009999959795203048}
382
+ {"train_loss": 0.013290653005242348, "train_loss_bc": 0.0127793550491333, "train_loss_llm": 0.5112981796264648, "grad_norm": 0.08744651824235916, "global_step": 381, "epoch": 1, "lr": 0.009999959795203048}
383
+ {"train_loss": 0.012638435699045658, "train_loss_bc": 0.012112822383642197, "train_loss_llm": 0.5256132483482361, "grad_norm": 0.08734080195426941, "global_step": 382, "epoch": 1, "lr": 0.009999959795203048}
384
+ {"train_loss": 0.01276348065584898, "train_loss_bc": 0.01233246922492981, "train_loss_llm": 0.43101125955581665, "grad_norm": 0.11115267127752304, "global_step": 383, "epoch": 1, "lr": 0.009999959795203048}
385
+ {"train_loss": 0.013118097558617592, "train_loss_bc": 0.012599822133779526, "train_loss_llm": 0.5182749629020691, "grad_norm": 0.11958809196949005, "global_step": 384, "epoch": 1, "lr": 0.009999957651321473}
386
+ {"train_loss": 0.01025390811264515, "train_loss_bc": 0.009825386106967926, "train_loss_llm": 0.4285220801830292, "grad_norm": 0.018952684476971626, "global_step": 385, "epoch": 1, "lr": 0.009999957651321473}
387
+ {"train_loss": 0.010963615961372852, "train_loss_bc": 0.010562529787421227, "train_loss_llm": 0.40108659863471985, "grad_norm": 0.03306251019239426, "global_step": 386, "epoch": 1, "lr": 0.009999957651321473}
388
+ {"train_loss": 0.010541597381234169, "train_loss_bc": 0.010126705281436443, "train_loss_llm": 0.4148922562599182, "grad_norm": 0.042092613875865936, "global_step": 387, "epoch": 1, "lr": 0.009999957651321473}
389
+ {"train_loss": 0.008702381514012814, "train_loss_bc": 0.008199061267077923, "train_loss_llm": 0.5033202171325684, "grad_norm": 0.043085530400276184, "global_step": 388, "epoch": 1, "lr": 0.009999957651321473}
390
+ {"train_loss": 0.008918298408389091, "train_loss_bc": 0.008477844297885895, "train_loss_llm": 0.4404541254043579, "grad_norm": 0.055809881538152695, "global_step": 389, "epoch": 1, "lr": 0.009999957651321473}
391
+ {"train_loss": 0.009737114422023296, "train_loss_bc": 0.009314477443695068, "train_loss_llm": 0.4226372539997101, "grad_norm": 0.07001017779111862, "global_step": 390, "epoch": 1, "lr": 0.009999957651321473}
392
+ {"train_loss": 0.010793833062052727, "train_loss_bc": 0.010402481071650982, "train_loss_llm": 0.3913517892360687, "grad_norm": 0.07023876160383224, "global_step": 391, "epoch": 1, "lr": 0.009999957651321473}
393
+ {"train_loss": 0.010602637194097042, "train_loss_bc": 0.010235416702926159, "train_loss_llm": 0.36722007393836975, "grad_norm": 0.09314236044883728, "global_step": 392, "epoch": 1, "lr": 0.00999995545175498}
394
+ {"train_loss": 0.013229576870799065, "train_loss_bc": 0.01280839741230011, "train_loss_llm": 0.4211796224117279, "grad_norm": 0.010268638841807842, "global_step": 393, "epoch": 1, "lr": 0.00999995545175498}
395
+ {"train_loss": 0.006598448846489191, "train_loss_bc": 0.006211167201399803, "train_loss_llm": 0.38728177547454834, "grad_norm": 0.012780013494193554, "global_step": 394, "epoch": 1, "lr": 0.00999995545175498}
396
+ {"train_loss": 0.010388839058578014, "train_loss_bc": 0.00987747497856617, "train_loss_llm": 0.5113644599914551, "grad_norm": 0.015021376311779022, "global_step": 395, "epoch": 1, "lr": 0.00999995545175498}
397
+ {"train_loss": 0.014541227370500565, "train_loss_bc": 0.014116690494120121, "train_loss_llm": 0.4245363771915436, "grad_norm": 0.02319067344069481, "global_step": 396, "epoch": 1, "lr": 0.00999995545175498}
398
+ {"train_loss": 0.007016970310360193, "train_loss_bc": 0.006666948553174734, "train_loss_llm": 0.3500216007232666, "grad_norm": 0.027652941644191742, "global_step": 397, "epoch": 1, "lr": 0.00999995545175498}
399
+ {"train_loss": 0.007159297354519367, "train_loss_bc": 0.006787103600800037, "train_loss_llm": 0.3721938133239746, "grad_norm": 0.03523283079266548, "global_step": 398, "epoch": 1, "lr": 0.00999995545175498}
400
+ {"train_loss": 0.009045234881341457, "train_loss_bc": 0.008669788017868996, "train_loss_llm": 0.37544700503349304, "grad_norm": 0.049455754458904266, "global_step": 399, "epoch": 1, "lr": 0.00999995545175498}
401
+ {"train_loss": 0.008435660041868687, "train_loss_bc": 0.008010749705135822, "train_loss_llm": 0.42491012811660767, "grad_norm": 0.0516187846660614, "global_step": 400, "epoch": 1, "lr": 0.009999953196503595}
402
+ {"train_loss": 0.010924630798399448, "train_loss_bc": 0.01047501340508461, "train_loss_llm": 0.44961774349212646, "grad_norm": 0.019651779904961586, "global_step": 401, "epoch": 1, "lr": 0.009999953196503595}
403
+ {"train_loss": 0.006148731801658869, "train_loss_bc": 0.0057486011646687984, "train_loss_llm": 0.4001305103302002, "grad_norm": 0.02275880053639412, "global_step": 402, "epoch": 1, "lr": 0.009999953196503595}
404
+ {"train_loss": 0.007822881452739239, "train_loss_bc": 0.00734285730868578, "train_loss_llm": 0.48002392053604126, "grad_norm": 0.03126152977347374, "global_step": 403, "epoch": 1, "lr": 0.009999953196503595}
405
+ {"train_loss": 0.00978546217083931, "train_loss_bc": 0.009389730170369148, "train_loss_llm": 0.3957315683364868, "grad_norm": 0.0482921376824379, "global_step": 404, "epoch": 1, "lr": 0.009999953196503595}
406
+ {"train_loss": 0.00851339939981699, "train_loss_bc": 0.008063830435276031, "train_loss_llm": 0.4495692849159241, "grad_norm": 0.05878711864352226, "global_step": 405, "epoch": 1, "lr": 0.009999953196503595}
407
+ {"train_loss": 0.012543894350528717, "train_loss_bc": 0.012006400153040886, "train_loss_llm": 0.5374938249588013, "grad_norm": 0.07312177866697311, "global_step": 406, "epoch": 1, "lr": 0.009999953196503595}
408
+ {"train_loss": 0.004626167938113213, "train_loss_bc": 0.004230715800076723, "train_loss_llm": 0.39545202255249023, "grad_norm": 0.0742395669221878, "global_step": 407, "epoch": 1, "lr": 0.009999953196503595}
409
+ {"train_loss": 0.010349040850996971, "train_loss_bc": 0.00993720255792141, "train_loss_llm": 0.41183826327323914, "grad_norm": 0.08678025007247925, "global_step": 408, "epoch": 1, "lr": 0.009999950885567342}
410
+ {"train_loss": 0.006372408010065556, "train_loss_bc": 0.006014788523316383, "train_loss_llm": 0.35761937499046326, "grad_norm": 0.010603218339383602, "global_step": 409, "epoch": 1, "lr": 0.009999950885567342}
411
+ {"train_loss": 0.009057055227458477, "train_loss_bc": 0.008602965623140335, "train_loss_llm": 0.45408937335014343, "grad_norm": 0.029408499598503113, "global_step": 410, "epoch": 1, "lr": 0.009999950885567342}
412
+ {"train_loss": 0.006392288021743298, "train_loss_bc": 0.006004677154123783, "train_loss_llm": 0.38761094212532043, "grad_norm": 0.040126774460077286, "global_step": 411, "epoch": 1, "lr": 0.009999950885567342}
413
+ {"train_loss": 0.007694873958826065, "train_loss_bc": 0.007268495857715607, "train_loss_llm": 0.42637819051742554, "grad_norm": 0.051206592470407486, "global_step": 412, "epoch": 1, "lr": 0.009999950885567342}
414
+ {"train_loss": 0.00797163788229227, "train_loss_bc": 0.007495692931115627, "train_loss_llm": 0.47594505548477173, "grad_norm": 0.06213797628879547, "global_step": 413, "epoch": 1, "lr": 0.009999950885567342}
415
+ {"train_loss": 0.008228043094277382, "train_loss_bc": 0.007861753925681114, "train_loss_llm": 0.366288959980011, "grad_norm": 0.07453621178865433, "global_step": 414, "epoch": 1, "lr": 0.009999950885567342}
416
+ {"train_loss": 0.007793589495122433, "train_loss_bc": 0.00744793564081192, "train_loss_llm": 0.34565383195877075, "grad_norm": 0.08818801492452621, "global_step": 415, "epoch": 1, "lr": 0.009999950885567342}
417
+ {"train_loss": 0.010696557350456715, "train_loss_bc": 0.01020999439060688, "train_loss_llm": 0.4865627586841583, "grad_norm": 0.10583324730396271, "global_step": 416, "epoch": 1, "lr": 0.009999948518946245}
418
+ {"train_loss": 0.016479648649692535, "train_loss_bc": 0.016043461859226227, "train_loss_llm": 0.43618765473365784, "grad_norm": 0.02455216646194458, "global_step": 417, "epoch": 1, "lr": 0.009999948518946245}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/train.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: detected 112 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
2
+ [2026-01-21 13:12:21,183][numexpr.utils][INFO] - Note: NumExpr detected 112 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.
3
+ [2026-01-21 13:12:21,183][numexpr.utils][INFO] - NumExpr defaulting to 16 threads.
4
+ [2026-01-21 13:12:27,199][datasets][INFO] - PyTorch version 2.2.2 available.
5
+ [2026-01-21 13:12:27,200][datasets][INFO] - TensorFlow version 2.15.1 available.
6
+ [2026-01-21 13:12:27,201][datasets][INFO] - JAX version 0.4.30 available.
7
+ [2026-01-21 13:12:35,484][absl][INFO] - MUJOCO_GL=osmesa, attempting to import specified OpenGL backend.
8
+ [2026-01-21 13:12:35,493][absl][INFO] - MuJoCo library version is: 2.3.7
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T13:12:36.44966483+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
+ {"time":"2026-01-21T13:12:36.449675304+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log"}
3
+ {"time":"2026-01-21T13:12:36.564980991+08:00","level":"INFO","msg":"created new stream","id":"yhjy9tz9"}
4
+ {"time":"2026-01-21T13:12:36.565006242+08:00","level":"INFO","msg":"stream: started","id":"yhjy9tz9"}
5
+ {"time":"2026-01-21T13:12:36.565029519+08:00","level":"INFO","msg":"sender: started","stream_id":"yhjy9tz9"}
6
+ {"time":"2026-01-21T13:12:36.565021074+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"yhjy9tz9"}}
7
+ {"time":"2026-01-21T13:12:36.565029409+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"yhjy9tz9"}}
8
+ {"time":"2026-01-21T13:12:37.456830647+08:00","level":"INFO","msg":"Starting system monitor"}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Configure stats pid to 3666395
3
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
4
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
5
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
8
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log
10
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log
11
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():619] calling init triggers
12
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.001, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
14
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():669] starting backend
15
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():673] sending inform_init request
16
+ 2026-01-21 13:12:36,447 INFO MainThread:3666395 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-21 13:12:36,447 INFO MainThread:3666395 [wandb_init.py:init():686] backend started and connected
18
+ 2026-01-21 13:12:36,456 INFO MainThread:3666395 [wandb_init.py:init():781] updated telemetry
19
+ 2026-01-21 13:12:36,506 INFO MainThread:3666395 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2026-01-21 13:12:37,452 INFO MainThread:3666395 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_console_start():2451] atexit reg
22
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2389] Redirects installed.
25
+ 2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2'}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/output.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Eval MetaworldLowdimRunner 1/6: 0%| | 0/30 [00:00<?, ?it/s]/work/u1131674/LLM-BC/llmbc/common/llfbench_util.py:39: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:275.)
2
+ obs = torch.tensor(obs, dtype=torch.float32).unsqueeze(dim=0).to(device)
3
+ Training epoch 1: 74%|███████▍ | 176/238 [00:23<00:08, 7.45it/s, grad_norm=0.0621, loss=0.00797]
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/requirements.txt ADDED
@@ -0,0 +1,857 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rpds-py==0.27.1
2
+ typeguard==4.4.4
3
+ flatbuffers==25.12.19
4
+ toppra==0.6.3
5
+ sympy==1.14.0
6
+ tiktoken==0.8.0
7
+ nvidia-cuda-cupti-cu12==12.1.105
8
+ arm_pytorch_utilities==0.4.3
9
+ pynndescent==0.6.0
10
+ multidict==6.7.0
11
+ fonttools==4.60.2
12
+ numexpr==2.10.1
13
+ cmudict==1.0.13
14
+ PyOpenGL-accelerate==3.1.10
15
+ gmpy2==2.2.1
16
+ peft==0.14.0
17
+ metaworld==2.0.0
18
+ nvidia-cufft-cu12==11.0.2.54
19
+ python-dateutil==2.9.0.post0
20
+ aiosignal==1.4.0
21
+ pexpect==4.9.0
22
+ protobuf==4.25.8
23
+ typing_extensions==4.15.0
24
+ mujoco==2.3.7
25
+ tokenizers==0.21.0
26
+ pytorch-kinematics==0.7.5
27
+ sniffio==1.3.1
28
+ aiofiles==25.1.0
29
+ mplib==0.1.1
30
+ wcwidth==0.2.14
31
+ Pygments==2.19.1
32
+ anyio==4.12.1
33
+ tensorflow-estimator==2.15.0
34
+ filelock==3.17.0
35
+ numpy==1.23.5
36
+ attrs==25.4.0
37
+ Markdown==3.9
38
+ fsspec==2024.3.1
39
+ libclang==18.1.1
40
+ umap-learn==0.5.9.post2
41
+ dill==0.3.8
42
+ narwhals==2.15.0
43
+ tensorboard==2.15.2
44
+ dacite==1.9.2
45
+ termcolor==3.1.0
46
+ llmbc==0.0.0
47
+ python-multipart==0.0.20
48
+ exceptiongroup==1.3.1
49
+ sapien==3.0.0b1
50
+ pygame==2.6.1
51
+ nvidia-curand-cu12==10.3.2.106
52
+ evaluate==0.4.3
53
+ msgpack==1.1.1
54
+ tensorflow-probability==0.23.0
55
+ diffusers==0.31.0
56
+ certifi==2025.10.5
57
+ d4rl==1.1
58
+ pydub==0.25.1
59
+ annotated-doc==0.0.4
60
+ gitdb==4.0.12
61
+ gradio_client==0.2.9
62
+ Shapely==1.8.4
63
+ mani_skill==3.0.0b20
64
+ tensorflow-io-gcs-filesystem==0.37.1
65
+ fasteners==0.20
66
+ hjson==3.1.0
67
+ ninja==1.13.0
68
+ stack-data==0.6.3
69
+ pyarrow==21.0.0
70
+ networkx==3.2.1
71
+ nvidia-cusparse-cu12==12.1.0.106
72
+ pyparsing==3.3.1
73
+ timm==1.0.22
74
+ typing-inspection==0.4.2
75
+ openai==2.8.1
76
+ pybullet==3.2.6
77
+ hydra-core==1.2.0
78
+ gradio==3.36.1
79
+ tensorflow==2.15.1
80
+ asttokens==3.0.1
81
+ importlib-metadata==5.2.0
82
+ astunparse==1.6.3
83
+ tifffile==2024.8.30
84
+ annotated-types==0.7.0
85
+ Bottleneck==1.4.2
86
+ accelerate==1.0.1
87
+ pytz==2025.2
88
+ urllib3==2.5.0
89
+ frozenlist==1.8.0
90
+ sentry-sdk==2.50.0
91
+ jsonschema==4.25.1
92
+ tyro==0.9.1
93
+ Farama-Notifications==0.0.4
94
+ ffmpy==1.0.0
95
+ httpx==0.28.1
96
+ pymunk==6.2.1
97
+ shtab==1.7.2
98
+ glfw==2.0.0
99
+ hf-xet==1.1.8
100
+ omegaconf==2.2.1
101
+ blobfile==3.0.0
102
+ decorator==5.2.1
103
+ cffi==1.17.1
104
+ matplotlib-inline==0.2.1
105
+ eval_type_backport==0.2.2
106
+ torchaudio==2.2.2
107
+ colorama==0.4.6
108
+ click==8.1.8
109
+ Cython==0.29.37
110
+ orjson==3.11.5
111
+ gym_bandits==0.0.2
112
+ traitlets==5.14.3
113
+ docker-pycreds==0.4.0
114
+ multiprocess==0.70.15
115
+ zipp==3.21.0
116
+ antlr4-python3-runtime==4.9.3
117
+ uc-micro-py==1.0.3
118
+ mpmath==1.3.0
119
+ idna==3.11
120
+ aiodns==3.5.0
121
+ charset-normalizer==3.4.4
122
+ nvidia-nvjitlink-cu12==12.9.86
123
+ nvidia-cuda-nvrtc-cu12==12.1.105
124
+ seaborn==0.13.2
125
+ pyarrow-hotfix==0.7
126
+ pillow==11.3.0
127
+ pyautogen==0.1.0
128
+ requests==2.32.0
129
+ MarkupSafe==3.0.2
130
+ websockets==15.0.1
131
+ nvidia-nccl-cu12==2.19.3
132
+ pure_eval==0.2.3
133
+ parso==0.8.5
134
+ huggingface-hub==0.26.2
135
+ syllables==1.0.9
136
+ tf-agents==0.19.0
137
+ six==1.17.0
138
+ referencing==0.36.2
139
+ ptyprocess==0.7.0
140
+ platformdirs==4.4.0
141
+ fastapi==0.128.0
142
+ stable-baselines3==2.2.1
143
+ av==10.0.0
144
+ diskcache==5.6.3
145
+ pynvml==13.0.1
146
+ pytorch-seed==0.2.0
147
+ zarr==2.12.0
148
+ mdurl==0.1.2
149
+ docstring-parser==0.16
150
+ packaging==25.0
151
+ numcodecs==0.12.1
152
+ opt_einsum==3.4.0
153
+ markdown-it-py==2.2.0
154
+ nvidia-cuda-runtime-cu12==12.1.105
155
+ PyWavelets==1.6.0
156
+ datasets==2.19.0
157
+ contourpy==1.3.0
158
+ aiohappyeyeballs==2.6.1
159
+ jaxlib==0.4.30
160
+ ImageIO==2.37.2
161
+ wandb==0.18.6
162
+ jiter==0.12.0
163
+ gymnasium==0.29.1
164
+ pycryptodomex==3.23.0
165
+ google-pasta==0.2.0
166
+ ipython==8.18.1
167
+ threadpoolctl==3.6.0
168
+ py-cpuinfo==9.0.0
169
+ bitsandbytes==0.45.0
170
+ xxhash==3.5.0
171
+ google-auth-oauthlib==1.2.4
172
+ rsa==4.9.1
173
+ rouge_score==0.1.2
174
+ dm-control==1.0.14
175
+ oauthlib==3.3.1
176
+ pandas==2.3.3
177
+ tenacity==9.1.2
178
+ asciitree==0.3.3
179
+ scipy==1.13.1
180
+ jedi==0.19.2
181
+ gast==0.7.0
182
+ google-auth==2.47.0
183
+ transforms3d==0.4.2
184
+ kiwisolver==1.4.7
185
+ matplotlib==3.7.5
186
+ aiohttp==3.12.15
187
+ pip==23.3.2
188
+ imageio-ffmpeg==0.6.0
189
+ deepspeed==0.16.1
190
+ yarl==1.18.0
191
+ nvidia-nvtx-cu12==12.1.105
192
+ llfbench==0.1.0
193
+ wheel==0.45.1
194
+ PySocks==1.7.1
195
+ ml-dtypes==0.3.2
196
+ PyYAML==6.0.2
197
+ fast_kinematics==0.2.2
198
+ gin-config==0.5.0
199
+ setproctitle==1.3.7
200
+ safetensors==0.5.3
201
+ torchvision==0.17.2
202
+ semantic-version==2.10.0
203
+ PyOpenGL==3.1.10
204
+ nltk==3.9.2
205
+ lxml==6.0.2
206
+ pydantic==2.12.5
207
+ tqdm==4.67.1
208
+ keras==2.15.0
209
+ parse==1.19.1
210
+ linkify-it-py==2.0.3
211
+ dm-tree==0.1.8
212
+ requests-oauthlib==2.0.0
213
+ scikit-learn==1.6.1
214
+ altair==6.0.0
215
+ Werkzeug==3.1.5
216
+ sentencepiece==0.2.0
217
+ uvicorn==0.39.0
218
+ cycler==0.12.1
219
+ transformers==4.47.1
220
+ uvloop==0.22.1
221
+ mkl_random==1.2.8
222
+ GitPython==3.1.46
223
+ regex==2025.9.1
224
+ jax==0.4.30
225
+ llvmlite==0.39.1
226
+ pyasn1_modules==0.4.2
227
+ nvidia-cudnn-cu12==8.9.2.26
228
+ pydantic_core==2.41.5
229
+ google-genai==1.47.0
230
+ propcache==0.3.1
231
+ pycares==4.10.0
232
+ pyperclip==1.11.0
233
+ pyasn1==0.6.2
234
+ async-timeout==5.0.1
235
+ psutil==7.0.0
236
+ gym==0.23.1
237
+ dm-env==1.6
238
+ Jinja2==3.1.6
239
+ sentence-transformers==3.2.1
240
+ einops==0.4.1
241
+ triton==2.2.0
242
+ grpcio==1.76.0
243
+ labmaze==1.0.6
244
+ nvidia-ml-py==13.590.44
245
+ brotlicffi==1.0.9.2
246
+ smmap==5.0.2
247
+ cloudpickle==3.1.2
248
+ setuptools==80.9.0
249
+ starlette==0.49.3
250
+ prompt_toolkit==3.0.52
251
+ wrapt==1.14.2
252
+ h5py==3.14.0
253
+ scikit-image==0.19.3
254
+ joblib==1.5.3
255
+ opencv-python==4.11.0.86
256
+ rich==14.2.0
257
+ trl==0.11.4
258
+ gym-notices==0.1.0
259
+ trimesh==4.11.1
260
+ mdit-py-plugins==0.3.3
261
+ distro==1.9.0
262
+ executing==2.2.1
263
+ mkl-service==2.4.0
264
+ nvidia-cusolver-cu12==11.4.5.107
265
+ FLAML==2.3.6
266
+ mujoco-py==2.1.2.14
267
+ h11==0.16.0
268
+ highway-env==1.9.1
269
+ httpcore==1.0.9
270
+ tensorboard-data-server==0.7.2
271
+ tzdata==2025.3
272
+ absl-py==2.3.1
273
+ jsonschema-specifications==2025.9.1
274
+ numba==0.56.4
275
+ tabulate==0.9.0
276
+ importlib-resources==5.13.0
277
+ pycparser==2.23
278
+ mkl_fft==1.3.11
279
+ torch==2.2.2
280
+ nvidia-cublas-cu12==12.1.3.1
281
+ rpds-py==0.27.1
282
+ typeguard==4.4.4
283
+ flatbuffers==25.12.19
284
+ toppra==0.6.3
285
+ sympy==1.14.0
286
+ tiktoken==0.8.0
287
+ nvidia-cuda-cupti-cu12==12.1.105
288
+ arm_pytorch_utilities==0.4.3
289
+ pynndescent==0.6.0
290
+ multidict==6.7.0
291
+ fonttools==4.60.2
292
+ numexpr==2.10.1
293
+ cmudict==1.0.13
294
+ PyOpenGL-accelerate==3.1.10
295
+ gmpy2==2.2.1
296
+ peft==0.14.0
297
+ metaworld==2.0.0
298
+ nvidia-cufft-cu12==11.0.2.54
299
+ python-dateutil==2.9.0.post0
300
+ aiosignal==1.4.0
301
+ pexpect==4.9.0
302
+ protobuf==4.25.8
303
+ typing_extensions==4.15.0
304
+ mujoco==2.3.7
305
+ tokenizers==0.21.0
306
+ pytorch-kinematics==0.7.5
307
+ sniffio==1.3.1
308
+ aiofiles==25.1.0
309
+ mplib==0.1.1
310
+ wcwidth==0.2.14
311
+ Pygments==2.19.1
312
+ anyio==4.12.1
313
+ tensorflow-estimator==2.15.0
314
+ filelock==3.17.0
315
+ numpy==1.23.5
316
+ attrs==25.4.0
317
+ Markdown==3.9
318
+ fsspec==2024.3.1
319
+ libclang==18.1.1
320
+ umap-learn==0.5.9.post2
321
+ dill==0.3.8
322
+ narwhals==2.15.0
323
+ tensorboard==2.15.2
324
+ dacite==1.9.2
325
+ termcolor==3.1.0
326
+ llmbc==0.0.0
327
+ python-multipart==0.0.20
328
+ exceptiongroup==1.3.1
329
+ sapien==3.0.0b1
330
+ pygame==2.6.1
331
+ nvidia-curand-cu12==10.3.2.106
332
+ evaluate==0.4.3
333
+ msgpack==1.1.1
334
+ tensorflow-probability==0.23.0
335
+ diffusers==0.31.0
336
+ certifi==2025.10.5
337
+ d4rl==1.1
338
+ pydub==0.25.1
339
+ annotated-doc==0.0.4
340
+ gitdb==4.0.12
341
+ gradio_client==0.2.9
342
+ Shapely==1.8.4
343
+ mani_skill==3.0.0b20
344
+ tensorflow-io-gcs-filesystem==0.37.1
345
+ fasteners==0.20
346
+ hjson==3.1.0
347
+ ninja==1.13.0
348
+ stack-data==0.6.3
349
+ pyarrow==21.0.0
350
+ networkx==3.2.1
351
+ nvidia-cusparse-cu12==12.1.0.106
352
+ pyparsing==3.3.1
353
+ timm==1.0.22
354
+ typing-inspection==0.4.2
355
+ openai==2.8.1
356
+ pybullet==3.2.6
357
+ hydra-core==1.2.0
358
+ gradio==3.36.1
359
+ tensorflow==2.15.1
360
+ asttokens==3.0.1
361
+ importlib-metadata==5.2.0
362
+ astunparse==1.6.3
363
+ tifffile==2024.8.30
364
+ annotated-types==0.7.0
365
+ Bottleneck==1.4.2
366
+ accelerate==1.0.1
367
+ pytz==2025.2
368
+ urllib3==2.5.0
369
+ frozenlist==1.8.0
370
+ sentry-sdk==2.50.0
371
+ jsonschema==4.25.1
372
+ tyro==0.9.1
373
+ Farama-Notifications==0.0.4
374
+ ffmpy==1.0.0
375
+ httpx==0.28.1
376
+ pymunk==6.2.1
377
+ shtab==1.7.2
378
+ glfw==2.0.0
379
+ hf-xet==1.1.8
380
+ omegaconf==2.2.1
381
+ blobfile==3.0.0
382
+ decorator==5.2.1
383
+ cffi==1.17.1
384
+ matplotlib-inline==0.2.1
385
+ eval_type_backport==0.2.2
386
+ torchaudio==2.2.2
387
+ colorama==0.4.6
388
+ click==8.1.8
389
+ Cython==0.29.37
390
+ orjson==3.11.5
391
+ gym_bandits==0.0.2
392
+ traitlets==5.14.3
393
+ docker-pycreds==0.4.0
394
+ multiprocess==0.70.15
395
+ zipp==3.21.0
396
+ antlr4-python3-runtime==4.9.3
397
+ uc-micro-py==1.0.3
398
+ mpmath==1.3.0
399
+ idna==3.11
400
+ aiodns==3.5.0
401
+ charset-normalizer==3.4.4
402
+ nvidia-nvjitlink-cu12==12.9.86
403
+ nvidia-cuda-nvrtc-cu12==12.1.105
404
+ seaborn==0.13.2
405
+ pyarrow-hotfix==0.7
406
+ pillow==11.3.0
407
+ pyautogen==0.1.0
408
+ requests==2.32.0
409
+ MarkupSafe==3.0.2
410
+ websockets==15.0.1
411
+ nvidia-nccl-cu12==2.19.3
412
+ pure_eval==0.2.3
413
+ parso==0.8.5
414
+ huggingface-hub==0.26.2
415
+ syllables==1.0.9
416
+ tf-agents==0.19.0
417
+ six==1.17.0
418
+ referencing==0.36.2
419
+ ptyprocess==0.7.0
420
+ platformdirs==4.4.0
421
+ fastapi==0.128.0
422
+ stable-baselines3==2.2.1
423
+ av==10.0.0
424
+ diskcache==5.6.3
425
+ pynvml==13.0.1
426
+ pytorch-seed==0.2.0
427
+ zarr==2.12.0
428
+ mdurl==0.1.2
429
+ docstring-parser==0.16
430
+ packaging==25.0
431
+ numcodecs==0.12.1
432
+ opt_einsum==3.4.0
433
+ markdown-it-py==2.2.0
434
+ nvidia-cuda-runtime-cu12==12.1.105
435
+ PyWavelets==1.6.0
436
+ datasets==2.19.0
437
+ contourpy==1.3.0
438
+ aiohappyeyeballs==2.6.1
439
+ jaxlib==0.4.30
440
+ ImageIO==2.37.2
441
+ wandb==0.18.6
442
+ jiter==0.12.0
443
+ gymnasium==0.29.1
444
+ pycryptodomex==3.23.0
445
+ google-pasta==0.2.0
446
+ ipython==8.18.1
447
+ threadpoolctl==3.6.0
448
+ py-cpuinfo==9.0.0
449
+ bitsandbytes==0.45.0
450
+ xxhash==3.5.0
451
+ google-auth-oauthlib==1.2.4
452
+ rsa==4.9.1
453
+ rouge_score==0.1.2
454
+ dm-control==1.0.14
455
+ oauthlib==3.3.1
456
+ pandas==2.3.3
457
+ tenacity==9.1.2
458
+ asciitree==0.3.3
459
+ scipy==1.13.1
460
+ jedi==0.19.2
461
+ gast==0.7.0
462
+ google-auth==2.47.0
463
+ transforms3d==0.4.2
464
+ kiwisolver==1.4.7
465
+ matplotlib==3.7.5
466
+ aiohttp==3.12.15
467
+ pip==23.3.2
468
+ imageio-ffmpeg==0.6.0
469
+ deepspeed==0.16.1
470
+ yarl==1.18.0
471
+ nvidia-nvtx-cu12==12.1.105
472
+ llfbench==0.1.0
473
+ wheel==0.45.1
474
+ PySocks==1.7.1
475
+ ml-dtypes==0.3.2
476
+ PyYAML==6.0.2
477
+ fast_kinematics==0.2.2
478
+ gin-config==0.5.0
479
+ setproctitle==1.3.7
480
+ safetensors==0.5.3
481
+ torchvision==0.17.2
482
+ semantic-version==2.10.0
483
+ PyOpenGL==3.1.10
484
+ nltk==3.9.2
485
+ lxml==6.0.2
486
+ pydantic==2.12.5
487
+ tqdm==4.67.1
488
+ keras==2.15.0
489
+ parse==1.19.1
490
+ linkify-it-py==2.0.3
491
+ dm-tree==0.1.8
492
+ requests-oauthlib==2.0.0
493
+ scikit-learn==1.6.1
494
+ altair==6.0.0
495
+ Werkzeug==3.1.5
496
+ sentencepiece==0.2.0
497
+ uvicorn==0.39.0
498
+ cycler==0.12.1
499
+ transformers==4.47.1
500
+ uvloop==0.22.1
501
+ mkl_random==1.2.8
502
+ GitPython==3.1.46
503
+ regex==2025.9.1
504
+ jax==0.4.30
505
+ llvmlite==0.39.1
506
+ pyasn1_modules==0.4.2
507
+ nvidia-cudnn-cu12==8.9.2.26
508
+ pydantic_core==2.41.5
509
+ google-genai==1.47.0
510
+ propcache==0.3.1
511
+ pycares==4.10.0
512
+ pyperclip==1.11.0
513
+ pyasn1==0.6.2
514
+ async-timeout==5.0.1
515
+ psutil==7.0.0
516
+ gym==0.23.1
517
+ dm-env==1.6
518
+ Jinja2==3.1.6
519
+ sentence-transformers==3.2.1
520
+ einops==0.4.1
521
+ triton==2.2.0
522
+ grpcio==1.76.0
523
+ labmaze==1.0.6
524
+ nvidia-ml-py==13.590.44
525
+ brotlicffi==1.0.9.2
526
+ smmap==5.0.2
527
+ cloudpickle==3.1.2
528
+ setuptools==80.9.0
529
+ starlette==0.49.3
530
+ prompt_toolkit==3.0.52
531
+ wrapt==1.14.2
532
+ h5py==3.14.0
533
+ scikit-image==0.19.3
534
+ joblib==1.5.3
535
+ opencv-python==4.11.0.86
536
+ rich==14.2.0
537
+ trl==0.11.4
538
+ gym-notices==0.1.0
539
+ trimesh==4.11.1
540
+ mdit-py-plugins==0.3.3
541
+ distro==1.9.0
542
+ executing==2.2.1
543
+ mkl-service==2.4.0
544
+ nvidia-cusolver-cu12==11.4.5.107
545
+ FLAML==2.3.6
546
+ mujoco-py==2.1.2.14
547
+ h11==0.16.0
548
+ highway-env==1.9.1
549
+ httpcore==1.0.9
550
+ tensorboard-data-server==0.7.2
551
+ tzdata==2025.3
552
+ absl-py==2.3.1
553
+ jsonschema-specifications==2025.9.1
554
+ numba==0.56.4
555
+ tabulate==0.9.0
556
+ importlib-resources==5.13.0
557
+ pycparser==2.23
558
+ mkl_fft==1.3.11
559
+ torch==2.2.2
560
+ nvidia-cublas-cu12==12.1.3.1
561
+ llmbc==0.0.0
562
+ rpds-py==0.27.1
563
+ typeguard==4.4.4
564
+ flatbuffers==25.12.19
565
+ toppra==0.6.3
566
+ sympy==1.14.0
567
+ tiktoken==0.8.0
568
+ nvidia-cuda-cupti-cu12==12.1.105
569
+ arm_pytorch_utilities==0.4.3
570
+ pynndescent==0.6.0
571
+ multidict==6.7.0
572
+ fonttools==4.60.2
573
+ numexpr==2.10.1
574
+ cmudict==1.0.13
575
+ PyOpenGL-accelerate==3.1.10
576
+ gmpy2==2.2.1
577
+ peft==0.14.0
578
+ metaworld==2.0.0
579
+ nvidia-cufft-cu12==11.0.2.54
580
+ python-dateutil==2.9.0.post0
581
+ aiosignal==1.4.0
582
+ pexpect==4.9.0
583
+ protobuf==4.25.8
584
+ typing_extensions==4.15.0
585
+ mujoco==2.3.7
586
+ tokenizers==0.21.0
587
+ pytorch-kinematics==0.7.5
588
+ sniffio==1.3.1
589
+ aiofiles==25.1.0
590
+ mplib==0.1.1
591
+ wcwidth==0.2.14
592
+ Pygments==2.19.1
593
+ anyio==4.12.1
594
+ tensorflow-estimator==2.15.0
595
+ filelock==3.17.0
596
+ numpy==1.23.5
597
+ attrs==25.4.0
598
+ Markdown==3.9
599
+ fsspec==2024.3.1
600
+ libclang==18.1.1
601
+ umap-learn==0.5.9.post2
602
+ dill==0.3.8
603
+ narwhals==2.15.0
604
+ tensorboard==2.15.2
605
+ dacite==1.9.2
606
+ termcolor==3.1.0
607
+ llmbc==0.0.0
608
+ python-multipart==0.0.20
609
+ exceptiongroup==1.3.1
610
+ sapien==3.0.0b1
611
+ pygame==2.6.1
612
+ nvidia-curand-cu12==10.3.2.106
613
+ evaluate==0.4.3
614
+ msgpack==1.1.1
615
+ tensorflow-probability==0.23.0
616
+ diffusers==0.31.0
617
+ certifi==2025.10.5
618
+ d4rl==1.1
619
+ pydub==0.25.1
620
+ annotated-doc==0.0.4
621
+ gitdb==4.0.12
622
+ gradio_client==0.2.9
623
+ Shapely==1.8.4
624
+ mani_skill==3.0.0b20
625
+ tensorflow-io-gcs-filesystem==0.37.1
626
+ fasteners==0.20
627
+ hjson==3.1.0
628
+ ninja==1.13.0
629
+ stack-data==0.6.3
630
+ pyarrow==21.0.0
631
+ networkx==3.2.1
632
+ nvidia-cusparse-cu12==12.1.0.106
633
+ pyparsing==3.3.1
634
+ timm==1.0.22
635
+ typing-inspection==0.4.2
636
+ openai==2.8.1
637
+ pybullet==3.2.6
638
+ hydra-core==1.2.0
639
+ gradio==3.36.1
640
+ tensorflow==2.15.1
641
+ asttokens==3.0.1
642
+ importlib-metadata==5.2.0
643
+ astunparse==1.6.3
644
+ tifffile==2024.8.30
645
+ annotated-types==0.7.0
646
+ Bottleneck==1.4.2
647
+ accelerate==1.0.1
648
+ pytz==2025.2
649
+ urllib3==2.5.0
650
+ frozenlist==1.8.0
651
+ sentry-sdk==2.50.0
652
+ jsonschema==4.25.1
653
+ tyro==0.9.1
654
+ Farama-Notifications==0.0.4
655
+ ffmpy==1.0.0
656
+ httpx==0.28.1
657
+ pymunk==6.2.1
658
+ shtab==1.7.2
659
+ glfw==2.0.0
660
+ hf-xet==1.1.8
661
+ omegaconf==2.2.1
662
+ blobfile==3.0.0
663
+ decorator==5.2.1
664
+ cffi==1.17.1
665
+ matplotlib-inline==0.2.1
666
+ eval_type_backport==0.2.2
667
+ torchaudio==2.2.2
668
+ colorama==0.4.6
669
+ click==8.1.8
670
+ Cython==0.29.37
671
+ orjson==3.11.5
672
+ gym_bandits==0.0.2
673
+ traitlets==5.14.3
674
+ docker-pycreds==0.4.0
675
+ multiprocess==0.70.15
676
+ zipp==3.21.0
677
+ antlr4-python3-runtime==4.9.3
678
+ uc-micro-py==1.0.3
679
+ mpmath==1.3.0
680
+ idna==3.11
681
+ aiodns==3.5.0
682
+ charset-normalizer==3.4.4
683
+ nvidia-nvjitlink-cu12==12.9.86
684
+ nvidia-cuda-nvrtc-cu12==12.1.105
685
+ seaborn==0.13.2
686
+ pyarrow-hotfix==0.7
687
+ pillow==11.3.0
688
+ pyautogen==0.1.0
689
+ requests==2.32.0
690
+ MarkupSafe==3.0.2
691
+ websockets==15.0.1
692
+ nvidia-nccl-cu12==2.19.3
693
+ pure_eval==0.2.3
694
+ parso==0.8.5
695
+ huggingface-hub==0.26.2
696
+ syllables==1.0.9
697
+ tf-agents==0.19.0
698
+ six==1.17.0
699
+ referencing==0.36.2
700
+ ptyprocess==0.7.0
701
+ platformdirs==4.4.0
702
+ fastapi==0.128.0
703
+ stable-baselines3==2.2.1
704
+ av==10.0.0
705
+ diskcache==5.6.3
706
+ pynvml==13.0.1
707
+ pytorch-seed==0.2.0
708
+ zarr==2.12.0
709
+ mdurl==0.1.2
710
+ docstring-parser==0.16
711
+ packaging==25.0
712
+ numcodecs==0.12.1
713
+ opt_einsum==3.4.0
714
+ markdown-it-py==2.2.0
715
+ nvidia-cuda-runtime-cu12==12.1.105
716
+ PyWavelets==1.6.0
717
+ datasets==2.19.0
718
+ contourpy==1.3.0
719
+ aiohappyeyeballs==2.6.1
720
+ jaxlib==0.4.30
721
+ ImageIO==2.37.2
722
+ wandb==0.18.6
723
+ jiter==0.12.0
724
+ gymnasium==0.29.1
725
+ pycryptodomex==3.23.0
726
+ google-pasta==0.2.0
727
+ ipython==8.18.1
728
+ threadpoolctl==3.6.0
729
+ py-cpuinfo==9.0.0
730
+ bitsandbytes==0.45.0
731
+ xxhash==3.5.0
732
+ google-auth-oauthlib==1.2.4
733
+ rsa==4.9.1
734
+ rouge_score==0.1.2
735
+ dm-control==1.0.14
736
+ oauthlib==3.3.1
737
+ pandas==2.3.3
738
+ tenacity==9.1.2
739
+ asciitree==0.3.3
740
+ scipy==1.13.1
741
+ jedi==0.19.2
742
+ gast==0.7.0
743
+ google-auth==2.47.0
744
+ transforms3d==0.4.2
745
+ kiwisolver==1.4.7
746
+ matplotlib==3.7.5
747
+ aiohttp==3.12.15
748
+ pip==23.3.2
749
+ imageio-ffmpeg==0.6.0
750
+ deepspeed==0.16.1
751
+ yarl==1.18.0
752
+ nvidia-nvtx-cu12==12.1.105
753
+ llfbench==0.1.0
754
+ wheel==0.45.1
755
+ PySocks==1.7.1
756
+ ml-dtypes==0.3.2
757
+ PyYAML==6.0.2
758
+ fast_kinematics==0.2.2
759
+ gin-config==0.5.0
760
+ setproctitle==1.3.7
761
+ safetensors==0.5.3
762
+ torchvision==0.17.2
763
+ semantic-version==2.10.0
764
+ PyOpenGL==3.1.10
765
+ nltk==3.9.2
766
+ lxml==6.0.2
767
+ pydantic==2.12.5
768
+ tqdm==4.67.1
769
+ keras==2.15.0
770
+ parse==1.19.1
771
+ linkify-it-py==2.0.3
772
+ dm-tree==0.1.8
773
+ requests-oauthlib==2.0.0
774
+ scikit-learn==1.6.1
775
+ altair==6.0.0
776
+ Werkzeug==3.1.5
777
+ sentencepiece==0.2.0
778
+ uvicorn==0.39.0
779
+ cycler==0.12.1
780
+ transformers==4.47.1
781
+ uvloop==0.22.1
782
+ mkl_random==1.2.8
783
+ GitPython==3.1.46
784
+ regex==2025.9.1
785
+ jax==0.4.30
786
+ llvmlite==0.39.1
787
+ pyasn1_modules==0.4.2
788
+ nvidia-cudnn-cu12==8.9.2.26
789
+ pydantic_core==2.41.5
790
+ google-genai==1.47.0
791
+ propcache==0.3.1
792
+ pycares==4.10.0
793
+ pyperclip==1.11.0
794
+ pyasn1==0.6.2
795
+ async-timeout==5.0.1
796
+ psutil==7.0.0
797
+ gym==0.23.1
798
+ dm-env==1.6
799
+ Jinja2==3.1.6
800
+ sentence-transformers==3.2.1
801
+ einops==0.4.1
802
+ triton==2.2.0
803
+ grpcio==1.76.0
804
+ labmaze==1.0.6
805
+ nvidia-ml-py==13.590.44
806
+ brotlicffi==1.0.9.2
807
+ smmap==5.0.2
808
+ cloudpickle==3.1.2
809
+ setuptools==80.9.0
810
+ starlette==0.49.3
811
+ prompt_toolkit==3.0.52
812
+ wrapt==1.14.2
813
+ h5py==3.14.0
814
+ scikit-image==0.19.3
815
+ joblib==1.5.3
816
+ opencv-python==4.11.0.86
817
+ rich==14.2.0
818
+ trl==0.11.4
819
+ gym-notices==0.1.0
820
+ trimesh==4.11.1
821
+ mdit-py-plugins==0.3.3
822
+ distro==1.9.0
823
+ executing==2.2.1
824
+ mkl-service==2.4.0
825
+ nvidia-cusolver-cu12==11.4.5.107
826
+ FLAML==2.3.6
827
+ mujoco-py==2.1.2.14
828
+ h11==0.16.0
829
+ highway-env==1.9.1
830
+ httpcore==1.0.9
831
+ tensorboard-data-server==0.7.2
832
+ tzdata==2025.3
833
+ absl-py==2.3.1
834
+ jsonschema-specifications==2025.9.1
835
+ numba==0.56.4
836
+ tabulate==0.9.0
837
+ importlib-resources==5.13.0
838
+ pycparser==2.23
839
+ mkl_fft==1.3.11
840
+ torch==2.2.2
841
+ nvidia-cublas-cu12==12.1.3.1
842
+ zipp==3.19.2
843
+ jaraco.text==3.12.1
844
+ jaraco.context==5.3.0
845
+ importlib_metadata==8.0.0
846
+ typeguard==4.3.0
847
+ inflect==7.3.1
848
+ more-itertools==10.3.0
849
+ wheel==0.45.1
850
+ packaging==24.2
851
+ backports.tarfile==1.2.0
852
+ autocommand==2.2.2
853
+ jaraco.collections==5.1.0
854
+ tomli==2.0.1
855
+ platformdirs==4.2.2
856
+ jaraco.functools==4.0.1
857
+ typing_extensions==4.12.2
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/files/wandb-metadata.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28",
3
+ "python": "3.9.25",
4
+ "startedAt": "2026-01-21T05:12:36.447778Z",
5
+ "args": [
6
+ "--config-path",
7
+ "./config/main_table",
8
+ "--config-name",
9
+ "llmbc_box-close-v2.yaml",
10
+ "policy.loss_llm_weight=1.0e-3",
11
+ "training.seed=42"
12
+ ],
13
+ "program": "/work/u1131674/LLM-BC/./train.py",
14
+ "codePath": "train.py",
15
+ "git": {
16
+ "remote": "https://github.com/CHYang25/LLM-BC.git",
17
+ "commit": "1d2e1f5818e116390426ef596d075fc0cf1b0081"
18
+ },
19
+ "email": "chris920325@gmail.com",
20
+ "root": "/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2",
21
+ "host": "hgpn19",
22
+ "username": "u1131674",
23
+ "executable": "/home/u1131674/.conda/envs/llm-bc/bin/python3",
24
+ "codePathLocal": "train.py",
25
+ "cpu_count": 112,
26
+ "cpu_count_logical": 112,
27
+ "gpu": "NVIDIA H100 80GB HBM3",
28
+ "gpu_count": 1,
29
+ "disk": {
30
+ "/": {
31
+ "total": "1918024196096",
32
+ "used": "394359058432"
33
+ }
34
+ },
35
+ "memory": {
36
+ "total": "2163685928960"
37
+ },
38
+ "cpu": {
39
+ "count": 112,
40
+ "countLogical": 112
41
+ },
42
+ "gpu_nvidia": [
43
+ {
44
+ "name": "NVIDIA H100 80GB HBM3",
45
+ "memoryTotal": "85520809984",
46
+ "cudaCores": 16896,
47
+ "architecture": "Hopper"
48
+ }
49
+ ],
50
+ "slurm": {
51
+ "cluster_name": "hpc",
52
+ "conf": "/etc/slurm/slurm.conf",
53
+ "cpu_bind": "quiet,mask_cpu:0x00000000000000000000000000FF",
54
+ "cpu_bind_list": "0x00000000000000000000000000FF",
55
+ "cpu_bind_type": "mask_cpu:",
56
+ "cpu_bind_verbose": "quiet",
57
+ "cpus_on_node": "8",
58
+ "cpus_per_task": "8",
59
+ "distribution": "cyclic,pack",
60
+ "gpus_on_node": "1",
61
+ "gpus_per_node": "1",
62
+ "gtids": "0",
63
+ "job_account": "mst114558",
64
+ "job_cpus_per_node": "8",
65
+ "job_end_time": "1769145136",
66
+ "job_gid": "106773",
67
+ "job_group": "MST114558",
68
+ "job_id": "99320",
69
+ "job_name": "python3",
70
+ "job_nodelist": "hgpn19",
71
+ "job_num_nodes": "1",
72
+ "job_partition": "normal",
73
+ "job_qos": "normal",
74
+ "job_start_time": "1768972336",
75
+ "job_uid": "41408",
76
+ "job_user": "u1131674",
77
+ "jobid": "99320",
78
+ "launch_node_ipaddr": "172.21.101.1",
79
+ "localid": "0",
80
+ "mem_per_node": "204800",
81
+ "nnodes": "1",
82
+ "nodeid": "0",
83
+ "nodelist": "hgpn19",
84
+ "nprocs": "1",
85
+ "ntasks": "1",
86
+ "prio_process": "0",
87
+ "procid": "0",
88
+ "srun_comm_host": "172.21.101.1",
89
+ "srun_comm_port": "37185",
90
+ "step_gpus": "0",
91
+ "step_id": "0",
92
+ "step_launcher_port": "37185",
93
+ "step_nodelist": "hgpn19",
94
+ "step_num_nodes": "1",
95
+ "step_num_tasks": "1",
96
+ "step_tasks_per_node": "1",
97
+ "stepid": "0",
98
+ "submit_dir": "/work/u1131674/LLM-BC",
99
+ "submit_host": "cbi-lgn01",
100
+ "task_pid": "3666395",
101
+ "tasks_per_node": "1",
102
+ "topology_addr": "ibsw1.hgpn19",
103
+ "topology_addr_pattern": "switch.node",
104
+ "tres_per_task": "cpu:8",
105
+ "umask": "0022"
106
+ },
107
+ "cudaVersion": "12.4"
108
+ }
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T13:12:35.832711383+08:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpgly62w7c/port-3666395.txt","pid":3666395,"debug":false,"disable-analytics":false}
2
+ {"time":"2026-01-21T13:12:35.8327317+08:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2026-01-21T13:12:35.833044589+08:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3666395}
4
+ {"time":"2026-01-21T13:12:35.833040266+08:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":39835,"Zone":""}}
5
+ {"time":"2026-01-21T13:12:36.026076689+08:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:57216"}
6
+ {"time":"2026-01-21T13:12:36.448305573+08:00","level":"INFO","msg":"handleInformInit: received","streamId":"yhjy9tz9","id":"127.0.0.1:57216"}
7
+ {"time":"2026-01-21T13:12:36.565012013+08:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"yhjy9tz9","id":"127.0.0.1:57216"}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-21T13:12:36.44966483+08:00","level":"INFO","msg":"using version","core version":"0.18.6"}
2
+ {"time":"2026-01-21T13:12:36.449675304+08:00","level":"INFO","msg":"created symlink","path":"/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-core.log"}
3
+ {"time":"2026-01-21T13:12:36.564980991+08:00","level":"INFO","msg":"created new stream","id":"yhjy9tz9"}
4
+ {"time":"2026-01-21T13:12:36.565006242+08:00","level":"INFO","msg":"stream: started","id":"yhjy9tz9"}
5
+ {"time":"2026-01-21T13:12:36.565029519+08:00","level":"INFO","msg":"sender: started","stream_id":"yhjy9tz9"}
6
+ {"time":"2026-01-21T13:12:36.565021074+08:00","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"yhjy9tz9"}}
7
+ {"time":"2026-01-21T13:12:36.565029409+08:00","level":"INFO","msg":"handler: started","stream_id":{"value":"yhjy9tz9"}}
8
+ {"time":"2026-01-21T13:12:37.456830647+08:00","level":"INFO","msg":"Starting system monitor"}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Current SDK version is 0.18.6
2
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Configure stats pid to 3666395
3
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /home/u1131674/.config/wandb/settings
4
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from /work/u1131674/LLM-BC/wandb/settings
5
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': 'online', '_disable_service': None}
7
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'train.py', 'program_abspath': '/work/u1131674/LLM-BC/train.py', 'program': '/work/u1131674/LLM-BC/./train.py'}
8
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():533] Logging user logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug.log
10
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:_log_setup():534] Logging internal logs to /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/logs/debug-internal.log
11
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():619] calling init triggers
12
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {'name': 'train_llmbc_lowdim', '_target_': 'llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace', 'obs_dim': 9, 'action_dim': 4, 'task_name': 'box-close-v2', 'exp_name': 'default', 'model_name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'n_latency_steps': 0, 'past_action_visible': False, 'llm_orig_expert_feedback': True, 'llm_do_sample': False, 'policy': {'_target_': 'llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy', 'model': {'_target_': 'llmbc.model.policy.policy_mlp.PolicyMLP', 'input_size': 9, 'hidden_size': [256, 256], 'output_size': 4, 'activation': 'relu', 'n_obs_steps': 1, 'n_action_steps': 1}, 'obs_dim': 9, 'action_dim': 4, 'llm_discriminator': {'_target_': 'llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator', 'task_id': 'box-close-v2', 'llm_translator': {'_target_': 'llmbc.translator.llm_translator.LLMTranslator', 'cfg': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}, 'obs_dim': 9, 'action_dim': 4, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1}}, 'loss_bc_weight': 1.0, 'loss_llm_weight': 0.001, 'horizon': 1, 'n_obs_steps': 1, 'n_action_steps': 1, 'normalize_llm_loss': True}, 'dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'val_dataloader': {'batch_size': 16, 'num_workers': 0, 'shuffle': True, 'pin_memory': False, 'persistent_workers': False}, 'optimizer': {'_target_': 'torch.optim.AdamW', 'lr': 0.01, 'betas': [0.95, 0.999], 'eps': 1e-08, 'weight_decay': 1e-06}, 'training': {'device': 'cuda:0', 'seed': 42, 'debug': False, 'resume': False, 'lr_scheduler': 'cosine', 'lr_warmup_steps': 10, 'num_epochs': 1001, 'gradient_accumulate_every': 8, 'grad_norm_clip': 0.5, 'rollout_every': 5, 'checkpoint_every': 5, 'val_every': 1, 'sample_every': 5, 'sample_max_batch': 128, 'max_train_steps': None, 'max_val_steps': None, 'tqdm_interval_sec': 1.0}, 'logging': {'project': 'box-close-v2-training', 'resume': True, 'mode': 'online', 'name': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2', 'tags': ['train_llmbc_lowdim', 'box-close-v2', 'default'], 'id': None, 'group': None}, 'checkpoint': {'topk': {'monitor_key': 'test_success_rate', 'mode': 'max', 'k': 5, 'format_str': 'epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt'}, 'save_last_ckpt': True, 'save_last_snapshot': False}, 'multi_run': {'run_dir': 'data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2', 'wandb_name_base': '2026.01.21-13.12.19_train_llmbc_lowdim_box-close-v2'}, 'task': {'name': 'box-close-v2', 'obs_dim': 9, 'action_dim': 4, 'env_runner': {'_target_': 'llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner', 'env_name': 'llf-metaworld-box-close-v2', 'n_train': 10, 'n_test': 50, 'n_envs': 10, 'max_steps': 30, 'n_obs_steps': 1, 'n_action_steps': 1, 'instruction_type': 'b', 'feedback_type': ['hp', 'hn', 'fp'], 'visual': False, 'discount': 0.9}, 'dataset': {'_target_': 'llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset', 'data_path': 'datasets/box-close-v2.pt', 'data_path2': 'datasets/box-close-v2.pt', 'horizon': 1, 'pad_before': 0, 'pad_after': 0, 'obs_eef_target': True, 'use_manual_normalizer': False, 'val_ratio': 0.1, 'dummy_normalizer': True}, 'instructor': {'_target_': 'llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor'}}, 'llm': {'name': 'HuggingFaceTB/SmolLM2-135M-Instruct', 'model_name': 'SmolLM2-135M-Instruct', 'config_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig', 'causal_lm_target': 'llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM', 'use_quantization': False, 'use_joint_mlp_projector': True, 'llm_mode': 'ete-finetuned', 'finetune_mode': 'orig', 'checkpoint': 'data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890', 'max_length': 100, 'lora_config': {'r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05, 'bias': 'none', 'task_type': 'CAUSAL_LM'}, 'prompter': {'_target_': 'llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter', 'use_joint_mlp_projector': True}, 'hydra': {'job': {'override_dirname': 'HuggingFaceTB/SmolLM2-135M-Instruct'}, 'run': {'dir': 'data/outputs/2026.01.21/13.12.19_HuggingFaceTB/SmolLM2-135M-Instruct'}}}}
14
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():669] starting backend
15
+ 2026-01-21 13:12:36,445 INFO MainThread:3666395 [wandb_init.py:init():673] sending inform_init request
16
+ 2026-01-21 13:12:36,447 INFO MainThread:3666395 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2026-01-21 13:12:36,447 INFO MainThread:3666395 [wandb_init.py:init():686] backend started and connected
18
+ 2026-01-21 13:12:36,456 INFO MainThread:3666395 [wandb_init.py:init():781] updated telemetry
19
+ 2026-01-21 13:12:36,506 INFO MainThread:3666395 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2026-01-21 13:12:37,452 INFO MainThread:3666395 [wandb_init.py:init():867] starting run threads in backend
21
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_console_start():2451] atexit reg
22
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2299] redirect: wrap_raw
23
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2364] Wrapping output streams.
24
+ 2026-01-21 13:12:38,016 INFO MainThread:3666395 [wandb_run.py:_redirect():2389] Redirects installed.
25
+ 2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2026-01-21 13:12:38,019 INFO MainThread:3666395 [wandb_run.py:_config_callback():1389] config_cb None None {'output_dir': '/work/u1131674/LLM-BC/data/outputs/2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2'}
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/run-20260121_131236-yhjy9tz9/run-yhjy9tz9.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a15abdc3557e86e80be7f022afc84d1b6a86d97d6b37b42da9bb26e3ca4834e
3
+ size 327680
2026.01.21/13.12.19_train_llmbc_lowdim_box-close-v2/wandb/wandb-resume.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"run_id": "yhjy9tz9"}
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: train_llmbc_lowdim
2
+ _target_: llmbc.workspace.train_llmbc_lowdim_workspace.TrainLLMBCLowdimWorkspace
3
+ obs_dim: ${task.obs_dim}
4
+ action_dim: ${task.action_dim}
5
+ task_name: ${task.name}
6
+ exp_name: default
7
+ model_name: ${llm.name}
8
+ horizon: 1
9
+ n_obs_steps: 1
10
+ n_action_steps: 1
11
+ n_latency_steps: 0
12
+ past_action_visible: false
13
+ llm_orig_expert_feedback: true
14
+ llm_do_sample: false
15
+ policy:
16
+ _target_: llmbc.policy.llmbc_lowdim_policy.LLMBCLowdimPolicy
17
+ model:
18
+ _target_: llmbc.model.policy.policy_mlp.PolicyMLP
19
+ input_size: ${eval:'${n_obs_steps}*${obs_dim}'}
20
+ hidden_size:
21
+ - 256
22
+ - 256
23
+ output_size: ${eval:'${n_action_steps}*${action_dim}'}
24
+ activation: relu
25
+ n_obs_steps: ${n_obs_steps}
26
+ n_action_steps: ${n_action_steps}
27
+ obs_dim: ${obs_dim}
28
+ action_dim: ${action_dim}
29
+ llm_discriminator:
30
+ _target_: llmbc.discriminator.llm_ce_discriminator.LLMCEDiscriminator
31
+ task_id: ${task_name}
32
+ llm_translator:
33
+ _target_: llmbc.translator.llm_translator.LLMTranslator
34
+ cfg: ${llm}
35
+ obs_dim: ${task.obs_dim}
36
+ action_dim: ${task.action_dim}
37
+ horizon: ${horizon}
38
+ n_obs_steps: ${n_obs_steps}
39
+ n_action_steps: ${n_action_steps}
40
+ loss_bc_weight: 1.0
41
+ loss_llm_weight: 1.0
42
+ horizon: ${horizon}
43
+ n_obs_steps: ${n_obs_steps}
44
+ n_action_steps: ${n_action_steps}
45
+ normalize_llm_loss: true
46
+ dataloader:
47
+ batch_size: 16
48
+ num_workers: 0
49
+ shuffle: true
50
+ pin_memory: false
51
+ persistent_workers: false
52
+ val_dataloader:
53
+ batch_size: 16
54
+ num_workers: 0
55
+ shuffle: true
56
+ pin_memory: false
57
+ persistent_workers: false
58
+ optimizer:
59
+ _target_: torch.optim.AdamW
60
+ lr: 0.01
61
+ betas:
62
+ - 0.95
63
+ - 0.999
64
+ eps: 1.0e-08
65
+ weight_decay: 1.0e-06
66
+ training:
67
+ device: cuda:0
68
+ seed: 42
69
+ debug: false
70
+ resume: false
71
+ lr_scheduler: cosine
72
+ lr_warmup_steps: 10
73
+ num_epochs: 1001
74
+ gradient_accumulate_every: 8
75
+ grad_norm_clip: 0.5
76
+ rollout_every: 5
77
+ checkpoint_every: 5
78
+ val_every: 1
79
+ sample_every: 5
80
+ sample_max_batch: 128
81
+ max_train_steps: null
82
+ max_val_steps: null
83
+ tqdm_interval_sec: 1.0
84
+ logging:
85
+ project: ${task.name}-training
86
+ resume: true
87
+ mode: online
88
+ name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
89
+ tags:
90
+ - ${name}
91
+ - ${task_name}
92
+ - ${exp_name}
93
+ id: null
94
+ group: null
95
+ checkpoint:
96
+ topk:
97
+ monitor_key: test_success_rate
98
+ mode: max
99
+ k: 5
100
+ format_str: epoch={epoch:04d}-test_success_rate={test_success_rate:.3f}.ckpt
101
+ save_last_ckpt: true
102
+ save_last_snapshot: false
103
+ multi_run:
104
+ run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
105
+ wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
106
+ task:
107
+ name: box-close-v2
108
+ obs_dim: 9
109
+ action_dim: 4
110
+ env_runner:
111
+ _target_: llmbc.env_runner.metaworld_lowdim_runner.MetaworldLowdimRunner
112
+ env_name: llf-metaworld-box-close-v2
113
+ n_train: 10
114
+ n_test: 50
115
+ n_envs: 10
116
+ max_steps: 30
117
+ n_obs_steps: ${n_obs_steps}
118
+ n_action_steps: ${n_action_steps}
119
+ instruction_type: b
120
+ feedback_type:
121
+ - hp
122
+ - hn
123
+ - fp
124
+ visual: false
125
+ discount: 0.9
126
+ dataset:
127
+ _target_: llmbc.dataset.metaworld_lowdim_dataset.MetaworldLowdimDataset
128
+ data_path: datasets/box-close-v2.pt
129
+ data_path2: datasets/box-close-v2.pt
130
+ horizon: ${horizon}
131
+ pad_before: ${eval:'${n_obs_steps}-1'}
132
+ pad_after: ${eval:'${n_action_steps}-1'}
133
+ obs_eef_target: true
134
+ use_manual_normalizer: false
135
+ val_ratio: 0.1
136
+ dummy_normalizer: true
137
+ instructor:
138
+ _target_: llmbc.translator.instructor.metaworld_instructor.box_close_v2_instructor.BoxCloseV2Instructor
139
+ llm:
140
+ name: HuggingFaceTB/SmolLM2-135M-Instruct
141
+ model_name: SmolLM2-135M-Instruct
142
+ config_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaConfig
143
+ causal_lm_target: llmbc.model.llm.llama_lowdim_model.LowdimLlamaForCausalLM
144
+ use_quantization: false
145
+ use_joint_mlp_projector: true
146
+ llm_mode: ete-finetuned
147
+ finetune_mode: orig
148
+ checkpoint: data/outputs/2025.09.25/22.49.29_train_llm_lowdim_box-close-v2/HuggingFaceTB/SmolLM2-135M-Instruct-finetuned-box-close-v2/checkpoint-5890
149
+ max_length: 100
150
+ lora_config:
151
+ r: 32
152
+ lora_alpha: 64
153
+ lora_dropout: 0.05
154
+ bias: none
155
+ task_type: CAUSAL_LM
156
+ prompter:
157
+ _target_: llmbc.translator.prompter.smollm2_prompter.SmolLM2Prompter
158
+ use_joint_mlp_projector: true
159
+ hydra:
160
+ job:
161
+ override_dirname: ${model_name}
162
+ run:
163
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${model_name}
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/hydra.yaml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
4
+ sweep:
5
+ dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ simple:
72
+ format: '[%(asctime)s][HYDRA] %(message)s'
73
+ handlers:
74
+ console:
75
+ class: logging.StreamHandler
76
+ formatter: simple
77
+ stream: ext://sys.stdout
78
+ root:
79
+ level: INFO
80
+ handlers:
81
+ - console
82
+ loggers:
83
+ logging_example:
84
+ level: DEBUG
85
+ disable_existing_loggers: false
86
+ job_logging:
87
+ version: 1
88
+ formatters:
89
+ simple:
90
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
91
+ handlers:
92
+ console:
93
+ class: logging.StreamHandler
94
+ formatter: simple
95
+ stream: ext://sys.stdout
96
+ file:
97
+ class: logging.FileHandler
98
+ formatter: simple
99
+ filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
100
+ root:
101
+ level: INFO
102
+ handlers:
103
+ - console
104
+ - file
105
+ disable_existing_loggers: false
106
+ env: {}
107
+ mode: RUN
108
+ searchpath: []
109
+ callbacks: {}
110
+ output_subdir: .hydra
111
+ overrides:
112
+ hydra:
113
+ - hydra.mode=RUN
114
+ task:
115
+ - policy.loss_llm_weight=1.0
116
+ - training.seed=42
117
+ job:
118
+ name: train
119
+ chdir: null
120
+ override_dirname: policy.loss_llm_weight=1.0,training.seed=42
121
+ id: ???
122
+ num: ???
123
+ config_name: llmbc_box-close-v2.yaml
124
+ env_set: {}
125
+ env_copy: []
126
+ config:
127
+ override_dirname:
128
+ kv_sep: '='
129
+ item_sep: ','
130
+ exclude_keys: []
131
+ runtime:
132
+ version: 1.2.0
133
+ version_base: '1.2'
134
+ cwd: /work/u1131674/LLM-BC
135
+ config_sources:
136
+ - path: hydra.conf
137
+ schema: pkg
138
+ provider: hydra
139
+ - path: /work/u1131674/LLM-BC/config/main_table
140
+ schema: file
141
+ provider: main
142
+ - path: ''
143
+ schema: structured
144
+ provider: schema
145
+ output_dir: /work/u1131674/LLM-BC/data/outputs/2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2
146
+ choices:
147
+ hydra/env: default
148
+ hydra/callbacks: null
149
+ hydra/job_logging: default
150
+ hydra/hydra_logging: default
151
+ hydra/hydra_help: default
152
+ hydra/help: default
153
+ hydra/sweeper: basic
154
+ hydra/launcher: basic
155
+ hydra/output: default
156
+ verbose: false
2026.01.21/13.23.20_train_llmbc_lowdim_box-close-v2/.hydra/overrides.yaml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ - policy.loss_llm_weight=1.0
2
+ - training.seed=42