Henryeahhh commited on
Commit
b3d911c
·
verified ·
1 Parent(s): 4919132

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. eraser_flow_matching/step12000/config.yaml +322 -0
  3. eraser_l1_regression/step12000-unsharded/config.yaml +322 -0
  4. eraser_l1_regression/step12000/config.yaml +322 -0
  5. eraser_l1_regression/wandb/wandb/debug-internal.log +12 -0
  6. eraser_l1_regression/wandb/wandb/debug.log +0 -0
  7. eraser_l1_regression/wandb/wandb/run-20251011_163844-qzez8pv7/files/requirements.txt +286 -0
  8. eraser_l1_regression/wandb/wandb/run-20251011_163844-qzez8pv7/files/wandb-metadata.json +204 -0
  9. glue/wandb/wandb/debug-internal.log +6 -0
  10. glue/wandb/wandb/debug.log +0 -0
  11. glue/wandb/wandb/run-20251002_162842-zmotbaex/files/output.log +98 -0
  12. glue/wandb/wandb/run-20251002_162842-zmotbaex/files/requirements.txt +286 -0
  13. glue/wandb/wandb/run-20251002_162842-zmotbaex/files/wandb-metadata.json +204 -0
  14. glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug-core.log +6 -0
  15. glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug-internal.log +6 -0
  16. glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug.log +0 -0
  17. glue/wandb/wandb/run-20251002_162842-zmotbaex/run-zmotbaex.wandb +0 -0
  18. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/output.log +107 -0
  19. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/requirements.txt +286 -0
  20. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/wandb-metadata.json +204 -0
  21. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug-core.log +6 -0
  22. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug-internal.log +6 -0
  23. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug.log +0 -0
  24. glue/wandb/wandb/run-20251002_162844-bzkyoc0w/run-bzkyoc0w.wandb +0 -0
  25. glue_flow_matching/step12000-unsharded/config.yaml +322 -0
  26. glue_flow_matching/step12000/config.yaml +322 -0
  27. glue_flow_matching/wandb/wandb/debug-internal.log +6 -0
  28. glue_flow_matching/wandb/wandb/debug.log +0 -0
  29. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/output.log +0 -0
  30. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/requirements.txt +286 -0
  31. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/wandb-metadata.json +204 -0
  32. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug-core.log +12 -0
  33. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug-internal.log +6 -0
  34. glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug.log +0 -0
  35. glue_l1_regression/step12000-unsharded/config.yaml +322 -0
  36. glue_l1_regression/wandb/wandb/debug-internal.log +6 -0
  37. glue_l1_regression/wandb/wandb/debug.log +0 -0
  38. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/output.log +0 -0
  39. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/requirements.txt +286 -0
  40. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/wandb-metadata.json +204 -0
  41. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug-core.log +12 -0
  42. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug-internal.log +6 -0
  43. glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug.log +0 -0
  44. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/output.log +0 -0
  45. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/requirements.txt +286 -0
  46. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/wandb-metadata.json +204 -0
  47. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug-core.log +6 -0
  48. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug-internal.log +8 -0
  49. wandb/wandb/debug.log +0 -0
  50. wandb/wandb/run-20251002_155015-xojint20/logs/debug.log +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/wandb/run-20251002_155442-6v8q0jgn/run-6v8q0jgn.wandb filter=lfs diff=lfs merge=lfs -text
eraser_flow_matching/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: eraser_20251011_163756
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: a1_real_world
201
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: eraser_20251011_163756
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
eraser_l1_regression/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: eraser_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: a1_real_world
201
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: eraser_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
eraser_l1_regression/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: eraser_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: a1_real_world
201
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: eraser_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
eraser_l1_regression/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-11T16:38:45.369810639Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-11T16:38:46.624568558Z","level":"INFO","msg":"stream: created new stream","id":"qzez8pv7"}
3
+ {"time":"2025-10-11T16:38:46.624626598Z","level":"INFO","msg":"stream: started","id":"qzez8pv7"}
4
+ {"time":"2025-10-11T16:38:46.624662329Z","level":"INFO","msg":"sender: started","stream_id":"qzez8pv7"}
5
+ {"time":"2025-10-11T16:38:46.624681929Z","level":"INFO","msg":"handler: started","stream_id":"qzez8pv7"}
6
+ {"time":"2025-10-11T16:38:46.624653129Z","level":"INFO","msg":"writer: started","stream_id":"qzez8pv7"}
7
+ {"time":"2025-10-11T20:03:56.095706913Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
8
+ {"time":"2025-10-13T02:33:28.920574862Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/qzez8pv7/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-13T16:23:56.963675478Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-10-13T20:04:02.844107426Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2025-10-13T22:38:31.203849115Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
12
+ {"time":"2025-10-14T15:32:47.24166171Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
eraser_l1_regression/wandb/wandb/debug.log ADDED
File without changes
eraser_l1_regression/wandb/wandb/run-20251011_163844-qzez8pv7/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
eraser_l1_regression/wandb/wandb/run-20251011_163844-qzez8pv7/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-11T16:38:44.897304Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "eraser",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_eraser.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "7c171df5d31577ede69d05172c2bc62d42ef3e3d"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression/wandb",
44
+ "host": "auh7-1b-gpu-252",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50572640256"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606936064"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0xb1e32805d91e8fd",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "5",
75
+ "uniqueId": "0xf68552567a447d29",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "0",
88
+ "uniqueId": "0xb5f4e58f50394bbb",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "3",
101
+ "uniqueId": "0xfb9d87270270f7af",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "6",
114
+ "uniqueId": "0x558725d79035e281",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "4",
127
+ "uniqueId": "0xedec3b515d1caf9",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "7",
140
+ "uniqueId": "0x651131bef1a09ac7",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "1",
153
+ "uniqueId": "0xd41cfefdcf23b69b",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1760459846",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2286",
177
+ "job_name": "mh_eraser_l1_regression",
178
+ "job_nodelist": "auh7-1b-gpu-252",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1760200646",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2286",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-252",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "579086",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-252",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "td93jux4p0c1uemtf61zmtfwi71qa2wt"
204
+ }
glue/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:28:44.574145675Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:28:45.769098374Z","level":"INFO","msg":"stream: created new stream","id":"bzkyoc0w"}
3
+ {"time":"2025-10-02T16:28:45.769135025Z","level":"INFO","msg":"stream: started","id":"bzkyoc0w"}
4
+ {"time":"2025-10-02T16:28:45.769167365Z","level":"INFO","msg":"handler: started","stream_id":"bzkyoc0w"}
5
+ {"time":"2025-10-02T16:28:45.769158745Z","level":"INFO","msg":"writer: started","stream_id":"bzkyoc0w"}
6
+ {"time":"2025-10-02T16:28:45.769191036Z","level":"INFO","msg":"sender: started","stream_id":"bzkyoc0w"}
glue/wandb/wandb/debug.log ADDED
File without changes
glue/wandb/wandb/run-20251002_162842-zmotbaex/files/output.log ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [16:28:44] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [16:28:51] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
16
+ ****** length of the dataset: 10316
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7fd60eb0a890>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [16:28:57] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [16:30:16] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=36,856
53
+ WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ 10/02 [16:30:23] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py:967: UserWarning: The .grad attribute warnings.py:109
58
+ of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed
59
+ want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor
60
+ by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered
61
+ internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
62
+ param_grad = param.grad
63
+
64
+ [step=1/500000]
65
+ train/ActionNoiseL2Loss=1.834
66
+ throughput/total_tokens=192,000
67
+ System/Peak GPU Memory (MB)=39,644
68
+ [step=2/500000]
69
+ train/ActionNoiseL2Loss=1.807
70
+ throughput/total_tokens=384,000
71
+ throughput/device/tokens_per_second=1,142
72
+ throughput/device/batches_per_second=0.0476
73
+ System/Peak GPU Memory (MB)=46,466
74
+ [step=3/500000]
75
+ train/ActionNoiseL2Loss=1.699
76
+ throughput/total_tokens=576,000
77
+ throughput/device/tokens_per_second=975.1
78
+ throughput/device/batches_per_second=0.0406
79
+ [step=4/500000]
80
+ train/ActionNoiseL2Loss=1.790
81
+ throughput/total_tokens=768,000
82
+ throughput/device/tokens_per_second=878.3
83
+ throughput/device/batches_per_second=0.0366
84
+ [step=5/500000]
85
+ train/ActionNoiseL2Loss=1.693
86
+ throughput/total_tokens=960,000
87
+ throughput/device/tokens_per_second=830.7
88
+ throughput/device/batches_per_second=0.0346
89
+ [step=6/500000]
90
+ train/ActionNoiseL2Loss=1.678
91
+ throughput/total_tokens=1,152,000
92
+ throughput/device/tokens_per_second=801.1
93
+ throughput/device/batches_per_second=0.0334
94
+ [step=7/500000]
95
+ train/ActionNoiseL2Loss=1.561
96
+ throughput/total_tokens=1,344,000
97
+ throughput/device/tokens_per_second=779.7
98
+ throughput/device/batches_per_second=0.0325
glue/wandb/wandb/run-20251002_162842-zmotbaex/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
glue/wandb/wandb/run-20251002_162842-zmotbaex/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T16:28:42.692996Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue/wandb",
44
+ "host": "auh7-1b-gpu-260",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "56243372032"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "6",
62
+ "uniqueId": "0x2d75dae36f0dc353",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "1",
75
+ "uniqueId": "0xe35cdba2e3fafd21",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "0",
88
+ "uniqueId": "0x4213cc9eeeefc98d",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "7",
101
+ "uniqueId": "0x702e8efb76b00c21",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "4",
114
+ "uniqueId": "0x4493708eee1ee737",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "2",
127
+ "uniqueId": "0x9815965a899d8053",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "3",
140
+ "uniqueId": "0xd7a6e11358a6574d",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "5",
153
+ "uniqueId": "0xd79d4a081e34548d",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759681678",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2259",
177
+ "job_name": "mh_glue_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-260",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759422478",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2259",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-260",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2571821",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-260",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "mkb2blj4w2h2y7he4b0dlo7b5hd3p38x"
204
+ }
glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:28:42.743912932Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpsh8i0z2c/port-2572010.txt","pid":2572010,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T16:28:42.744707722Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2572010}
3
+ {"time":"2025-10-02T16:28:42.744690061Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2572010-2572176-2097911213/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T16:28:42.92711156Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T16:28:42.933538707Z","level":"INFO","msg":"handleInformInit: received","streamId":"zmotbaex","id":"1(@)"}
6
+ {"time":"2025-10-02T16:28:43.973148537Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"zmotbaex","id":"1(@)"}
glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:28:42.93546825Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:28:43.973096137Z","level":"INFO","msg":"stream: created new stream","id":"zmotbaex"}
3
+ {"time":"2025-10-02T16:28:43.973143067Z","level":"INFO","msg":"stream: started","id":"zmotbaex"}
4
+ {"time":"2025-10-02T16:28:43.973152847Z","level":"INFO","msg":"writer: started","stream_id":"zmotbaex"}
5
+ {"time":"2025-10-02T16:28:43.973166287Z","level":"INFO","msg":"sender: started","stream_id":"zmotbaex"}
6
+ {"time":"2025-10-02T16:28:43.973210878Z","level":"INFO","msg":"handler: started","stream_id":"zmotbaex"}
glue/wandb/wandb/run-20251002_162842-zmotbaex/logs/debug.log ADDED
File without changes
glue/wandb/wandb/run-20251002_162842-zmotbaex/run-zmotbaex.wandb ADDED
Binary file (65.5 kB). View file
 
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/output.log ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [16:28:46] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [16:28:51] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
16
+ ****** length of the dataset: 10316
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f8509571300>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [16:28:57] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [16:30:26] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=35,614
53
+ 10/02 [16:30:27] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ [step=1/500000]
58
+ train/ActionL1Loss=0.6604
59
+ throughput/total_tokens=192,000
60
+ System/Peak GPU Memory (MB)=40,144
61
+ [step=2/500000]
62
+ train/ActionL1Loss=0.6686
63
+ throughput/total_tokens=384,000
64
+ throughput/device/tokens_per_second=1,212
65
+ throughput/device/batches_per_second=0.0505
66
+ System/Peak GPU Memory (MB)=46,917
67
+ [step=3/500000]
68
+ train/ActionL1Loss=0.6331
69
+ throughput/total_tokens=576,000
70
+ throughput/device/tokens_per_second=1,187
71
+ throughput/device/batches_per_second=0.0495
72
+ [step=4/500000]
73
+ train/ActionL1Loss=0.6222
74
+ throughput/total_tokens=768,000
75
+ throughput/device/tokens_per_second=1,177
76
+ throughput/device/batches_per_second=0.0491
77
+ [step=5/500000]
78
+ train/ActionL1Loss=0.5780
79
+ throughput/total_tokens=960,000
80
+ throughput/device/tokens_per_second=1,176
81
+ throughput/device/batches_per_second=0.0490
82
+ [step=6/500000]
83
+ train/ActionL1Loss=0.5804
84
+ throughput/total_tokens=1,152,000
85
+ throughput/device/tokens_per_second=1,177
86
+ throughput/device/batches_per_second=0.0490
87
+ [step=7/500000]
88
+ train/ActionL1Loss=0.4998
89
+ throughput/total_tokens=1,344,000
90
+ throughput/device/tokens_per_second=1,177
91
+ throughput/device/batches_per_second=0.0490
92
+ [step=8/500000]
93
+ train/ActionL1Loss=0.5153
94
+ throughput/total_tokens=1,536,000
95
+ throughput/device/tokens_per_second=1,177
96
+ throughput/device/batches_per_second=0.0491
97
+ [step=9/500000]
98
+ train/ActionL1Loss=0.5447
99
+ throughput/total_tokens=1,728,000
100
+ throughput/device/tokens_per_second=1,177
101
+ throughput/device/batches_per_second=0.0491
102
+ [step=10/500000]
103
+ train/ActionL1Loss=0.4229
104
+ throughput/total_tokens=1,920,000
105
+ throughput/device/tokens_per_second=1,177
106
+ throughput/device/batches_per_second=0.0491
107
+ System/Peak GPU Memory (MB)=46,917
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T16:28:44.296863Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue/wandb",
44
+ "host": "auh7-1b-gpu-282",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50534686720"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "1",
62
+ "uniqueId": "0x63f0fe2c43bc1640",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "2",
75
+ "uniqueId": "0x492f172b602a22b5",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "5",
88
+ "uniqueId": "0xcec181d5e2ce525",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "6",
101
+ "uniqueId": "0xdaf531ba129c665e",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "7",
114
+ "uniqueId": "0x14a7fc56ac2e5e42",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "3",
127
+ "uniqueId": "0x1af77dc455975108",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "4",
140
+ "uniqueId": "0xb66ccb62112b0571",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "0",
153
+ "uniqueId": "0x2e7a3afcefcaca4b",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759681678",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2260",
177
+ "job_name": "mh_glue_l1_regression",
178
+ "job_nodelist": "auh7-1b-gpu-282",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759422478",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2260",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-282",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "3777115",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-282",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "065brkivq78b6pyeoha2ces6xhhdu5dv"
204
+ }
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:28:44.546350644Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpduepwsd2/port-3777304.txt","pid":3777304,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T16:28:44.547929797Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3777304}
3
+ {"time":"2025-10-02T16:28:44.549056273Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3777304-3777474-3243164877/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T16:28:44.556761474Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T16:28:44.572324418Z","level":"INFO","msg":"handleInformInit: received","streamId":"bzkyoc0w","id":"1(@)"}
6
+ {"time":"2025-10-02T16:28:45.769139765Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"bzkyoc0w","id":"1(@)"}
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:28:44.574145675Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:28:45.769098374Z","level":"INFO","msg":"stream: created new stream","id":"bzkyoc0w"}
3
+ {"time":"2025-10-02T16:28:45.769135025Z","level":"INFO","msg":"stream: started","id":"bzkyoc0w"}
4
+ {"time":"2025-10-02T16:28:45.769167365Z","level":"INFO","msg":"handler: started","stream_id":"bzkyoc0w"}
5
+ {"time":"2025-10-02T16:28:45.769158745Z","level":"INFO","msg":"writer: started","stream_id":"bzkyoc0w"}
6
+ {"time":"2025-10-02T16:28:45.769191036Z","level":"INFO","msg":"sender: started","stream_id":"bzkyoc0w"}
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/logs/debug.log ADDED
File without changes
glue/wandb/wandb/run-20251002_162844-bzkyoc0w/run-bzkyoc0w.wandb ADDED
Binary file (65.5 kB). View file
 
glue_flow_matching/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue_flow_matching/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue_flow_matching/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.207693207Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:37:30.340213302Z","level":"INFO","msg":"stream: created new stream","id":"tmwli25x"}
3
+ {"time":"2025-10-02T16:37:30.340254592Z","level":"INFO","msg":"stream: started","id":"tmwli25x"}
4
+ {"time":"2025-10-02T16:37:30.340274553Z","level":"INFO","msg":"writer: started","stream_id":"tmwli25x"}
5
+ {"time":"2025-10-02T16:37:30.340287593Z","level":"INFO","msg":"handler: started","stream_id":"tmwli25x"}
6
+ {"time":"2025-10-02T16:37:30.340319673Z","level":"INFO","msg":"sender: started","stream_id":"tmwli25x"}
glue_flow_matching/wandb/wandb/debug.log ADDED
File without changes
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T16:37:28.959576Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching/wandb",
44
+ "host": "auh7-1b-gpu-260",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "56243757056"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0x9815965a899d8053",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "6",
75
+ "uniqueId": "0x2d75dae36f0dc353",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "7",
88
+ "uniqueId": "0x702e8efb76b00c21",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "5",
101
+ "uniqueId": "0xd79d4a081e34548d",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "1",
114
+ "uniqueId": "0xe35cdba2e3fafd21",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "3",
127
+ "uniqueId": "0xd7a6e11358a6574d",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "4",
140
+ "uniqueId": "0x4493708eee1ee737",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "0",
153
+ "uniqueId": "0x4213cc9eeeefc98d",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759682204",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2277",
177
+ "job_name": "mh_glue_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-260",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759423004",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2277",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-260",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2574600",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-260",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "bkls6pwuvb5z6spobbikig7vp96dw2y9"
204
+ }
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug-core.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.014245666Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpusl77j3_/port-2574789.txt","pid":2574789,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T16:37:29.016062848Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2574789}
3
+ {"time":"2025-10-02T16:37:29.016006867Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2574789-2574955-4107859315/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T16:37:29.198285234Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T16:37:29.205734663Z","level":"INFO","msg":"handleInformInit: received","streamId":"tmwli25x","id":"1(@)"}
6
+ {"time":"2025-10-02T16:37:30.340260012Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tmwli25x","id":"1(@)"}
7
+ .txt","pid":3780083,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
8
+ {"time":"2025-10-02T16:37:29.282444644Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3780083}
9
+ {"time":"2025-10-02T16:37:29.282434424Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3780083-3780247-3093816148/socket","Net":"unix"}}
10
+ {"time":"2025-10-02T16:37:29.458879988Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
11
+ {"time":"2025-10-02T16:37:29.465619355Z","level":"INFO","msg":"handleInformInit: received","streamId":"7ovz4jzt","id":"1(@)"}
12
+ {"time":"2025-10-02T16:37:30.493288413Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7ovz4jzt","id":"1(@)"}
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.207693207Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:37:30.340213302Z","level":"INFO","msg":"stream: created new stream","id":"tmwli25x"}
3
+ {"time":"2025-10-02T16:37:30.340254592Z","level":"INFO","msg":"stream: started","id":"tmwli25x"}
4
+ {"time":"2025-10-02T16:37:30.340274553Z","level":"INFO","msg":"writer: started","stream_id":"tmwli25x"}
5
+ {"time":"2025-10-02T16:37:30.340287593Z","level":"INFO","msg":"handler: started","stream_id":"tmwli25x"}
6
+ {"time":"2025-10-02T16:37:30.340319673Z","level":"INFO","msg":"sender: started","stream_id":"tmwli25x"}
glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/logs/debug.log ADDED
File without changes
glue_l1_regression/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue_l1_regression/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.467576263Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:37:30.493240032Z","level":"INFO","msg":"stream: created new stream","id":"7ovz4jzt"}
3
+ {"time":"2025-10-02T16:37:30.493282902Z","level":"INFO","msg":"stream: started","id":"7ovz4jzt"}
4
+ {"time":"2025-10-02T16:37:30.493310273Z","level":"INFO","msg":"writer: started","stream_id":"7ovz4jzt"}
5
+ {"time":"2025-10-02T16:37:30.493324013Z","level":"INFO","msg":"sender: started","stream_id":"7ovz4jzt"}
6
+ {"time":"2025-10-02T16:37:30.493358514Z","level":"INFO","msg":"handler: started","stream_id":"7ovz4jzt"}
glue_l1_regression/wandb/wandb/debug.log ADDED
File without changes
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T16:37:29.221472Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression/wandb",
44
+ "host": "auh7-1b-gpu-282",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50535075840"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0x492f172b602a22b5",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "3",
75
+ "uniqueId": "0x1af77dc455975108",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "7",
88
+ "uniqueId": "0x14a7fc56ac2e5e42",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "0",
101
+ "uniqueId": "0x2e7a3afcefcaca4b",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "4",
114
+ "uniqueId": "0xb66ccb62112b0571",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "1",
127
+ "uniqueId": "0x63f0fe2c43bc1640",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "5",
140
+ "uniqueId": "0xcec181d5e2ce525",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "6",
153
+ "uniqueId": "0xdaf531ba129c665e",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759682204",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2278",
177
+ "job_name": "mh_glue_l1_regression",
178
+ "job_nodelist": "auh7-1b-gpu-282",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759423004",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2278",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-282",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "3779894",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-282",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "5tngekiy76vcgqn1s8m1wijc9isok5tn"
204
+ }
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug-core.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.014245666Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpusl77j3_/port-2574789.txt","pid":2574789,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T16:37:29.016062848Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2574789}
3
+ {"time":"2025-10-02T16:37:29.016006867Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2574789-2574955-4107859315/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T16:37:29.198285234Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T16:37:29.205734663Z","level":"INFO","msg":"handleInformInit: received","streamId":"tmwli25x","id":"1(@)"}
6
+ {"time":"2025-10-02T16:37:30.340260012Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"tmwli25x","id":"1(@)"}
7
+ .txt","pid":3780083,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
8
+ {"time":"2025-10-02T16:37:29.282444644Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3780083}
9
+ {"time":"2025-10-02T16:37:29.282434424Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3780083-3780247-3093816148/socket","Net":"unix"}}
10
+ {"time":"2025-10-02T16:37:29.458879988Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
11
+ {"time":"2025-10-02T16:37:29.465619355Z","level":"INFO","msg":"handleInformInit: received","streamId":"7ovz4jzt","id":"1(@)"}
12
+ {"time":"2025-10-02T16:37:30.493288413Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"7ovz4jzt","id":"1(@)"}
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:37:29.467576263Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:37:30.493240032Z","level":"INFO","msg":"stream: created new stream","id":"7ovz4jzt"}
3
+ {"time":"2025-10-02T16:37:30.493282902Z","level":"INFO","msg":"stream: started","id":"7ovz4jzt"}
4
+ {"time":"2025-10-02T16:37:30.493310273Z","level":"INFO","msg":"writer: started","stream_id":"7ovz4jzt"}
5
+ {"time":"2025-10-02T16:37:30.493324013Z","level":"INFO","msg":"sender: started","stream_id":"7ovz4jzt"}
6
+ {"time":"2025-10-02T16:37:30.493358514Z","level":"INFO","msg":"handler: started","stream_id":"7ovz4jzt"}
glue_l1_regression/wandb/wandb/run-20251002_163729-7ovz4jzt/logs/debug.log ADDED
File without changes
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-11T16:38:44.830364Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "pen",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_pen.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "7c171df5d31577ede69d05172c2bc62d42ef3e3d"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching/wandb",
44
+ "host": "auh7-1b-gpu-253",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "3778763694080",
53
+ "used": "50589351936"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606940160"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "7",
62
+ "uniqueId": "0x79f34beb0df1642b",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "1",
75
+ "uniqueId": "0xde7b3a3b0e7b52be",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "6",
88
+ "uniqueId": "0xe08249923e0a99ae",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "0",
101
+ "uniqueId": "0x6e39dcc60a37a155",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "5",
114
+ "uniqueId": "0xc01c66958a593461",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "3",
127
+ "uniqueId": "0x7d5ed86cf6c4a80a",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "4",
140
+ "uniqueId": "0x22f4e1ec1e4766a1",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "2",
153
+ "uniqueId": "0xe24ebd0f35014c51",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1760459846",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2287",
177
+ "job_name": "mh_pen_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-253",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1760200646",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2287",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-253",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "411389",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-253",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "c87r2obvd7qa3blbvewjvdehottl5rrr"
204
+ }
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-11T16:38:45.197330387Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpygij4s57/port-579278.txt","pid":579278,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-11T16:38:45.198122306Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":579278}
3
+ {"time":"2025-10-11T16:38:45.199702094Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-579278-579454-1269370264/socket","Net":"unix"}}
4
+ {"time":"2025-10-11T16:38:45.353337002Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-11T16:38:45.368049369Z","level":"INFO","msg":"handleInformInit: received","streamId":"qzez8pv7","id":"1(@)"}
6
+ {"time":"2025-10-11T16:38:46.624632949Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"qzez8pv7","id":"1(@)"}
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-11T16:38:45.301569164Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-11T16:38:46.459201786Z","level":"INFO","msg":"stream: created new stream","id":"a381qnn9"}
3
+ {"time":"2025-10-11T16:38:46.459237957Z","level":"INFO","msg":"stream: started","id":"a381qnn9"}
4
+ {"time":"2025-10-11T16:38:46.459266458Z","level":"INFO","msg":"handler: started","stream_id":"a381qnn9"}
5
+ {"time":"2025-10-11T16:38:46.459291898Z","level":"INFO","msg":"sender: started","stream_id":"a381qnn9"}
6
+ {"time":"2025-10-11T16:38:46.459287598Z","level":"INFO","msg":"writer: started","stream_id":"a381qnn9"}
7
+ {"time":"2025-10-12T06:42:47.897888022Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2025-10-12T14:34:32.120286068Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
wandb/wandb/debug.log ADDED
File without changes
wandb/wandb/run-20251002_155015-xojint20/logs/debug.log ADDED
File without changes