Henryeahhh commited on
Commit
90c97df
·
verified ·
1 Parent(s): 4e9de94

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. all_flow_matching/config.yaml +322 -0
  2. all_flow_matching/upload.sh +5 -0
  3. all_l1/config.yaml +322 -0
  4. cleandesk50_flow_matching/config.yaml +322 -0
  5. cleandesk50_l1_regression/config.yaml +322 -0
  6. cleandesk_flow_matching/config.yaml +322 -0
  7. cleandesk_l1_regression/config.yaml +322 -0
  8. config.yaml +322 -0
  9. eraser_flow_matching/config.yaml +322 -0
  10. eraser_l1_regression/config.yaml +322 -0
  11. glue/config.yaml +322 -0
  12. glue_flow_matching/config.yaml +322 -0
  13. glue_l1_regression/config.yaml +322 -0
  14. pen_flow_matching/config.yaml +322 -0
  15. wandb/wandb/run-20251002_150921-kqbx0cjv/files/output.log +390 -0
  16. wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-core.log +6 -0
  17. wandb/wandb/run-20251002_154526-bw81vbs0/files/wandb-metadata.json +204 -0
  18. wandb/wandb/run-20251002_154526-bw81vbs0/run-bw81vbs0.wandb +0 -0
  19. wandb/wandb/run-20251002_155015-xojint20/files/wandb-metadata.json +204 -0
  20. wandb/wandb/run-20251002_155441-70dhy5dq/files/requirements.txt +286 -0
  21. wandb/wandb/run-20251002_155441-70dhy5dq/files/wandb-metadata.json +204 -0
  22. wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-core.log +6 -0
  23. wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug.log +0 -0
  24. wandb/wandb/run-20251002_155442-6v8q0jgn/files/output.log +314 -0
  25. wandb/wandb/run-20251002_155442-6v8q0jgn/files/wandb-metadata.json +204 -0
  26. wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-core.log +6 -0
  27. wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-internal.log +6 -0
  28. wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug.log +0 -0
  29. wipe/config.yaml +322 -0
  30. wipe/wandb/wandb/debug-internal.log +6 -0
  31. wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/wandb-metadata.json +204 -0
  32. wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-internal.log +6 -0
  33. wipe/wandb/wandb/run-20251002_163436-itiyfljc/run-itiyfljc.wandb +0 -0
  34. wipe_flow_matching/config.yaml +322 -0
  35. wipe_flow_matching/step12000-unsharded/config.yaml +322 -0
  36. wipe_flow_matching/step12000/config.yaml +322 -0
  37. wipe_flow_matching/wandb/wandb/debug-internal.log +10 -0
  38. wipe_flow_matching/wandb/wandb/debug.log +0 -0
  39. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/wandb-metadata.json +204 -0
  40. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-core.log +6 -0
  41. wipe_l1_regression/config.yaml +322 -0
  42. wipe_l1_regression/step12000-unsharded/config.yaml +322 -0
  43. wipe_l1_regression/step12000/config.yaml +322 -0
  44. wipe_l1_regression/wandb/wandb/debug-internal.log +10 -0
  45. wipe_l1_regression/wandb/wandb/debug.log +0 -0
  46. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/requirements.txt +286 -0
  47. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/wandb-metadata.json +204 -0
  48. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-core.log +6 -0
  49. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-internal.log +10 -0
  50. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug.log +0 -0
all_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: all_20251002_164508
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: all_20251002_164508
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
all_flow_matching/upload.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ hf auth login
2
+ huggingface-cli upload-large-folder spatialtemporal-ai/Lerobot_Glue_best ./glue_best --repo-type=model
3
+
4
+ hf auth logout
5
+
all_l1/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: realworld_20250930_131219
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: 0
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: realworld_20250930_131219
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
cleandesk50_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: cleandesk50_20251008_163755
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: cleandesk50_20251008_163755
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
cleandesk50_l1_regression/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: cleandesk50_20251008_163748
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk50_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: cleandesk50_20251008_163748
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
cleandesk_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: cleandesk_20251005_163721
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: cleandesk_20251005_163721
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
cleandesk_l1_regression/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: cleandesk_20251008_163754
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/cleandesk_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: cleandesk_20251008_163754
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_155411
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_155411
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
eraser_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: eraser_20251011_163756
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: eraser_20251011_163756
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
eraser_l1_regression/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: eraser_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/eraser_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: eraser_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_162813
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_162813
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
glue_l1_regression/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
pen_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: pen_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: pen_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wandb/wandb/run-20251002_150921-kqbx0cjv/files/output.log ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:09:23] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:09:30] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
16
+ ****** length of the dataset: 72641
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7faa4f997a30>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:09:39] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:11:05] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=35,614
53
+ 10/02 [15:11:06] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ [step=1/500000]
58
+ train/ActionL1Loss=0.5663
59
+ throughput/total_tokens=192,000
60
+ System/Peak GPU Memory (MB)=40,144
61
+ [step=2/500000]
62
+ train/ActionL1Loss=0.5589
63
+ throughput/total_tokens=384,000
64
+ throughput/device/tokens_per_second=1,185
65
+ throughput/device/batches_per_second=0.0494
66
+ System/Peak GPU Memory (MB)=46,917
67
+ [step=3/500000]
68
+ train/ActionL1Loss=0.5206
69
+ throughput/total_tokens=576,000
70
+ throughput/device/tokens_per_second=1,171
71
+ throughput/device/batches_per_second=0.0488
72
+ [step=4/500000]
73
+ train/ActionL1Loss=0.4852
74
+ throughput/total_tokens=768,000
75
+ throughput/device/tokens_per_second=1,173
76
+ throughput/device/batches_per_second=0.0489
77
+ [step=5/500000]
78
+ train/ActionL1Loss=0.4320
79
+ throughput/total_tokens=960,000
80
+ throughput/device/tokens_per_second=1,173
81
+ throughput/device/batches_per_second=0.0489
82
+ [step=6/500000]
83
+ train/ActionL1Loss=0.4672
84
+ throughput/total_tokens=1,152,000
85
+ throughput/device/tokens_per_second=1,173
86
+ throughput/device/batches_per_second=0.0489
87
+ [step=7/500000]
88
+ train/ActionL1Loss=0.3926
89
+ throughput/total_tokens=1,344,000
90
+ throughput/device/tokens_per_second=1,173
91
+ throughput/device/batches_per_second=0.0489
92
+ [step=8/500000]
93
+ train/ActionL1Loss=0.4395
94
+ throughput/total_tokens=1,536,000
95
+ throughput/device/tokens_per_second=1,173
96
+ throughput/device/batches_per_second=0.0489
97
+ [step=9/500000]
98
+ train/ActionL1Loss=0.4654
99
+ throughput/total_tokens=1,728,000
100
+ throughput/device/tokens_per_second=1,173
101
+ throughput/device/batches_per_second=0.0489
102
+ [step=10/500000]
103
+ train/ActionL1Loss=0.3900
104
+ throughput/total_tokens=1,920,000
105
+ throughput/device/tokens_per_second=1,173
106
+ throughput/device/batches_per_second=0.0489
107
+ System/Peak GPU Memory (MB)=46,917
108
+ [step=11/500000]
109
+ train/ActionL1Loss=0.3751
110
+ throughput/total_tokens=2,112,000
111
+ throughput/device/tokens_per_second=1,172
112
+ throughput/device/batches_per_second=0.0489
113
+ [step=12/500000]
114
+ train/ActionL1Loss=0.3996
115
+ throughput/total_tokens=2,304,000
116
+ throughput/device/tokens_per_second=1,170
117
+ throughput/device/batches_per_second=0.0488
118
+ [step=13/500000]
119
+ train/ActionL1Loss=0.3628
120
+ throughput/total_tokens=2,496,000
121
+ throughput/device/tokens_per_second=1,169
122
+ throughput/device/batches_per_second=0.0487
123
+ [step=14/500000]
124
+ train/ActionL1Loss=0.3743
125
+ throughput/total_tokens=2,688,000
126
+ throughput/device/tokens_per_second=1,168
127
+ throughput/device/batches_per_second=0.0487
128
+ [step=15/500000]
129
+ train/ActionL1Loss=0.3542
130
+ throughput/total_tokens=2,880,000
131
+ throughput/device/tokens_per_second=1,168
132
+ throughput/device/batches_per_second=0.0487
133
+ [step=16/500000]
134
+ train/ActionL1Loss=0.3885
135
+ throughput/total_tokens=3,072,000
136
+ throughput/device/tokens_per_second=1,168
137
+ throughput/device/batches_per_second=0.0487
138
+ [step=17/500000]
139
+ train/ActionL1Loss=0.3967
140
+ throughput/total_tokens=3,264,000
141
+ throughput/device/tokens_per_second=1,168
142
+ throughput/device/batches_per_second=0.0487
143
+ [step=18/500000]
144
+ train/ActionL1Loss=0.4508
145
+ throughput/total_tokens=3,456,000
146
+ throughput/device/tokens_per_second=1,169
147
+ throughput/device/batches_per_second=0.0487
148
+ [step=19/500000]
149
+ train/ActionL1Loss=0.4414
150
+ throughput/total_tokens=3,648,000
151
+ throughput/device/tokens_per_second=1,169
152
+ throughput/device/batches_per_second=0.0487
153
+ [step=20/500000]
154
+ optim/total_grad_norm=31.97
155
+ train/ActionL1Loss=0.3768
156
+ throughput/total_tokens=3,840,000
157
+ throughput/device/tokens_per_second=1,168
158
+ throughput/device/batches_per_second=0.0487
159
+ System/Peak GPU Memory (MB)=46,917
160
+ [step=21/500000]
161
+ train/ActionL1Loss=0.3586
162
+ throughput/total_tokens=4,032,000
163
+ throughput/device/tokens_per_second=1,169
164
+ throughput/device/batches_per_second=0.0487
165
+ [step=22/500000]
166
+ train/ActionL1Loss=0.3712
167
+ throughput/total_tokens=4,224,000
168
+ throughput/device/tokens_per_second=1,170
169
+ throughput/device/batches_per_second=0.0488
170
+ [step=23/500000]
171
+ train/ActionL1Loss=0.3941
172
+ throughput/total_tokens=4,416,000
173
+ throughput/device/tokens_per_second=1,171
174
+ throughput/device/batches_per_second=0.0488
175
+ [step=24/500000]
176
+ train/ActionL1Loss=0.4223
177
+ throughput/total_tokens=4,608,000
178
+ throughput/device/tokens_per_second=1,171
179
+ throughput/device/batches_per_second=0.0488
180
+ [step=25/500000]
181
+ train/ActionL1Loss=0.4184
182
+ throughput/total_tokens=4,800,000
183
+ throughput/device/tokens_per_second=1,171
184
+ throughput/device/batches_per_second=0.0488
185
+ [step=26/500000]
186
+ train/ActionL1Loss=0.3437
187
+ throughput/total_tokens=4,992,000
188
+ throughput/device/tokens_per_second=1,171
189
+ throughput/device/batches_per_second=0.0488
190
+ [step=27/500000]
191
+ train/ActionL1Loss=0.3695
192
+ throughput/total_tokens=5,184,000
193
+ throughput/device/tokens_per_second=1,171
194
+ throughput/device/batches_per_second=0.0488
195
+ [step=28/500000]
196
+ train/ActionL1Loss=0.3300
197
+ throughput/total_tokens=5,376,000
198
+ throughput/device/tokens_per_second=1,171
199
+ throughput/device/batches_per_second=0.0488
200
+ [step=29/500000]
201
+ train/ActionL1Loss=0.4344
202
+ throughput/total_tokens=5,568,000
203
+ throughput/device/tokens_per_second=1,171
204
+ throughput/device/batches_per_second=0.0488
205
+ [step=30/500000]
206
+ train/ActionL1Loss=0.4002
207
+ throughput/total_tokens=5,760,000
208
+ throughput/device/tokens_per_second=1,171
209
+ throughput/device/batches_per_second=0.0488
210
+ System/Peak GPU Memory (MB)=46,917
211
+ [step=31/500000]
212
+ train/ActionL1Loss=0.3070
213
+ throughput/total_tokens=5,952,000
214
+ throughput/device/tokens_per_second=1,172
215
+ throughput/device/batches_per_second=0.0488
216
+ [step=32/500000]
217
+ train/ActionL1Loss=0.3657
218
+ throughput/total_tokens=6,144,000
219
+ throughput/device/tokens_per_second=1,172
220
+ throughput/device/batches_per_second=0.0489
221
+ [step=33/500000]
222
+ train/ActionL1Loss=0.3855
223
+ throughput/total_tokens=6,336,000
224
+ throughput/device/tokens_per_second=1,174
225
+ throughput/device/batches_per_second=0.0489
226
+ [step=34/500000]
227
+ train/ActionL1Loss=0.4027
228
+ throughput/total_tokens=6,528,000
229
+ throughput/device/tokens_per_second=1,175
230
+ throughput/device/batches_per_second=0.0490
231
+ [step=35/500000]
232
+ train/ActionL1Loss=0.2975
233
+ throughput/total_tokens=6,720,000
234
+ throughput/device/tokens_per_second=1,175
235
+ throughput/device/batches_per_second=0.0490
236
+ [step=36/500000]
237
+ train/ActionL1Loss=0.4002
238
+ throughput/total_tokens=6,912,000
239
+ throughput/device/tokens_per_second=1,175
240
+ throughput/device/batches_per_second=0.0490
241
+ [step=37/500000]
242
+ train/ActionL1Loss=0.3601
243
+ throughput/total_tokens=7,104,000
244
+ throughput/device/tokens_per_second=1,175
245
+ throughput/device/batches_per_second=0.0490
246
+ [step=38/500000]
247
+ train/ActionL1Loss=0.4267
248
+ throughput/total_tokens=7,296,000
249
+ throughput/device/tokens_per_second=1,175
250
+ throughput/device/batches_per_second=0.0490
251
+ [step=39/500000]
252
+ train/ActionL1Loss=0.3714
253
+ throughput/total_tokens=7,488,000
254
+ throughput/device/tokens_per_second=1,175
255
+ throughput/device/batches_per_second=0.0490
256
+ [step=40/500000]
257
+ optim/total_grad_norm=20.27
258
+ train/ActionL1Loss=0.3428
259
+ throughput/total_tokens=7,680,000
260
+ throughput/device/tokens_per_second=1,175
261
+ throughput/device/batches_per_second=0.0490
262
+ System/Peak GPU Memory (MB)=46,917
263
+ [step=41/500000]
264
+ train/ActionL1Loss=0.4135
265
+ throughput/total_tokens=7,872,000
266
+ throughput/device/tokens_per_second=1,174
267
+ throughput/device/batches_per_second=0.0490
268
+ [step=42/500000]
269
+ train/ActionL1Loss=0.3713
270
+ throughput/total_tokens=8,064,000
271
+ throughput/device/tokens_per_second=1,174
272
+ throughput/device/batches_per_second=0.0489
273
+ [step=43/500000]
274
+ train/ActionL1Loss=0.3708
275
+ throughput/total_tokens=8,256,000
276
+ throughput/device/tokens_per_second=1,174
277
+ throughput/device/batches_per_second=0.0489
278
+ [step=44/500000]
279
+ train/ActionL1Loss=0.4028
280
+ throughput/total_tokens=8,448,000
281
+ throughput/device/tokens_per_second=1,174
282
+ throughput/device/batches_per_second=0.0489
283
+ [step=45/500000]
284
+ train/ActionL1Loss=0.3508
285
+ throughput/total_tokens=8,640,000
286
+ throughput/device/tokens_per_second=1,174
287
+ throughput/device/batches_per_second=0.0489
288
+ [step=46/500000]
289
+ train/ActionL1Loss=0.3318
290
+ throughput/total_tokens=8,832,000
291
+ throughput/device/tokens_per_second=1,174
292
+ throughput/device/batches_per_second=0.0489
293
+ [step=47/500000]
294
+ train/ActionL1Loss=0.3590
295
+ throughput/total_tokens=9,024,000
296
+ throughput/device/tokens_per_second=1,174
297
+ throughput/device/batches_per_second=0.0489
298
+ [step=48/500000]
299
+ train/ActionL1Loss=0.3704
300
+ throughput/total_tokens=9,216,000
301
+ throughput/device/tokens_per_second=1,174
302
+ throughput/device/batches_per_second=0.0489
303
+ [step=49/500000]
304
+ train/ActionL1Loss=0.3401
305
+ throughput/total_tokens=9,408,000
306
+ throughput/device/tokens_per_second=1,174
307
+ throughput/device/batches_per_second=0.0489
308
+ [step=50/500000]
309
+ train/ActionL1Loss=0.4467
310
+ throughput/total_tokens=9,600,000
311
+ throughput/device/tokens_per_second=1,174
312
+ throughput/device/batches_per_second=0.0489
313
+ System/Peak GPU Memory (MB)=46,917
314
+ [step=51/500000]
315
+ train/ActionL1Loss=0.4312
316
+ throughput/total_tokens=9,792,000
317
+ throughput/device/tokens_per_second=1,172
318
+ throughput/device/batches_per_second=0.0488
319
+ [step=52/500000]
320
+ train/ActionL1Loss=0.3493
321
+ throughput/total_tokens=9,984,000
322
+ throughput/device/tokens_per_second=1,171
323
+ throughput/device/batches_per_second=0.0488
324
+ [step=53/500000]
325
+ train/ActionL1Loss=0.4043
326
+ throughput/total_tokens=10,176,000
327
+ throughput/device/tokens_per_second=1,172
328
+ throughput/device/batches_per_second=0.0488
329
+ [step=54/500000]
330
+ train/ActionL1Loss=0.4185
331
+ throughput/total_tokens=10,368,000
332
+ throughput/device/tokens_per_second=1,172
333
+ throughput/device/batches_per_second=0.0488
334
+ [step=55/500000]
335
+ train/ActionL1Loss=0.4030
336
+ throughput/total_tokens=10,560,000
337
+ throughput/device/tokens_per_second=1,171
338
+ throughput/device/batches_per_second=0.0488
339
+ [step=56/500000]
340
+ train/ActionL1Loss=0.4105
341
+ throughput/total_tokens=10,752,000
342
+ throughput/device/tokens_per_second=1,171
343
+ throughput/device/batches_per_second=0.0488
344
+ [step=57/500000]
345
+ train/ActionL1Loss=0.3801
346
+ throughput/total_tokens=10,944,000
347
+ throughput/device/tokens_per_second=1,172
348
+ throughput/device/batches_per_second=0.0488
349
+ [step=58/500000]
350
+ train/ActionL1Loss=0.3240
351
+ throughput/total_tokens=11,136,000
352
+ throughput/device/tokens_per_second=1,171
353
+ throughput/device/batches_per_second=0.0488
354
+ [step=59/500000]
355
+ train/ActionL1Loss=0.4480
356
+ throughput/total_tokens=11,328,000
357
+ throughput/device/tokens_per_second=1,171
358
+ throughput/device/batches_per_second=0.0488
359
+ [step=60/500000]
360
+ optim/total_grad_norm=22.23
361
+ train/ActionL1Loss=0.2945
362
+ throughput/total_tokens=11,520,000
363
+ throughput/device/tokens_per_second=1,172
364
+ throughput/device/batches_per_second=0.0488
365
+ System/Peak GPU Memory (MB)=46,917
366
+ [step=61/500000]
367
+ train/ActionL1Loss=0.4101
368
+ throughput/total_tokens=11,712,000
369
+ throughput/device/tokens_per_second=1,172
370
+ throughput/device/batches_per_second=0.0488
371
+ [step=62/500000]
372
+ train/ActionL1Loss=0.4025
373
+ throughput/total_tokens=11,904,000
374
+ throughput/device/tokens_per_second=1,172
375
+ throughput/device/batches_per_second=0.0488
376
+ [step=63/500000]
377
+ train/ActionL1Loss=0.4508
378
+ throughput/total_tokens=12,096,000
379
+ throughput/device/tokens_per_second=1,172
380
+ throughput/device/batches_per_second=0.0489
381
+ [step=64/500000]
382
+ train/ActionL1Loss=0.3416
383
+ throughput/total_tokens=12,288,000
384
+ throughput/device/tokens_per_second=1,172
385
+ throughput/device/batches_per_second=0.0489
386
+ [step=65/500000]
387
+ train/ActionL1Loss=0.3825
388
+ throughput/total_tokens=12,480,000
389
+ throughput/device/tokens_per_second=1,172
390
+ throughput/device/batches_per_second=0.0489
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:09:21.488298147Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpdb_ho7_w/port-1805179.txt","pid":1805179,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:09:21.489895431Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1805179}
3
+ {"time":"2025-10-02T15:09:21.490851516Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1805179-1805352-669910572/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:09:21.492979899Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:09:21.505095966Z","level":"INFO","msg":"handleInformInit: received","streamId":"kqbx0cjv","id":"1(@)"}
6
+ {"time":"2025-10-02T15:09:22.667598354Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"kqbx0cjv","id":"1(@)"}
wandb/wandb/run-20251002_154526-bw81vbs0/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:45:26.956450Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "wipe",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_wipe.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-260",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "56241807360"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0x9815965a899d8053",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "7",
75
+ "uniqueId": "0x702e8efb76b00c21",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "3",
88
+ "uniqueId": "0xd7a6e11358a6574d",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "1",
101
+ "uniqueId": "0xe35cdba2e3fafd21",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "4",
114
+ "uniqueId": "0x4493708eee1ee737",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "0",
127
+ "uniqueId": "0x4213cc9eeeefc98d",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "6",
140
+ "uniqueId": "0x2d75dae36f0dc353",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "5",
153
+ "uniqueId": "0xd79d4a081e34548d",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759679082",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2231",
177
+ "job_name": "mh_wipe",
178
+ "job_nodelist": "auh7-1b-gpu-260",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759419882",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2231",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-260",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2561154",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-260",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "objruxls4ndcc2m3d5i0bpx0ttt9cswe"
204
+ }
wandb/wandb/run-20251002_154526-bw81vbs0/run-bw81vbs0.wandb ADDED
Binary file (32.8 kB). View file
 
wandb/wandb/run-20251002_155015-xojint20/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:50:15.612316Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "wipe",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_wipe.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-260",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "56242147328"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "5",
62
+ "uniqueId": "0xd79d4a081e34548d",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "4",
75
+ "uniqueId": "0x4493708eee1ee737",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "1",
88
+ "uniqueId": "0xe35cdba2e3fafd21",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "6",
101
+ "uniqueId": "0x2d75dae36f0dc353",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "0",
114
+ "uniqueId": "0x4213cc9eeeefc98d",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "2",
127
+ "uniqueId": "0x9815965a899d8053",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "3",
140
+ "uniqueId": "0xd7a6e11358a6574d",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "7",
153
+ "uniqueId": "0x702e8efb76b00c21",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759679370",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2232",
177
+ "job_name": "mh_wipe_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-260",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759420170",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2232",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-260",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2563631",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-260",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "dta64te2cmxj20iztgvki6h4mul24fyy"
204
+ }
wandb/wandb/run-20251002_155441-70dhy5dq/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wandb/wandb/run-20251002_155441-70dhy5dq/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:54:41.904163Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-293",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50659602432"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606936064"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0xd3246a860ff61784",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "6",
75
+ "uniqueId": "0xa307dde62eec0d7d",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "4",
88
+ "uniqueId": "0xd8fa68fa19711efd",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "7",
101
+ "uniqueId": "0xba4e7044cb7e770",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "3",
114
+ "uniqueId": "0x36cd9caedcbd1661",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "0",
127
+ "uniqueId": "0x5ad6d84cdd116aca",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "1",
140
+ "uniqueId": "0xbd5d0be0d2a8e2aa",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "5",
153
+ "uniqueId": "0x8c18f9eeeea22bf2",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759679637",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2235",
177
+ "job_name": "mh_glue_l1_regression",
178
+ "job_nodelist": "auh7-1b-gpu-293",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759420437",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2235",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-293",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "1811465",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-293",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "79ch7p9c1j6zdjyu7l2owvuh1v64fp2u"
204
+ }
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:54:41.960056364Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpbo30i4ac/port-1811654.txt","pid":1811654,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:54:41.960588222Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1811654}
3
+ {"time":"2025-10-02T15:54:41.960569302Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1811654-1811823-2274215804/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:54:42.143689402Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:54:42.151758187Z","level":"INFO","msg":"handleInformInit: received","streamId":"70dhy5dq","id":"1(@)"}
6
+ {"time":"2025-10-02T15:54:43.180651706Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"70dhy5dq","id":"1(@)"}
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug.log ADDED
File without changes
wandb/wandb/run-20251002_155442-6v8q0jgn/files/output.log ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:54:43] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:54:49] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 10/02 [15:54:50] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
16
+ ****** length of the dataset: 10316
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f71bbbb8100>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:54:56] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:56:15] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=36,856
53
+ 10/02 [15:56:16] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ 10/02 [15:56:23] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py:967: UserWarning: The .grad attribute warnings.py:109
58
+ of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed
59
+ want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor
60
+ by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered
61
+ internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
62
+ param_grad = param.grad
63
+
64
+ [step=1/500000]
65
+ train/ActionNoiseL2Loss=1.834
66
+ throughput/total_tokens=192,000
67
+ System/Peak GPU Memory (MB)=39,644
68
+ [step=2/500000]
69
+ train/ActionNoiseL2Loss=1.807
70
+ throughput/total_tokens=384,000
71
+ throughput/device/tokens_per_second=1,196
72
+ throughput/device/batches_per_second=0.0499
73
+ System/Peak GPU Memory (MB)=46,466
74
+ [step=3/500000]
75
+ train/ActionNoiseL2Loss=1.699
76
+ throughput/total_tokens=576,000
77
+ throughput/device/tokens_per_second=1,175
78
+ throughput/device/batches_per_second=0.0490
79
+ [step=4/500000]
80
+ train/ActionNoiseL2Loss=1.790
81
+ throughput/total_tokens=768,000
82
+ throughput/device/tokens_per_second=1,167
83
+ throughput/device/batches_per_second=0.0487
84
+ [step=5/500000]
85
+ train/ActionNoiseL2Loss=1.693
86
+ throughput/total_tokens=960,000
87
+ throughput/device/tokens_per_second=1,164
88
+ throughput/device/batches_per_second=0.0485
89
+ [step=6/500000]
90
+ train/ActionNoiseL2Loss=1.679
91
+ throughput/total_tokens=1,152,000
92
+ throughput/device/tokens_per_second=1,161
93
+ throughput/device/batches_per_second=0.0484
94
+ [step=7/500000]
95
+ train/ActionNoiseL2Loss=1.560
96
+ throughput/total_tokens=1,344,000
97
+ throughput/device/tokens_per_second=1,159
98
+ throughput/device/batches_per_second=0.0483
99
+ [step=8/500000]
100
+ train/ActionNoiseL2Loss=1.603
101
+ throughput/total_tokens=1,536,000
102
+ throughput/device/tokens_per_second=1,158
103
+ throughput/device/batches_per_second=0.0483
104
+ [step=9/500000]
105
+ train/ActionNoiseL2Loss=1.556
106
+ throughput/total_tokens=1,728,000
107
+ throughput/device/tokens_per_second=1,158
108
+ throughput/device/batches_per_second=0.0483
109
+ [step=10/500000]
110
+ train/ActionNoiseL2Loss=1.506
111
+ throughput/total_tokens=1,920,000
112
+ throughput/device/tokens_per_second=1,157
113
+ throughput/device/batches_per_second=0.0482
114
+ System/Peak GPU Memory (MB)=46,466
115
+ [step=11/500000]
116
+ train/ActionNoiseL2Loss=1.537
117
+ throughput/total_tokens=2,112,000
118
+ throughput/device/tokens_per_second=1,156
119
+ throughput/device/batches_per_second=0.0482
120
+ [step=12/500000]
121
+ train/ActionNoiseL2Loss=1.279
122
+ throughput/total_tokens=2,304,000
123
+ throughput/device/tokens_per_second=1,155
124
+ throughput/device/batches_per_second=0.0482
125
+ [step=13/500000]
126
+ train/ActionNoiseL2Loss=1.477
127
+ throughput/total_tokens=2,496,000
128
+ throughput/device/tokens_per_second=1,154
129
+ throughput/device/batches_per_second=0.0481
130
+ [step=14/500000]
131
+ train/ActionNoiseL2Loss=1.432
132
+ throughput/total_tokens=2,688,000
133
+ throughput/device/tokens_per_second=1,153
134
+ throughput/device/batches_per_second=0.0481
135
+ [step=15/500000]
136
+ train/ActionNoiseL2Loss=1.288
137
+ throughput/total_tokens=2,880,000
138
+ throughput/device/tokens_per_second=1,153
139
+ throughput/device/batches_per_second=0.0480
140
+ [step=16/500000]
141
+ train/ActionNoiseL2Loss=1.371
142
+ throughput/total_tokens=3,072,000
143
+ throughput/device/tokens_per_second=1,152
144
+ throughput/device/batches_per_second=0.0480
145
+ [step=17/500000]
146
+ train/ActionNoiseL2Loss=1.352
147
+ throughput/total_tokens=3,264,000
148
+ throughput/device/tokens_per_second=1,151
149
+ throughput/device/batches_per_second=0.0480
150
+ [step=18/500000]
151
+ train/ActionNoiseL2Loss=1.520
152
+ throughput/total_tokens=3,456,000
153
+ throughput/device/tokens_per_second=1,151
154
+ throughput/device/batches_per_second=0.0480
155
+ [step=19/500000]
156
+ train/ActionNoiseL2Loss=1.305
157
+ throughput/total_tokens=3,648,000
158
+ throughput/device/tokens_per_second=1,151
159
+ throughput/device/batches_per_second=0.0480
160
+ [step=20/500000]
161
+ optim/total_grad_norm=16.52
162
+ train/ActionNoiseL2Loss=1.322
163
+ throughput/total_tokens=3,840,000
164
+ throughput/device/tokens_per_second=1,149
165
+ throughput/device/batches_per_second=0.0479
166
+ System/Peak GPU Memory (MB)=46,466
167
+ [step=21/500000]
168
+ train/ActionNoiseL2Loss=1.404
169
+ throughput/total_tokens=4,032,000
170
+ throughput/device/tokens_per_second=1,149
171
+ throughput/device/batches_per_second=0.0479
172
+ [step=22/500000]
173
+ train/ActionNoiseL2Loss=1.266
174
+ throughput/total_tokens=4,224,000
175
+ throughput/device/tokens_per_second=1,149
176
+ throughput/device/batches_per_second=0.0479
177
+ [step=23/500000]
178
+ train/ActionNoiseL2Loss=1.394
179
+ throughput/total_tokens=4,416,000
180
+ throughput/device/tokens_per_second=1,149
181
+ throughput/device/batches_per_second=0.0479
182
+ [step=24/500000]
183
+ train/ActionNoiseL2Loss=1.259
184
+ throughput/total_tokens=4,608,000
185
+ throughput/device/tokens_per_second=1,148
186
+ throughput/device/batches_per_second=0.0479
187
+ [step=25/500000]
188
+ train/ActionNoiseL2Loss=1.191
189
+ throughput/total_tokens=4,800,000
190
+ throughput/device/tokens_per_second=1,148
191
+ throughput/device/batches_per_second=0.0479
192
+ [step=26/500000]
193
+ train/ActionNoiseL2Loss=1.317
194
+ throughput/total_tokens=4,992,000
195
+ throughput/device/tokens_per_second=1,148
196
+ throughput/device/batches_per_second=0.0479
197
+ [step=27/500000]
198
+ train/ActionNoiseL2Loss=1.215
199
+ throughput/total_tokens=5,184,000
200
+ throughput/device/tokens_per_second=1,148
201
+ throughput/device/batches_per_second=0.0479
202
+ [step=28/500000]
203
+ train/ActionNoiseL2Loss=1.260
204
+ throughput/total_tokens=5,376,000
205
+ throughput/device/tokens_per_second=1,148
206
+ throughput/device/batches_per_second=0.0478
207
+ [step=29/500000]
208
+ train/ActionNoiseL2Loss=1.132
209
+ throughput/total_tokens=5,568,000
210
+ throughput/device/tokens_per_second=1,148
211
+ throughput/device/batches_per_second=0.0478
212
+ [step=30/500000]
213
+ train/ActionNoiseL2Loss=1.241
214
+ throughput/total_tokens=5,760,000
215
+ throughput/device/tokens_per_second=1,148
216
+ throughput/device/batches_per_second=0.0478
217
+ System/Peak GPU Memory (MB)=46,466
218
+ [step=31/500000]
219
+ train/ActionNoiseL2Loss=1.084
220
+ throughput/total_tokens=5,952,000
221
+ throughput/device/tokens_per_second=1,147
222
+ throughput/device/batches_per_second=0.0478
223
+ [step=32/500000]
224
+ train/ActionNoiseL2Loss=1.049
225
+ throughput/total_tokens=6,144,000
226
+ throughput/device/tokens_per_second=1,147
227
+ throughput/device/batches_per_second=0.0478
228
+ [step=33/500000]
229
+ train/ActionNoiseL2Loss=1.266
230
+ throughput/total_tokens=6,336,000
231
+ throughput/device/tokens_per_second=1,148
232
+ throughput/device/batches_per_second=0.0478
233
+ [step=34/500000]
234
+ train/ActionNoiseL2Loss=1.018
235
+ throughput/total_tokens=6,528,000
236
+ throughput/device/tokens_per_second=1,148
237
+ throughput/device/batches_per_second=0.0479
238
+ [step=35/500000]
239
+ train/ActionNoiseL2Loss=1.012
240
+ throughput/total_tokens=6,720,000
241
+ throughput/device/tokens_per_second=1,148
242
+ throughput/device/batches_per_second=0.0479
243
+ [step=36/500000]
244
+ train/ActionNoiseL2Loss=1.101
245
+ throughput/total_tokens=6,912,000
246
+ throughput/device/tokens_per_second=1,149
247
+ throughput/device/batches_per_second=0.0479
248
+ [step=37/500000]
249
+ train/ActionNoiseL2Loss=1.093
250
+ throughput/total_tokens=7,104,000
251
+ throughput/device/tokens_per_second=1,149
252
+ throughput/device/batches_per_second=0.0479
253
+ [step=38/500000]
254
+ train/ActionNoiseL2Loss=1.153
255
+ throughput/total_tokens=7,296,000
256
+ throughput/device/tokens_per_second=1,149
257
+ throughput/device/batches_per_second=0.0479
258
+ [step=39/500000]
259
+ train/ActionNoiseL2Loss=0.9454
260
+ throughput/total_tokens=7,488,000
261
+ throughput/device/tokens_per_second=1,149
262
+ throughput/device/batches_per_second=0.0479
263
+ [step=40/500000]
264
+ optim/total_grad_norm=63.39
265
+ train/ActionNoiseL2Loss=1.099
266
+ throughput/total_tokens=7,680,000
267
+ throughput/device/tokens_per_second=1,149
268
+ throughput/device/batches_per_second=0.0479
269
+ System/Peak GPU Memory (MB)=46,466
270
+ [step=41/500000]
271
+ train/ActionNoiseL2Loss=0.9066
272
+ throughput/total_tokens=7,872,000
273
+ throughput/device/tokens_per_second=1,149
274
+ throughput/device/batches_per_second=0.0479
275
+ [step=42/500000]
276
+ train/ActionNoiseL2Loss=1.033
277
+ throughput/total_tokens=8,064,000
278
+ throughput/device/tokens_per_second=1,150
279
+ throughput/device/batches_per_second=0.0479
280
+ [step=43/500000]
281
+ train/ActionNoiseL2Loss=0.9956
282
+ throughput/total_tokens=8,256,000
283
+ throughput/device/tokens_per_second=1,150
284
+ throughput/device/batches_per_second=0.0479
285
+ [step=44/500000]
286
+ train/ActionNoiseL2Loss=1.186
287
+ throughput/total_tokens=8,448,000
288
+ throughput/device/tokens_per_second=1,150
289
+ throughput/device/batches_per_second=0.0479
290
+ [step=45/500000]
291
+ train/ActionNoiseL2Loss=1.020
292
+ throughput/total_tokens=8,640,000
293
+ throughput/device/tokens_per_second=1,149
294
+ throughput/device/batches_per_second=0.0479
295
+ [step=46/500000]
296
+ train/ActionNoiseL2Loss=0.9211
297
+ throughput/total_tokens=8,832,000
298
+ throughput/device/tokens_per_second=1,150
299
+ throughput/device/batches_per_second=0.0479
300
+ [step=47/500000]
301
+ train/ActionNoiseL2Loss=0.9811
302
+ throughput/total_tokens=9,024,000
303
+ throughput/device/tokens_per_second=1,149
304
+ throughput/device/batches_per_second=0.0479
305
+ [step=48/500000]
306
+ train/ActionNoiseL2Loss=0.9845
307
+ throughput/total_tokens=9,216,000
308
+ throughput/device/tokens_per_second=1,149
309
+ throughput/device/batches_per_second=0.0479
310
+ [step=49/500000]
311
+ train/ActionNoiseL2Loss=0.9234
312
+ throughput/total_tokens=9,408,000
313
+ throughput/device/tokens_per_second=1,149
314
+ throughput/device/batches_per_second=0.0479
wandb/wandb/run-20251002_155442-6v8q0jgn/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:54:42.003061Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "glue",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_glue.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-260",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "56242470912"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606956544"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "6",
62
+ "uniqueId": "0x2d75dae36f0dc353",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "3",
75
+ "uniqueId": "0xd7a6e11358a6574d",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "0",
88
+ "uniqueId": "0x4213cc9eeeefc98d",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "1",
101
+ "uniqueId": "0xe35cdba2e3fafd21",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "7",
114
+ "uniqueId": "0x702e8efb76b00c21",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "4",
127
+ "uniqueId": "0x4493708eee1ee737",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "5",
140
+ "uniqueId": "0xd79d4a081e34548d",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "2",
153
+ "uniqueId": "0x9815965a899d8053",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759679637",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2234",
177
+ "job_name": "mh_glue_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-260",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759420437",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2234",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-260",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2565886",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-260",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "1x8epr6rdu28pcmllq7snrfdls3nek8y"
204
+ }
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:54:42.055940338Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpwa4j5m09/port-2566075.txt","pid":2566075,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:54:42.056549445Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2566075}
3
+ {"time":"2025-10-02T15:54:42.056519645Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2566075-2566240-670263594/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:54:42.238288115Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:54:42.245089407Z","level":"INFO","msg":"handleInformInit: received","streamId":"6v8q0jgn","id":"1(@)"}
6
+ {"time":"2025-10-02T15:54:43.370000731Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"6v8q0jgn","id":"1(@)"}
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:54:42.24698282Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:54:43.36994658Z","level":"INFO","msg":"stream: created new stream","id":"6v8q0jgn"}
3
+ {"time":"2025-10-02T15:54:43.369995201Z","level":"INFO","msg":"stream: started","id":"6v8q0jgn"}
4
+ {"time":"2025-10-02T15:54:43.370003131Z","level":"INFO","msg":"writer: started","stream_id":"6v8q0jgn"}
5
+ {"time":"2025-10-02T15:54:43.370010001Z","level":"INFO","msg":"handler: started","stream_id":"6v8q0jgn"}
6
+ {"time":"2025-10-02T15:54:43.370045482Z","level":"INFO","msg":"sender: started","stream_id":"6v8q0jgn"}
wandb/wandb/run-20251002_155442-6v8q0jgn/logs/debug.log ADDED
File without changes
wipe/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251002_163406
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251002_163406
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:34:36.620221893Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:34:37.749739233Z","level":"INFO","msg":"stream: created new stream","id":"itiyfljc"}
3
+ {"time":"2025-10-02T16:34:37.749792274Z","level":"INFO","msg":"stream: started","id":"itiyfljc"}
4
+ {"time":"2025-10-02T16:34:37.749802594Z","level":"INFO","msg":"writer: started","stream_id":"itiyfljc"}
5
+ {"time":"2025-10-02T16:34:37.749817664Z","level":"INFO","msg":"handler: started","stream_id":"itiyfljc"}
6
+ {"time":"2025-10-02T16:34:37.749829744Z","level":"INFO","msg":"sender: started","stream_id":"itiyfljc"}
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T16:34:36.375177Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "wipe",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_wipe.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe/wandb",
44
+ "host": "auh7-1b-gpu-293",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50660397056"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606936064"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "3",
62
+ "uniqueId": "0x36cd9caedcbd1661",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "0",
75
+ "uniqueId": "0x5ad6d84cdd116aca",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "6",
88
+ "uniqueId": "0xa307dde62eec0d7d",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "1",
101
+ "uniqueId": "0xbd5d0be0d2a8e2aa",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "7",
114
+ "uniqueId": "0xba4e7044cb7e770",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "2",
127
+ "uniqueId": "0xd3246a860ff61784",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "4",
140
+ "uniqueId": "0xd8fa68fa19711efd",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "5",
153
+ "uniqueId": "0x8c18f9eeeea22bf2",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759682032",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2261",
177
+ "job_name": "mh_wipe_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-293",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759422832",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2261",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-293",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "1816946",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-293",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "e61jhvldaqba9uvqusim29dt4x4fm38h"
204
+ }
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:34:36.620221893Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T16:34:37.749739233Z","level":"INFO","msg":"stream: created new stream","id":"itiyfljc"}
3
+ {"time":"2025-10-02T16:34:37.749792274Z","level":"INFO","msg":"stream: started","id":"itiyfljc"}
4
+ {"time":"2025-10-02T16:34:37.749802594Z","level":"INFO","msg":"writer: started","stream_id":"itiyfljc"}
5
+ {"time":"2025-10-02T16:34:37.749817664Z","level":"INFO","msg":"handler: started","stream_id":"itiyfljc"}
6
+ {"time":"2025-10-02T16:34:37.749829744Z","level":"INFO","msg":"sender: started","stream_id":"itiyfljc"}
wipe/wandb/wandb/run-20251002_163436-itiyfljc/run-itiyfljc.wandb ADDED
File without changes
wipe_flow_matching/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163733
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163733
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_flow_matching/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163733
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163733
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_flow_matching/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163733
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163733
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_flow_matching/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:38:13.19911913Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-05T16:38:14.385618537Z","level":"INFO","msg":"stream: created new stream","id":"0cfqmuqw"}
3
+ {"time":"2025-10-05T16:38:14.385648767Z","level":"INFO","msg":"stream: started","id":"0cfqmuqw"}
4
+ {"time":"2025-10-05T16:38:14.385660457Z","level":"INFO","msg":"handler: started","stream_id":"0cfqmuqw"}
5
+ {"time":"2025-10-05T16:38:14.385655167Z","level":"INFO","msg":"writer: started","stream_id":"0cfqmuqw"}
6
+ {"time":"2025-10-05T16:38:14.385680798Z","level":"INFO","msg":"sender: started","stream_id":"0cfqmuqw"}
7
+ {"time":"2025-10-06T16:34:15.587824169Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2025-10-06T18:35:03.703248769Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-06T20:02:36.97363154Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-10-07T05:02:26.79910172Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
wipe_flow_matching/wandb/wandb/debug.log ADDED
File without changes
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-05T16:38:12.937946Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "wipe",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_wipe.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_flow_matching/wandb",
44
+ "host": "auh7-1b-gpu-268",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50524278784"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606911488"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "2",
62
+ "uniqueId": "0x4e23787acbcc959c",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "4",
75
+ "uniqueId": "0x21097ed02658304",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "0",
88
+ "uniqueId": "0x5222e4ce7a335651",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "6",
101
+ "uniqueId": "0x6ea319284113b182",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "3",
114
+ "uniqueId": "0x3974e08aaf22dd9e",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "5",
127
+ "uniqueId": "0x88a800d44035c135",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "7",
140
+ "uniqueId": "0x9c1461c3fb78979f",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "1",
153
+ "uniqueId": "0x47f0c4894158743b",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759941420",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2279",
177
+ "job_name": "mh_wipe_flow_matching",
178
+ "job_nodelist": "auh7-1b-gpu-268",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759682220",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2279",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-268",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "1295134",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-268",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "z62bo323zzwiyws743dfdlddov1q0sqt"
204
+ }
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:38:13.182675593Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp9aw022y7/port-1295327.txt","pid":1295327,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-05T16:38:13.18385037Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1295327}
3
+ {"time":"2025-10-05T16:38:13.185026547Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1295327-1295508-4047313830/socket","Net":"unix"}}
4
+ {"time":"2025-10-05T16:38:13.18868387Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-05T16:38:13.197275504Z","level":"INFO","msg":"handleInformInit: received","streamId":"0cfqmuqw","id":"1(@)"}
6
+ {"time":"2025-10-05T16:38:14.385653777Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0cfqmuqw","id":"1(@)"}
wipe_l1_regression/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163714
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163714
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_l1_regression/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163714
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163714
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_l1_regression/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: wipe_20251005_163714
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: wipe_20251005_163714
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
wipe_l1_regression/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:37:44.133320669Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-05T16:37:45.160495159Z","level":"INFO","msg":"stream: created new stream","id":"a1znetn8"}
3
+ {"time":"2025-10-05T16:37:45.160546189Z","level":"INFO","msg":"stream: started","id":"a1znetn8"}
4
+ {"time":"2025-10-05T16:37:45.16056551Z","level":"INFO","msg":"handler: started","stream_id":"a1znetn8"}
5
+ {"time":"2025-10-05T16:37:45.1605953Z","level":"INFO","msg":"sender: started","stream_id":"a1znetn8"}
6
+ {"time":"2025-10-05T16:37:45.16057409Z","level":"INFO","msg":"writer: started","stream_id":"a1znetn8"}
7
+ {"time":"2025-10-06T15:55:45.597714896Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
8
+ {"time":"2025-10-06T18:21:33.322202546Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a1znetn8/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-07T13:03:00.740491875Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-10-08T00:38:26.283561572Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
wipe_l1_regression/wandb/wandb/debug.log ADDED
File without changes
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-05T16:37:43.884420Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "wipe",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_wipe.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wipe_l1_regression/wandb",
44
+ "host": "auh7-1b-gpu-306",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50268852224"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434611519488"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "3",
62
+ "uniqueId": "0x95be8fdc770fcfd7",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "1",
75
+ "uniqueId": "0x27087f06439a527d",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "5",
88
+ "uniqueId": "0x413935505e32b8da",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "7",
101
+ "uniqueId": "0xa0442ab3bdd405c1",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "6",
114
+ "uniqueId": "0x12140cd9e24f12e9",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "0",
127
+ "uniqueId": "0x82728d7f9bd937e4",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "2",
140
+ "uniqueId": "0xaabcddaa244a3d6e",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "4",
153
+ "uniqueId": "0x24ee801b7c402006",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759941420",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2280",
177
+ "job_name": "mh_wipe_l1_regression",
178
+ "job_nodelist": "auh7-1b-gpu-306",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759682220",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2280",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-306",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "1826176",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-306",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "bdwjpywyhycxf8g9ov01yp1e5him8k31"
204
+ }
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:37:43.93852383Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_xq7pohw/port-1826365.txt","pid":1826365,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-05T16:37:43.939021639Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1826365}
3
+ {"time":"2025-10-05T16:37:43.939000279Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1826365-1826537-524537524/socket","Net":"unix"}}
4
+ {"time":"2025-10-05T16:37:44.124047526Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-05T16:37:44.131326394Z","level":"INFO","msg":"handleInformInit: received","streamId":"a1znetn8","id":"1(@)"}
6
+ {"time":"2025-10-05T16:37:45.16055242Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"a1znetn8","id":"1(@)"}
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug-internal.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:37:44.133320669Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-05T16:37:45.160495159Z","level":"INFO","msg":"stream: created new stream","id":"a1znetn8"}
3
+ {"time":"2025-10-05T16:37:45.160546189Z","level":"INFO","msg":"stream: started","id":"a1znetn8"}
4
+ {"time":"2025-10-05T16:37:45.16056551Z","level":"INFO","msg":"handler: started","stream_id":"a1znetn8"}
5
+ {"time":"2025-10-05T16:37:45.1605953Z","level":"INFO","msg":"sender: started","stream_id":"a1znetn8"}
6
+ {"time":"2025-10-05T16:37:45.16057409Z","level":"INFO","msg":"writer: started","stream_id":"a1znetn8"}
7
+ {"time":"2025-10-06T15:55:45.597714896Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
8
+ {"time":"2025-10-06T18:21:33.322202546Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a1znetn8/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-07T13:03:00.740491875Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-10-08T00:38:26.283561572Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/logs/debug.log ADDED
File without changes