Henryeahhh commited on
Commit
4919132
·
verified ·
1 Parent(s): 90c97df

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. glue_l1_regression/step12000/config.yaml +322 -0
  2. pen_flow_matching/step11500-action-head/metadata.pt +3 -0
  3. pen_flow_matching/step12000-action-head/metadata.pt +3 -0
  4. pen_flow_matching/step12000-unsharded/config.yaml +322 -0
  5. pen_flow_matching/step12000/config.yaml +322 -0
  6. pen_flow_matching/wandb/wandb/debug-internal.log +8 -0
  7. pen_flow_matching/wandb/wandb/debug.log +0 -0
  8. pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug.log +0 -0
  9. wandb/wandb/debug-internal.log +6 -0
  10. wandb/wandb/run-20251002_150921-kqbx0cjv/files/requirements.txt +286 -0
  11. wandb/wandb/run-20251002_150921-kqbx0cjv/files/wandb-metadata.json +204 -0
  12. wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-internal.log +6 -0
  13. wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug.log +0 -0
  14. wandb/wandb/run-20251002_151047-gal9lnsm/files/output.log +365 -0
  15. wandb/wandb/run-20251002_151047-gal9lnsm/files/requirements.txt +286 -0
  16. wandb/wandb/run-20251002_151047-gal9lnsm/files/wandb-metadata.json +203 -0
  17. wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-core.log +6 -0
  18. wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-internal.log +6 -0
  19. wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug.log +0 -0
  20. wandb/wandb/run-20251002_154526-bw81vbs0/files/output.log +81 -0
  21. wandb/wandb/run-20251002_154526-bw81vbs0/files/requirements.txt +286 -0
  22. wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-core.log +6 -0
  23. wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-internal.log +6 -0
  24. wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug.log +0 -0
  25. wandb/wandb/run-20251002_155015-xojint20/files/output.log +88 -0
  26. wandb/wandb/run-20251002_155015-xojint20/files/requirements.txt +286 -0
  27. wandb/wandb/run-20251002_155015-xojint20/logs/debug-core.log +6 -0
  28. wandb/wandb/run-20251002_155015-xojint20/logs/debug-internal.log +6 -0
  29. wandb/wandb/run-20251002_155015-xojint20/run-xojint20.wandb +0 -0
  30. wandb/wandb/run-20251002_155441-70dhy5dq/files/output.log +318 -0
  31. wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-internal.log +6 -0
  32. wandb/wandb/run-20251002_155442-6v8q0jgn/files/requirements.txt +286 -0
  33. wipe/wandb/wandb/debug.log +0 -0
  34. wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/output.log +15 -0
  35. wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/requirements.txt +286 -0
  36. wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-core.log +6 -0
  37. wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug.log +0 -0
  38. wipe_flow_matching/step11500-action-head/metadata.pt +3 -0
  39. wipe_flow_matching/step12000-action-head/metadata.pt +3 -0
  40. wipe_flow_matching/step12000-unsharded/lora.pt +3 -0
  41. wipe_flow_matching/step12000-unsharded/train.pt +3 -0
  42. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/output.log +0 -0
  43. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/requirements.txt +286 -0
  44. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-internal.log +10 -0
  45. wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug.log +0 -0
  46. wipe_l1_regression/step11500-action-head/metadata.pt +3 -0
  47. wipe_l1_regression/step12000-action-head/metadata.pt +3 -0
  48. wipe_l1_regression/step12000-unsharded/lora.pt +3 -0
  49. wipe_l1_regression/step12000-unsharded/train.pt +3 -0
  50. wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/output.log +0 -0
glue_l1_regression/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: glue_20251002_163658
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/glue_l1_regression
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: glue_20251002_163658
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
pen_flow_matching/step11500-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
3
+ size 1331
pen_flow_matching/step12000-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995307502120af3866f237cd0bc484fc848a652539d28e53cbea882abc16ba6b
3
+ size 1331
pen_flow_matching/step12000-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: pen_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: a1_real_world
201
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: pen_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
pen_flow_matching/step12000/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: pen_20251011_163803
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: flow_matching
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: a1_real_world
201
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/datasets/OXE
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/pen_flow_matching
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: pen_20251011_163803
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
pen_flow_matching/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-11T16:38:45.301569164Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-11T16:38:46.459201786Z","level":"INFO","msg":"stream: created new stream","id":"a381qnn9"}
3
+ {"time":"2025-10-11T16:38:46.459237957Z","level":"INFO","msg":"stream: started","id":"a381qnn9"}
4
+ {"time":"2025-10-11T16:38:46.459266458Z","level":"INFO","msg":"handler: started","stream_id":"a381qnn9"}
5
+ {"time":"2025-10-11T16:38:46.459291898Z","level":"INFO","msg":"sender: started","stream_id":"a381qnn9"}
6
+ {"time":"2025-10-11T16:38:46.459287598Z","level":"INFO","msg":"writer: started","stream_id":"a381qnn9"}
7
+ {"time":"2025-10-12T06:42:47.897888022Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2025-10-12T14:34:32.120286068Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/a381qnn9/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
pen_flow_matching/wandb/wandb/debug.log ADDED
File without changes
pen_flow_matching/wandb/wandb/run-20251011_163844-a381qnn9/logs/debug.log ADDED
File without changes
wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:54:42.154138214Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:54:43.180595015Z","level":"INFO","msg":"stream: created new stream","id":"70dhy5dq"}
3
+ {"time":"2025-10-02T15:54:43.180644946Z","level":"INFO","msg":"stream: started","id":"70dhy5dq"}
4
+ {"time":"2025-10-02T15:54:43.180663737Z","level":"INFO","msg":"sender: started","stream_id":"70dhy5dq"}
5
+ {"time":"2025-10-02T15:54:43.180659826Z","level":"INFO","msg":"writer: started","stream_id":"70dhy5dq"}
6
+ {"time":"2025-10-02T15:54:43.180682767Z","level":"INFO","msg":"handler: started","stream_id":"70dhy5dq"}
wandb/wandb/run-20251002_150921-kqbx0cjv/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wandb/wandb/run-20251002_150921-kqbx0cjv/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:09:21.237465Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "realworld",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_cleandesk.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-293",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "50658734080"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606936064"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "6",
62
+ "uniqueId": "0xa307dde62eec0d7d",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "4",
75
+ "uniqueId": "0xd8fa68fa19711efd",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "3",
88
+ "uniqueId": "0x36cd9caedcbd1661",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "7",
101
+ "uniqueId": "0xba4e7044cb7e770",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "1",
114
+ "uniqueId": "0xbd5d0be0d2a8e2aa",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "0",
127
+ "uniqueId": "0x5ad6d84cdd116aca",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "2",
140
+ "uniqueId": "0xd3246a860ff61784",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "5",
153
+ "uniqueId": "0x8c18f9eeeea22bf2",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1759676881",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "2221",
177
+ "job_name": "mh_cleandesk",
178
+ "job_nodelist": "auh7-1b-gpu-293",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1759417681",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "2221",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-293",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "1804994",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-293",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "z2ddwaxyl0hxhxvll2z1wkcfd4ygtgyd"
204
+ }
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:09:21.507261489Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:09:22.667529923Z","level":"INFO","msg":"stream: created new stream","id":"kqbx0cjv"}
3
+ {"time":"2025-10-02T15:09:22.667591843Z","level":"INFO","msg":"stream: started","id":"kqbx0cjv"}
4
+ {"time":"2025-10-02T15:09:22.667623244Z","level":"INFO","msg":"writer: started","stream_id":"kqbx0cjv"}
5
+ {"time":"2025-10-02T15:09:22.667639754Z","level":"INFO","msg":"handler: started","stream_id":"kqbx0cjv"}
6
+ {"time":"2025-10-02T15:09:22.667666975Z","level":"INFO","msg":"sender: started","stream_id":"kqbx0cjv"}
wandb/wandb/run-20251002_150921-kqbx0cjv/logs/debug.log ADDED
File without changes
wandb/wandb/run-20251002_151047-gal9lnsm/files/output.log ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:10:54] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:11:06] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
16
+ ****** length of the dataset: 27906
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f11812bff40>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:11:14] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:12:41] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=35,614
53
+ 10/02 [15:12:42] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ [step=1/500000]
58
+ train/ActionL1Loss=0.6062
59
+ throughput/total_tokens=192,000
60
+ System/Peak GPU Memory (MB)=40,144
61
+ [step=2/500000]
62
+ train/ActionL1Loss=0.6043
63
+ throughput/total_tokens=384,000
64
+ throughput/device/tokens_per_second=1,201
65
+ throughput/device/batches_per_second=0.0501
66
+ System/Peak GPU Memory (MB)=46,917
67
+ [step=3/500000]
68
+ train/ActionL1Loss=0.5778
69
+ throughput/total_tokens=576,000
70
+ throughput/device/tokens_per_second=1,169
71
+ throughput/device/batches_per_second=0.0487
72
+ [step=4/500000]
73
+ train/ActionL1Loss=0.5434
74
+ throughput/total_tokens=768,000
75
+ throughput/device/tokens_per_second=1,155
76
+ throughput/device/batches_per_second=0.0481
77
+ [step=5/500000]
78
+ train/ActionL1Loss=0.5383
79
+ throughput/total_tokens=960,000
80
+ throughput/device/tokens_per_second=1,153
81
+ throughput/device/batches_per_second=0.0481
82
+ [step=6/500000]
83
+ train/ActionL1Loss=0.5146
84
+ throughput/total_tokens=1,152,000
85
+ throughput/device/tokens_per_second=1,152
86
+ throughput/device/batches_per_second=0.0480
87
+ [step=7/500000]
88
+ train/ActionL1Loss=0.4823
89
+ throughput/total_tokens=1,344,000
90
+ throughput/device/tokens_per_second=1,152
91
+ throughput/device/batches_per_second=0.0480
92
+ [step=8/500000]
93
+ train/ActionL1Loss=0.4415
94
+ throughput/total_tokens=1,536,000
95
+ throughput/device/tokens_per_second=1,153
96
+ throughput/device/batches_per_second=0.0481
97
+ [step=9/500000]
98
+ train/ActionL1Loss=0.4776
99
+ throughput/total_tokens=1,728,000
100
+ throughput/device/tokens_per_second=1,155
101
+ throughput/device/batches_per_second=0.0481
102
+ [step=10/500000]
103
+ train/ActionL1Loss=0.4819
104
+ throughput/total_tokens=1,920,000
105
+ throughput/device/tokens_per_second=1,156
106
+ throughput/device/batches_per_second=0.0482
107
+ System/Peak GPU Memory (MB)=46,917
108
+ [step=11/500000]
109
+ train/ActionL1Loss=0.4335
110
+ throughput/total_tokens=2,112,000
111
+ throughput/device/tokens_per_second=1,157
112
+ throughput/device/batches_per_second=0.0482
113
+ [step=12/500000]
114
+ train/ActionL1Loss=0.4216
115
+ throughput/total_tokens=2,304,000
116
+ throughput/device/tokens_per_second=1,158
117
+ throughput/device/batches_per_second=0.0483
118
+ [step=13/500000]
119
+ train/ActionL1Loss=0.3851
120
+ throughput/total_tokens=2,496,000
121
+ throughput/device/tokens_per_second=1,159
122
+ throughput/device/batches_per_second=0.0483
123
+ [step=14/500000]
124
+ train/ActionL1Loss=0.3569
125
+ throughput/total_tokens=2,688,000
126
+ throughput/device/tokens_per_second=1,160
127
+ throughput/device/batches_per_second=0.0483
128
+ [step=15/500000]
129
+ train/ActionL1Loss=0.4119
130
+ throughput/total_tokens=2,880,000
131
+ throughput/device/tokens_per_second=1,160
132
+ throughput/device/batches_per_second=0.0484
133
+ [step=16/500000]
134
+ train/ActionL1Loss=0.4318
135
+ throughput/total_tokens=3,072,000
136
+ throughput/device/tokens_per_second=1,161
137
+ throughput/device/batches_per_second=0.0484
138
+ [step=17/500000]
139
+ train/ActionL1Loss=0.3860
140
+ throughput/total_tokens=3,264,000
141
+ throughput/device/tokens_per_second=1,161
142
+ throughput/device/batches_per_second=0.0484
143
+ [step=18/500000]
144
+ train/ActionL1Loss=0.3929
145
+ throughput/total_tokens=3,456,000
146
+ throughput/device/tokens_per_second=1,162
147
+ throughput/device/batches_per_second=0.0484
148
+ [step=19/500000]
149
+ train/ActionL1Loss=0.3802
150
+ throughput/total_tokens=3,648,000
151
+ throughput/device/tokens_per_second=1,162
152
+ throughput/device/batches_per_second=0.0484
153
+ [step=20/500000]
154
+ optim/total_grad_norm=29.43
155
+ train/ActionL1Loss=0.3528
156
+ throughput/total_tokens=3,840,000
157
+ throughput/device/tokens_per_second=1,162
158
+ throughput/device/batches_per_second=0.0484
159
+ System/Peak GPU Memory (MB)=46,917
160
+ [step=21/500000]
161
+ train/ActionL1Loss=0.3761
162
+ throughput/total_tokens=4,032,000
163
+ throughput/device/tokens_per_second=1,162
164
+ throughput/device/batches_per_second=0.0484
165
+ [step=22/500000]
166
+ train/ActionL1Loss=0.3916
167
+ throughput/total_tokens=4,224,000
168
+ throughput/device/tokens_per_second=1,162
169
+ throughput/device/batches_per_second=0.0484
170
+ [step=23/500000]
171
+ train/ActionL1Loss=0.3271
172
+ throughput/total_tokens=4,416,000
173
+ throughput/device/tokens_per_second=1,164
174
+ throughput/device/batches_per_second=0.0485
175
+ [step=24/500000]
176
+ train/ActionL1Loss=0.3833
177
+ throughput/total_tokens=4,608,000
178
+ throughput/device/tokens_per_second=1,166
179
+ throughput/device/batches_per_second=0.0486
180
+ [step=25/500000]
181
+ train/ActionL1Loss=0.3419
182
+ throughput/total_tokens=4,800,000
183
+ throughput/device/tokens_per_second=1,167
184
+ throughput/device/batches_per_second=0.0487
185
+ [step=26/500000]
186
+ train/ActionL1Loss=0.3660
187
+ throughput/total_tokens=4,992,000
188
+ throughput/device/tokens_per_second=1,168
189
+ throughput/device/batches_per_second=0.0487
190
+ [step=27/500000]
191
+ train/ActionL1Loss=0.3771
192
+ throughput/total_tokens=5,184,000
193
+ throughput/device/tokens_per_second=1,169
194
+ throughput/device/batches_per_second=0.0487
195
+ [step=28/500000]
196
+ train/ActionL1Loss=0.3350
197
+ throughput/total_tokens=5,376,000
198
+ throughput/device/tokens_per_second=1,170
199
+ throughput/device/batches_per_second=0.0488
200
+ [step=29/500000]
201
+ train/ActionL1Loss=0.4330
202
+ throughput/total_tokens=5,568,000
203
+ throughput/device/tokens_per_second=1,170
204
+ throughput/device/batches_per_second=0.0488
205
+ [step=30/500000]
206
+ train/ActionL1Loss=0.3133
207
+ throughput/total_tokens=5,760,000
208
+ throughput/device/tokens_per_second=1,170
209
+ throughput/device/batches_per_second=0.0488
210
+ System/Peak GPU Memory (MB)=46,917
211
+ [step=31/500000]
212
+ train/ActionL1Loss=0.3785
213
+ throughput/total_tokens=5,952,000
214
+ throughput/device/tokens_per_second=1,170
215
+ throughput/device/batches_per_second=0.0488
216
+ [step=32/500000]
217
+ train/ActionL1Loss=0.3183
218
+ throughput/total_tokens=6,144,000
219
+ throughput/device/tokens_per_second=1,170
220
+ throughput/device/batches_per_second=0.0488
221
+ [step=33/500000]
222
+ train/ActionL1Loss=0.3700
223
+ throughput/total_tokens=6,336,000
224
+ throughput/device/tokens_per_second=1,170
225
+ throughput/device/batches_per_second=0.0488
226
+ [step=34/500000]
227
+ train/ActionL1Loss=0.3268
228
+ throughput/total_tokens=6,528,000
229
+ throughput/device/tokens_per_second=1,170
230
+ throughput/device/batches_per_second=0.0488
231
+ [step=35/500000]
232
+ train/ActionL1Loss=0.3539
233
+ throughput/total_tokens=6,720,000
234
+ throughput/device/tokens_per_second=1,170
235
+ throughput/device/batches_per_second=0.0488
236
+ [step=36/500000]
237
+ train/ActionL1Loss=0.3596
238
+ throughput/total_tokens=6,912,000
239
+ throughput/device/tokens_per_second=1,170
240
+ throughput/device/batches_per_second=0.0488
241
+ [step=37/500000]
242
+ train/ActionL1Loss=0.3529
243
+ throughput/total_tokens=7,104,000
244
+ throughput/device/tokens_per_second=1,170
245
+ throughput/device/batches_per_second=0.0488
246
+ [step=38/500000]
247
+ train/ActionL1Loss=0.3620
248
+ throughput/total_tokens=7,296,000
249
+ throughput/device/tokens_per_second=1,170
250
+ throughput/device/batches_per_second=0.0488
251
+ [step=39/500000]
252
+ train/ActionL1Loss=0.3647
253
+ throughput/total_tokens=7,488,000
254
+ throughput/device/tokens_per_second=1,170
255
+ throughput/device/batches_per_second=0.0488
256
+ [step=40/500000]
257
+ optim/total_grad_norm=21.94
258
+ train/ActionL1Loss=0.3782
259
+ throughput/total_tokens=7,680,000
260
+ throughput/device/tokens_per_second=1,170
261
+ throughput/device/batches_per_second=0.0488
262
+ System/Peak GPU Memory (MB)=46,917
263
+ [step=41/500000]
264
+ train/ActionL1Loss=0.3259
265
+ throughput/total_tokens=7,872,000
266
+ throughput/device/tokens_per_second=1,170
267
+ throughput/device/batches_per_second=0.0488
268
+ [step=42/500000]
269
+ train/ActionL1Loss=0.3395
270
+ throughput/total_tokens=8,064,000
271
+ throughput/device/tokens_per_second=1,169
272
+ throughput/device/batches_per_second=0.0487
273
+ [step=43/500000]
274
+ train/ActionL1Loss=0.3244
275
+ throughput/total_tokens=8,256,000
276
+ throughput/device/tokens_per_second=1,168
277
+ throughput/device/batches_per_second=0.0487
278
+ [step=44/500000]
279
+ train/ActionL1Loss=0.3537
280
+ throughput/total_tokens=8,448,000
281
+ throughput/device/tokens_per_second=1,167
282
+ throughput/device/batches_per_second=0.0486
283
+ [step=45/500000]
284
+ train/ActionL1Loss=0.3423
285
+ throughput/total_tokens=8,640,000
286
+ throughput/device/tokens_per_second=1,167
287
+ throughput/device/batches_per_second=0.0486
288
+ [step=46/500000]
289
+ train/ActionL1Loss=0.3216
290
+ throughput/total_tokens=8,832,000
291
+ throughput/device/tokens_per_second=1,167
292
+ throughput/device/batches_per_second=0.0487
293
+ [step=47/500000]
294
+ train/ActionL1Loss=0.3626
295
+ throughput/total_tokens=9,024,000
296
+ throughput/device/tokens_per_second=1,167
297
+ throughput/device/batches_per_second=0.0487
298
+ [step=48/500000]
299
+ train/ActionL1Loss=0.3210
300
+ throughput/total_tokens=9,216,000
301
+ throughput/device/tokens_per_second=1,168
302
+ throughput/device/batches_per_second=0.0487
303
+ [step=49/500000]
304
+ train/ActionL1Loss=0.3180
305
+ throughput/total_tokens=9,408,000
306
+ throughput/device/tokens_per_second=1,168
307
+ throughput/device/batches_per_second=0.0487
308
+ [step=50/500000]
309
+ train/ActionL1Loss=0.3499
310
+ throughput/total_tokens=9,600,000
311
+ throughput/device/tokens_per_second=1,169
312
+ throughput/device/batches_per_second=0.0487
313
+ System/Peak GPU Memory (MB)=46,917
314
+ [step=51/500000]
315
+ train/ActionL1Loss=0.3278
316
+ throughput/total_tokens=9,792,000
317
+ throughput/device/tokens_per_second=1,166
318
+ throughput/device/batches_per_second=0.0486
319
+ [step=52/500000]
320
+ train/ActionL1Loss=0.3730
321
+ throughput/total_tokens=9,984,000
322
+ throughput/device/tokens_per_second=1,166
323
+ throughput/device/batches_per_second=0.0486
324
+ [step=53/500000]
325
+ train/ActionL1Loss=0.3430
326
+ throughput/total_tokens=10,176,000
327
+ throughput/device/tokens_per_second=1,166
328
+ throughput/device/batches_per_second=0.0486
329
+ [step=54/500000]
330
+ train/ActionL1Loss=0.3628
331
+ throughput/total_tokens=10,368,000
332
+ throughput/device/tokens_per_second=1,166
333
+ throughput/device/batches_per_second=0.0486
334
+ [step=55/500000]
335
+ train/ActionL1Loss=0.3139
336
+ throughput/total_tokens=10,560,000
337
+ throughput/device/tokens_per_second=1,166
338
+ throughput/device/batches_per_second=0.0486
339
+ [step=56/500000]
340
+ train/ActionL1Loss=0.3347
341
+ throughput/total_tokens=10,752,000
342
+ throughput/device/tokens_per_second=1,166
343
+ throughput/device/batches_per_second=0.0486
344
+ [step=57/500000]
345
+ train/ActionL1Loss=0.3825
346
+ throughput/total_tokens=10,944,000
347
+ throughput/device/tokens_per_second=1,165
348
+ throughput/device/batches_per_second=0.0486
349
+ [step=58/500000]
350
+ train/ActionL1Loss=0.3657
351
+ throughput/total_tokens=11,136,000
352
+ throughput/device/tokens_per_second=1,165
353
+ throughput/device/batches_per_second=0.0486
354
+ [step=59/500000]
355
+ train/ActionL1Loss=0.3329
356
+ throughput/total_tokens=11,328,000
357
+ throughput/device/tokens_per_second=1,165
358
+ throughput/device/batches_per_second=0.0486
359
+ [step=60/500000]
360
+ optim/total_grad_norm=28.20
361
+ train/ActionL1Loss=0.3882
362
+ throughput/total_tokens=11,520,000
363
+ throughput/device/tokens_per_second=1,165
364
+ throughput/device/batches_per_second=0.0485
365
+ System/Peak GPU Memory (MB)=46,917
wandb/wandb/run-20251002_151047-gal9lnsm/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wandb/wandb/run-20251002_151047-gal9lnsm/files/wandb-metadata.json ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-10-02T15:10:47.778990Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "realworld",
31
+ "--real_world_vla_config_path",
32
+ "vla_config_realworld/vla_config_cleandesk50.yaml",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "5071f59d87c6a976691323cbac66d7a988b0b4e7"
41
+ },
42
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
43
+ "host": "auh7-1b-gpu-260",
44
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
45
+ "cpu_count": 64,
46
+ "cpu_count_logical": 128,
47
+ "gpu": "Instinct MI210",
48
+ "gpu_count": 8,
49
+ "disk": {
50
+ "/": {
51
+ "total": "470343073792",
52
+ "used": "56241074176"
53
+ }
54
+ },
55
+ "memory": {
56
+ "total": "2434606956544"
57
+ },
58
+ "gpu_amd": [
59
+ {
60
+ "id": "2",
61
+ "uniqueId": "0x9815965a899d8053",
62
+ "vbiosVersion": "113-D67301V-073",
63
+ "performanceLevel": "auto",
64
+ "maxPower": "300.0",
65
+ "series": "Instinct MI210",
66
+ "model": "0x740f",
67
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
68
+ "sku": "D67301V",
69
+ "sclkRange": "500Mhz - 1700Mhz",
70
+ "mclkRange": "400Mhz - 1600Mhz"
71
+ },
72
+ {
73
+ "id": "0",
74
+ "uniqueId": "0x4213cc9eeeefc98d",
75
+ "vbiosVersion": "113-D67301V-073",
76
+ "performanceLevel": "auto",
77
+ "maxPower": "300.0",
78
+ "series": "Instinct MI210",
79
+ "model": "0x740f",
80
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
81
+ "sku": "D67301V",
82
+ "sclkRange": "500Mhz - 1700Mhz",
83
+ "mclkRange": "400Mhz - 1600Mhz"
84
+ },
85
+ {
86
+ "id": "5",
87
+ "uniqueId": "0xd79d4a081e34548d",
88
+ "vbiosVersion": "113-D67301V-073",
89
+ "performanceLevel": "auto",
90
+ "maxPower": "300.0",
91
+ "series": "Instinct MI210",
92
+ "model": "0x740f",
93
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
94
+ "sku": "D67301V",
95
+ "sclkRange": "500Mhz - 1700Mhz",
96
+ "mclkRange": "400Mhz - 1600Mhz"
97
+ },
98
+ {
99
+ "id": "3",
100
+ "uniqueId": "0xd7a6e11358a6574d",
101
+ "vbiosVersion": "113-D67301V-073",
102
+ "performanceLevel": "auto",
103
+ "maxPower": "300.0",
104
+ "series": "Instinct MI210",
105
+ "model": "0x740f",
106
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
107
+ "sku": "D67301V",
108
+ "sclkRange": "500Mhz - 1700Mhz",
109
+ "mclkRange": "400Mhz - 1600Mhz"
110
+ },
111
+ {
112
+ "id": "6",
113
+ "uniqueId": "0x2d75dae36f0dc353",
114
+ "vbiosVersion": "113-D67301V-073",
115
+ "performanceLevel": "auto",
116
+ "maxPower": "300.0",
117
+ "series": "Instinct MI210",
118
+ "model": "0x740f",
119
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
120
+ "sku": "D67301V",
121
+ "sclkRange": "500Mhz - 1700Mhz",
122
+ "mclkRange": "400Mhz - 1600Mhz"
123
+ },
124
+ {
125
+ "id": "7",
126
+ "uniqueId": "0x702e8efb76b00c21",
127
+ "vbiosVersion": "113-D67301V-073",
128
+ "performanceLevel": "auto",
129
+ "maxPower": "300.0",
130
+ "series": "Instinct MI210",
131
+ "model": "0x740f",
132
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
133
+ "sku": "D67301V",
134
+ "sclkRange": "500Mhz - 1700Mhz",
135
+ "mclkRange": "400Mhz - 1600Mhz"
136
+ },
137
+ {
138
+ "id": "4",
139
+ "uniqueId": "0x4493708eee1ee737",
140
+ "vbiosVersion": "113-D67301V-073",
141
+ "performanceLevel": "auto",
142
+ "maxPower": "300.0",
143
+ "series": "Instinct MI210",
144
+ "model": "0x740f",
145
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
146
+ "sku": "D67301V",
147
+ "sclkRange": "500Mhz - 1700Mhz",
148
+ "mclkRange": "400Mhz - 1600Mhz"
149
+ },
150
+ {
151
+ "id": "1",
152
+ "uniqueId": "0xe35cdba2e3fafd21",
153
+ "vbiosVersion": "113-D67301V-073",
154
+ "performanceLevel": "auto",
155
+ "maxPower": "300.0",
156
+ "series": "Instinct MI210",
157
+ "model": "0x740f",
158
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
159
+ "sku": "D67301V",
160
+ "sclkRange": "500Mhz - 1700Mhz",
161
+ "mclkRange": "400Mhz - 1600Mhz"
162
+ }
163
+ ],
164
+ "slurm": {
165
+ "cluster_name": "ai-04r",
166
+ "conf": "/etc/slurm/slurm.conf",
167
+ "cpus_on_node": "128",
168
+ "gpus_on_node": "8",
169
+ "gtids": "0",
170
+ "job_account": "faculty-acc",
171
+ "job_cpus_per_node": "128",
172
+ "job_end_time": "1759676993",
173
+ "job_gid": "2000",
174
+ "job_gpus": "0,1,2,3,4,5,6,7",
175
+ "job_id": "2222",
176
+ "job_name": "mh_cleandesk50",
177
+ "job_nodelist": "auh7-1b-gpu-260",
178
+ "job_num_nodes": "1",
179
+ "job_partition": "faculty",
180
+ "job_qos": "xdqos",
181
+ "job_start_time": "1759417793",
182
+ "job_uid": "2013",
183
+ "job_user": "xiaodan",
184
+ "jobid": "2222",
185
+ "localid": "0",
186
+ "nnodes": "1",
187
+ "nodeid": "0",
188
+ "nodelist": "auh7-1b-gpu-260",
189
+ "nprocs": "1",
190
+ "ntasks": "1",
191
+ "ntasks_per_node": "1",
192
+ "oom_kill_step": "0",
193
+ "prio_process": "0",
194
+ "procid": "0",
195
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
196
+ "submit_host": "auh-1b-cpu-login-001",
197
+ "task_pid": "2555521",
198
+ "tasks_per_node": "1",
199
+ "topology_addr": "auh7-1b-gpu-260",
200
+ "topology_addr_pattern": "node"
201
+ },
202
+ "writerId": "nw73z7xb5cgzo0hg2igu85u5fde2wemd"
203
+ }
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:10:47.93095326Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpvxt8f_1d/port-2555704.txt","pid":2555704,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:10:47.932417878Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2555704}
3
+ {"time":"2025-10-02T15:10:47.931479477Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2555704-2555872-3886299646/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:10:48.03182349Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:10:48.038756993Z","level":"INFO","msg":"handleInformInit: received","streamId":"gal9lnsm","id":"1(@)"}
6
+ {"time":"2025-10-02T15:10:53.841859694Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"gal9lnsm","id":"1(@)"}
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:10:48.040587486Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:10:53.84149929Z","level":"INFO","msg":"stream: created new stream","id":"gal9lnsm"}
3
+ {"time":"2025-10-02T15:10:53.841853494Z","level":"INFO","msg":"stream: started","id":"gal9lnsm"}
4
+ {"time":"2025-10-02T15:10:53.841880754Z","level":"INFO","msg":"sender: started","stream_id":"gal9lnsm"}
5
+ {"time":"2025-10-02T15:10:53.841894045Z","level":"INFO","msg":"writer: started","stream_id":"gal9lnsm"}
6
+ {"time":"2025-10-02T15:10:53.842927647Z","level":"INFO","msg":"handler: started","stream_id":"gal9lnsm"}
wandb/wandb/run-20251002_151047-gal9lnsm/logs/debug.log ADDED
File without changes
wandb/wandb/run-20251002_154526-bw81vbs0/files/output.log ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:45:28] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:45:35] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 10/02 [15:45:36] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
16
+ ****** length of the dataset: 18397
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f5ce66e1750>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:45:42] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:46:56] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=35,614
53
+ 10/02 [15:46:57] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ [step=1/500000]
58
+ train/ActionL1Loss=0.5548
59
+ throughput/total_tokens=192,000
60
+ System/Peak GPU Memory (MB)=40,144
61
+ [step=2/500000]
62
+ train/ActionL1Loss=0.6130
63
+ throughput/total_tokens=384,000
64
+ throughput/device/tokens_per_second=1,170
65
+ throughput/device/batches_per_second=0.0488
66
+ System/Peak GPU Memory (MB)=46,917
67
+ [step=3/500000]
68
+ train/ActionL1Loss=0.6006
69
+ throughput/total_tokens=576,000
70
+ throughput/device/tokens_per_second=999.5
71
+ throughput/device/batches_per_second=0.0416
72
+ [step=4/500000]
73
+ train/ActionL1Loss=0.5381
74
+ throughput/total_tokens=768,000
75
+ throughput/device/tokens_per_second=898.3
76
+ throughput/device/batches_per_second=0.0374
77
+ [step=5/500000]
78
+ train/ActionL1Loss=0.4982
79
+ throughput/total_tokens=960,000
80
+ throughput/device/tokens_per_second=851.4
81
+ throughput/device/batches_per_second=0.0355
wandb/wandb/run-20251002_154526-bw81vbs0/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:45:27.013808824Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpr0uhv_6g/port-2561337.txt","pid":2561337,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:45:27.014628014Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2561337}
3
+ {"time":"2025-10-02T15:45:27.014614203Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2561337-2561508-3882934790/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:45:27.192771801Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:45:27.199344999Z","level":"INFO","msg":"handleInformInit: received","streamId":"bw81vbs0","id":"1(@)"}
6
+ {"time":"2025-10-02T15:45:28.225110984Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"bw81vbs0","id":"1(@)"}
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:45:27.201294023Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:45:28.225059133Z","level":"INFO","msg":"stream: created new stream","id":"bw81vbs0"}
3
+ {"time":"2025-10-02T15:45:28.225105354Z","level":"INFO","msg":"stream: started","id":"bw81vbs0"}
4
+ {"time":"2025-10-02T15:45:28.225129364Z","level":"INFO","msg":"writer: started","stream_id":"bw81vbs0"}
5
+ {"time":"2025-10-02T15:45:28.225137364Z","level":"INFO","msg":"sender: started","stream_id":"bw81vbs0"}
6
+ {"time":"2025-10-02T15:45:28.225195135Z","level":"INFO","msg":"handler: started","stream_id":"bw81vbs0"}
wandb/wandb/run-20251002_154526-bw81vbs0/logs/debug.log ADDED
File without changes
wandb/wandb/run-20251002_155015-xojint20/files/output.log ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:50:17] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:50:19] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 10/02 [15:50:20] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
16
+ ****** length of the dataset: 18397
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f8994997820>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:50:26] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.state_proj.weight', 'action_head.state_proj.bias', 'action_head.action_in_proj.weight', 'action_head.action_in_proj.bias', 'action_head.action_time_in.weight', 'action_head.action_time_in.bias', 'action_head.action_time_out.weight', 'action_head.action_time_out.bias', 'action_head.memory_proj.weight', 'action_head.memory_proj.bias', 'action_head.gemma.model.layers.0.self_attn.q_proj.weight', 'action_head.gemma.model.layers.0.self_attn.k_proj.weight', 'action_head.gemma.model.layers.0.self_attn.v_proj.weight', 'action_head.gemma.model.layers.0.self_attn.o_proj.weight', 'action_head.gemma.model.layers.0.mlp.gate_proj.weight', 'action_head.gemma.model.layers.0.mlp.up_proj.weight', 'action_head.gemma.model.layers.0.mlp.down_proj.weight', 'action_head.gemma.model.layers.0.input_layernorm.weight', 'action_head.gemma.model.layers.0.post_attention_layernorm.weight', 'action_head.gemma.model.layers.1.self_attn.q_proj.weight', 'action_head.gemma.model.layers.1.self_attn.k_proj.weight', 'action_head.gemma.model.layers.1.self_attn.v_proj.weight', 'action_head.gemma.model.layers.1.self_attn.o_proj.weight', 'action_head.gemma.model.layers.1.mlp.gate_proj.weight', 'action_head.gemma.model.layers.1.mlp.up_proj.weight', 'action_head.gemma.model.layers.1.mlp.down_proj.weight', 'action_head.gemma.model.layers.1.input_layernorm.weight', 'action_head.gemma.model.layers.1.post_attention_layernorm.weight', 'action_head.gemma.model.layers.2.self_attn.q_proj.weight', 'action_head.gemma.model.layers.2.self_attn.k_proj.weight', 'action_head.gemma.model.layers.2.self_attn.v_proj.weight', 'action_head.gemma.model.layers.2.self_attn.o_proj.weight', 'action_head.gemma.model.layers.2.mlp.gate_proj.weight', 'action_head.gemma.model.layers.2.mlp.up_proj.weight', 'action_head.gemma.model.layers.2.mlp.down_proj.weight', 'action_head.gemma.model.layers.2.input_layernorm.weight', 'action_head.gemma.model.layers.2.post_attention_layernorm.weight', 'action_head.gemma.model.layers.3.self_attn.q_proj.weight', 'action_head.gemma.model.layers.3.self_attn.k_proj.weight', 'action_head.gemma.model.layers.3.self_attn.v_proj.weight', 'action_head.gemma.model.layers.3.self_attn.o_proj.weight', 'action_head.gemma.model.layers.3.mlp.gate_proj.weight', 'action_head.gemma.model.layers.3.mlp.up_proj.weight', 'action_head.gemma.model.layers.3.mlp.down_proj.weight', 'action_head.gemma.model.layers.3.input_layernorm.weight', 'action_head.gemma.model.layers.3.post_attention_layernorm.weight', 'action_head.gemma.model.layers.4.self_attn.q_proj.weight', 'action_head.gemma.model.layers.4.self_attn.k_proj.weight', 'action_head.gemma.model.layers.4.self_attn.v_proj.weight', 'action_head.gemma.model.layers.4.self_attn.o_proj.weight', 'action_head.gemma.model.layers.4.mlp.gate_proj.weight', 'action_head.gemma.model.layers.4.mlp.up_proj.weight', 'action_head.gemma.model.layers.4.mlp.down_proj.weight', 'action_head.gemma.model.layers.4.input_layernorm.weight', 'action_head.gemma.model.layers.4.post_attention_layernorm.weight', 'action_head.gemma.model.layers.5.self_attn.q_proj.weight', 'action_head.gemma.model.layers.5.self_attn.k_proj.weight', 'action_head.gemma.model.layers.5.self_attn.v_proj.weight', 'action_head.gemma.model.layers.5.self_attn.o_proj.weight', 'action_head.gemma.model.layers.5.mlp.gate_proj.weight', 'action_head.gemma.model.layers.5.mlp.up_proj.weight', 'action_head.gemma.model.layers.5.mlp.down_proj.weight', 'action_head.gemma.model.layers.5.input_layernorm.weight', 'action_head.gemma.model.layers.5.post_attention_layernorm.weight', 'action_head.gemma.model.layers.6.self_attn.q_proj.weight', 'action_head.gemma.model.layers.6.self_attn.k_proj.weight', 'action_head.gemma.model.layers.6.self_attn.v_proj.weight', 'action_head.gemma.model.layers.6.self_attn.o_proj.weight', 'action_head.gemma.model.layers.6.mlp.gate_proj.weight', 'action_head.gemma.model.layers.6.mlp.up_proj.weight', 'action_head.gemma.model.layers.6.mlp.down_proj.weight', 'action_head.gemma.model.layers.6.input_layernorm.weight', 'action_head.gemma.model.layers.6.post_attention_
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:51:44] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 36856
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=36,856
53
+ 10/02 [15:51:45] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ 10/02 [15:51:52] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py:967: UserWarning: The .grad attribute warnings.py:109
58
+ of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed
59
+ want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor
60
+ by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered
61
+ internally at /pytorch/build/aten/src/ATen/core/TensorBody.h:489.)
62
+ param_grad = param.grad
63
+
64
+ [step=1/500000]
65
+ train/ActionNoiseL2Loss=1.632
66
+ throughput/total_tokens=192,000
67
+ System/Peak GPU Memory (MB)=39,644
68
+ [step=2/500000]
69
+ train/ActionNoiseL2Loss=1.683
70
+ throughput/total_tokens=384,000
71
+ throughput/device/tokens_per_second=1,194
72
+ throughput/device/batches_per_second=0.0498
73
+ System/Peak GPU Memory (MB)=46,466
74
+ [step=3/500000]
75
+ train/ActionNoiseL2Loss=1.640
76
+ throughput/total_tokens=576,000
77
+ throughput/device/tokens_per_second=1,176
78
+ throughput/device/batches_per_second=0.0490
79
+ [step=4/500000]
80
+ train/ActionNoiseL2Loss=1.547
81
+ throughput/total_tokens=768,000
82
+ throughput/device/tokens_per_second=1,171
83
+ throughput/device/batches_per_second=0.0488
84
+ [step=5/500000]
85
+ train/ActionNoiseL2Loss=1.508
86
+ throughput/total_tokens=960,000
87
+ throughput/device/tokens_per_second=1,168
88
+ throughput/device/batches_per_second=0.0487
wandb/wandb/run-20251002_155015-xojint20/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wandb/wandb/run-20251002_155015-xojint20/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:50:15.670289561Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpzs22pa0_/port-2563820.txt","pid":2563820,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T15:50:15.670880688Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2563820}
3
+ {"time":"2025-10-02T15:50:15.670869618Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2563820-2563984-1205114273/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T15:50:15.848134405Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T15:50:15.855061477Z","level":"INFO","msg":"handleInformInit: received","streamId":"xojint20","id":"1(@)"}
6
+ {"time":"2025-10-02T15:50:17.025989793Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"xojint20","id":"1(@)"}
wandb/wandb/run-20251002_155015-xojint20/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:50:15.85696447Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:50:17.025925463Z","level":"INFO","msg":"stream: created new stream","id":"xojint20"}
3
+ {"time":"2025-10-02T15:50:17.025984043Z","level":"INFO","msg":"stream: started","id":"xojint20"}
4
+ {"time":"2025-10-02T15:50:17.026005264Z","level":"INFO","msg":"sender: started","stream_id":"xojint20"}
5
+ {"time":"2025-10-02T15:50:17.026005514Z","level":"INFO","msg":"writer: started","stream_id":"xojint20"}
6
+ {"time":"2025-10-02T15:50:17.026057304Z","level":"INFO","msg":"handler: started","stream_id":"xojint20"}
wandb/wandb/run-20251002_155015-xojint20/run-xojint20.wandb ADDED
Binary file (65.5 kB). View file
 
wandb/wandb/run-20251002_155441-70dhy5dq/files/output.log ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [15:54:43] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [15:54:49] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 10/02 [15:54:50] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
16
+ ****** length of the dataset: 10316
17
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
18
+ ****** Expect one of: []
19
+ ****** path: None
20
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
21
+ ****** After build vla train dataset...
22
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7f57cc61bdc0>]
23
+ ****** Before build mixed iterable dataset...
24
+ ****** Build vla train dataloader successfully!
25
+ ************************* Build train_dataloader successful!
26
+ ************************* Before build_inf_evaluators
27
+ 10/02 [15:54:55] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
28
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
29
+ warnings.warn( # warn only once
30
+
31
+ ************************* Build evaluators successful!
32
+ ************************* Early exit flags: early_exit=False
33
+ ************************* Initialize model successful!
34
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
35
+ ***** Load checkpoint successful!
36
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
37
+ unexpected keys: []
38
+ ************************* Initialize model successful!
39
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
40
+ ************************* Before add lora to model
41
+ ************************* Before FSDP model wrapping
42
+ ************************* FSDP model wrapping successful!
43
+ ************************* Before building optimizer and scheduler
44
+ ************* Before get lora params
45
+ ************* After get lora params successfully
46
+ 10/02 [15:56:08] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
47
+ **************************************************
48
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
49
+ ************************* VLATrainer initialized successfully!
50
+ ************************* Before trainer.fit()
51
+ Pre-train system metrics
52
+ System/Peak GPU Memory (MB)=35,614
53
+ WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
54
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
55
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
56
+
57
+ [step=1/500000]
58
+ train/ActionL1Loss=0.6604
59
+ throughput/total_tokens=192,000
60
+ System/Peak GPU Memory (MB)=40,144
61
+ [step=2/500000]
62
+ train/ActionL1Loss=0.6686
63
+ throughput/total_tokens=384,000
64
+ throughput/device/tokens_per_second=1,220
65
+ throughput/device/batches_per_second=0.0508
66
+ System/Peak GPU Memory (MB)=46,917
67
+ [step=3/500000]
68
+ train/ActionL1Loss=0.6322
69
+ throughput/total_tokens=576,000
70
+ throughput/device/tokens_per_second=1,199
71
+ throughput/device/batches_per_second=0.0500
72
+ [step=4/500000]
73
+ train/ActionL1Loss=0.6230
74
+ throughput/total_tokens=768,000
75
+ throughput/device/tokens_per_second=1,190
76
+ throughput/device/batches_per_second=0.0496
77
+ [step=5/500000]
78
+ train/ActionL1Loss=0.5815
79
+ throughput/total_tokens=960,000
80
+ throughput/device/tokens_per_second=1,187
81
+ throughput/device/batches_per_second=0.0495
82
+ [step=6/500000]
83
+ train/ActionL1Loss=0.5807
84
+ throughput/total_tokens=1,152,000
85
+ throughput/device/tokens_per_second=1,185
86
+ throughput/device/batches_per_second=0.0494
87
+ [step=7/500000]
88
+ train/ActionL1Loss=0.5010
89
+ throughput/total_tokens=1,344,000
90
+ throughput/device/tokens_per_second=1,184
91
+ throughput/device/batches_per_second=0.0493
92
+ [step=8/500000]
93
+ train/ActionL1Loss=0.5155
94
+ throughput/total_tokens=1,536,000
95
+ throughput/device/tokens_per_second=1,183
96
+ throughput/device/batches_per_second=0.0493
97
+ [step=9/500000]
98
+ train/ActionL1Loss=0.5458
99
+ throughput/total_tokens=1,728,000
100
+ throughput/device/tokens_per_second=1,182
101
+ throughput/device/batches_per_second=0.0493
102
+ [step=10/500000]
103
+ train/ActionL1Loss=0.4240
104
+ throughput/total_tokens=1,920,000
105
+ throughput/device/tokens_per_second=1,182
106
+ throughput/device/batches_per_second=0.0493
107
+ System/Peak GPU Memory (MB)=46,917
108
+ [step=11/500000]
109
+ train/ActionL1Loss=0.4684
110
+ throughput/total_tokens=2,112,000
111
+ throughput/device/tokens_per_second=1,181
112
+ throughput/device/batches_per_second=0.0492
113
+ [step=12/500000]
114
+ train/ActionL1Loss=0.4157
115
+ throughput/total_tokens=2,304,000
116
+ throughput/device/tokens_per_second=1,181
117
+ throughput/device/batches_per_second=0.0492
118
+ [step=13/500000]
119
+ train/ActionL1Loss=0.5035
120
+ throughput/total_tokens=2,496,000
121
+ throughput/device/tokens_per_second=1,181
122
+ throughput/device/batches_per_second=0.0492
123
+ [step=14/500000]
124
+ train/ActionL1Loss=0.4165
125
+ throughput/total_tokens=2,688,000
126
+ throughput/device/tokens_per_second=1,181
127
+ throughput/device/batches_per_second=0.0492
128
+ [step=15/500000]
129
+ train/ActionL1Loss=0.3336
130
+ throughput/total_tokens=2,880,000
131
+ throughput/device/tokens_per_second=1,181
132
+ throughput/device/batches_per_second=0.0492
133
+ [step=16/500000]
134
+ train/ActionL1Loss=0.4032
135
+ throughput/total_tokens=3,072,000
136
+ throughput/device/tokens_per_second=1,181
137
+ throughput/device/batches_per_second=0.0492
138
+ [step=17/500000]
139
+ train/ActionL1Loss=0.4553
140
+ throughput/total_tokens=3,264,000
141
+ throughput/device/tokens_per_second=1,180
142
+ throughput/device/batches_per_second=0.0492
143
+ [step=18/500000]
144
+ train/ActionL1Loss=0.5436
145
+ throughput/total_tokens=3,456,000
146
+ throughput/device/tokens_per_second=1,180
147
+ throughput/device/batches_per_second=0.0492
148
+ [step=19/500000]
149
+ train/ActionL1Loss=0.3642
150
+ throughput/total_tokens=3,648,000
151
+ throughput/device/tokens_per_second=1,180
152
+ throughput/device/batches_per_second=0.0492
153
+ [step=20/500000]
154
+ optim/total_grad_norm=21.01
155
+ train/ActionL1Loss=0.4468
156
+ throughput/total_tokens=3,840,000
157
+ throughput/device/tokens_per_second=1,180
158
+ throughput/device/batches_per_second=0.0492
159
+ System/Peak GPU Memory (MB)=46,917
160
+ [step=21/500000]
161
+ train/ActionL1Loss=0.4660
162
+ throughput/total_tokens=4,032,000
163
+ throughput/device/tokens_per_second=1,179
164
+ throughput/device/batches_per_second=0.0492
165
+ [step=22/500000]
166
+ train/ActionL1Loss=0.3718
167
+ throughput/total_tokens=4,224,000
168
+ throughput/device/tokens_per_second=1,179
169
+ throughput/device/batches_per_second=0.0492
170
+ [step=23/500000]
171
+ train/ActionL1Loss=0.4880
172
+ throughput/total_tokens=4,416,000
173
+ throughput/device/tokens_per_second=1,180
174
+ throughput/device/batches_per_second=0.0492
175
+ [step=24/500000]
176
+ train/ActionL1Loss=0.4259
177
+ throughput/total_tokens=4,608,000
178
+ throughput/device/tokens_per_second=1,180
179
+ throughput/device/batches_per_second=0.0492
180
+ [step=25/500000]
181
+ train/ActionL1Loss=0.4473
182
+ throughput/total_tokens=4,800,000
183
+ throughput/device/tokens_per_second=1,180
184
+ throughput/device/batches_per_second=0.0492
185
+ [step=26/500000]
186
+ train/ActionL1Loss=0.4736
187
+ throughput/total_tokens=4,992,000
188
+ throughput/device/tokens_per_second=1,180
189
+ throughput/device/batches_per_second=0.0492
190
+ [step=27/500000]
191
+ train/ActionL1Loss=0.4105
192
+ throughput/total_tokens=5,184,000
193
+ throughput/device/tokens_per_second=1,180
194
+ throughput/device/batches_per_second=0.0492
195
+ [step=28/500000]
196
+ train/ActionL1Loss=0.4386
197
+ throughput/total_tokens=5,376,000
198
+ throughput/device/tokens_per_second=1,180
199
+ throughput/device/batches_per_second=0.0492
200
+ [step=29/500000]
201
+ train/ActionL1Loss=0.4463
202
+ throughput/total_tokens=5,568,000
203
+ throughput/device/tokens_per_second=1,180
204
+ throughput/device/batches_per_second=0.0492
205
+ [step=30/500000]
206
+ train/ActionL1Loss=0.4582
207
+ throughput/total_tokens=5,760,000
208
+ throughput/device/tokens_per_second=1,180
209
+ throughput/device/batches_per_second=0.0492
210
+ System/Peak GPU Memory (MB)=46,917
211
+ [step=31/500000]
212
+ train/ActionL1Loss=0.3000
213
+ throughput/total_tokens=5,952,000
214
+ throughput/device/tokens_per_second=1,180
215
+ throughput/device/batches_per_second=0.0492
216
+ [step=32/500000]
217
+ train/ActionL1Loss=0.4196
218
+ throughput/total_tokens=6,144,000
219
+ throughput/device/tokens_per_second=1,180
220
+ throughput/device/batches_per_second=0.0492
221
+ [step=33/500000]
222
+ train/ActionL1Loss=0.4201
223
+ throughput/total_tokens=6,336,000
224
+ throughput/device/tokens_per_second=1,181
225
+ throughput/device/batches_per_second=0.0492
226
+ [step=34/500000]
227
+ train/ActionL1Loss=0.3680
228
+ throughput/total_tokens=6,528,000
229
+ throughput/device/tokens_per_second=1,181
230
+ throughput/device/batches_per_second=0.0492
231
+ [step=35/500000]
232
+ train/ActionL1Loss=0.3642
233
+ throughput/total_tokens=6,720,000
234
+ throughput/device/tokens_per_second=1,181
235
+ throughput/device/batches_per_second=0.0492
236
+ [step=36/500000]
237
+ train/ActionL1Loss=0.4062
238
+ throughput/total_tokens=6,912,000
239
+ throughput/device/tokens_per_second=1,181
240
+ throughput/device/batches_per_second=0.0492
241
+ [step=37/500000]
242
+ train/ActionL1Loss=0.4864
243
+ throughput/total_tokens=7,104,000
244
+ throughput/device/tokens_per_second=1,181
245
+ throughput/device/batches_per_second=0.0492
246
+ [step=38/500000]
247
+ train/ActionL1Loss=0.4030
248
+ throughput/total_tokens=7,296,000
249
+ throughput/device/tokens_per_second=1,181
250
+ throughput/device/batches_per_second=0.0492
251
+ [step=39/500000]
252
+ train/ActionL1Loss=0.3131
253
+ throughput/total_tokens=7,488,000
254
+ throughput/device/tokens_per_second=1,181
255
+ throughput/device/batches_per_second=0.0492
256
+ [step=40/500000]
257
+ optim/total_grad_norm=17.23
258
+ train/ActionL1Loss=0.4256
259
+ throughput/total_tokens=7,680,000
260
+ throughput/device/tokens_per_second=1,181
261
+ throughput/device/batches_per_second=0.0492
262
+ System/Peak GPU Memory (MB)=46,917
263
+ [step=41/500000]
264
+ train/ActionL1Loss=0.3575
265
+ throughput/total_tokens=7,872,000
266
+ throughput/device/tokens_per_second=1,181
267
+ throughput/device/batches_per_second=0.0492
268
+ [step=42/500000]
269
+ train/ActionL1Loss=0.4358
270
+ throughput/total_tokens=8,064,000
271
+ throughput/device/tokens_per_second=1,181
272
+ throughput/device/batches_per_second=0.0492
273
+ [step=43/500000]
274
+ train/ActionL1Loss=0.2869
275
+ throughput/total_tokens=8,256,000
276
+ throughput/device/tokens_per_second=1,181
277
+ throughput/device/batches_per_second=0.0492
278
+ [step=44/500000]
279
+ train/ActionL1Loss=0.4891
280
+ throughput/total_tokens=8,448,000
281
+ throughput/device/tokens_per_second=1,182
282
+ throughput/device/batches_per_second=0.0493
283
+ [step=45/500000]
284
+ train/ActionL1Loss=0.3633
285
+ throughput/total_tokens=8,640,000
286
+ throughput/device/tokens_per_second=1,182
287
+ throughput/device/batches_per_second=0.0493
288
+ [step=46/500000]
289
+ train/ActionL1Loss=0.3974
290
+ throughput/total_tokens=8,832,000
291
+ throughput/device/tokens_per_second=1,181
292
+ throughput/device/batches_per_second=0.0492
293
+ [step=47/500000]
294
+ train/ActionL1Loss=0.3156
295
+ throughput/total_tokens=9,024,000
296
+ throughput/device/tokens_per_second=1,181
297
+ throughput/device/batches_per_second=0.0492
298
+ [step=48/500000]
299
+ train/ActionL1Loss=0.4408
300
+ throughput/total_tokens=9,216,000
301
+ throughput/device/tokens_per_second=1,181
302
+ throughput/device/batches_per_second=0.0492
303
+ [step=49/500000]
304
+ train/ActionL1Loss=0.3966
305
+ throughput/total_tokens=9,408,000
306
+ throughput/device/tokens_per_second=1,181
307
+ throughput/device/batches_per_second=0.0492
308
+ [step=50/500000]
309
+ train/ActionL1Loss=0.3903
310
+ throughput/total_tokens=9,600,000
311
+ throughput/device/tokens_per_second=1,181
312
+ throughput/device/batches_per_second=0.0492
313
+ System/Peak GPU Memory (MB)=46,917
314
+ [step=51/500000]
315
+ train/ActionL1Loss=0.2963
316
+ throughput/total_tokens=9,792,000
317
+ throughput/device/tokens_per_second=1,177
318
+ throughput/device/batches_per_second=0.0491
wandb/wandb/run-20251002_155441-70dhy5dq/logs/debug-internal.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T15:54:42.154138214Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-02T15:54:43.180595015Z","level":"INFO","msg":"stream: created new stream","id":"70dhy5dq"}
3
+ {"time":"2025-10-02T15:54:43.180644946Z","level":"INFO","msg":"stream: started","id":"70dhy5dq"}
4
+ {"time":"2025-10-02T15:54:43.180663737Z","level":"INFO","msg":"sender: started","stream_id":"70dhy5dq"}
5
+ {"time":"2025-10-02T15:54:43.180659826Z","level":"INFO","msg":"writer: started","stream_id":"70dhy5dq"}
6
+ {"time":"2025-10-02T15:54:43.180682767Z","level":"INFO","msg":"handler: started","stream_id":"70dhy5dq"}
wandb/wandb/run-20251002_155442-6v8q0jgn/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wipe/wandb/wandb/debug.log ADDED
File without changes
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 10/02 [16:34:38] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 8, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 10/02 [16:34:45] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
wipe/wandb/wandb/run-20251002_163436-itiyfljc/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-10-02T16:34:36.433466086Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmptn5gtmeu/port-1817135.txt","pid":1817135,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-10-02T16:34:36.434959359Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1817135}
3
+ {"time":"2025-10-02T16:34:36.434944369Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1817135-1817304-2550672707/socket","Net":"unix"}}
4
+ {"time":"2025-10-02T16:34:36.610751367Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-10-02T16:34:36.618123521Z","level":"INFO","msg":"handleInformInit: received","streamId":"itiyfljc","id":"1(@)"}
6
+ {"time":"2025-10-02T16:34:37.749798524Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"itiyfljc","id":"1(@)"}
wipe/wandb/wandb/run-20251002_163436-itiyfljc/logs/debug.log ADDED
File without changes
wipe_flow_matching/step11500-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb72b6306ce04d1beb20bb289509f00c39a40845ff7c4b36bf4deb4e83fe82a
3
+ size 1331
wipe_flow_matching/step12000-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:995307502120af3866f237cd0bc484fc848a652539d28e53cbea882abc16ba6b
3
+ size 1331
wipe_flow_matching/step12000-unsharded/lora.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b09055f15a54dd092b4dd30833406731057005822da0c55c16231cf2e68f7f6
3
+ size 1243
wipe_flow_matching/step12000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd8bfbb0ee49ab78063ef6cfdd404afa5cc66b67c8d3c5bb7cd6db0cb4c048d5
3
+ size 15061
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ et_xmlfile==2.0.0
72
+ etils==1.13.0
73
+ evdev==1.9.2
74
+ exceptiongroup==1.3.0
75
+ face==24.0.0
76
+ fastapi==0.116.2
77
+ ffmpy==0.6.1
78
+ fiddle==0.3.0
79
+ filelock==3.13.1
80
+ Flask==3.1.2
81
+ fonttools==4.60.0
82
+ frozenlist==1.7.0
83
+ fsspec==2023.9.2
84
+ ftfy==6.3.1
85
+ gcsfs==2023.9.2
86
+ gitdb==4.0.12
87
+ GitPython==3.1.45
88
+ glom==24.11.0
89
+ google-api-core==2.25.1
90
+ google-auth==2.40.3
91
+ google-auth-oauthlib==1.2.2
92
+ google-cloud-core==2.4.3
93
+ google-cloud-storage==2.19.0
94
+ google-crc32c==1.7.1
95
+ google-resumable-media==2.7.2
96
+ googleapis-common-protos==1.70.0
97
+ gradio==5.46.0
98
+ gradio_client==1.13.0
99
+ graphviz==0.21
100
+ groovy==0.1.2
101
+ grpcio==1.75.0
102
+ gymnasium==0.29.1
103
+ h11==0.16.0
104
+ hf_transfer==0.1.9
105
+ hf-xet==1.1.10
106
+ httpcore==1.0.9
107
+ httpx==0.28.1
108
+ huggingface-hub==0.35.0
109
+ id==1.5.0
110
+ idna==3.10
111
+ imageio==2.37.0
112
+ imageio-ffmpeg==0.6.0
113
+ importlib_metadata==8.7.0
114
+ importlib_resources==6.5.2
115
+ iniconfig==2.1.0
116
+ inquirerpy==0.3.4
117
+ isort==5.12.0
118
+ itsdangerous==2.2.0
119
+ jaraco.classes==3.4.0
120
+ jaraco.context==6.0.1
121
+ jaraco.functools==4.3.0
122
+ jeepney==0.9.0
123
+ Jinja2==3.1.4
124
+ jiter==0.11.0
125
+ jmespath==1.0.1
126
+ joblib==1.5.2
127
+ jsonlines==4.0.0
128
+ keras==2.15.0
129
+ keyring==25.6.0
130
+ kiwisolver==1.4.9
131
+ latex2sympy2_extended==1.10.2
132
+ lerobot==0.3.4
133
+ Levenshtein==0.27.1
134
+ libcst==1.8.4
135
+ lightning-utilities==0.15.2
136
+ markdown-it-py==4.0.0
137
+ math-verify==0.8.0
138
+ matplotlib==3.10.6
139
+ mdurl==0.1.2
140
+ mergedeep==1.3.4
141
+ ml-dtypes==0.2.0
142
+ ml_dtypes==0.5.3
143
+ more-itertools==10.8.0
144
+ mpmath==1.3.0
145
+ msgspec==0.19.0
146
+ multidict==6.6.4
147
+ multiprocess==0.70.16
148
+ mypy==1.3.0
149
+ mypy_extensions==1.1.0
150
+ necessary==0.4.3
151
+ networkx==3.3
152
+ nh3==0.3.0
153
+ nltk==3.9.1
154
+ numpy==1.26.4
155
+ oauthlib==3.3.1
156
+ omegaconf==2.3.0
157
+ openai==1.108.0
158
+ opencv-python-headless==4.12.0.88
159
+ OpenEXR==3.4.0
160
+ openpyxl==3.1.5
161
+ orderly-set==5.5.0
162
+ orjson==3.11.3
163
+ packaging==25.0
164
+ pandas==2.3.2
165
+ pathspec==0.12.1
166
+ petname==2.6
167
+ pfzy==0.3.4
168
+ pillow==11.0.0
169
+ pip==25.2
170
+ platformdirs==4.4.0
171
+ pluggy==1.6.0
172
+ promise==2.3
173
+ prompt_toolkit==3.0.52
174
+ propcache==0.3.2
175
+ proto-plus==1.26.1
176
+ protobuf==4.21.12
177
+ protobuf==6.32.1
178
+ psutil==7.1.0
179
+ pyarrow==21.0.0
180
+ pyasn1==0.6.1
181
+ pyasn1_modules==0.4.2
182
+ pycparser==2.23
183
+ pydantic==2.11.9
184
+ pydantic_core==2.33.2
185
+ pydub==0.25.1
186
+ Pygments==2.19.2
187
+ pynput==1.8.1
188
+ pyparsing==3.2.4
189
+ pyproject_hooks==1.2.0
190
+ pyserial==3.5
191
+ pytest==8.4.2
192
+ pytest-sphinx==0.6.3
193
+ python-dateutil==2.9.0.post0
194
+ python-Levenshtein==0.27.1
195
+ python-multipart==0.0.20
196
+ python-xlib==0.33
197
+ pytorch-triton-rocm==3.4.0
198
+ pytz==2025.2
199
+ pyyaml-include==1.4.1
200
+ RapidFuzz==3.14.1
201
+ readme_renderer==44.0
202
+ regex==2025.9.1
203
+ requests==2.32.5
204
+ requests-oauthlib==2.0.0
205
+ requests-toolbelt==1.0.0
206
+ requirements-parser==0.13.0
207
+ rerun-sdk==0.22.1
208
+ rfc3986==2.0.0
209
+ rich==13.9.4
210
+ rsa==4.9.1
211
+ ruff==0.13.0
212
+ s3transfer==0.14.0
213
+ safehttpx==0.1.6
214
+ safetensors==0.6.2
215
+ scikit-learn==1.7.2
216
+ scipy==1.15.3
217
+ SecretStorage==3.4.0
218
+ semantic-version==2.10.0
219
+ sentencepiece==0.2.1
220
+ sentry-sdk==2.38.0
221
+ setuptools==78.1.1
222
+ shellingham==1.5.4
223
+ six==1.17.0
224
+ smart_open==7.3.1
225
+ smashed==0.21.5
226
+ smmap==5.0.2
227
+ sniffio==1.3.1
228
+ starlette==0.48.0
229
+ sympy==1.13.3
230
+ tensorboard==2.15.2
231
+ tensorboard==2.19.0
232
+ tensorflow==2.15.0
233
+ tensorflow-addons==0.23.0
234
+ tensorflow-datasets==4.9.3
235
+ tensorflow-estimator==2.15.0
236
+ tensorflow-graphics==2021.12.3
237
+ tensorflow-metadata==1.17.2
238
+ threadpoolctl==3.6.0
239
+ timm==1.0.19
240
+ tokenizers==0.22.0
241
+ toml==0.10.2
242
+ tomli==2.2.1
243
+ tomlkit==0.13.3
244
+ torch==2.8.0+rocm6.4
245
+ torchcodec==0.5
246
+ torchmetrics==1.8.2
247
+ torchvision==0.23.0+rocm6.4
248
+ tqdm==4.67.1
249
+ transformers==4.56.1
250
+ trimesh==4.8.2
251
+ trouting==0.3.3
252
+ twine==6.2.0
253
+ typeguard==2.13.3
254
+ typer==0.17.4
255
+ typing_extensions==4.15.0
256
+ typing-inspect==0.9.0
257
+ typing-inspection==0.4.1
258
+ tzdata==2025.2
259
+ urllib3==2.5.0
260
+ uvicorn==0.35.0
261
+ wandb==0.21.4
262
+ wcwidth==0.2.13
263
+ websockets==15.0.1
264
+ wheel==0.45.1
265
+ wrapt==1.14.2
266
+ xxhash==3.5.0
267
+ yarl==1.20.1
268
+ zipp==3.23.0
269
+ lerobot==0.3.4
270
+ minLoRA==0.1.0
271
+ autocommand==2.2.2
272
+ backports.tarfile==1.2.0
273
+ importlib_metadata==8.0.0
274
+ inflect==7.3.1
275
+ jaraco.collections==5.1.0
276
+ jaraco.context==5.3.0
277
+ jaraco.functools==4.0.1
278
+ jaraco.text==3.12.1
279
+ more-itertools==10.3.0
280
+ packaging==24.2
281
+ platformdirs==4.2.2
282
+ tomli==2.0.1
283
+ typeguard==4.3.0
284
+ typing_extensions==4.12.2
285
+ wheel==0.45.1
286
+ zipp==3.19.2
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug-internal.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-10-05T16:38:13.19911913Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-10-05T16:38:14.385618537Z","level":"INFO","msg":"stream: created new stream","id":"0cfqmuqw"}
3
+ {"time":"2025-10-05T16:38:14.385648767Z","level":"INFO","msg":"stream: started","id":"0cfqmuqw"}
4
+ {"time":"2025-10-05T16:38:14.385660457Z","level":"INFO","msg":"handler: started","stream_id":"0cfqmuqw"}
5
+ {"time":"2025-10-05T16:38:14.385655167Z","level":"INFO","msg":"writer: started","stream_id":"0cfqmuqw"}
6
+ {"time":"2025-10-05T16:38:14.385680798Z","level":"INFO","msg":"sender: started","stream_id":"0cfqmuqw"}
7
+ {"time":"2025-10-06T16:34:15.587824169Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2025-10-06T18:35:03.703248769Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/0cfqmuqw/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-06T20:02:36.97363154Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
10
+ {"time":"2025-10-07T05:02:26.79910172Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
wipe_flow_matching/wandb/wandb/run-20251005_163812-0cfqmuqw/logs/debug.log ADDED
File without changes
wipe_l1_regression/step11500-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3468869fca8c228cf940661de292bc786f2b18fa96d39a892606183f8dd9c0
3
+ size 1331
wipe_l1_regression/step12000-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:620f952d81c645b6e811733acfcda83cb57d683353c484746edb322f5094c21e
3
+ size 1331
wipe_l1_regression/step12000-unsharded/lora.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b09055f15a54dd092b4dd30833406731057005822da0c55c16231cf2e68f7f6
3
+ size 1243
wipe_l1_regression/step12000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724fab83190f8cbcb009297c5aa7582489c7d894b16beebfd03fdc7f888a2ce8
3
+ size 15061
wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/files/output.log ADDED
The diff for this file is too large to render. See raw diff