jianzhang96 commited on
Commit
9cb2a50
·
verified ·
1 Parent(s): 7870e6c

Upload folder using huggingface_hub

Browse files
xiaoyu/dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240/step7000-unsharded/config.yaml ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240_20251127_133646
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: null
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 5
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ tokenizer:
104
+ identifier: Qwen/Qwen2-7B
105
+ tokenizer_dir: null
106
+ pad_tokenizer: true
107
+ moe_num_experts: 8
108
+ moe_top_k: 2
109
+ moe_mlp_impl: sparse
110
+ moe_log_expert_assignment: false
111
+ moe_shared_expert: false
112
+ moe_lbl_in_fp32: false
113
+ moe_interleave: false
114
+ moe_loss_weight: 0.1
115
+ moe_zloss_weight: null
116
+ moe_dropless: true
117
+ moe_capacity_factor: 1.25
118
+ action_head: flow_matching
119
+ action_dim: 14
120
+ fixed_action_dim: 14
121
+ right_end_effector_dim: 7
122
+ left_end_effector_dim: 7
123
+ mobile_base_dim: 3
124
+ num_actions_chunk: 16
125
+ proprio_dim: 14
126
+ num_diffusion_steps: 1000
127
+ num_diffusion_inference_steps: 30
128
+ use_proprio: true
129
+ action_head_dit_hidden_size: 1152
130
+ action_head_dit_depth: 28
131
+ action_head_dit_num_heads: 16
132
+ action_head_flow_matching_dim: 1024
133
+ action_head_flow_matching_layers: 28
134
+ action_head_flow_matching_heads: 8
135
+ action_head_flow_matching_intermediate_size: 2048
136
+ action_head_flow_matching_pvf_function: 2d_attn_mask
137
+ llm_causal_attention: false
138
+ action_use_left_eef: true
139
+ action_use_mobile_base: false
140
+ allow_resume: true
141
+ ft_llm: true
142
+ ft_vit: false
143
+ ft_connector: false
144
+ ft_embedding: lm_head
145
+ lora: false
146
+ use_lora: false
147
+ lora_rank: 8
148
+ lora_llm: false
149
+ lora_vit: false
150
+ lora_connector: false
151
+ early_exit: false
152
+ train_exit_random_layer: false
153
+ optimizer:
154
+ name: adamw
155
+ learning_rate: 0.0001
156
+ weight_decay: 0.01
157
+ betas:
158
+ - 0.9
159
+ - 0.95
160
+ eps: 1.0e-05
161
+ connector_learning_rate: 0.0002
162
+ vit_learning_rate: 6.0e-06
163
+ llm_learning_rate: 5.0e-05
164
+ connector_weight_decay: 0.0
165
+ vit_weight_decay: 0.0
166
+ llm_weight_decay: 0.0
167
+ connector_betas:
168
+ - 0.9
169
+ - 0.95
170
+ vit_betas:
171
+ - 0.9
172
+ - 0.95
173
+ llm_betas:
174
+ - 0.9
175
+ - 0.95
176
+ connector_eps: 1.0e-06
177
+ vit_eps: 1.0e-06
178
+ llm_eps: 1.0e-06
179
+ metrics_log_interval: 20
180
+ scheduler:
181
+ name: multimodal
182
+ units: steps
183
+ t_warmup: 100
184
+ t_max: null
185
+ alpha_f: 0.1
186
+ connector_t_warmup: 200
187
+ vit_t_warmup: 2000
188
+ llm_t_warmup: 2000
189
+ grad_clip_warmup_steps: null
190
+ grad_clip_warmup_factor: null
191
+ warmup_min_lr: 0.0
192
+ data:
193
+ dataset: vla_dataset_realmachine
194
+ mixture: null
195
+ root_size_mixture: null
196
+ split: train
197
+ seed: 95818
198
+ shuffle_messages: false
199
+ pad: to_max
200
+ sequence_length: 1000
201
+ shuffle: true
202
+ for_inference: false
203
+ multi_modal: torch
204
+ num_workers: 2
205
+ drop_last: true
206
+ pin_memory: true
207
+ prefetch_factor: null
208
+ persistent_workers: false
209
+ timeout: 0
210
+ rlds_dataset_name: libero_4_task_suites_no_noops
211
+ rlds_data_root_dir: /vast/users/xiaodan/zhangjian/HuggingFace/dataset/Dobot-Xtrainer/LeRobotDatasetV2.1_dobot_crop_grab_pass_place
212
+ use_wrist_image: true
213
+ use_proprio: true
214
+ rlds_shuffle_buffer_size: 100000
215
+ rlds_traj_threads: 2
216
+ rlds_read_threads: 2
217
+ lerobot_episode_index_start: null
218
+ lerobot_episode_index_end: null
219
+ restore_dataloader: true
220
+ fast_forward_batches: null
221
+ evaluators:
222
+ - label: val
223
+ data:
224
+ dataset: vla_dataset_realmachine
225
+ mixture: null
226
+ root_size_mixture: null
227
+ split: validation
228
+ seed: null
229
+ shuffle_messages: false
230
+ pad: to_max
231
+ sequence_length: 1000
232
+ shuffle: false
233
+ for_inference: false
234
+ multi_modal: torch
235
+ num_workers: 0
236
+ drop_last: true
237
+ pin_memory: true
238
+ prefetch_factor: null
239
+ persistent_workers: true
240
+ timeout: 0
241
+ rlds_dataset_name: libero_4_task_suites_no_noops
242
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
243
+ use_wrist_image: true
244
+ use_proprio: true
245
+ rlds_shuffle_buffer_size: 256000
246
+ rlds_traj_threads: 8
247
+ rlds_read_threads: 8
248
+ lerobot_episode_index_start: 353
249
+ lerobot_episode_index_end: 765
250
+ device_eval_batch_size: null
251
+ subset_num_batches: 64
252
+ max_examples: null
253
+ max_new_tokens: 448
254
+ mm_evaluator: null
255
+ save_dir: null
256
+ save_to_checkpoint_dir: false
257
+ eval_name: null
258
+ skip_if_metrics_cached: true
259
+ eval_interval: 0
260
+ inf_eval_interval: -1
261
+ inf_evaluators: []
262
+ save_folder: /vast/users/xiaodan/zhangjian/checkpoints_real/dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240
263
+ remote_save_folder: null
264
+ canceled_check_interval: 50
265
+ save_interval: 500
266
+ save_interval_unsharded: 500
267
+ save_interval_ephemeral: null
268
+ save_interval_action_head: 500
269
+ save_num_checkpoints_to_keep: 1
270
+ save_num_unsharded_checkpoints_to_keep: 1
271
+ save_num_action_head_checkpoints_to_keep: 2
272
+ save_overwrite: true
273
+ force_save_unsharded: false
274
+ no_pre_train_checkpoint: true
275
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
276
+ load_model_config: null
277
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
278
+ load_path: null
279
+ load_path_sharded_checkpointer: null
280
+ reset_optimizer_state: false
281
+ reset_trainer_state: false
282
+ save_dataloader_state: false
283
+ reset_dataloader_state: false
284
+ keep_lr_on_load: true
285
+ sharded_checkpointer: torch_legacy
286
+ max_duration: 500000
287
+ global_train_batch_size: 240
288
+ device_train_batch_size: 30
289
+ device_train_microbatch_size: 30
290
+ device_eval_batch_size: 4
291
+ eval_subset_num_batches: -1
292
+ eval_on_load: false
293
+ device_inf_eval_batch_size: 16
294
+ inf_eval_subset_num_batches: -1
295
+ device_train_grad_accum: 1
296
+ max_grad_norm: 1.0
297
+ multi_component_grad_norm: true
298
+ batch_divisor: global_batch
299
+ max_grad_norm_ratio: null
300
+ precision: amp_bf16
301
+ wandb:
302
+ project: a1-vla-realmachine
303
+ entity: demo0
304
+ group: null
305
+ name: dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240_20251127_133646
306
+ tags:
307
+ - watching
308
+ log_artifacts: false
309
+ rank_zero_only: true
310
+ log_interval: 1
311
+ speed_monitor:
312
+ window_size: 20
313
+ gpu_flops_available: null
314
+ console_log_interval: 1
315
+ gen1_gc_interval: 1
316
+ compile: null
317
+ fsdp:
318
+ use_orig_params: true
319
+ sharding_strategy: FULL_SHARD
320
+ wrapping_strategy: by_block_and_size
321
+ precision: float
322
+ hybrid_sharding_num_model_replicas: null
323
+ softmax_auxiliary_loss: true
324
+ softmax_auxiliary_loss_scale: 0.0001
325
+ time_limit: null
326
+ extra_steps_after_cancel: 10
327
+ python_profiling: false
328
+ torch_profiling: false
329
+ stop_at: 500000
330
+ stop_after: null
331
+ activation_checkpointing: whole_layer
332
+ fused_loss: null
xiaoyu/dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240/step7000-unsharded/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7349a1ef169c085f2800171f385271d8ae75f27dbbf78d18e94d19aae38b6525
3
+ size 33841343207
xiaoyu/dobot_pour_water_full-cook_vegetable_full-crop_tidy_up_the_desk-pealing_fruit-crop_stack_cups_Molmo-7B-D-0924_openai_seq1000_flow_matching-pvf-2d_attn_mask_three_images_resized336_overlap-and-resize-c2_5_proprio_ft_ah_fullyft_llm_bs240/step7000-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68327995a8648e0e54e8ca775240cc0afa5421892cc459fedf59e08adfc8e89c
3
+ size 15189