JianZhangAI commited on
Commit
f56d49d
·
1 Parent(s): c7bb048

add steps24000

Browse files
libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240/step24000-unsharded/config.yaml ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240_20250929_170947
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: false
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ additional_vocab_size: 128
64
+ new_embedding_init_range: 0.02
65
+ weight_tying: false
66
+ init_device: null
67
+ init_fn: normal
68
+ init_std: 0.02
69
+ init_cutoff_factor: null
70
+ norm_after: false
71
+ precision: amp_bf16
72
+ max_crops: 12
73
+ crop_mode: overlap-and-resize-c2
74
+ use_col_tokens: true
75
+ prompt_type: uber_model
76
+ system_prompt_kind: demo_or_style
77
+ message_formatting: role
78
+ always_start_with_space: true
79
+ multi_annotation_weighting: root_subsegments
80
+ default_inference_len: 65
81
+ overlap_margins:
82
+ - 4
83
+ - 4
84
+ pad_value: 0.0
85
+ image_padding_embed: pad_and_partial_pad
86
+ fix_image_padding: true
87
+ vit_layers:
88
+ - -2
89
+ - -9
90
+ image_pooling_h: 2
91
+ image_pooling_w: 2
92
+ image_pooling_2d: attention_meanq
93
+ image_projector: mlp
94
+ image_feature_dropout: 0.0
95
+ initializer_range: 0.02
96
+ normalize_input_embeds: false
97
+ use_position_ids: true
98
+ head_dim: null
99
+ tokenizer:
100
+ identifier: Qwen/Qwen2-7B
101
+ tokenizer_dir: null
102
+ pad_tokenizer: true
103
+ moe_num_experts: 8
104
+ moe_top_k: 2
105
+ moe_mlp_impl: sparse
106
+ moe_log_expert_assignment: false
107
+ moe_shared_expert: false
108
+ moe_lbl_in_fp32: false
109
+ moe_interleave: false
110
+ moe_loss_weight: 0.1
111
+ moe_zloss_weight: null
112
+ moe_dropless: true
113
+ moe_capacity_factor: 1.25
114
+ action_head: l1_regression
115
+ num_diffusion_steps: 1000
116
+ num_diffusion_inference_steps: 30
117
+ use_proprio: true
118
+ action_head_dit_hidden_size: 1152
119
+ action_head_dit_depth: 28
120
+ action_head_dit_num_heads: 16
121
+ llm_causal_attention: false
122
+ action_use_left_eef: false
123
+ action_use_mobile_base: false
124
+ allow_resume: true
125
+ ft_llm: true
126
+ ft_vit: false
127
+ ft_connector: false
128
+ ft_embedding: lm_head
129
+ lora: false
130
+ use_lora: false
131
+ lora_rank: 8
132
+ lora_llm: false
133
+ lora_vit: false
134
+ lora_connector: false
135
+ early_exit: false
136
+ train_exit_random_layer: false
137
+ optimizer:
138
+ name: adamw
139
+ learning_rate: 0.0001
140
+ weight_decay: 0.01
141
+ betas:
142
+ - 0.9
143
+ - 0.95
144
+ eps: 1.0e-05
145
+ connector_learning_rate: 0.0002
146
+ vit_learning_rate: 6.0e-06
147
+ llm_learning_rate: 5.0e-05
148
+ connector_weight_decay: 0.0
149
+ vit_weight_decay: 0.0
150
+ llm_weight_decay: 0.0
151
+ connector_betas:
152
+ - 0.9
153
+ - 0.95
154
+ vit_betas:
155
+ - 0.9
156
+ - 0.95
157
+ llm_betas:
158
+ - 0.9
159
+ - 0.95
160
+ connector_eps: 1.0e-06
161
+ vit_eps: 1.0e-06
162
+ llm_eps: 1.0e-06
163
+ metrics_log_interval: 20
164
+ scheduler:
165
+ name: multimodal
166
+ units: steps
167
+ t_warmup: 100
168
+ t_max: null
169
+ alpha_f: 0.1
170
+ connector_t_warmup: 200
171
+ vit_t_warmup: 2000
172
+ llm_t_warmup: 2000
173
+ grad_clip_warmup_steps: null
174
+ grad_clip_warmup_factor: null
175
+ warmup_min_lr: 0.0
176
+ data:
177
+ dataset: vla_dataset_simulation
178
+ mixture: null
179
+ root_size_mixture: null
180
+ split: train
181
+ seed: 95818
182
+ shuffle_messages: false
183
+ pad: to_max
184
+ sequence_length: 768
185
+ shuffle: true
186
+ for_inference: false
187
+ multi_modal: torch
188
+ num_workers: 0
189
+ drop_last: true
190
+ pin_memory: true
191
+ prefetch_factor: null
192
+ persistent_workers: false
193
+ timeout: 0
194
+ rlds_dataset_name: libero_4_task_suites_no_noops
195
+ rlds_data_root_dir: /vast/users/meng.cao/datasets/modified_libero_rlds
196
+ use_wrist_image: true
197
+ use_proprio: true
198
+ rlds_shuffle_buffer_size: 100000
199
+ rlds_traj_threads: 8
200
+ rlds_read_threads: 8
201
+ lerobot_episode_index_start: null
202
+ lerobot_episode_index_end: null
203
+ restore_dataloader: true
204
+ fast_forward_batches: null
205
+ evaluators:
206
+ - label: val
207
+ data:
208
+ dataset: vla_dataset_simulation
209
+ mixture: null
210
+ root_size_mixture: null
211
+ split: validation
212
+ seed: null
213
+ shuffle_messages: false
214
+ pad: to_max
215
+ sequence_length: 768
216
+ shuffle: false
217
+ for_inference: false
218
+ multi_modal: torch
219
+ num_workers: 0
220
+ drop_last: true
221
+ pin_memory: true
222
+ prefetch_factor: null
223
+ persistent_workers: true
224
+ timeout: 0
225
+ rlds_dataset_name: libero_4_task_suites_no_noops
226
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
227
+ use_wrist_image: true
228
+ use_proprio: true
229
+ rlds_shuffle_buffer_size: 256000
230
+ rlds_traj_threads: 8
231
+ rlds_read_threads: 8
232
+ lerobot_episode_index_start: 353
233
+ lerobot_episode_index_end: 765
234
+ device_eval_batch_size: null
235
+ subset_num_batches: 64
236
+ max_examples: null
237
+ max_new_tokens: 448
238
+ mm_evaluator: null
239
+ save_dir: null
240
+ save_to_checkpoint_dir: false
241
+ eval_name: null
242
+ skip_if_metrics_cached: true
243
+ eval_interval: 0
244
+ inf_eval_interval: -1
245
+ inf_evaluators: []
246
+ save_folder: /vast/users/meng.cao/checkpoints/libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240
247
+ remote_save_folder: null
248
+ canceled_check_interval: 50
249
+ save_interval: 500
250
+ save_interval_unsharded: 500
251
+ save_interval_ephemeral: null
252
+ save_interval_action_head: 500
253
+ save_num_checkpoints_to_keep: 1
254
+ save_num_unsharded_checkpoints_to_keep: 1
255
+ save_num_action_head_checkpoints_to_keep: 2
256
+ save_overwrite: true
257
+ force_save_unsharded: false
258
+ no_pre_train_checkpoint: true
259
+ initial_model_checkpoint: /vast/users/meng.cao/molmo_data/Molmo-7B-D-0924
260
+ load_model_config: null
261
+ checkpoint_dir: /vast/users/meng.cao/molmo_data/Molmo-7B-D-0924
262
+ load_path: /vast/users/meng.cao/checkpoints/libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240/step12500
263
+ load_path_sharded_checkpointer: null
264
+ reset_optimizer_state: false
265
+ reset_trainer_state: false
266
+ save_dataloader_state: false
267
+ reset_dataloader_state: false
268
+ keep_lr_on_load: true
269
+ sharded_checkpointer: torch_legacy
270
+ max_duration: 500000
271
+ global_train_batch_size: 240
272
+ device_train_batch_size: 30
273
+ device_train_microbatch_size: 30
274
+ device_eval_batch_size: 4
275
+ eval_subset_num_batches: -1
276
+ eval_on_load: false
277
+ device_inf_eval_batch_size: 16
278
+ inf_eval_subset_num_batches: -1
279
+ device_train_grad_accum: 1
280
+ max_grad_norm: 1.0
281
+ multi_component_grad_norm: true
282
+ batch_divisor: global_batch
283
+ max_grad_norm_ratio: null
284
+ precision: amp_bf16
285
+ wandb:
286
+ project: a1-vla-meng
287
+ entity: demo0
288
+ group: null
289
+ name: libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240_20250929_170947
290
+ tags:
291
+ - watching
292
+ log_artifacts: false
293
+ rank_zero_only: true
294
+ log_interval: 1
295
+ speed_monitor:
296
+ window_size: 20
297
+ gpu_flops_available: null
298
+ console_log_interval: 1
299
+ gen1_gc_interval: 1
300
+ compile: null
301
+ fsdp:
302
+ use_orig_params: true
303
+ sharding_strategy: FULL_SHARD
304
+ wrapping_strategy: by_block_and_size
305
+ precision: float
306
+ hybrid_sharding_num_model_replicas: null
307
+ softmax_auxiliary_loss: true
308
+ softmax_auxiliary_loss_scale: 0.0001
309
+ time_limit: null
310
+ extra_steps_after_cancel: 10
311
+ python_profiling: false
312
+ torch_profiling: false
313
+ stop_at: 500000
314
+ stop_after: null
315
+ activation_checkpointing: whole_layer
316
+ fused_loss: null
libero_4_molmo-7b-d_clip_l1_regression_wrist_proprio_ft_ah_fully_ft_llm_bs240/step24000-unsharded/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaf0c9e4ba1688466a29c64206fb0c6964f0a863387f19c2e12768df1ff83096
3
+ size 32598769367