JianZhangAI commited on
Commit
255fd7c
·
verified ·
1 Parent(s): 8ea7e6d

Upload folder using huggingface_hub

Browse files
Molmo-7B-10131629-5000/config.yaml ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: multitask_train
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: 152192
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: cpu
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 128
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ num_actions_chunk: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: false
124
+ action_head_dit_hidden_size: 1024
125
+ action_head_dit_depth: 14
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: false
129
+ action_use_mobile_base: false
130
+ allow_resume: true
131
+ ft_llm: true
132
+ ft_vit: true
133
+ ft_connector: true
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: false
137
+ lora_rank: 32
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ optimizer:
142
+ name: adamw
143
+ learning_rate: 0.0001
144
+ weight_decay: 0.01
145
+ betas:
146
+ - 0.9
147
+ - 0.95
148
+ eps: 1.0e-05
149
+ connector_learning_rate: 0.0002
150
+ vit_learning_rate: 6.0e-06
151
+ llm_learning_rate: 2.0e-05
152
+ connector_weight_decay: 0.0
153
+ vit_weight_decay: 0.0
154
+ llm_weight_decay: 0.0
155
+ connector_betas:
156
+ - 0.9
157
+ - 0.95
158
+ vit_betas:
159
+ - 0.9
160
+ - 0.95
161
+ llm_betas:
162
+ - 0.9
163
+ - 0.95
164
+ connector_eps: 1.0e-06
165
+ vit_eps: 1.0e-06
166
+ llm_eps: 1.0e-06
167
+ metrics_log_interval: 20
168
+ scheduler:
169
+ name: multimodal
170
+ units: steps
171
+ t_warmup: 100
172
+ t_max: null
173
+ alpha_f: 0.1
174
+ connector_t_warmup: 200
175
+ vit_t_warmup: 1000
176
+ llm_t_warmup: 1000
177
+ grad_clip_warmup_steps: null
178
+ grad_clip_warmup_factor: null
179
+ warmup_min_lr: 0.0
180
+ data:
181
+ dataset: null
182
+ mixture: null
183
+ root_size_mixture:
184
+ - rate: 0.125
185
+ mixture:
186
+ sr_planning: 500000.0
187
+ robovqa: 300000.0
188
+ - rate: 0.125
189
+ mixture:
190
+ pixmo_ask_model_anything: null
191
+ pixmo_cap: null
192
+ pixmo_points: null
193
+ pixmo_count: null
194
+ blip_laion_cc: null
195
+ - rate: 0.125
196
+ mixture:
197
+ sr_affordance: null
198
+ - rate: 0.125
199
+ mixture:
200
+ sr_trajectory: null
201
+ - rate: 0.5
202
+ mixture:
203
+ oxe_magic_soup_plus_minus_A1: null
204
+ split: train
205
+ seed: 50189
206
+ shuffle_messages: true
207
+ pad: to_max
208
+ sequence_length: 2304
209
+ shuffle: true
210
+ for_inference: false
211
+ multi_modal: torch
212
+ num_workers: 0
213
+ drop_last: true
214
+ pin_memory: true
215
+ prefetch_factor: null
216
+ persistent_workers: false
217
+ timeout: 0
218
+ rlds_dataset_name: ''
219
+ rlds_data_root_dir: null
220
+ use_wrist_image: false
221
+ use_proprio: false
222
+ restore_dataloader: true
223
+ fast_forward_batches: null
224
+ evaluators: []
225
+ eval_interval: 12000
226
+ inf_eval_interval: 12000
227
+ inf_evaluators:
228
+ - label: robovqa
229
+ data:
230
+ dataset: robovqa
231
+ mixture: null
232
+ root_size_mixture: null
233
+ split: validation
234
+ seed: null
235
+ shuffle_messages: true
236
+ pad: to_max
237
+ sequence_length: 1792
238
+ shuffle: true
239
+ for_inference: true
240
+ multi_modal: torch
241
+ num_workers: 0
242
+ drop_last: true
243
+ pin_memory: true
244
+ prefetch_factor: null
245
+ persistent_workers: true
246
+ timeout: 0
247
+ rlds_dataset_name: ''
248
+ rlds_data_root_dir: null
249
+ use_wrist_image: false
250
+ use_proprio: false
251
+ device_eval_batch_size: null
252
+ subset_num_batches: null
253
+ max_examples: 2048
254
+ max_new_tokens: 128
255
+ mm_evaluator:
256
+ n_to_log: 0
257
+ num_wandb_examples: 32
258
+ save_predictions: null
259
+ save_tokens: false
260
+ vqa_eval: robovqa_score
261
+ multi_threshold_box_eval: false
262
+ coordinate_eval: false
263
+ pointing_eval: false
264
+ count_eval: false
265
+ point_count_eval: false
266
+ trajectory_eval: false
267
+ android_eval: false
268
+ clock_eval: false
269
+ clock_bench_eval: false
270
+ math_vista_eval: false
271
+ action_eval: false
272
+ save_dir: null
273
+ save_to_checkpoint_dir: false
274
+ eval_name: null
275
+ skip_if_metrics_cached: true
276
+ save_folder: /vast/users/xiaodan/zhangkaidong/A1/model/checkpoints/10131629
277
+ remote_save_folder: null
278
+ canceled_check_interval: 50
279
+ save_interval: 500
280
+ save_interval_unsharded: 32000
281
+ save_interval_ephemeral: null
282
+ save_interval_action_head: null
283
+ save_num_checkpoints_to_keep: 1
284
+ save_num_unsharded_checkpoints_to_keep: -1
285
+ save_num_action_head_checkpoints_to_keep: -1
286
+ save_overwrite: true
287
+ force_save_unsharded: false
288
+ no_pre_train_checkpoint: true
289
+ initial_model_checkpoint: /vast/users/xiaodan/zhangkaidong/A1/model/MolmoE-7B-10061402-3000
290
+ load_model_config: null
291
+ load_path: null
292
+ load_path_sharded_checkpointer: null
293
+ reset_optimizer_state: false
294
+ reset_trainer_state: false
295
+ save_dataloader_state: false
296
+ reset_dataloader_state: false
297
+ sharded_checkpointer: torch_legacy
298
+ max_duration: 32000
299
+ global_train_batch_size: 256
300
+ device_train_batch_size: 32
301
+ device_train_microbatch_size: 4
302
+ device_eval_batch_size: 32
303
+ eval_subset_num_batches: 8
304
+ eval_on_load: false
305
+ device_inf_eval_batch_size: 4
306
+ inf_eval_subset_num_batches: -1
307
+ device_train_grad_accum: 8
308
+ max_grad_norm: 1.0
309
+ multi_component_grad_norm: true
310
+ batch_divisor: global_batch
311
+ max_grad_norm_ratio: null
312
+ precision: amp_bf16
313
+ wandb:
314
+ project: molmo_training
315
+ entity: ''
316
+ group: null
317
+ name: MolmoE-7B-A1-pretrain-A1
318
+ tags:
319
+ - watching
320
+ log_artifacts: false
321
+ rank_zero_only: true
322
+ log_interval: 20
323
+ speed_monitor:
324
+ window_size: 20
325
+ gpu_flops_available: null
326
+ console_log_interval: 1
327
+ gen1_gc_interval: 1
328
+ compile: null
329
+ fsdp:
330
+ use_orig_params: true
331
+ sharding_strategy: FULL_SHARD
332
+ wrapping_strategy: by_block_and_size
333
+ precision: pure
334
+ hybrid_sharding_num_model_replicas: null
335
+ softmax_auxiliary_loss: true
336
+ softmax_auxiliary_loss_scale: 0.0001
337
+ time_limit: null
338
+ extra_steps_after_cancel: 10
339
+ python_profiling: false
340
+ torch_profiling: false
341
+ stop_at: 32000
342
+ stop_after: null
343
+ activation_checkpointing: whole_layer
344
+ fused_loss: null
Molmo-7B-10131629-5000/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1859ce065348151318d9b847981b27dcf6f764188725dc45bf7e0e9e7657f133
3
+ size 32086136618