Robotics
English
MobileManipulation
snehal-allenai commited on
Commit
0786434
·
verified ·
1 Parent(s): 7c45429

Upload MolmoBot RBY1 Multitask weights (step74000 unsharded)

Browse files
Files changed (2) hide show
  1. config.yaml +696 -0
  2. model.pt +3 -0
config.yaml ADDED
@@ -0,0 +1,696 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
2
+ model:
3
+ model_name: molmoact
4
+ data_formatter:
5
+ prompt_templates: uber_model_v2
6
+ message_format: qwen3
7
+ system_prompt: demo_or_style_v2
8
+ always_start_with_space: false
9
+ default_inference_len: 65
10
+ select_answer: best
11
+ debug: false
12
+ image_last: false
13
+ format_message_list: null
14
+ p_one_message: 0.0
15
+ eval_system_prompt_mapping: null
16
+ p_choice_content_in_mc: 1.0
17
+ template_video_mc_questions: true
18
+ pointing_format: html-v2
19
+ points_decimal_places: 1
20
+ use_seperate_non_pointing_qa_style: false
21
+ timestamp_mode: 50-percent-seconds
22
+ output_timestamp_mode: seconds
23
+ seconds_decimal_places: 1
24
+ p_multi_point_all_image: 0.5
25
+ use_seperate_count_without_pointing_style: false
26
+ sample_random_initial_point: true
27
+ llm:
28
+ d_model: 2560
29
+ n_heads: 32
30
+ n_kv_heads: 8
31
+ head_dim: 128
32
+ qkv_bias: false
33
+ clip_qkv: null
34
+ n_layers: 36
35
+ mlp_ratio: 4
36
+ mlp_hidden_size: 19456
37
+ activation_type: swiglu
38
+ block_type: sequential
39
+ rope: true
40
+ rope_full_precision: true
41
+ rope_theta: 5000000.0
42
+ rope_type: default
43
+ rope_factor: null
44
+ rope_high_freq_factor: null
45
+ rope_low_freq_factor: null
46
+ rope_original_max_position_embeddings: null
47
+ rope_attention_factor: null
48
+ rope_beta_fast: null
49
+ rope_beta_slow: null
50
+ rope_mscale: null
51
+ rope_mscale_all_dim: null
52
+ rope_truncate: null
53
+ attention_type: sdpa
54
+ full_attention_layers: null
55
+ sliding_attention_rope_scaling: false
56
+ float32_attention: true
57
+ attention_dropout: 0.0
58
+ attention_layer_norm: true
59
+ attention_layer_norm_type: qwen3
60
+ residual_dropout: 0.1
61
+ response_residual_dropout: 0.0
62
+ layer_norm_type: rms
63
+ layer_norm_with_affine: true
64
+ layer_norm_eps: 1.0e-06
65
+ attention_layer_norm_with_affine: true
66
+ max_sequence_length: 8192
67
+ max_position_embeddings: null
68
+ include_bias: false
69
+ bias_for_layer_norm: null
70
+ norm_after: false
71
+ moe_num_experts: 8
72
+ moe_top_k: 2
73
+ moe_mlp_impl: sparse
74
+ moe_log_expert_assignment: false
75
+ moe_shared_expert: false
76
+ moe_lbl_in_fp32: false
77
+ moe_interleave: false
78
+ moe_loss_weight: 0.1
79
+ moe_zloss_weight: null
80
+ moe_dropless: true
81
+ moe_capacity_factor: 1.25
82
+ embedding_dropout: 0.0
83
+ scale_logits: false
84
+ vocab_size: 151936
85
+ additional_vocab_size: 128
86
+ weight_tying: true
87
+ embedding_size: 151936
88
+ use_position_ids: true
89
+ tokenizer:
90
+ identifier: Qwen/Qwen3-4B-Instruct-2507
91
+ tokenizer_dir: null
92
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen3-4b-instruct.pt
93
+ init_incremental: null
94
+ new_embedding_init_range: 0.02
95
+ initializer_range: 0.02
96
+ normalize_input_embeds: false
97
+ activation_checkpoint: whole_layer
98
+ compile: blocks
99
+ fix_pad_tokenizer: false
100
+ init_std: 0.02
101
+ init_fn: normal
102
+ init_cutoff_factor: null
103
+ vision_backbone:
104
+ vit:
105
+ image_model_type: siglip
106
+ image_default_input_size:
107
+ - 378
108
+ - 378
109
+ image_patch_size: 14
110
+ image_pos_patch_size: 14
111
+ image_emb_dim: 1152
112
+ image_num_heads: 16
113
+ image_num_key_value_heads: 16
114
+ image_num_layers: 27
115
+ image_head_dim: 72
116
+ image_mlp_dim: 4304
117
+ image_mlp_activations: gelu_pytorch_tanh
118
+ image_dropout_rate: 0.0
119
+ image_num_pos: 729
120
+ image_norm_eps: 1.0e-06
121
+ attention_dropout: 0.0
122
+ residual_dropout: 0.0
123
+ initializer_range: 0.02
124
+ float32_attention: true
125
+ attention_type: sdpa
126
+ sdpa_backend: all
127
+ activation_checkpointing: true
128
+ init_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
129
+ resize_mode: siglip
130
+ pad_value: 0.0
131
+ normalize: siglip
132
+ image_pooling_2d: attention_meanq
133
+ pooling_attention_mask: true
134
+ image_projector: mlp
135
+ image_padding_embed: null
136
+ vit_layers:
137
+ - -3
138
+ - -9
139
+ skip_unused_layers: true
140
+ use_deepstack: false
141
+ share_connector: false
142
+ image_feature_dropout: 0.0
143
+ connector_activation_checkpointing: true
144
+ compile_vit: blocks
145
+ pool_size_embeds: null
146
+ compile_connector: null
147
+ normalize_on_gpu: true
148
+ use_image_augmentation: true
149
+ use_resize_bottleneck: false
150
+ mm_preprocessor:
151
+ max_answer_len: null
152
+ last_message_loss_only: false
153
+ max_text_tokens: null
154
+ loss_token_weighting: root_subsegments_root_tokens
155
+ max_frames: 1
156
+ frame_sample_mode: uniform_last_frame
157
+ candidate_sampling_fps:
158
+ - 0.25
159
+ - 0.5
160
+ - 1.0
161
+ - 2.0
162
+ - 4.0
163
+ - 6.0
164
+ - 8.0
165
+ - 16.0
166
+ cache_videos: true
167
+ loading_method: torchcodec_exact
168
+ max_fps:
169
+ - 2.0
170
+ time_sampling: true
171
+ time_mode: per-frame-compact
172
+ subtitle_mode: frame_1
173
+ max_crops: 1
174
+ overlap_margins:
175
+ - 4.0
176
+ - 4.0
177
+ use_col_tokens: false
178
+ periodic_high_res_frame: null
179
+ high_low_train_mode: local_rnd
180
+ high_res_frame_sample_options: null
181
+ periodic_sample_rate_training:
182
+ 4:
183
+ - 0.9
184
+ - 0.03
185
+ - 0.03
186
+ - 0.04
187
+ 3:
188
+ - 0.6
189
+ - 0.2
190
+ - 0.2
191
+ skip_low_res_in_high_low: false
192
+ pooling_w: 3
193
+ pooling_h: 3
194
+ high_res_pooling_w: null
195
+ high_res_pooling_h: null
196
+ query_based_resolution_selection: false
197
+ max_queries_for_resolution_selection: 8
198
+ use_frame_special_tokens: true
199
+ frame_sel_clip_identifier: google/siglip2-so400m-patch14-384
200
+ image_padding_mask: false
201
+ max_subtitle_tokens: null
202
+ image:
203
+ crop_mode: resize
204
+ use_col_tokens: true
205
+ max_crops: 8
206
+ high_res_max_crops: 24
207
+ p_high_res: 0.0
208
+ pooling_w: 2
209
+ pooling_h: 2
210
+ overlap_margins:
211
+ - 4
212
+ - 4
213
+ max_images: 5
214
+ max_multi_image_crops: 8
215
+ multi_image_pooling_w: 2
216
+ multi_image_pooling_h: 2
217
+ use_single_crop_col_tokens: false
218
+ use_single_crop_start_token: true
219
+ topk: null
220
+ prune_from_frame: 0
221
+ bi_directional_attn: image_tokens
222
+ shared_low_high_embedding: true
223
+ debug: null
224
+ cp_enabled: false
225
+ apply_cp_to_vision_backbone: false
226
+ action_dim: 20
227
+ action_horizon: 16
228
+ n_action_steps: 8
229
+ n_obs_steps: 1
230
+ action_expert:
231
+ max_horizon: 32
232
+ action_dim: 20
233
+ hidden_size: 768
234
+ num_layers: 36
235
+ num_heads: 8
236
+ mlp_ratio: 4.0
237
+ timestep_embed_dim: 256
238
+ dropout: 0.0
239
+ attn_dropout: 0.0
240
+ context_layer_norm: true
241
+ action_expert_layer_mode: per_layer
242
+ flow_matching_num_steps: 10
243
+ flow_matching_cutoff: 0.999
244
+ flow_matching_beta_alpha: 1.0
245
+ flow_matching_beta_beta: 1.5
246
+ num_flow_timestamps: 8
247
+ same_noise_per_time: false
248
+ robot_preprocessor:
249
+ stats_by_repo:
250
+ synthmanip:
251
+ observation.state:
252
+ min:
253
+ - -4.904874324798584
254
+ - -4.564780235290527
255
+ - -3.5160739421844482
256
+ - -2.356419563293457
257
+ - -0.47234979271888733
258
+ - -2.0865397453308105
259
+ - -3.343071222305298
260
+ - -5.8824052810668945
261
+ - -1.7488995790481567
262
+ - -2.967109203338623
263
+ - -0.11299018561840057
264
+ - -2.3546268939971924
265
+ - -3.1416664123535156
266
+ - -2.0946199893951416
267
+ - -3.2890703678131104
268
+ - -6.282893657684326
269
+ - -1.7483078241348267
270
+ - -2.967064142227173
271
+ - -0.12049419432878494
272
+ - -1.778153419494629
273
+ - -1.7587945461273193
274
+ - -1.5871200561523438
275
+ max:
276
+ - 17.08185577392578
277
+ - 33.73189163208008
278
+ - 3.2411913871765137
279
+ - 2.356658697128296
280
+ - 3.1416971683502197
281
+ - 2.1008245944976807
282
+ - 0.07229717075824738
283
+ - 6.270575523376465
284
+ - 2.0102994441986084
285
+ - 2.9668161869049072
286
+ - 0.021467044949531555
287
+ - 2.3977394104003906
288
+ - 0.34489157795906067
289
+ - 2.0900635719299316
290
+ - 0.07242166996002197
291
+ - 6.27663516998291
292
+ - 2.0076160430908203
293
+ - 2.9636759757995605
294
+ - 0.04509617015719414
295
+ - 0.919683575630188
296
+ - 1.6717331409454346
297
+ - 1.1039749383926392
298
+ action:
299
+ q01:
300
+ - -0.04400388523936272
301
+ - -0.044572047889232635
302
+ - -0.05000000074505806
303
+ - -0.05000000074505806
304
+ - -0.037506889551877975
305
+ - -0.03562070056796074
306
+ - -0.05000000074505806
307
+ - -0.05000000074505806
308
+ - -0.04800133779644966
309
+ - -0.05000000074505806
310
+ - -100.0
311
+ - -0.05000000074505806
312
+ - -0.05000000074505806
313
+ - -0.04927435144782066
314
+ - -0.05000000074505806
315
+ - -0.05000000074505806
316
+ - -0.0456085205078125
317
+ - -0.05000000074505806
318
+ - -100.0
319
+ - -0.025820335373282433
320
+ q99:
321
+ - 0.04579437896609306
322
+ - 0.04565873369574547
323
+ - 0.05000000074505806
324
+ - 0.05000000074505806
325
+ - 0.05000000074505806
326
+ - 0.03847877308726311
327
+ - 0.05000000074505806
328
+ - 0.05000000074505806
329
+ - 0.05000000074505806
330
+ - 0.05000000074505806
331
+ - 100.0
332
+ - 0.05000000074505806
333
+ - 0.03608553484082222
334
+ - 0.04896605759859085
335
+ - 0.05000000074505806
336
+ - 0.05000000074505806
337
+ - 0.05000000074505806
338
+ - 0.05000000074505806
339
+ - 100.0
340
+ - 0.7379999756813049
341
+ default_repo_id: synthmanip
342
+ action_key: action
343
+ state_keys:
344
+ - observation.state
345
+ action_norm_mode: quantiles
346
+ state_norm_mode: min_max
347
+ robot_postprocessor:
348
+ stats_by_repo:
349
+ synthmanip:
350
+ observation.state:
351
+ min:
352
+ - -4.904874324798584
353
+ - -4.564780235290527
354
+ - -3.5160739421844482
355
+ - -2.356419563293457
356
+ - -0.47234979271888733
357
+ - -2.0865397453308105
358
+ - -3.343071222305298
359
+ - -5.8824052810668945
360
+ - -1.7488995790481567
361
+ - -2.967109203338623
362
+ - -0.11299018561840057
363
+ - -2.3546268939971924
364
+ - -3.1416664123535156
365
+ - -2.0946199893951416
366
+ - -3.2890703678131104
367
+ - -6.282893657684326
368
+ - -1.7483078241348267
369
+ - -2.967064142227173
370
+ - -0.12049419432878494
371
+ - -1.778153419494629
372
+ - -1.7587945461273193
373
+ - -1.5871200561523438
374
+ max:
375
+ - 17.08185577392578
376
+ - 33.73189163208008
377
+ - 3.2411913871765137
378
+ - 2.356658697128296
379
+ - 3.1416971683502197
380
+ - 2.1008245944976807
381
+ - 0.07229717075824738
382
+ - 6.270575523376465
383
+ - 2.0102994441986084
384
+ - 2.9668161869049072
385
+ - 0.021467044949531555
386
+ - 2.3977394104003906
387
+ - 0.34489157795906067
388
+ - 2.0900635719299316
389
+ - 0.07242166996002197
390
+ - 6.27663516998291
391
+ - 2.0076160430908203
392
+ - 2.9636759757995605
393
+ - 0.04509617015719414
394
+ - 0.919683575630188
395
+ - 1.6717331409454346
396
+ - 1.1039749383926392
397
+ action:
398
+ q01:
399
+ - -0.04400388523936272
400
+ - -0.044572047889232635
401
+ - -0.05000000074505806
402
+ - -0.05000000074505806
403
+ - -0.037506889551877975
404
+ - -0.03562070056796074
405
+ - -0.05000000074505806
406
+ - -0.05000000074505806
407
+ - -0.04800133779644966
408
+ - -0.05000000074505806
409
+ - -100.0
410
+ - -0.05000000074505806
411
+ - -0.05000000074505806
412
+ - -0.04927435144782066
413
+ - -0.05000000074505806
414
+ - -0.05000000074505806
415
+ - -0.0456085205078125
416
+ - -0.05000000074505806
417
+ - -100.0
418
+ - -0.025820335373282433
419
+ q99:
420
+ - 0.04579437896609306
421
+ - 0.04565873369574547
422
+ - 0.05000000074505806
423
+ - 0.05000000074505806
424
+ - 0.05000000074505806
425
+ - 0.03847877308726311
426
+ - 0.05000000074505806
427
+ - 0.05000000074505806
428
+ - 0.05000000074505806
429
+ - 0.05000000074505806
430
+ - 100.0
431
+ - 0.05000000074505806
432
+ - 0.03608553484082222
433
+ - 0.04896605759859085
434
+ - 0.05000000074505806
435
+ - 0.05000000074505806
436
+ - 0.05000000074505806
437
+ - 0.05000000074505806
438
+ - 100.0
439
+ - 0.7379999756813049
440
+ default_repo_id: synthmanip
441
+ action_key: action
442
+ state_keys:
443
+ - observation.state
444
+ action_norm_mode: quantiles
445
+ state_norm_mode: min_max
446
+ parallelism:
447
+ data_parallel_replicate_degree: 1
448
+ enable_compiled_autograd: false
449
+ data_parallel_shard_degree: -1
450
+ fsdp_reshard_after_forward: default
451
+ context_parallel_config:
452
+ degree: 1
453
+ attention_type: ulysses
454
+ load_balancer: ulysses
455
+ head_stride: 1
456
+ tensor_parallel_config:
457
+ degree: 1
458
+ enable_async: false
459
+ data_parallel_config:
460
+ name: fsdp
461
+ param_dtype: null
462
+ reduce_dtype: float32
463
+ num_replicas: null
464
+ shard_degree: null
465
+ wrapping_strategy: full
466
+ prefetch_factor: 0
467
+ context_parallel_rotate_method: allgather
468
+ seed: 6198
469
+ epoch: null
470
+ dry_run: false
471
+ ft_llm: true
472
+ ft_vit: false
473
+ ft_connector: false
474
+ ft_embedding: lm_head
475
+ optimizer:
476
+ name: adamw
477
+ learning_rate: 0.0001
478
+ weight_decay: 0.01
479
+ betas:
480
+ - 0.9
481
+ - 0.95
482
+ eps: 1.0e-05
483
+ connector_learning_rate: 5.0e-06
484
+ vit_learning_rate: 5.0e-06
485
+ llm_learning_rate: 1.0e-05
486
+ frame_selector_learning_rate: 0.0001
487
+ temporal_token_scorer_learning_rate: 0.0001
488
+ action_expert_learning_rate: 0.0001
489
+ connector_weight_decay: 0.0
490
+ vit_weight_decay: 0.0
491
+ llm_weight_decay: 0.0
492
+ frame_selector_weight_decay: 0.01
493
+ temporal_token_scorer_weight_decay: 0.01
494
+ action_expert_weight_decay: 0.0
495
+ connector_betas:
496
+ - 0.9
497
+ - 0.95
498
+ vit_betas:
499
+ - 0.9
500
+ - 0.95
501
+ llm_betas:
502
+ - 0.9
503
+ - 0.95
504
+ frame_selector_betas:
505
+ - 0.9
506
+ - 0.95
507
+ temporal_token_scorer_betas:
508
+ - 0.9
509
+ - 0.95
510
+ action_expert_betas:
511
+ - 0.9
512
+ - 0.95
513
+ connector_eps: 1.0e-06
514
+ vit_eps: 1.0e-06
515
+ llm_eps: 1.0e-06
516
+ frame_selector_eps: 1.0e-06
517
+ temporal_token_scorer_eps: 1.0e-06
518
+ action_expert_eps: 1.0e-06
519
+ metrics_log_interval: -1
520
+ scheduler:
521
+ name: multimodal
522
+ units: steps
523
+ t_warmup: 100
524
+ t_max: null
525
+ alpha_f: 0.1
526
+ connector_t_warmup: 200
527
+ vit_t_warmup: 200
528
+ llm_t_warmup: 2000
529
+ frame_selector_t_warmup: 200
530
+ temporal_token_scorer_t_warmup: 200
531
+ action_expert_t_warmup: 200
532
+ grad_clip_warmup_steps: null
533
+ grad_clip_warmup_factor: null
534
+ warmup_min_lr: 0.0
535
+ data:
536
+ dataset: null
537
+ mixture:
538
+ synthmanip/task_0: 1.0
539
+ synthmanip/task_1: 1.0
540
+ synthmanip/task_2: 1.0
541
+ synthmanip/task_3: 1.0
542
+ synthmanip/task_4: 1.0
543
+ synthmanip/task_5: 1.0
544
+ synthmanip/task_6: 1.0
545
+ synthmanip/task_7: 1.0
546
+ synthmanip/task_8: 1.0
547
+ synthmanip/task_9: 1.0
548
+ root_size_mixture: null
549
+ kwargs_mixture: null
550
+ split: train
551
+ seed: 50189
552
+ pad: to_max
553
+ sequence_length: 1024
554
+ max_text_seq_len: null
555
+ shuffle: true
556
+ start_index: 0
557
+ packing: null
558
+ enable_variable_sized_token_pooling: true
559
+ num_workers: 4
560
+ drop_last: true
561
+ pin_memory: true
562
+ prefetch_factor: 4
563
+ persistent_workers: false
564
+ timeout: 300
565
+ action_data: null
566
+ action_loader_rate: null
567
+ action_batch_interval: 1
568
+ restore_dataloader: true
569
+ fast_forward_batches: null
570
+ evaluators: []
571
+ eval_interval: 0
572
+ inf_evaluators: []
573
+ inf_eval_interval: 1000
574
+ eval_on_last_step: true
575
+ eval_on_load: false
576
+ eval_on: []
577
+ save_folder: /weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
578
+ checkpointer_config:
579
+ save_thread_count: null
580
+ load_thread_count: null
581
+ pre_download: false
582
+ work_dir: null
583
+ throttle_uploads: false
584
+ canceled_check_interval: 50
585
+ save_interval: 2000
586
+ save_at: null
587
+ save_final_optim: false
588
+ save_num_checkpoints_to_keep: 3
589
+ checkpoint_retention_frequency: 10000
590
+ save_final_unsharded_checkpoint: false
591
+ save_interval_ephemeral: null
592
+ save_overwrite: true
593
+ load_path: null
594
+ reset_optimizer_state: false
595
+ reset_trainer_state: false
596
+ initial_model_checkpoint: /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
597
+ allow_resume: true
598
+ max_duration: 100000
599
+ global_train_batch_size: 1024
600
+ device_train_microbatch_size: 8
601
+ max_grad_norm: 1.0
602
+ multi_component_grad_norm: true
603
+ batch_divisor: global_batch
604
+ max_grad_norm_ratio: null
605
+ precision: amp_bf16
606
+ wandb:
607
+ project: whirl-molmoflow-rby1
608
+ entity: prior-ai2
609
+ group: null
610
+ name: molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
611
+ tags:
612
+ - watching
613
+ log_artifacts: false
614
+ rank_zero_only: true
615
+ log_interval: 20
616
+ allow_resume: true
617
+ finish_on_sigterm: true
618
+ beaker_log_interval: 50
619
+ speed_monitor:
620
+ window_size: 20
621
+ gpu_flops_available: null
622
+ console_log_interval: 20
623
+ enable_timing_logs: false
624
+ gen1_gc_interval: 1
625
+ compile:
626
+ mode: default
627
+ fullgraph: false
628
+ dynamic: false
629
+ backend: inductor
630
+ activation_checkpointing: true
631
+ fsdp:
632
+ fsdp2: true
633
+ precision: pure
634
+ use_orig_params: true
635
+ wrapping_strategy: null
636
+ sharding_strategy: FULL_SHARD
637
+ hybrid_sharding_num_model_replicas: null
638
+ softmax_auxiliary_loss: false
639
+ softmax_auxiliary_loss_scale: 0.0001
640
+ response_logits_only: true
641
+ saliency_score_loss_wt: null
642
+ frame_score_loss_wt: null
643
+ frame_score_loss_type: mse
644
+ frame_score_loss_target: 0.7
645
+ time_limit: null
646
+ extra_steps_after_cancel: 0
647
+ python_profiling: false
648
+ torch_profiling: false
649
+ stop_at: 100000
650
+ stop_after: null
651
+ fused_loss: false
652
+ compile_loss: true
653
+ runtime_data:
654
+ args: launch_scripts/train_synthmanip.py /weka/oe-training-default/hqfang/molmo2_checkpoints/4b-cp/step2000-unsharded/
655
+ --data_paths /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/DoorOpeningDataGenConfig
656
+ /weka/prior/datasets/robomolmo/feb15_franka_and_rby1/DoorOpeningDataGenConfig
657
+ /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/RBY1PickDataGenConfig /weka/prior/datasets/robomolmo/feb15_franka_and_rby1/RBY1PickDataGenConfig
658
+ /weka/prior/datasets/robomolmo/feb21_franka_and_rby1/RBY1PickDataGenConfig /weka/prior/datasets/robomolmo/feb12_franka_and_rby1/RBY1PickAndPlaceDataGenConfig
659
+ /weka/prior/datasets/robomolmo/feb15_franka_and_rby1/RBY1PickAndPlaceDataGenConfig
660
+ /weka/prior/datasets/robomolmo/feb21_franka_and_rby1/RBY1PickAndPlaceDataGenConfig
661
+ /weka/prior/datasets/robomolmo/feb23_open_datagen/RBY1OpenDataGenConfig /weka/prior/datasets/robomolmo/feb23_open_datagen_obja/RBY1OpenDataGenConfig
662
+ --no_val --dataset_sample_rates 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 --stats_path=/weka/prior/datasets/robomolmo/rby1_multitask_norm_stats.yaml
663
+ --action_preset RBY1_multitask --camera_preset RBY1_full_with_head_gopro --wandb.name=molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
664
+ --wandb.entity=prior-ai2 --wandb.project=whirl-molmoflow-rby1 --seq_len=1024 --max_duration=100000
665
+ --device_batch_size=8 --global_batch_size=1024 --log_interval=20 --model.mm_preprocessor.use_frame_special_tokens=True
666
+ --model.mm_preprocessor.max_subtitle_tokens=null --data.num_workers=4 --prefetch_factor=4
667
+ --save_interval=2000 --save_num_checkpoints_to_keep=3 --checkpoint_retention_frequency=10000
668
+ --save_folder=/weka/oe-training-default/snehalj/synthmanip_checkpoints/molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
669
+ --exp_name=molmoflow-rby1-multitask-16node-03-05-23-31-19_bs_1024_dbs_8_step_100000_llmlr_1e-5
670
+ --data.packing=null --model.mm_preprocessor.image.max_images=5 --model.mm_preprocessor.image.crop_mode=resize
671
+ --model.mm_preprocessor.max_frames=1 --model.same_noise_per_time=False --model.num_flow_timestamps=8
672
+ --use_point_prompts_per_dataset 1 1 0 0 0 0 0 0 1 1 --randomize_prompts --point_prompt_camera=head_camera
673
+ --max_points_in_conditioning_frame=1 --conditioning_frame=random_first_10 --cameras_to_warp
674
+ head_camera --img_aug --ft_llm=True --scheduler.llm_t_warmup=2000 --optimizer.llm_learning_rate=1e-5
675
+ hostname: jupiter-cs-aus-147.reviz.ai2.in
676
+ date: 03/05/2026, 22:36
677
+ world_size: 128
678
+ resuming_from: null
679
+ beaker_experiment_id: 01KK0212A2CKWNFJEJHT7AZMW5
680
+ beaker_experiment_url: https://beaker.org/ex/01KK0212A2CKWNFJEJHT7AZMW5
681
+ wandb_id: t57qc9vl
682
+ wandb_url: https://wandb.ai/prior-ai2/whirl-molmoflow-rby1/runs/t57qc9vl
683
+ distributed_eval_enabled: false
684
+ distributed_eval_benchmark_path: /weka/oe/rohunt/robo-bench/FrankaPickandPlaceDroidBench_5ep_json_benchmark
685
+ distributed_eval_config_cls: launch_scripts.synthvla.configure_mujoco_thor:FrankaState8ClampConfig
686
+ distributed_eval_task_horizon: 300
687
+ distributed_eval_num_worker_jobs: 1
688
+ distributed_eval_wandb_project: mjthor-online-eval
689
+ distributed_eval_workspace: ai2/robo-molmo
690
+ distributed_eval_clusters:
691
+ - ai2/saturn
692
+ - ai2/neptune
693
+ - ai2/rhea
694
+ - ai2/ceres
695
+ distributed_eval_priority: high
696
+ distributed_eval_preemptible: true
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009fa4dc60493da7b4aad9ffa58481fdd87298c2c11d95f6d472b67b56789a3d
3
+ size 19992232602