| run_id: 0906_bestvla_retrain_lr_v2 |
| run_root_dir: ./results/Checkpoints |
| seed: 42 |
| trackers: |
| - jsonl |
| - wandb |
| wandb_entity: jinhuiye |
| wandb_project: InternVLA |
| is_debug: false |
| framework: |
| framework_py: InternVLA-M1 |
| qwenvl: |
| base_vlm: /mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000 |
| attn_implementation: flash_attention_2 |
| vl_hidden_dim: 2048 |
| dino: |
| dino_backbone: dinov2_vits14 |
| layer_qformer: |
| qformer_end_layer: 37 |
| qformer_start_layer: 36 |
| num_query_tokens: 64 |
| input_dim: 2048 |
| ouptput_dim: 768 |
| grad_scale: 0.5 |
| action_model: |
| action_model_type: DiT-B |
| action_hidden_dim: 768 |
| action_dim: 7 |
| use_ema: false |
| future_action_window_size: 15 |
| past_action_window_size: 0 |
| repeated_diffusion_steps: 8 |
| reduce_in_full_precision: true |
| datasets: |
| vlm_data: |
| dataformat: llava_json |
| dataset_use: ao_droid_data,ao_droid_molmo_sam2,ao_hoi4d_data,ao_maniskills,ao_hoi4d_frame_data,pixmo_point,refspatial_sim%10,xudong_spatial_interact%10,xudong_invalid_task%10,xudong_task_onlyaction%10,xudong_task_cot_cap_resp_act%10,gsys2_14kv2_gd_coco_rule%10,gsys2_14kv2_obj_attr%10,gsys2_14kv2_obj_nearby%10,gsys2_14kv2_obj_senmatic%10,gsys2_14kv2_action_plan%10,asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_neg_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en,robo_point_refobj,robo_point_refregion,roborefit,ao_droid_data,ao_droid_molmo_sam2,ao_hoi4d_data,ao_hoi4d_frame_data,ao_maniskills,molmo_traj_auxiliary_trace,molmo_traj_bridge_dataset,molmo_traj_bc_z,molmo_traj_fractal |
| eval_dataset: aokvqa_cauldron_llava_format |
| data_flatten: false |
| base_interval: 2 |
| max_pixels: 12845056 |
| min_pixels: 3136 |
| model_max_length: 2048 |
| model_type: qwen2.5vl |
| per_device_batch_size: 2 |
| vla_data: |
| dataset_py: rlds_datasets |
| data_root_dir: playground/Datasets/OXE_openvla |
| data_mix: bridge_rt_1 |
| default_image_resolution: |
| - 3 |
| - 224 |
| - 224 |
| shuffle_buffer_size: 250000 |
| image_aug: true |
| per_device_batch_size: 16 |
| load_all_data_for_training: true |
| trainer: |
| epochs: 100 |
| max_train_steps: 100000 |
| num_warmup_steps: 5000 |
| save_interval: 5000 |
| eval_interval: 100 |
| learning_rate: |
| base: 4.0e-05 |
| qwen_vl_interface: 1.0e-05 |
| action_model: 0.0001 |
| lr_scheduler_type: cosine_with_min_lr |
| scheduler_specific_kwargs: |
| min_lr: 5.0e-07 |
| freeze_modules: null |
| loss_scale: |
| vla: 1.0 |
| vlm: 0.1 |
| max_grad_norm: 1.0 |
| warmup_ratio: 0.1 |
| weight_decay: 0.0 |
| logging_frequency: 10 |
| gradient_clipping: 1.0 |
| gradient_accumulation_steps: 1 |
| optimizer: |
| name: AdamW |
| betas: |
| - 0.9 |
| - 0.95 |
| eps: 1.0e-08 |
| weight_decay: 1.0e-08 |
| is_resume: false |
| resume_epoch: null |
| resume_step: null |
| enable_gradient_checkpointing: true |
| enable_mixed_precision_training: true |
| is_resume: false |
| output_dir: ./results/Checkpoints/0906_internvla_m1 |
|
|