Jinhuiye's picture
Update config.yaml
d1cdf6c verified
raw
history blame
3.15 kB
run_id: 0906_bestvla_retrain_lr_v2
run_root_dir: ./results/Checkpoints
seed: 42
trackers:
- jsonl
- wandb
wandb_entity: jinhuiye
wandb_project: InternVLA
is_debug: false
framework:
framework_py: InternVLA-M1
qwenvl:
base_vlm: /mnt/phwfile/efm_t/zhuyangkun_tmp_need_del/exp/exp_08_09/manip_sys2_qwen25_3b_onevision_molmo_a0all_refsp20/checkpoint-20000
attn_implementation: flash_attention_2
vl_hidden_dim: 2048
dino:
dino_backbone: dinov2_vits14
layer_qformer:
qformer_end_layer: 37
qformer_start_layer: 36
num_query_tokens: 64
input_dim: 2048
ouptput_dim: 768
grad_scale: 0.5
action_model:
action_model_type: DiT-B
action_hidden_dim: 768
action_dim: 7
use_ema: false
future_action_window_size: 15
past_action_window_size: 0
repeated_diffusion_steps: 8
reduce_in_full_precision: true
datasets:
vlm_data:
dataformat: llava_json
dataset_use: ao_droid_data,ao_droid_molmo_sam2,ao_hoi4d_data,ao_maniskills,ao_hoi4d_frame_data,pixmo_point,refspatial_sim%10,xudong_spatial_interact%10,xudong_invalid_task%10,xudong_task_onlyaction%10,xudong_task_cot_cap_resp_act%10,gsys2_14kv2_gd_coco_rule%10,gsys2_14kv2_obj_attr%10,gsys2_14kv2_obj_nearby%10,gsys2_14kv2_obj_senmatic%10,gsys2_14kv2_action_plan%10,asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_neg_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en,robo_point_refobj,robo_point_refregion,roborefit,ao_droid_data,ao_droid_molmo_sam2,ao_hoi4d_data,ao_hoi4d_frame_data,ao_maniskills,molmo_traj_auxiliary_trace,molmo_traj_bridge_dataset,molmo_traj_bc_z,molmo_traj_fractal
eval_dataset: aokvqa_cauldron_llava_format
data_flatten: false
base_interval: 2
max_pixels: 12845056
min_pixels: 3136
model_max_length: 2048
model_type: qwen2.5vl
per_device_batch_size: 2
vla_data:
dataset_py: rlds_datasets
data_root_dir: playground/Datasets/OXE_openvla
data_mix: bridge_rt_1
default_image_resolution:
- 3
- 224
- 224
shuffle_buffer_size: 250000
image_aug: true
per_device_batch_size: 16
load_all_data_for_training: true
trainer:
epochs: 100
max_train_steps: 100000
num_warmup_steps: 5000
save_interval: 5000
eval_interval: 100
learning_rate:
base: 4.0e-05
qwen_vl_interface: 1.0e-05
action_model: 0.0001
lr_scheduler_type: cosine_with_min_lr
scheduler_specific_kwargs:
min_lr: 5.0e-07
freeze_modules: null
loss_scale:
vla: 1.0
vlm: 0.1
max_grad_norm: 1.0
warmup_ratio: 0.1
weight_decay: 0.0
logging_frequency: 10
gradient_clipping: 1.0
gradient_accumulation_steps: 1
optimizer:
name: AdamW
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 1.0e-08
is_resume: false
resume_epoch: null
resume_step: null
enable_gradient_checkpointing: true
enable_mixed_precision_training: true
is_resume: false
output_dir: ./results/Checkpoints/0906_internvla_m1