datasets: vla_data: data_mix: robotwin_dynamic_task data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1 dataset_py: lerobot_datasets history_flow: cpu_worker_num: 12 history_image_size: - 64 - 64 history_mode: flow image_size: - 224 - 224 num_workers: 8 per_device_batch_size: 8 video_backend: torchvision_av framework: action_model: action_dim: 14 action_hidden_dim: 2560 action_model_type: MLP future_action_window_size: 15 past_action_window_size: 0 history_flow_stage: stage2 name: PUMA qwenvl: attn_implementation: sdpa base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action world_model: dino_backbone: dinov2_vitb14 enabled: true feature_loss: cosine future_view_index: 0 grounding: box_threshold: 0.35 grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py max_boxes: 1 multimask_output: false sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml text_threshold: 0.25 video_prompt: mask grounding_mode: image loss_weight: 0.05 supervision: per_frame world_query_num: 4 output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task run_id: puma-domino-dynamic-35task run_root_dir: ./result/output/Dynamic_VLA seed: 42 trainer: eval_interval: 1000 freeze_modules: null gradient_accumulation_steps: 1 gradient_clipping: 1.0 is_resume: false learning_rate: action_model: 0.0001 base: 1.0e-05 qwen_vl_interface: 1.0e-05 logging_frequency: 100 lr_scheduler_type: cosine_with_min_lr max_train_steps: 100000 num_warmup_steps: 5000 optimizer: betas: - 0.9 - 0.95 eps: 1.0e-08 weight_decay: 1.0e-08 save_interval: 10000 scheduler_specific_kwargs: min_lr: 5.0e-07 wandb_entity: heng_ wandb_project: Dynamic_VLA