run_id: puma-domino-dynamic-35task run_root_dir: ./result/output/Dynamic_VLA seed: 42 trackers: - jsonl - wandb wandb_entity: heng_ wandb_project: Dynamic_VLA is_debug: false framework: name: PUMA history_flow_stage: stage2 qwenvl: base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action attn_implementation: sdpa vl_hidden_dim: 2560 action_model: action_model_type: MLP action_hidden_dim: 2560 action_dim: 14 state_dim: 14 future_action_window_size: 15 past_action_window_size: 0 world_model: enabled: true world_query_num: 4 loss_weight: 0.05 supervision: per_frame feature_loss: cosine grounding_mode: image future_view_index: 0 dino_backbone: dinov2_vitb14 world_token: <|world|> grounding: sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth box_threshold: 0.35 text_threshold: 0.25 multimask_output: false max_boxes: 1 video_prompt: mask cache: enabled: true read: true write: true dirname: grounding_cache version: v1 debug: enabled: false output_dir: ./grounding_output include_box: true include_mask: true datasets: vla_data: dataset_py: lerobot_datasets num_workers: 8 data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1 data_mix: robotwin_dynamic_task action_type: abs_qpos default_image_resolution: - 3 - 224 - 224 per_device_batch_size: 8 load_all_data_for_training: true obs: - image_0 image_size: - 224 - 224 video_backend: torchvision_av include_state: false future_k: 4 future_stride: 4 history_k: 4 history_stride: 4 history_mode: flow history_image_size: - 64 - 64 history_flow: compute_size: - 64 - 64 cpu_worker_num: 12 cache: enabled: true read: true write: true dirname: history_flow_cache version: v1 trainer: epochs: 100 max_train_steps: 100000 num_warmup_steps: 5000 save_interval: 10000 eval_interval: 1000 learning_rate: base: 1.0e-05 qwen_vl_interface: 1.0e-05 action_model: 0.0001 lr_scheduler_type: cosine_with_min_lr scheduler_specific_kwargs: min_lr: 5.0e-07 freeze_modules: null loss_scale: vla: 1.0 vlm: 0.0 repeated_diffusion_steps: 4 max_grad_norm: 1.0 warmup_ratio: 0.1 weight_decay: 0.0 logging_frequency: 100 gradient_clipping: 1.0 gradient_accumulation_steps: 1 optimizer: name: AdamW betas: - 0.9 - 0.95 eps: 1.0e-08 weight_decay: 1.0e-08 is_resume: false resume_epoch: null resume_step: null enable_gradient_checkpointing: true enable_mixed_precision_training: true output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task