| run_id: puma-domino-dynamic-35task |
| run_root_dir: ./result/output/Dynamic_VLA |
| seed: 42 |
| trackers: |
| - jsonl |
| - wandb |
| wandb_entity: heng_ |
| wandb_project: Dynamic_VLA |
| is_debug: false |
| framework: |
| name: PUMA |
| history_flow_stage: stage2 |
| qwenvl: |
| base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action |
| attn_implementation: sdpa |
| vl_hidden_dim: 2560 |
| action_model: |
| action_model_type: MLP |
| action_hidden_dim: 2560 |
| action_dim: 14 |
| state_dim: 14 |
| future_action_window_size: 15 |
| past_action_window_size: 0 |
| world_model: |
| enabled: true |
| world_query_num: 4 |
| loss_weight: 0.05 |
| supervision: per_frame |
| feature_loss: cosine |
| grounding_mode: image |
| future_view_index: 0 |
| dino_backbone: dinov2_vitb14 |
| world_token: <|world|> |
| grounding: |
| sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml |
| sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt |
| grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py |
| grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth |
| box_threshold: 0.35 |
| text_threshold: 0.25 |
| multimask_output: false |
| max_boxes: 1 |
| video_prompt: mask |
| cache: |
| enabled: true |
| read: true |
| write: true |
| dirname: grounding_cache |
| version: v1 |
| debug: |
| enabled: false |
| output_dir: ./grounding_output |
| include_box: true |
| include_mask: true |
| datasets: |
| vla_data: |
| dataset_py: lerobot_datasets |
| num_workers: 8 |
| data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1 |
| data_mix: robotwin_dynamic_task |
| action_type: abs_qpos |
| default_image_resolution: |
| - 3 |
| - 224 |
| - 224 |
| per_device_batch_size: 8 |
| load_all_data_for_training: true |
| obs: |
| - image_0 |
| image_size: |
| - 224 |
| - 224 |
| video_backend: torchvision_av |
| include_state: false |
| future_k: 4 |
| future_stride: 4 |
| history_k: 4 |
| history_stride: 4 |
| history_mode: flow |
| history_image_size: |
| - 64 |
| - 64 |
| history_flow: |
| compute_size: |
| - 64 |
| - 64 |
| cpu_worker_num: 12 |
| cache: |
| enabled: true |
| read: true |
| write: true |
| dirname: history_flow_cache |
| version: v1 |
| trainer: |
| epochs: 100 |
| max_train_steps: 100000 |
| num_warmup_steps: 5000 |
| save_interval: 10000 |
| eval_interval: 1000 |
| learning_rate: |
| base: 1.0e-05 |
| qwen_vl_interface: 1.0e-05 |
| action_model: 0.0001 |
| lr_scheduler_type: cosine_with_min_lr |
| scheduler_specific_kwargs: |
| min_lr: 5.0e-07 |
| freeze_modules: null |
| loss_scale: |
| vla: 1.0 |
| vlm: 0.0 |
| repeated_diffusion_steps: 4 |
| max_grad_norm: 1.0 |
| warmup_ratio: 0.1 |
| weight_decay: 0.0 |
| logging_frequency: 100 |
| gradient_clipping: 1.0 |
| gradient_accumulation_steps: 1 |
| optimizer: |
| name: AdamW |
| betas: |
| - 0.9 |
| - 0.95 |
| eps: 1.0e-08 |
| weight_decay: 1.0e-08 |
| is_resume: false |
| resume_epoch: null |
| resume_step: null |
| enable_gradient_checkpointing: true |
| enable_mixed_precision_training: true |
| output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task |
|
|