File size: 3,244 Bytes
45bc8f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | run_id: puma-domino-dynamic-35task
run_root_dir: ./result/output/Dynamic_VLA
seed: 42
trackers:
- jsonl
- wandb
wandb_entity: heng_
wandb_project: Dynamic_VLA
is_debug: false
framework:
name: PUMA
history_flow_stage: stage2
qwenvl:
base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action
attn_implementation: sdpa
vl_hidden_dim: 2560
action_model:
action_model_type: MLP
action_hidden_dim: 2560
action_dim: 14
state_dim: 14
future_action_window_size: 15
past_action_window_size: 0
world_model:
enabled: true
world_query_num: 4
loss_weight: 0.05
supervision: per_frame
feature_loss: cosine
grounding_mode: image
future_view_index: 0
dino_backbone: dinov2_vitb14
world_token: <|world|>
grounding:
sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml
sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt
grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py
grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth
box_threshold: 0.35
text_threshold: 0.25
multimask_output: false
max_boxes: 1
video_prompt: mask
cache:
enabled: true
read: true
write: true
dirname: grounding_cache
version: v1
debug:
enabled: false
output_dir: ./grounding_output
include_box: true
include_mask: true
datasets:
vla_data:
dataset_py: lerobot_datasets
num_workers: 8
data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1
data_mix: robotwin_dynamic_task
action_type: abs_qpos
default_image_resolution:
- 3
- 224
- 224
per_device_batch_size: 8
load_all_data_for_training: true
obs:
- image_0
image_size:
- 224
- 224
video_backend: torchvision_av
include_state: false
future_k: 4
future_stride: 4
history_k: 4
history_stride: 4
history_mode: flow
history_image_size:
- 64
- 64
history_flow:
compute_size:
- 64
- 64
cpu_worker_num: 12
cache:
enabled: true
read: true
write: true
dirname: history_flow_cache
version: v1
trainer:
epochs: 100
max_train_steps: 100000
num_warmup_steps: 5000
save_interval: 10000
eval_interval: 1000
learning_rate:
base: 1.0e-05
qwen_vl_interface: 1.0e-05
action_model: 0.0001
lr_scheduler_type: cosine_with_min_lr
scheduler_specific_kwargs:
min_lr: 5.0e-07
freeze_modules: null
loss_scale:
vla: 1.0
vlm: 0.0
repeated_diffusion_steps: 4
max_grad_norm: 1.0
warmup_ratio: 0.1
weight_decay: 0.0
logging_frequency: 100
gradient_clipping: 1.0
gradient_accumulation_steps: 1
optimizer:
name: AdamW
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 1.0e-08
is_resume: false
resume_epoch: null
resume_step: null
enable_gradient_checkpointing: true
enable_mixed_precision_training: true
output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task
|