PUMA / config.full.yaml
HENGFANG's picture
Add files using upload-large-folder tool
45bc8f7 verified
run_id: puma-domino-dynamic-35task
run_root_dir: ./result/output/Dynamic_VLA
seed: 42
trackers:
- jsonl
- wandb
wandb_entity: heng_
wandb_project: Dynamic_VLA
is_debug: false
framework:
name: PUMA
history_flow_stage: stage2
qwenvl:
base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action
attn_implementation: sdpa
vl_hidden_dim: 2560
action_model:
action_model_type: MLP
action_hidden_dim: 2560
action_dim: 14
state_dim: 14
future_action_window_size: 15
past_action_window_size: 0
world_model:
enabled: true
world_query_num: 4
loss_weight: 0.05
supervision: per_frame
feature_loss: cosine
grounding_mode: image
future_view_index: 0
dino_backbone: dinov2_vitb14
world_token: <|world|>
grounding:
sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml
sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt
grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py
grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth
box_threshold: 0.35
text_threshold: 0.25
multimask_output: false
max_boxes: 1
video_prompt: mask
cache:
enabled: true
read: true
write: true
dirname: grounding_cache
version: v1
debug:
enabled: false
output_dir: ./grounding_output
include_box: true
include_mask: true
datasets:
vla_data:
dataset_py: lerobot_datasets
num_workers: 8
data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1
data_mix: robotwin_dynamic_task
action_type: abs_qpos
default_image_resolution:
- 3
- 224
- 224
per_device_batch_size: 8
load_all_data_for_training: true
obs:
- image_0
image_size:
- 224
- 224
video_backend: torchvision_av
include_state: false
future_k: 4
future_stride: 4
history_k: 4
history_stride: 4
history_mode: flow
history_image_size:
- 64
- 64
history_flow:
compute_size:
- 64
- 64
cpu_worker_num: 12
cache:
enabled: true
read: true
write: true
dirname: history_flow_cache
version: v1
trainer:
epochs: 100
max_train_steps: 100000
num_warmup_steps: 5000
save_interval: 10000
eval_interval: 1000
learning_rate:
base: 1.0e-05
qwen_vl_interface: 1.0e-05
action_model: 0.0001
lr_scheduler_type: cosine_with_min_lr
scheduler_specific_kwargs:
min_lr: 5.0e-07
freeze_modules: null
loss_scale:
vla: 1.0
vlm: 0.0
repeated_diffusion_steps: 4
max_grad_norm: 1.0
warmup_ratio: 0.1
weight_decay: 0.0
logging_frequency: 100
gradient_clipping: 1.0
gradient_accumulation_steps: 1
optimizer:
name: AdamW
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 1.0e-08
is_resume: false
resume_epoch: null
resume_step: null
enable_gradient_checkpointing: true
enable_mixed_precision_training: true
output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task