File size: 1,890 Bytes
d1a238c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | datasets:
vla_data:
CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
Locate their bounding boxes in [x1,y1,x2,y2] format.
data_mix: bridge_rt_1
data_root_dir: ./playground/Datasets/OXE_LEROBOT
dataset_py: lerobot_datasets
image_size:
- 224
- 224
per_device_batch_size: 8
video_backend: torchvision_av
framework:
action_model:
action_dim: 7
action_horizon: 16
action_model_type: DiT-B
add_pos_embed: true
diffusion_model_cfg:
cross_attention_dim: 4096
dropout: 0.2
final_dropout: true
interleave_self_attention: true
norm_type: ada_norm
num_layers: 16
output_dim: 1024
positional_embeddings: null
future_action_window_size: 15
hidden_size: 1024
max_seq_len: 1024
noise_beta_alpha: 1.5
noise_beta_beta: 1.0
noise_s: 0.999
num_inference_timesteps: 4
num_target_vision_tokens: 32
num_timestep_buckets: 1000
past_action_window_size: 0
state_dim: 7
name: QwenGR00T
qwenvl:
base_vlm: /mnt/18T/starVLAproject/Qwen3-VL-8B-Instruct
output_dir: /starvla/Checkpoints/qwen3vl_bridge_rt1_QwenGR00T_2node_0203_1256
run_id: qwen3vl_bridge_rt1_QwenGR00T_2node_0203_1256
run_root_dir: /starvla/Checkpoints
seed: 42
trainer:
eval_interval: 500
freeze_modules: true
gradient_accumulation_steps: 1
gradient_clipping: 1.0
is_resume: false
learning_rate:
action_model: 0.0001
base: 1.0e-05
qwen_vl_interface: 1.0e-05
logging_frequency: 50
lr_scheduler_type: cosine_with_min_lr
max_train_steps: 100000
num_warmup_steps: 10000
optimizer:
betas:
- 0.9
- 0.95
eps: 1.0e-08
weight_decay: 1.0e-08
repeated_diffusion_steps: 4
save_interval: 10000
scheduler_specific_kwargs:
min_lr: 5.0e-07
wandb_entity: xiguapi
wandb_project: Qwen3VL_Bridge_RT1_QwenGR00T
|