Wayer2
/

LDA-pretrain

Model card Files Files and versions

Wayer2 commited on May 26

Commit

84bfd24

·

verified ·

1 Parent(s): a48237e

Create config.yaml

Files changed (1) hide show

config.yaml +90 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,90 @@

+datasets:
+  vla_data:
+    CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
+      Locate their bounding boxes in [x1,y1,x2,y2] format.
+    data_mix: all_dataset
+    data_root_dir: /mnt/project
+    dataset_py: lerobot_datasets
+    delete_pause_frame: false
+    image_size:
+    - 224
+    - 224
+    lerobot_version: v3.0
+    per_device_batch_size: 48
+    training_task_weights:
+    - 1
+    - 1
+    - 1
+    - 1
+    use_delta_action: true
+framework:
+  action_model:
+    action_dim: 138
+    action_horizon: 16
+    action_model_type: DiT-L
+    add_pos_embed: true
+    diffusion_model_cfg:
+      cross_attention_dim: 2560
+      dropout: 0.2
+      final_dropout: true
+      interleave_self_attention: true
+      norm_type: ada_norm
+      num_layers: 16
+      output_dim: 2560
+      positional_embeddings: null
+    future_action_window_size: 15
+    future_obs_index: 5
+    hidden_size: 2560
+    max_num_embodiments: 32
+    max_seq_len: 1024
+    noise_beta_alpha: 1.5
+    noise_beta_beta: 1.0
+    noise_s: 0.999
+    num_inference_timesteps: 4
+    num_target_vision_tokens: 32
+    num_timestep_buckets: 1000
+    num_views: 1
+    obs_horizon: 2
+    obs_loss_weight: 1.0
+    only_policy: false
+    only_wo_video_gen: false
+    past_action_window_size: 0
+    policy_and_video_gen: false
+    state_dim: null
+    vision_encoder_path: pretrained
+    vision_encoder_size: s
+    vision_encoder_type: dinov3
+  name: QwenMMDiT
+  qwenvl:
+    base_vlm: pretrained/vlm/Qwen3-VL-4B-Instruct
+output_dir: checkpoints/lda/pretrain
+run_id: lda-pretrain
+run_root_dir: checkpoints/lda
+seed: 42
+trainer:
+  eval_interval: 1000
+  freeze_modules: qwen_vl_interface,action_model.vision_encoder
+  gradient_accumulation_steps: 1
+  gradient_clipping: 1.0
+  is_resume: false
+  learning_rate:
+    action_model: 0.0001
+    base: 4.0e-05
+    qwen_vl_interface: 1.0e-05
+  logging_frequency: 100
+  lr_scheduler_type: cosine_with_min_lr
+  max_train_steps: 400000
+  num_warmup_steps: 5000
+  optimizer:
+    betas:
+    - 0.9
+    - 0.95
+    eps: 1.0e-08
+    weight_decay: 1.0e-08
+  pretrained_checkpoint: null
+  repeated_diffusion_steps: 1
+  save_interval: 10000
+  scheduler_specific_kwargs:
+    min_lr: 5.0e-07
+wandb_entity: Personal
+wandb_project: lda