Wayer2 commited on
Commit
84bfd24
·
verified ·
1 Parent(s): a48237e

Create config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +90 -0
config.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: all_dataset
6
+ data_root_dir: /mnt/project
7
+ dataset_py: lerobot_datasets
8
+ delete_pause_frame: false
9
+ image_size:
10
+ - 224
11
+ - 224
12
+ lerobot_version: v3.0
13
+ per_device_batch_size: 48
14
+ training_task_weights:
15
+ - 1
16
+ - 1
17
+ - 1
18
+ - 1
19
+ use_delta_action: true
20
+ framework:
21
+ action_model:
22
+ action_dim: 138
23
+ action_horizon: 16
24
+ action_model_type: DiT-L
25
+ add_pos_embed: true
26
+ diffusion_model_cfg:
27
+ cross_attention_dim: 2560
28
+ dropout: 0.2
29
+ final_dropout: true
30
+ interleave_self_attention: true
31
+ norm_type: ada_norm
32
+ num_layers: 16
33
+ output_dim: 2560
34
+ positional_embeddings: null
35
+ future_action_window_size: 15
36
+ future_obs_index: 5
37
+ hidden_size: 2560
38
+ max_num_embodiments: 32
39
+ max_seq_len: 1024
40
+ noise_beta_alpha: 1.5
41
+ noise_beta_beta: 1.0
42
+ noise_s: 0.999
43
+ num_inference_timesteps: 4
44
+ num_target_vision_tokens: 32
45
+ num_timestep_buckets: 1000
46
+ num_views: 1
47
+ obs_horizon: 2
48
+ obs_loss_weight: 1.0
49
+ only_policy: false
50
+ only_wo_video_gen: false
51
+ past_action_window_size: 0
52
+ policy_and_video_gen: false
53
+ state_dim: null
54
+ vision_encoder_path: pretrained
55
+ vision_encoder_size: s
56
+ vision_encoder_type: dinov3
57
+ name: QwenMMDiT
58
+ qwenvl:
59
+ base_vlm: pretrained/vlm/Qwen3-VL-4B-Instruct
60
+ output_dir: checkpoints/lda/pretrain
61
+ run_id: lda-pretrain
62
+ run_root_dir: checkpoints/lda
63
+ seed: 42
64
+ trainer:
65
+ eval_interval: 1000
66
+ freeze_modules: qwen_vl_interface,action_model.vision_encoder
67
+ gradient_accumulation_steps: 1
68
+ gradient_clipping: 1.0
69
+ is_resume: false
70
+ learning_rate:
71
+ action_model: 0.0001
72
+ base: 4.0e-05
73
+ qwen_vl_interface: 1.0e-05
74
+ logging_frequency: 100
75
+ lr_scheduler_type: cosine_with_min_lr
76
+ max_train_steps: 400000
77
+ num_warmup_steps: 5000
78
+ optimizer:
79
+ betas:
80
+ - 0.9
81
+ - 0.95
82
+ eps: 1.0e-08
83
+ weight_decay: 1.0e-08
84
+ pretrained_checkpoint: null
85
+ repeated_diffusion_steps: 1
86
+ save_interval: 10000
87
+ scheduler_specific_kwargs:
88
+ min_lr: 5.0e-07
89
+ wandb_entity: Personal
90
+ wandb_project: lda