HQJ2221 commited on
Commit
9fc0e4c
·
verified ·
1 Parent(s): ddb65a7

Upload folder using huggingface_hub

Browse files
checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787d22d53e65466541e9497d2c5cc14a3f4a20ffa332ed79e162918b94f6ed43
3
+ size 10443815904
checkpoints/steps_15000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dffb6edde8e7ecb2575424b894445832b9c007d57d0955d5358f182dec8b2aa
3
+ size 10443815904
checkpoints/steps_5000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1739326b0c0b9c572279c7f6c3515de8f90748658cc9f6c28cb91b12ca7041af
3
+ size 10443814850
config.full.yaml CHANGED
@@ -1,12 +1,10 @@
1
- run_id: 0508_1407_CL_Task1_robotwin_cpd_stage1
2
- run_root_dir: /mnt/data/sunxiaoquan/starVLA_ckpts
3
  seed: 42
4
- trackers:
5
- - jsonl
6
- - wandb
7
- wandb_entity: sunxiaoquan_2002-huazhong-university-of-science-and-tech
8
- wandb_project: starVLA-CL
9
  is_debug: false
 
10
  framework:
11
  name: QwenGR00T
12
  qwenvl:
@@ -15,14 +13,13 @@ framework:
15
  vl_hidden_dim: 2048
16
  action_model:
17
  action_model_type: DiT-B
 
18
  hidden_size: 1024
19
  add_pos_embed: true
20
  max_seq_len: 1024
21
  action_dim: 14
22
  state_dim: 14
23
- future_action_window_size: 15
24
  action_horizon: 16
25
- past_action_window_size: 0
26
  repeated_diffusion_steps: 8
27
  noise_beta_alpha: 1.5
28
  noise_beta_beta: 1.0
@@ -31,57 +28,54 @@ framework:
31
  num_inference_timesteps: 4
32
  num_target_vision_tokens: 32
33
  diffusion_model_cfg:
34
- cross_attention_dim: 2048
35
  dropout: 0.2
36
  final_dropout: true
37
  interleave_self_attention: true
38
  norm_type: ada_norm
39
- num_layers: 16
40
  output_dim: 2560
41
  positional_embeddings: null
42
- obs_image_size:
43
- - 224
44
- - 224
 
 
 
 
 
45
  datasets:
46
  vlm_data:
47
  dataset_py: vlm_datasets
48
  dataformat: llava_json
49
- dataset_use: asv2_conversation_en,coco_internvl_longcap_en,vqav2_en
50
  eval_dataset: aokvqa_cauldron_llava_format
51
  data_flatten: false
52
  base_interval: 2
53
  max_pixels: 50176
54
  min_pixels: 784
55
  model_max_length: 2048
56
- model_type: qwen3vl
57
  per_device_batch_size: 4
58
  vla_data:
59
  dataset_py: lerobot_datasets
60
- data_root_dir: /mnt/data/szeluresearch/datasets/robotwin_clean/
 
61
  data_mix: robotwin_cpd_stage1
62
  action_type: abs_qpos
63
- action_horizon: 8
64
- default_image_resolution:
65
- - 3
66
- - 224
67
- - 224
68
- per_device_batch_size: 8
69
- load_all_data_for_training: false
70
- obs:
71
- - image_0
72
- image_size:
73
  - 224
74
  - 224
75
- num_workers: 8
76
  video_backend: pyav
77
- debug_dataloader: false
78
- data_timeout_sec: 120
79
  trainer:
80
- epochs: 100
81
- max_train_steps: 40000
82
- num_warmup_steps: 2000
83
- save_interval: 2000
84
- eval_interval: 1000
85
  learning_rate:
86
  base: 1.0e-05
87
  qwen_vl_interface: 1.0e-05
@@ -89,18 +83,15 @@ trainer:
89
  lr_scheduler_type: cosine_with_min_lr
90
  scheduler_specific_kwargs:
91
  min_lr: 5.0e-07
92
- freeze_modules: null
93
  loss_scale:
94
  vla: 1.0
95
  vlm: 0.1
96
  max_grad_norm: 1.0
97
- warmup_ratio: 0.1
98
  weight_decay: 0.0
99
  logging_frequency: 100
100
  gradient_clipping: 1.0
101
  gradient_accumulation_steps: 1
102
- debug_data_fetch: false
103
- data_fetch_warn_sec: 5
104
  optimizer:
105
  name: AdamW
106
  betas:
@@ -108,8 +99,5 @@ trainer:
108
  - 0.95
109
  eps: 1.0e-08
110
  weight_decay: 1.0e-08
111
- is_resume: false
112
- resume_epoch: null
113
- resume_step: null
114
- enable_gradient_checkpointing: true
115
- enable_mixed_precision_training: true
 
1
+ run_id: 0518_robotwin_qwengr00t
2
+ run_root_dir: ./playground/Checkpoints
3
  seed: 42
4
+ wandb_entity: liberoVLA
5
+ wandb_project: starVLA
 
 
 
6
  is_debug: false
7
+ version_id: '0.21'
8
  framework:
9
  name: QwenGR00T
10
  qwenvl:
 
13
  vl_hidden_dim: 2048
14
  action_model:
15
  action_model_type: DiT-B
16
+ action_hidden_dim: 1024
17
  hidden_size: 1024
18
  add_pos_embed: true
19
  max_seq_len: 1024
20
  action_dim: 14
21
  state_dim: 14
 
22
  action_horizon: 16
 
23
  repeated_diffusion_steps: 8
24
  noise_beta_alpha: 1.5
25
  noise_beta_beta: 1.0
 
28
  num_inference_timesteps: 4
29
  num_target_vision_tokens: 32
30
  diffusion_model_cfg:
31
+ cross_attention_dim: 2560
32
  dropout: 0.2
33
  final_dropout: true
34
  interleave_self_attention: true
35
  norm_type: ada_norm
36
+ num_layers: 12
37
  output_dim: 2560
38
  positional_embeddings: null
39
+ dit_block_name: SwitchTransformerBlock
40
+ num_experts: 4
41
+ capacity_factor: 1.0
42
+ use_aux_loss: false
43
+ future_action_window_size: 15
44
+ past_action_window_size: 0
45
+ dino:
46
+ dino_backbone: dinov2_vits14
47
  datasets:
48
  vlm_data:
49
  dataset_py: vlm_datasets
50
  dataformat: llava_json
51
+ dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
52
  eval_dataset: aokvqa_cauldron_llava_format
53
  data_flatten: false
54
  base_interval: 2
55
  max_pixels: 50176
56
  min_pixels: 784
57
  model_max_length: 2048
58
+ model_type: qwen2.5vl
59
  per_device_batch_size: 4
60
  vla_data:
61
  dataset_py: lerobot_datasets
62
+ include_state: false
63
+ data_root_dir: /mnt/data/szeluresearch/datasets/robotwin_clean
64
  data_mix: robotwin_cpd_stage1
65
  action_type: abs_qpos
66
+ action_mode: abs
67
+ sequential_step_sampling: false
68
+ per_device_batch_size: 16
69
+ load_all_data_for_training: true
70
+ obs_image_size:
 
 
 
 
 
71
  - 224
72
  - 224
 
73
  video_backend: pyav
 
 
74
  trainer:
75
+ max_train_steps: 30000
76
+ num_warmup_steps: 5000
77
+ save_interval: 5000
78
+ eval_interval: 200
 
79
  learning_rate:
80
  base: 1.0e-05
81
  qwen_vl_interface: 1.0e-05
 
83
  lr_scheduler_type: cosine_with_min_lr
84
  scheduler_specific_kwargs:
85
  min_lr: 5.0e-07
86
+ freeze_modules: true
87
  loss_scale:
88
  vla: 1.0
89
  vlm: 0.1
90
  max_grad_norm: 1.0
 
91
  weight_decay: 0.0
92
  logging_frequency: 100
93
  gradient_clipping: 1.0
94
  gradient_accumulation_steps: 1
 
 
95
  optimizer:
96
  name: AdamW
97
  betas:
 
99
  - 0.95
100
  eps: 1.0e-08
101
  weight_decay: 1.0e-08
102
+ config_yaml: ./examples/Robotwin/train_files/starvla_robotwin_cl.yaml
103
+ output_dir: ./playground/Checkpoints/0518_robotwin_qwengr00t
 
 
 
config.yaml CHANGED
@@ -1,9 +1,14 @@
1
  datasets:
2
  vla_data:
 
3
  data_mix: robotwin_cpd_stage1
4
- data_root_dir: /mnt/data/szeluresearch/datasets/robotwin_clean/
5
  dataset_py: lerobot_datasets
6
- per_device_batch_size: 8
 
 
 
 
7
  video_backend: pyav
8
  framework:
9
  action_model:
@@ -12,14 +17,18 @@ framework:
12
  action_model_type: DiT-B
13
  add_pos_embed: true
14
  diffusion_model_cfg:
 
15
  cross_attention_dim: 2560
 
16
  dropout: 0.2
17
  final_dropout: true
18
  interleave_self_attention: true
19
  norm_type: ada_norm
20
- num_layers: 16
 
21
  output_dim: 2560
22
  positional_embeddings: null
 
23
  hidden_size: 1024
24
  max_seq_len: 1024
25
  noise_beta_alpha: 1.5
@@ -31,41 +40,33 @@ framework:
31
  repeated_diffusion_steps: 8
32
  state_dim: 14
33
  name: QwenGR00T
34
- obs_image_size:
35
- - 224
36
- - 224
37
  qwenvl:
38
  attn_implementation: flash_attention_2
39
  base_vlm: /mnt/data/szeluresearch/models/Qwen3-VL-4B-Instruct
40
- is_debug: false
41
- output_dir: /mnt/data/sunxiaoquan/starVLA_ckpts/0508_1407_CL_Task1_robotwin_cpd_stage1
42
- run_id: 0508_1407_CL_Task1_robotwin_cpd_stage1
43
- run_root_dir: /mnt/data/sunxiaoquan/starVLA_ckpts
44
  seed: 42
45
  trainer:
46
- data_fetch_warn_sec: 5
47
- debug_data_fetch: false
48
- eval_interval: 1000
49
- freeze_modules: null
50
- gradient_accumulation_steps: 1
51
  gradient_clipping: 1.0
52
- is_resume: false
53
  learning_rate:
54
  action_model: 0.0001
55
  base: 1.0e-05
56
  qwen_vl_interface: 1.0e-05
57
  logging_frequency: 100
58
  lr_scheduler_type: cosine_with_min_lr
59
- max_train_steps: 40000
60
- num_warmup_steps: 2000
61
  optimizer:
62
  betas:
63
  - 0.9
64
  - 0.95
65
  eps: 1.0e-08
66
  weight_decay: 1.0e-08
67
- save_interval: 2000
68
  scheduler_specific_kwargs:
69
  min_lr: 5.0e-07
70
- wandb_entity: sunxiaoquan_2002-huazhong-university-of-science-and-tech
71
- wandb_project: starVLA-CL
 
1
  datasets:
2
  vla_data:
3
+ action_mode: abs
4
  data_mix: robotwin_cpd_stage1
5
+ data_root_dir: /mnt/data/szeluresearch/datasets/robotwin_clean
6
  dataset_py: lerobot_datasets
7
+ obs_image_size:
8
+ - 224
9
+ - 224
10
+ per_device_batch_size: 16
11
+ sequential_step_sampling: false
12
  video_backend: pyav
13
  framework:
14
  action_model:
 
17
  action_model_type: DiT-B
18
  add_pos_embed: true
19
  diffusion_model_cfg:
20
+ capacity_factor: 1.0
21
  cross_attention_dim: 2560
22
+ dit_block_name: SwitchTransformerBlock
23
  dropout: 0.2
24
  final_dropout: true
25
  interleave_self_attention: true
26
  norm_type: ada_norm
27
+ num_experts: 4
28
+ num_layers: 12
29
  output_dim: 2560
30
  positional_embeddings: null
31
+ use_aux_loss: false
32
  hidden_size: 1024
33
  max_seq_len: 1024
34
  noise_beta_alpha: 1.5
 
40
  repeated_diffusion_steps: 8
41
  state_dim: 14
42
  name: QwenGR00T
 
 
 
43
  qwenvl:
44
  attn_implementation: flash_attention_2
45
  base_vlm: /mnt/data/szeluresearch/models/Qwen3-VL-4B-Instruct
46
+ output_dir: ./playground/Checkpoints/0518_robotwin_qwengr00t
47
+ run_id: 0518_robotwin_qwengr00t
48
+ run_root_dir: ./playground/Checkpoints
 
49
  seed: 42
50
  trainer:
51
+ eval_interval: 200
52
+ freeze_modules: true
 
 
 
53
  gradient_clipping: 1.0
 
54
  learning_rate:
55
  action_model: 0.0001
56
  base: 1.0e-05
57
  qwen_vl_interface: 1.0e-05
58
  logging_frequency: 100
59
  lr_scheduler_type: cosine_with_min_lr
60
+ max_train_steps: 30000
61
+ num_warmup_steps: 5000
62
  optimizer:
63
  betas:
64
  - 0.9
65
  - 0.95
66
  eps: 1.0e-08
67
  weight_decay: 1.0e-08
68
+ save_interval: 5000
69
  scheduler_specific_kwargs:
70
  min_lr: 5.0e-07
71
+ wandb_entity: liberoVLA
72
+ wandb_project: starVLA
dataset_statistics.json CHANGED
@@ -2,71 +2,71 @@
2
  "new_embodiment": {
3
  "action": {
4
  "mean": [
5
- -0.24294417202472687,
6
- 0.8235027074813843,
7
- 0.6636196792125701,
8
- -0.42967215776443485,
9
- -0.01669255435699597,
10
- -0.07209479324519634,
11
- 0.25053981244564055,
12
- 0.9234951853752137,
13
- 0.777768361568451,
14
- -0.5246866762638092,
15
- 0.023360290518030527,
16
- 0.10283591970801353,
17
- 0.7604784965515138,
18
- 0.7394176125526428
19
  ],
20
  "std": [
21
- 0.3155222021365239,
22
- 0.9752432164625877,
23
- 0.8226579490139962,
24
- 0.6093221529544477,
25
- 0.06312679560101302,
26
- 0.4851952257590841,
27
- 0.30253945413550004,
28
- 0.99396639820614,
29
- 0.85937879866739,
30
- 0.61281245962018,
31
- 0.07908774774998067,
32
- 0.3755971994670261,
33
- 0.4139703513995478,
34
- 0.4240937035239007
35
  ],
36
  "max": [
37
  0.01999334618449211,
38
- 2.7223196029663086,
39
- 2.904675006866455,
40
  1.528359055519104,
41
- 0.19349990785121918,
42
  1.2732691764831543,
43
- 1.0510598421096802,
44
- 2.672729253768921,
45
- 2.771620273590088,
46
  0.9248310327529907,
47
  0.710218071937561,
48
- 1.0608506202697754,
49
  1.0,
50
  1.0
51
  ],
52
  "min": [
53
- -1.0357567071914673,
54
  -5.257390398583084e-07,
55
  -2.296771708643064e-05,
56
- -1.8775614500045776,
57
  -0.6543047428131104,
58
  -5.5696635246276855,
59
- -0.0013324067695066333,
60
  -0.004139999859035015,
61
  -2.81171942333458e-05,
62
- -1.871181845664978,
63
- -0.14516803622245789,
64
  -1.1869020462036133,
65
  0.0,
66
  0.0
67
  ],
68
  "q01": [
69
- -0.9719842076301575,
70
  -5.257390398583084e-07,
71
  -2.296771708643064e-05,
72
  -1.841589093208313,
@@ -76,7 +76,7 @@
76
  0.0,
77
  -2.81171942333458e-05,
78
  -1.8467556238174438,
79
- -0.11184768699109554,
80
  -1.094338297843933,
81
  0.0,
82
  0.0
@@ -87,13 +87,13 @@
87
  2.520941734313965,
88
  1.3011630082130432,
89
  0.14792361631989484,
90
- 1.0777379810810088,
91
- 1.0109164714813232,
92
- 2.555085277557373,
93
- 2.5070955753326416,
94
  0.8670489948987962,
95
  0.6077348476648332,
96
- 1.011330008506775,
97
  1.0,
98
  1.0
99
  ],
@@ -116,71 +116,71 @@
116
  },
117
  "state": {
118
  "mean": [
119
- -0.24117434024810794,
120
- 0.8168358325958253,
121
- 0.6583515405654908,
122
- -0.4264527797698975,
123
- -0.016312663888675163,
124
- -0.07071333620697261,
125
- 0.24852103888988497,
126
- 0.9150182008743287,
127
- 0.7705055177211761,
128
- -0.5195455700159074,
129
- 0.023118514509405937,
130
- 0.10122086703777314,
131
- 0.7641711592674256,
132
- 0.7438219666481019
133
  ],
134
  "std": [
135
- 0.3153467097269625,
136
- 0.9744780775362232,
137
- 0.8211567370036635,
138
- 0.6067185206649752,
139
- 0.06236580800754508,
140
- 0.4844782331695077,
141
- 0.30256716125241684,
142
- 0.9936103058734853,
143
- 0.8582700577812189,
144
- 0.6108722308122556,
145
- 0.07796632979349367,
146
- 0.3748863363076043,
147
- 0.41162238868969653,
148
- 0.4215817545812199
149
  ],
150
  "max": [
151
  0.01999334618449211,
152
- 2.7223196029663086,
153
- 2.904675006866455,
154
  1.528359055519104,
155
- 0.19349990785121918,
156
  1.2732691764831543,
157
- 1.0510598421096802,
158
- 2.672729253768921,
159
- 2.771620273590088,
160
  0.9248310327529907,
161
  0.710218071937561,
162
- 1.0608506202697754,
163
  1.0,
164
  1.0
165
  ],
166
  "min": [
167
- -1.0357567071914673,
168
  -5.257390398583084e-07,
169
  -2.296771708643064e-05,
170
- -1.8775614500045776,
171
  -0.6543047428131104,
172
  -5.5696635246276855,
173
- -0.0013324067695066333,
174
  -0.004139999859035015,
175
  -2.81171942333458e-05,
176
- -1.871181845664978,
177
- -0.14516803622245789,
178
  -1.1869020462036133,
179
  0.0,
180
  0.0
181
  ],
182
  "q01": [
183
- -0.9719842076301575,
184
  -5.257390398583084e-07,
185
  -2.296771708643064e-05,
186
  -1.841589093208313,
@@ -190,7 +190,7 @@
190
  0.0,
191
  -2.81171942333458e-05,
192
  -1.8467556238174438,
193
- -0.11172551922500133,
194
  -1.094338297843933,
195
  0.0,
196
  0.0
@@ -201,18 +201,18 @@
201
  2.520941734313965,
202
  1.3011630082130432,
203
  0.14792361631989484,
204
- 1.0777379810810088,
205
- 1.0109164714813232,
206
- 2.555085277557373,
207
- 2.5070955753326416,
208
  0.8670489948987962,
209
  0.6024519658088687,
210
- 1.011330008506775,
211
  1.0,
212
  1.0
213
  ]
214
  },
215
- "num_transitions": 44018,
216
  "num_trajectories": 250
217
  }
218
  }
 
2
  "new_embodiment": {
3
  "action": {
4
  "mean": [
5
+ -0.23787280023097992,
6
+ 0.8526207566261292,
7
+ 0.6795735061168672,
8
+ -0.44741070866584776,
9
+ -0.005170448598801159,
10
+ -0.018425086373463267,
11
+ 0.24551648199558257,
12
+ 0.9419304490089416,
13
+ 0.7844514012336732,
14
+ -0.5366295397281646,
15
+ 0.014951886003836989,
16
+ 0.03796352967619896,
17
+ 0.7625027775764466,
18
+ 0.7451230764389039
19
  ],
20
  "std": [
21
+ 0.3156150277603005,
22
+ 0.9830999567243299,
23
+ 0.8229020640521909,
24
+ 0.6150442169166925,
25
+ 0.050956237157350745,
26
+ 0.49809455264644775,
27
+ 0.3012233751046266,
28
+ 1.0008338366289953,
29
+ 0.8609136208311007,
30
+ 0.6170888964271559,
31
+ 0.07131806276214984,
32
+ 0.391638029005536,
33
+ 0.4115661151835227,
34
+ 0.4197361308352289
35
  ],
36
  "max": [
37
  0.01999334618449211,
38
+ 3.172096014022827,
39
+ 3.52662992477417,
40
  1.528359055519104,
41
+ 0.19807769358158112,
42
  1.2732691764831543,
43
+ 1.0653150081634521,
44
+ 3.1910557746887207,
45
+ 3.568510055541992,
46
  0.9248310327529907,
47
  0.710218071937561,
48
+ 1.0686789751052856,
49
  1.0,
50
  1.0
51
  ],
52
  "min": [
53
+ -1.05232834815979,
54
  -5.257390398583084e-07,
55
  -2.296771708643064e-05,
56
+ -1.8949426412582397,
57
  -0.6543047428131104,
58
  -5.5696635246276855,
59
+ -0.0014313478022813797,
60
  -0.004139999859035015,
61
  -2.81171942333458e-05,
62
+ -1.9440162181854248,
63
+ -0.06945601850748062,
64
  -1.1869020462036133,
65
  0.0,
66
  0.0
67
  ],
68
  "q01": [
69
+ -1.0045776522159577,
70
  -5.257390398583084e-07,
71
  -2.296771708643064e-05,
72
  -1.841589093208313,
 
76
  0.0,
77
  -2.81171942333458e-05,
78
  -1.8467556238174438,
79
+ -0.04064673036336899,
80
  -1.094338297843933,
81
  0.0,
82
  0.0
 
87
  2.520941734313965,
88
  1.3011630082130432,
89
  0.14792361631989484,
90
+ 1.124965066909793,
91
+ 1.0279909372329712,
92
+ 2.5950185012817384,
93
+ 2.571608304977417,
94
  0.8670489948987962,
95
  0.6077348476648332,
96
+ 1.0272053480148315,
97
  1.0,
98
  1.0
99
  ],
 
116
  },
117
  "state": {
118
  "mean": [
119
+ -0.23635829389095306,
120
+ 0.8473392486572265,
121
+ 0.6753941893577576,
122
+ -0.4448261618614197,
123
+ -0.004914032170199789,
124
+ -0.01733654490672052,
125
+ 0.24353725612163546,
126
+ 0.9348157644271852,
127
+ 0.7781344711780549,
128
+ -0.5321321994066239,
129
+ 0.014636812207754703,
130
+ 0.036555251106619835,
131
+ 0.7653858661651612,
132
+ 0.7485770821571351
133
  ],
134
  "std": [
135
+ 0.31539701010848675,
136
+ 0.9829226342610844,
137
+ 0.821799121807512,
138
+ 0.6126238229705022,
139
+ 0.050130152566908955,
140
+ 0.49730232398025426,
141
+ 0.3009576595752512,
142
+ 1.0006900816161217,
143
+ 0.85964843804075,
144
+ 0.6151594157918544,
145
+ 0.07011113725937866,
146
+ 0.3905743726724527,
147
+ 0.4097133234868026,
148
+ 0.4177009120286524
149
  ],
150
  "max": [
151
  0.01999334618449211,
152
+ 3.172096014022827,
153
+ 3.52662992477417,
154
  1.528359055519104,
155
+ 0.19807769358158112,
156
  1.2732691764831543,
157
+ 1.0653150081634521,
158
+ 3.1910557746887207,
159
+ 3.568510055541992,
160
  0.9248310327529907,
161
  0.710218071937561,
162
+ 1.0686789751052856,
163
  1.0,
164
  1.0
165
  ],
166
  "min": [
167
+ -1.05232834815979,
168
  -5.257390398583084e-07,
169
  -2.296771708643064e-05,
170
+ -1.8949426412582397,
171
  -0.6543047428131104,
172
  -5.5696635246276855,
173
+ -0.0014313478022813797,
174
  -0.004139999859035015,
175
  -2.81171942333458e-05,
176
+ -1.9440162181854248,
177
+ -0.06945601850748062,
178
  -1.1869020462036133,
179
  0.0,
180
  0.0
181
  ],
182
  "q01": [
183
+ -1.0045776522159577,
184
  -5.257390398583084e-07,
185
  -2.296771708643064e-05,
186
  -1.841589093208313,
 
190
  0.0,
191
  -2.81171942333458e-05,
192
  -1.8467556238174438,
193
+ -0.04064673036336899,
194
  -1.094338297843933,
195
  0.0,
196
  0.0
 
201
  2.520941734313965,
202
  1.3011630082130432,
203
  0.14792361631989484,
204
+ 1.124965066909793,
205
+ 1.0279909372329712,
206
+ 2.5895396542549136,
207
+ 2.5667774224281317,
208
  0.8670489948987962,
209
  0.6024519658088687,
210
+ 1.0272053480148315,
211
  1.0,
212
  1.0
213
  ]
214
  },
215
+ "num_transitions": 61506,
216
  "num_trajectories": 250
217
  }
218
  }
run_robotwin_cl.sh ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
2
+ # export NCCL_SOCKET_IFNAME=bond0
3
+ # export NCCL_IB_HCA=mlx5_2,mlx5_3
4
+ #######################################
5
+ DEVICE=gpu # WARNING: cpu or gpu
6
+ #######################################
7
+ # used for check save when communication
8
+ export NCCL_BLOCKING_WAIT=1
9
+ export NCCL_ASYNC_ERROR_HANDLING=1
10
+ export NCCL_TIMEOUT=10000 # timeout set to 1 hour (unit: seconds)
11
+ export NCCL_SOCKET_TIMEOUT_MS=360000
12
+ export WANDB_API_KEY=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
13
+ ###########################################################################################
14
+ # === Please modify the following paths according to your environment ===
15
+ Framework_name=QwenGR00T
16
+ freeze_module_list=''
17
+ base_vlm=/mnt/data/szeluresearch/models/Qwen3-VL-4B-Instruct
18
+ config_yaml=./examples/Robotwin/train_files/starvla_robotwin_cl.yaml
19
+ robotwin_data_root=/mnt/data/szeluresearch/datasets/robotwin_clean
20
+ data_mix=robotwin_cpd_stage1
21
+ run_root_dir=./playground/Checkpoints
22
+ run_id=$(date +'%m%d')_robotwin_qwengr00t
23
+ # === End of environment variable configuration ===
24
+ ###########################################################################################
25
+
26
+ # 孩子们,robotwin太特殊了。千万别忘了把 action_horizon 跟 vla_data.per_device_batch_size 改成一样的。
27
+
28
+ # export WANDB_MODE=disabled
29
+
30
+ output_dir=${run_root_dir}/${run_id}
31
+ mkdir -p ${output_dir}
32
+ # mv this script to the output dir
33
+ cp $0 ${output_dir}/
34
+
35
+ log_dir="./logs/training/$(date +'%Y%m%d')"
36
+ mkdir -p "$log_dir"
37
+ log_file="${log_dir}/$(date +'%H%M').log"
38
+ exec > "$log_file" 2>&1
39
+ # exec 2>&1
40
+
41
+ source /root/miniconda3/etc/profile.d/conda.sh
42
+ conda activate starVLA
43
+ conda info --envs
44
+
45
+ if [[ "$DEVICE" = "gpu" ]]; then
46
+ echo "Running on GPU"
47
+ num_processes=${NUM_PROCESSES:-$(nvidia-smi -L | wc -l)}
48
+ attn_implementation="flash_attention_2"
49
+ else
50
+ echo "Running on CPU"
51
+ num_processes=1
52
+ attn_implementation="eager"
53
+ fi
54
+
55
+ accelerate launch \
56
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
57
+ --num_processes ${num_processes} \
58
+ starVLA/training/train_starvla.py \
59
+ --config_yaml ${config_yaml} \
60
+ --framework.name ${Framework_name} \
61
+ --framework.qwenvl.base_vlm ${base_vlm} \
62
+ --framework.qwenvl.attn_implementation ${attn_implementation}\
63
+ --datasets.vla_data.per_device_batch_size 16 \
64
+ --datasets.vla_data.data_root_dir ${robotwin_data_root}\
65
+ --datasets.vla_data.data_mix ${data_mix} \
66
+ --trainer.freeze_modules ${freeze_module_list} \
67
+ --trainer.max_train_steps 30000 \
68
+ --trainer.save_interval 5000 \
69
+ --trainer.logging_frequency 100 \
70
+ --trainer.eval_interval 200 \
71
+ --run_root_dir ${run_root_dir} \
72
+ --run_id ${run_id} \
73
+ --wandb_project starVLA \
74
+ --wandb_entity liberoVLA \
75
+ # --is_debug True
76
+
77
+
78
+
79
+ ##### Multi-Server Multi-GPU training script #####
80
+ # accelerate launch \
81
+ # --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
82
+ # --main_process_ip $MASTER_ADDR \
83
+ # --main_process_port $MASTER_PORT \
84
+ # --machine_rank $SLURM_PROCID \
85
+ # --num_machines $SLURM_NNODES \
86
+ # --num_processes=${TOTAL_GPUS} \
87
+ # starVLA/training/train_starvla.py \
88
+ # --config_yaml ${config_yaml} \
89
+ # --framework.name ${Framework_name} \
90
+ # --framework.qwenvl.base_vlm ${base_vlm} \
91
+ # --run_root_dir ${run_root_dir} \
92
+ # --run_id ${run_id} \
93
+ # --wandb_project your_project \
94
+ # --wandb_entity your_name
95
+ ##### Multi-Server Multi-GPU training script #####
summary.jsonl CHANGED
@@ -1,20 +1,4 @@
1
- {"steps": 2000}
2
- {"steps": 4000}
3
- {"steps": 6000}
4
- {"steps": 8000}
5
  {"steps": 10000}
6
- {"steps": 12000}
7
- {"steps": 14000}
8
- {"steps": 16000}
9
- {"steps": 18000}
10
  {"steps": 20000}
11
- {"steps": 22000}
12
- {"steps": 24000}
13
- {"steps": 26000}
14
- {"steps": 28000}
15
- {"steps": 30000}
16
- {"steps": 32000}
17
- {"steps": 34000}
18
- {"steps": 36000}
19
- {"steps": 38000}
20
- {"steps": 40000}
 
1
+ {"steps": 5000}
 
 
 
2
  {"steps": 10000}
3
+ {"steps": 15000}
 
 
 
4
  {"steps": 20000}