LinhanWang commited on
Commit
a35bc3c
·
verified ·
1 Parent(s): 64d05fb

Upload folder using huggingface_hub

Browse files
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_10000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9496c65b8eb0d70c10b93569b93e0521bd6095ed2d1dc53111f7db65c639b24
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73876f0f06e505bed7c983dbe77492ff23aaf4ba853faf62b36a9d08b9c617ed
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2499194e6defb11fbb2e5938afd9c86a746d00c9839295d5921d16e31cda190
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_40000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9ae08db17b255b36f808fc4e0a14b18a4170b1d6668fdfffb7ede959c83cfb
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_50000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe57f67d3bf12cd37fb4f7173583eb8802b444acc85c4207ed1de004b3ef96b
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/checkpoints/steps_60000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97188527806b5ce695a38bef3f7f7e5230a87eca3c25c5303b0c25c3480f2bf1
3
+ size 5839880837
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/config.full.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: bridge_rt_1
6
+ data_root_dir: /home/linhan/data/lerobot_v3
7
+ dataset_py: lerobot_datasets
8
+ lerobot_version: v3.0
9
+ image_size:
10
+ - 224
11
+ - 224
12
+ per_device_batch_size: 16
13
+ video_backend: torchvision_av
14
+ framework:
15
+ name: QwenPI_v3
16
+ qwenvl:
17
+ base_vlm: Qwen/Qwen3-VL-2B-Instruct
18
+ attn_implementation: sdpa
19
+ vl_hidden_dim: 2048
20
+ num_vl_layers: 28
21
+ action_model:
22
+ action_model_type: LayerwiseFM
23
+ action_dim: 7
24
+ state_dim: 7
25
+ action_horizon: 16
26
+ repeated_diffusion_steps: 2
27
+ num_inference_timesteps: 4
28
+ add_pos_embed: true
29
+ max_seq_len: 1024
30
+ num_target_vision_tokens: 32
31
+ noise_beta_alpha: 1.5
32
+ noise_beta_beta: 1.0
33
+ noise_s: 0.999
34
+ num_timestep_buckets: 1000
35
+ diffusion_model_cfg:
36
+ action_dit_hidden_dim: 1024
37
+ dropout: 0.2
38
+ final_dropout: true
39
+ interleave_self_attention: true
40
+ norm_type: ada_norm
41
+ positional_embeddings: null
42
+ attention_head_dim: 64
43
+ cross_attention_dim: 1024
44
+ input_embedding_dim: 1024
45
+ num_attention_heads: 16
46
+ num_layers: 28
47
+ output_dim: 1024
48
+ future_action_window_size: 15
49
+ past_action_window_size: 0
50
+ obs_image_size: null
51
+ run_id: 0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b
52
+ run_root_dir: /home/linhan/gdrive/data/starVLA/results/Checkpoints
53
+ wandb_dir: ./results/wandb_local
54
+ seed: 42
55
+ trainer:
56
+ eval_interval: 1000
57
+ freeze_modules: true
58
+ gradient_clipping: 1.0
59
+ is_resume: true
60
+ learning_rate:
61
+ action_model: 0.0001
62
+ base: 1.0e-05
63
+ qwen_vl_interface: 1.0e-05
64
+ logging_frequency: 100
65
+ lr_scheduler_type: cosine_with_min_lr
66
+ max_train_steps: 60000
67
+ scheduler_total_steps: 100000
68
+ num_warmup_steps: 5000
69
+ optimizer:
70
+ betas:
71
+ - 0.9
72
+ - 0.95
73
+ eps: 1.0e-08
74
+ weight_decay: 1.0e-08
75
+ save_interval: 10000
76
+ scheduler_specific_kwargs:
77
+ min_lr: 5.0e-07
78
+ wandb_entity: linhan2-virginia-tech
79
+ wandb_project: starVLA_simplerEnv
80
+ is_debug: false
81
+ version_id: '0.21'
82
+ config_yaml: ./examples/SimplerEnv/train_files/config_2b.yaml
83
+ output_dir: /home/linhan/gdrive/data/starVLA/results/Checkpoints/0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ data_mix: bridge_rt_1
6
+ data_root_dir: /home/linhan/data/lerobot_v3
7
+ dataset_py: lerobot_datasets
8
+ image_size:
9
+ - 224
10
+ - 224
11
+ lerobot_version: v3.0
12
+ per_device_batch_size: 16
13
+ video_backend: torchvision_av
14
+ framework:
15
+ action_model:
16
+ action_dim: 7
17
+ action_horizon: 16
18
+ add_pos_embed: true
19
+ diffusion_model_cfg:
20
+ action_dit_hidden_dim: 1024
21
+ attention_head_dim: 64
22
+ cross_attention_dim: 1024
23
+ dropout: 0.2
24
+ final_dropout: true
25
+ input_embedding_dim: 1024
26
+ interleave_self_attention: true
27
+ norm_type: ada_norm
28
+ num_attention_heads: 16
29
+ num_layers: 28
30
+ output_dim: 1024
31
+ positional_embeddings: null
32
+ max_seq_len: 1024
33
+ noise_beta_alpha: 1.5
34
+ noise_beta_beta: 1.0
35
+ noise_s: 0.999
36
+ num_inference_timesteps: 4
37
+ num_target_vision_tokens: 32
38
+ num_timestep_buckets: 1000
39
+ state_dim: 7
40
+ name: QwenPI_v3
41
+ qwenvl:
42
+ attn_implementation: sdpa
43
+ base_vlm: Qwen/Qwen3-VL-2B-Instruct
44
+ num_vl_layers: 28
45
+ vl_hidden_dim: 2048
46
+ output_dir: /home/linhan/gdrive/data/starVLA/results/Checkpoints/0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b
47
+ run_id: 0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b
48
+ run_root_dir: /home/linhan/gdrive/data/starVLA/results/Checkpoints
49
+ seed: 42
50
+ trainer:
51
+ eval_interval: 1000
52
+ freeze_modules: true
53
+ is_resume: true
54
+ learning_rate:
55
+ action_model: 0.0001
56
+ base: 1.0e-05
57
+ qwen_vl_interface: 1.0e-05
58
+ logging_frequency: 100
59
+ lr_scheduler_type: cosine_with_min_lr
60
+ max_train_steps: 60000
61
+ num_warmup_steps: 5000
62
+ optimizer:
63
+ betas:
64
+ - 0.9
65
+ - 0.95
66
+ eps: 1.0e-08
67
+ weight_decay: 1.0e-08
68
+ save_interval: 10000
69
+ scheduler_specific_kwargs:
70
+ min_lr: 5.0e-07
71
+ scheduler_total_steps: 100000
72
+ wandb_dir: ./results/wandb_local
73
+ wandb_entity: linhan2-virginia-tech
74
+ wandb_project: starVLA_simplerEnv
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/dataset_statistics.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "oxe_bridge": {
3
+ "action": {
4
+ "mean": [
5
+ 0.00011365935642970726,
6
+ 6.556109292432666e-05,
7
+ -6.320965621853247e-05,
8
+ -7.205350266303867e-05,
9
+ -0.00019515305757522583,
10
+ 0.0001203165520564653,
11
+ 0.28829458355903625
12
+ ],
13
+ "std": [
14
+ 0.006909770731857718,
15
+ 0.009684093232753703,
16
+ 0.00896290498528129,
17
+ 0.02012105293057885,
18
+ 0.021582655517558648,
19
+ 0.054723342223234994,
20
+ 0.4543627821514982
21
+ ],
22
+ "max": [
23
+ 0.41691166162490845,
24
+ 0.25864794850349426,
25
+ 0.21218234300613403,
26
+ 3.122201919555664,
27
+ 1.8618112802505493,
28
+ 6.272472858428955,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.4007510244846344,
33
+ -0.13874775171279907,
34
+ -0.22553899884223938,
35
+ -3.2010786533355713,
36
+ -1.8618112802505493,
37
+ -6.279075622558594,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.02875255048274994,
42
+ -0.04170213546603918,
43
+ -0.026096721179783344,
44
+ -0.08052874729037285,
45
+ -0.09249906800687313,
46
+ -0.20738555490970612,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.028306663036346436,
51
+ 0.04089853074401617,
52
+ 0.0401805154979229,
53
+ 0.08173403143882751,
54
+ 0.07760760560631752,
55
+ 0.2038465365767479,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ 0.15471743047237396,
71
+ 0.015362550504505634,
72
+ 0.03222028166055679,
73
+ 0.0032453201711177826,
74
+ -0.038600146770477295,
75
+ 0.05382946878671646,
76
+ 0.0,
77
+ 0.35406652092933655
78
+ ],
79
+ "std": [
80
+ 0.1605359274864927,
81
+ 0.06677912092232112,
82
+ 0.048657150951527554,
83
+ 0.09275336958940797,
84
+ 0.12566984746914736,
85
+ 0.41222738578801793,
86
+ 0.0,
87
+ 0.4330223535305803
88
+ ],
89
+ "max": [
90
+ 0.5862360596656799,
91
+ 0.4034728705883026,
92
+ 0.3568263053894043,
93
+ 1.3517684936523438,
94
+ 1.570796251296997,
95
+ 3.141204357147217,
96
+ 0.0,
97
+ 1.1121242046356201
98
+ ],
99
+ "min": [
100
+ -0.04167502000927925,
101
+ -0.3563207685947418,
102
+ -0.15537554025650024,
103
+ -3.141592502593994,
104
+ -1.4992541074752808,
105
+ -3.14153790473938,
106
+ 0.0,
107
+ 0.04637829214334488
108
+ ],
109
+ "q01": [
110
+ 0.17102622985839844,
111
+ -0.1698118858039379,
112
+ -0.055632163770496845,
113
+ -0.36493172496557236,
114
+ -0.541871190071106,
115
+ -1.354275494813919,
116
+ 0.0,
117
+ 0.052190229296684265
118
+ ],
119
+ "q99": [
120
+ 0.45322076976299286,
121
+ 0.2354845181107521,
122
+ 0.19489620998501778,
123
+ 0.3780156075954437,
124
+ 0.27568644285202026,
125
+ 1.8500566184520721,
126
+ 0.0,
127
+ 1.0105689764022827
128
+ ]
129
+ },
130
+ "num_transitions": 1305714,
131
+ "num_trajectories": 53192
132
+ },
133
+ "oxe_rt1": {
134
+ "action": {
135
+ "mean": [
136
+ 0.0034937341697514057,
137
+ 0.0031329537741839886,
138
+ -0.006312565412372351,
139
+ 0.021666014567017555,
140
+ -0.0028780826833099127,
141
+ 0.0004565482959151268,
142
+ 0.26771023869514465
143
+ ],
144
+ "std": [
145
+ 0.049064808343918714,
146
+ 0.042298366060685565,
147
+ 0.05237615259817309,
148
+ 0.11248596239589767,
149
+ 0.09312947515099047,
150
+ 0.10319085665374131,
151
+ 0.4418433071282603
152
+ ],
153
+ "max": [
154
+ 2.9984593391418457,
155
+ 22.09052848815918,
156
+ 2.7507524490356445,
157
+ 1.570636510848999,
158
+ 1.5321086645126343,
159
+ 1.5691522359848022,
160
+ 1.0
161
+ ],
162
+ "min": [
163
+ -2.0204520225524902,
164
+ -5.497899532318115,
165
+ -2.031663417816162,
166
+ -1.569917917251587,
167
+ -1.569892168045044,
168
+ -1.570419430732727,
169
+ 0.0
170
+ ],
171
+ "q01": [
172
+ -0.22453527510166169,
173
+ -0.14820013284683228,
174
+ -0.231589707583189,
175
+ -0.3517994859814644,
176
+ -0.4193011274933815,
177
+ -0.43643461108207704,
178
+ 0.0
179
+ ],
180
+ "q99": [
181
+ 0.17824687153100965,
182
+ 0.14938379630446405,
183
+ 0.21842354819178575,
184
+ 0.5892666035890578,
185
+ 0.35272657424211445,
186
+ 0.44796681255102094,
187
+ 1.0
188
+ ],
189
+ "mask": [
190
+ true,
191
+ true,
192
+ true,
193
+ true,
194
+ true,
195
+ true,
196
+ false
197
+ ]
198
+ },
199
+ "state": {
200
+ "mean": [
201
+ 0.27994900941848755,
202
+ -0.04167056083679199,
203
+ 0.38855159282684326,
204
+ 0.21306754648685455,
205
+ -0.12402169406414032,
206
+ 0.24757109582424164,
207
+ 0.046330440789461136,
208
+ 0.10487455129623413
209
+ ],
210
+ "std": [
211
+ 0.2934279783952629,
212
+ 0.09174009346250792,
213
+ 0.42569889727742133,
214
+ 0.3861620331714997,
215
+ 0.3831465513973279,
216
+ 0.444337737260646,
217
+ 0.12639291844222938,
218
+ 0.22122610279191582
219
+ ],
220
+ "max": [
221
+ 1.0534898042678833,
222
+ 0.48018959164619446,
223
+ 1.6896663904190063,
224
+ 1.0,
225
+ 0.9999993443489075,
226
+ 0.9999874830245972,
227
+ 0.9554369449615479,
228
+ 0.9914546012878418
229
+ ],
230
+ "min": [
231
+ -0.4436439275741577,
232
+ -0.9970501065254211,
233
+ -0.006579156965017319,
234
+ 0.0,
235
+ -0.8643477559089661,
236
+ -0.7079970240592957,
237
+ -0.7688722014427185,
238
+ -0.4999994933605194
239
+ ],
240
+ "q01": [
241
+ 0.32481380939483645,
242
+ -0.28334290891885755,
243
+ 0.14107070609927178,
244
+ 0.0,
245
+ -0.686474204659462,
246
+ -0.6808923494815826,
247
+ -0.36045596331357954,
248
+ -0.454380963742733
249
+ ],
250
+ "q99": [
251
+ 0.8750156319141384,
252
+ 0.21247054174542404,
253
+ 1.0727112340927123,
254
+ 1.0,
255
+ 0.9377871316671368,
256
+ 0.9563051050901409,
257
+ 0.45990042358636823,
258
+ 0.7216041100025177
259
+ ]
260
+ },
261
+ "num_transitions": 3786152,
262
+ "num_trajectories": 87212
263
+ }
264
+ }
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b4cda2a83b5d08539a2a3a897e73113e31257a5b24cfeae37c2056db6172cdc
3
+ size 5839866853
0430_oxe_bridge_rt_1_QwenPI_v3_lerobot_v3_2b/summary.jsonl ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"steps": 10000}
2
+ {"steps": 10000}
3
+ {"steps": 10000}
4
+ {"steps": 20000}
5
+ {"steps": 20000}
6
+ {"steps": 20000}
7
+ {"steps": 30000}
8
+ {"steps": 30000}
9
+ {"steps": 30000}
10
+ {"steps": 40000}
11
+ {"steps": 40000}
12
+ {"steps": 40000}
13
+ {"steps": 50000}
14
+ {"steps": 50000}
15
+ {"steps": 50000}
16
+ {"steps": 60000}