bambooboom commited on
Commit
0beff28
·
verified ·
1 Parent(s): fc35745

Add files using upload-large-folder tool

Browse files
Files changed (3) hide show
  1. config.json +173 -0
  2. model.safetensors +3 -0
  3. train_config.json +289 -0
config.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "xr1_stage2",
3
+ "n_obs_steps": 2,
4
+ "action_sample_factor": 1,
5
+ "normalization_mapping": {
6
+ "VISUAL": "IDENTITY",
7
+ "STATE": "MEAN_STD",
8
+ "ACTION": "MEAN_STD"
9
+ },
10
+ "input_features": {
11
+ "observation.state": {
12
+ "type": "STATE",
13
+ "shape": [
14
+ 32
15
+ ]
16
+ },
17
+ "observation.images.image_0": {
18
+ "type": "VISUAL",
19
+ "shape": [
20
+ 224,
21
+ 3,
22
+ 224
23
+ ]
24
+ },
25
+ "observation.images.image_1": {
26
+ "type": "VISUAL",
27
+ "shape": [
28
+ 224,
29
+ 3,
30
+ 224
31
+ ]
32
+ },
33
+ "observation.images.image_2": {
34
+ "type": "VISUAL",
35
+ "shape": [
36
+ 224,
37
+ 3,
38
+ 224
39
+ ]
40
+ },
41
+ "observation.images.image_3": {
42
+ "type": "VISUAL",
43
+ "shape": [
44
+ 224,
45
+ 3,
46
+ 224
47
+ ]
48
+ },
49
+ "observation.images.image_wrist_0": {
50
+ "type": "VISUAL",
51
+ "shape": [
52
+ 224,
53
+ 3,
54
+ 224
55
+ ]
56
+ },
57
+ "observation.images.image_wrist_1": {
58
+ "type": "VISUAL",
59
+ "shape": [
60
+ 224,
61
+ 3,
62
+ 224
63
+ ]
64
+ },
65
+ "observation.state_is_pad": {
66
+ "type": "STATE",
67
+ "shape": [
68
+ 2
69
+ ]
70
+ },
71
+ "observation.images.image_0_is_pad": {
72
+ "type": "STATE",
73
+ "shape": [
74
+ 2
75
+ ]
76
+ },
77
+ "observation.images.image_1_is_pad": {
78
+ "type": "STATE",
79
+ "shape": [
80
+ 2
81
+ ]
82
+ },
83
+ "observation.images.image_2_is_pad": {
84
+ "type": "STATE",
85
+ "shape": [
86
+ 2
87
+ ]
88
+ },
89
+ "observation.images.image_3_is_pad": {
90
+ "type": "STATE",
91
+ "shape": [
92
+ 2
93
+ ]
94
+ },
95
+ "observation.images.image_wrist_0_is_pad": {
96
+ "type": "STATE",
97
+ "shape": [
98
+ 2
99
+ ]
100
+ },
101
+ "observation.images.image_wrist_1_is_pad": {
102
+ "type": "STATE",
103
+ "shape": [
104
+ 2
105
+ ]
106
+ }
107
+ },
108
+ "output_features": {
109
+ "action": {
110
+ "type": "ACTION",
111
+ "shape": [
112
+ 32
113
+ ]
114
+ },
115
+ "action_is_pad": {
116
+ "type": "ACTION",
117
+ "shape": [
118
+ 50
119
+ ]
120
+ }
121
+ },
122
+ "stage1_pretrained_path": "/media/jushen/bamboo-fan/Save/crossvla/stage1_4dataset_10_8_bs12_kl_cb256_0528_stat/checkpoints/275000/pretrained_model/",
123
+ "stage2_pretrained_path": "None",
124
+ "stage2_latent_image_token_check": false,
125
+ "dataset_stats_generate": true,
126
+ "heterogeneous": true,
127
+ "split_dataset": true,
128
+ "real_robot_dev": false,
129
+ "image_interval_steps": 50,
130
+ "action_latent_token_num": 13,
131
+ "mformer_hidden_size": 768,
132
+ "decoder_hidden_size": 768,
133
+ "codebook_embed_dim": 256,
134
+ "codebook_k_size": 256,
135
+ "action_chunk_size": 50,
136
+ "chunk_size": 50,
137
+ "n_action_steps": 50,
138
+ "resampler": true,
139
+ "resampler_dim": 2048,
140
+ "resampler_depth": 3,
141
+ "resampler_dim_head": 128,
142
+ "resampler_heads": 4,
143
+ "resampler_num_media_embeds": 1,
144
+ "resampler_num_latents": 9,
145
+ "max_state_dim": 32,
146
+ "max_action_dim": 32,
147
+ "resize_imgs_with_padding": [
148
+ 224,
149
+ 224
150
+ ],
151
+ "empty_cameras": 0,
152
+ "adapt_to_pi_aloha": false,
153
+ "use_delta_joint_actions_aloha": false,
154
+ "tokenizer_max_length": 48,
155
+ "proj_width": 1024,
156
+ "num_steps": 10,
157
+ "use_cache": true,
158
+ "attention_implementation": "eager",
159
+ "freeze_vision_encoder": false,
160
+ "freeze_language_encoder": true,
161
+ "train_expert_only": false,
162
+ "train_state_proj": true,
163
+ "optimizer_lr": 0.0001,
164
+ "optimizer_betas": [
165
+ 0.9,
166
+ 0.95
167
+ ],
168
+ "optimizer_eps": 1e-08,
169
+ "optimizer_weight_decay": 1e-10,
170
+ "scheduler_warmup_steps": 5000,
171
+ "scheduler_decay_steps": 300000,
172
+ "scheduler_decay_lr": 1e-06
173
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad846e3601d2449bfd9038e88a779c3b9e72947f69e956e761c74f58a3c95ce
3
+ size 16423335872
train_config.json ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "select_dataset": "HumanoidStation_in_jushen",
4
+ "repo_ids": null,
5
+ "roots": null,
6
+ "repo_id": null,
7
+ "root": null,
8
+ "episodes": null,
9
+ "image_transforms": {
10
+ "enable": false,
11
+ "max_num_transforms": 3,
12
+ "random_order": false,
13
+ "tfs": {
14
+ "brightness": {
15
+ "weight": 1.0,
16
+ "type": "ColorJitter",
17
+ "kwargs": {
18
+ "brightness": [
19
+ 0.8,
20
+ 1.2
21
+ ]
22
+ }
23
+ },
24
+ "contrast": {
25
+ "weight": 1.0,
26
+ "type": "ColorJitter",
27
+ "kwargs": {
28
+ "contrast": [
29
+ 0.8,
30
+ 1.2
31
+ ]
32
+ }
33
+ },
34
+ "saturation": {
35
+ "weight": 1.0,
36
+ "type": "ColorJitter",
37
+ "kwargs": {
38
+ "saturation": [
39
+ 0.5,
40
+ 1.5
41
+ ]
42
+ }
43
+ },
44
+ "hue": {
45
+ "weight": 1.0,
46
+ "type": "ColorJitter",
47
+ "kwargs": {
48
+ "hue": [
49
+ -0.05,
50
+ 0.05
51
+ ]
52
+ }
53
+ },
54
+ "sharpness": {
55
+ "weight": 1.0,
56
+ "type": "SharpnessJitter",
57
+ "kwargs": {
58
+ "sharpness": [
59
+ 0.5,
60
+ 1.5
61
+ ]
62
+ }
63
+ }
64
+ }
65
+ },
66
+ "local_files_only": false,
67
+ "use_imagenet_stats": true,
68
+ "video_backend": "pyav"
69
+ },
70
+ "env": null,
71
+ "policy": {
72
+ "type": "crossvla_stage2_plus",
73
+ "n_obs_steps": 2,
74
+ "action_sample_factor": 1,
75
+ "normalization_mapping": {
76
+ "VISUAL": "IDENTITY",
77
+ "STATE": "MEAN_STD",
78
+ "ACTION": "MEAN_STD"
79
+ },
80
+ "input_features": {
81
+ "observation.state": {
82
+ "type": "STATE",
83
+ "shape": [
84
+ 32
85
+ ]
86
+ },
87
+ "observation.images.image_0": {
88
+ "type": "VISUAL",
89
+ "shape": [
90
+ 224,
91
+ 3,
92
+ 224
93
+ ]
94
+ },
95
+ "observation.images.image_1": {
96
+ "type": "VISUAL",
97
+ "shape": [
98
+ 224,
99
+ 3,
100
+ 224
101
+ ]
102
+ },
103
+ "observation.images.image_2": {
104
+ "type": "VISUAL",
105
+ "shape": [
106
+ 224,
107
+ 3,
108
+ 224
109
+ ]
110
+ },
111
+ "observation.images.image_3": {
112
+ "type": "VISUAL",
113
+ "shape": [
114
+ 224,
115
+ 3,
116
+ 224
117
+ ]
118
+ },
119
+ "observation.images.image_wrist_0": {
120
+ "type": "VISUAL",
121
+ "shape": [
122
+ 224,
123
+ 3,
124
+ 224
125
+ ]
126
+ },
127
+ "observation.images.image_wrist_1": {
128
+ "type": "VISUAL",
129
+ "shape": [
130
+ 224,
131
+ 3,
132
+ 224
133
+ ]
134
+ },
135
+ "observation.state_is_pad": {
136
+ "type": "STATE",
137
+ "shape": [
138
+ 2
139
+ ]
140
+ },
141
+ "observation.images.image_0_is_pad": {
142
+ "type": "STATE",
143
+ "shape": [
144
+ 2
145
+ ]
146
+ },
147
+ "observation.images.image_1_is_pad": {
148
+ "type": "STATE",
149
+ "shape": [
150
+ 2
151
+ ]
152
+ },
153
+ "observation.images.image_2_is_pad": {
154
+ "type": "STATE",
155
+ "shape": [
156
+ 2
157
+ ]
158
+ },
159
+ "observation.images.image_3_is_pad": {
160
+ "type": "STATE",
161
+ "shape": [
162
+ 2
163
+ ]
164
+ },
165
+ "observation.images.image_wrist_0_is_pad": {
166
+ "type": "STATE",
167
+ "shape": [
168
+ 2
169
+ ]
170
+ },
171
+ "observation.images.image_wrist_1_is_pad": {
172
+ "type": "STATE",
173
+ "shape": [
174
+ 2
175
+ ]
176
+ }
177
+ },
178
+ "output_features": {
179
+ "action": {
180
+ "type": "ACTION",
181
+ "shape": [
182
+ 32
183
+ ]
184
+ },
185
+ "action_is_pad": {
186
+ "type": "ACTION",
187
+ "shape": [
188
+ 50
189
+ ]
190
+ }
191
+ },
192
+ "stage1_pretrained_path": "/media/jushen/bamboo-fan/Save/crossvla/stage1_4dataset_10_8_bs12_kl_cb256_0528_stat/checkpoints/275000/pretrained_model/",
193
+ "stage2_pretrained_path": "None",
194
+ "stage2_latent_image_token_check": false,
195
+ "dataset_stats_generate": true,
196
+ "heterogeneous": true,
197
+ "split_dataset": true,
198
+ "real_robot_dev": false,
199
+ "image_interval_steps": 50,
200
+ "action_latent_token_num": 13,
201
+ "mformer_hidden_size": 768,
202
+ "decoder_hidden_size": 768,
203
+ "codebook_embed_dim": 256,
204
+ "codebook_k_size": 256,
205
+ "action_chunk_size": 50,
206
+ "chunk_size": 50,
207
+ "n_action_steps": 50,
208
+ "resampler": true,
209
+ "resampler_dim": 2048,
210
+ "resampler_depth": 3,
211
+ "resampler_dim_head": 128,
212
+ "resampler_heads": 4,
213
+ "resampler_num_media_embeds": 1,
214
+ "resampler_num_latents": 9,
215
+ "max_state_dim": 32,
216
+ "max_action_dim": 32,
217
+ "resize_imgs_with_padding": [
218
+ 224,
219
+ 224
220
+ ],
221
+ "empty_cameras": 0,
222
+ "adapt_to_pi_aloha": false,
223
+ "use_delta_joint_actions_aloha": false,
224
+ "tokenizer_max_length": 48,
225
+ "proj_width": 1024,
226
+ "num_steps": 10,
227
+ "use_cache": true,
228
+ "attention_implementation": "eager",
229
+ "freeze_vision_encoder": false,
230
+ "freeze_language_encoder": true,
231
+ "train_expert_only": false,
232
+ "train_state_proj": true,
233
+ "optimizer_lr": 0.0001,
234
+ "optimizer_betas": [
235
+ 0.9,
236
+ 0.95
237
+ ],
238
+ "optimizer_eps": 1e-08,
239
+ "optimizer_weight_decay": 1e-10,
240
+ "scheduler_warmup_steps": 5000,
241
+ "scheduler_decay_steps": 300000,
242
+ "scheduler_decay_lr": 1e-06
243
+ },
244
+ "output_dir": "/media/jushen/bamboo-fan/Save/crossvla/dev/stage2_dev_pretrain_10_8_bs8_stage1_275k",
245
+ "job_name": "stage2_dev_pretrain_10_8_bs8_stage1_275k",
246
+ "resume": true,
247
+ "device": "cuda",
248
+ "use_amp": false,
249
+ "seed": 1000,
250
+ "num_workers": 4,
251
+ "batch_size": 8,
252
+ "gradient_accumulation_steps": 1,
253
+ "steps": 300000,
254
+ "eval_freq": 20000,
255
+ "log_freq": 200,
256
+ "save_checkpoint": true,
257
+ "save_freq": 10000,
258
+ "use_policy_training_preset": true,
259
+ "optimizer": {
260
+ "type": "adamw",
261
+ "lr": 0.0001,
262
+ "weight_decay": 1e-10,
263
+ "grad_clip_norm": 10.0,
264
+ "betas": [
265
+ 0.9,
266
+ 0.95
267
+ ],
268
+ "eps": 1e-08
269
+ },
270
+ "scheduler": {
271
+ "type": "cosine_decay_with_warmup",
272
+ "num_warmup_steps": 5000,
273
+ "num_decay_steps": 300000,
274
+ "peak_lr": 0.0001,
275
+ "decay_lr": 1e-06
276
+ },
277
+ "eval": {
278
+ "n_episodes": 50,
279
+ "batch_size": 50,
280
+ "use_async_envs": false
281
+ },
282
+ "wandb": {
283
+ "enable": true,
284
+ "disable_artifact": true,
285
+ "project": "crossvla_dev",
286
+ "entity": null,
287
+ "notes": null
288
+ }
289
+ }