Sylvest commited on
Commit
6f95f8e
·
verified ·
1 Parent(s): 631fe5e

Upload folder using huggingface_hub

Browse files
ft_fast_xarm/config.yaml ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TAG: finetune
2
+ LOG_DIR: tensorboard_logs
3
+ vla_path: paligemma-3b-pt-224
4
+ data_root_dir: /inspire/hdd/project/embodied-multimodality/public/syfei/data/xarm_data_rlds_mix_new
5
+ dataset_name: xarm_new
6
+ run_root_dir: /inspire/hdd/project/embodied-multimodality/public/syfei/data/xarm_data/AR-VLA/runs/realworld
7
+ adapter_tmp_dir: adapter_tmp_weights
8
+ ckpt: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/model/pi0fast_base.pt
9
+ use_lora: false
10
+ lora_rank: 32
11
+ lora_dropout: 0.0
12
+ use_quantization: false
13
+ enable_bf16: true
14
+ model_param_to_bf16: false
15
+ vla_training_strategy: vla-full-train
16
+ weight_decay: 1.0e-10
17
+ batch_size: 16
18
+ grad_accumulation_steps: 1
19
+ learning_rate: 2.5e-05
20
+ warmup_steps: 1000
21
+ lr_scheduler_type: cosine
22
+ image_aug: true
23
+ max_steps: 30000
24
+ save_steps: 5000
25
+ log_steps: 100
26
+ use_torch_compile: false
27
+ use_8bit_optimizer: true
28
+ wandb_project: sii-realworld
29
+ wandb_entity: joey-zh
30
+ wandb: true
31
+ use_ema: false
32
+ ema:
33
+ update_after_step: 0
34
+ power: 0.67
35
+ DATASET:
36
+ shard_dataset: true
37
+ share_datasets_statistics: true
38
+ window_size: 1
39
+ future_action_window_size: 9
40
+ camera_views:
41
+ - primary
42
+ - wrist
43
+ shuffle_buffer_size: 100000
44
+ aug_instruction: false
45
+ action_proprio_normalization_type: q99
46
+ load_depth: false
47
+ image_augment_kwargs:
48
+ primary:
49
+ random_rotation:
50
+ - 5.0
51
+ random_resized_crop:
52
+ scale:
53
+ - 0.9
54
+ - 0.9
55
+ ratio:
56
+ - 1.0
57
+ - 1.0
58
+ random_brightness:
59
+ - 0.2
60
+ random_contrast:
61
+ - 0.8
62
+ - 1.2
63
+ random_saturation:
64
+ - 0.8
65
+ - 1.2
66
+ random_hue:
67
+ - 0.05
68
+ augment_order:
69
+ - random_rotation
70
+ - random_resized_crop
71
+ - random_brightness
72
+ - random_contrast
73
+ - random_saturation
74
+ - random_hue
75
+ secondary:
76
+ random_resized_crop:
77
+ scale:
78
+ - 0.9
79
+ - 0.9
80
+ ratio:
81
+ - 1.0
82
+ - 1.0
83
+ random_brightness:
84
+ - 0.2
85
+ random_contrast:
86
+ - 0.8
87
+ - 1.2
88
+ random_saturation:
89
+ - 0.8
90
+ - 1.2
91
+ random_hue:
92
+ - 0.05
93
+ augment_order:
94
+ - random_resized_crop
95
+ - random_brightness
96
+ - random_contrast
97
+ - random_saturation
98
+ - random_hue
99
+ wrist:
100
+ random_resized_crop:
101
+ scale:
102
+ - 0.9
103
+ - 0.9
104
+ ratio:
105
+ - 1.0
106
+ - 1.0
107
+ random_brightness:
108
+ - 0.2
109
+ random_contrast:
110
+ - 0.8
111
+ - 1.2
112
+ random_saturation:
113
+ - 0.8
114
+ - 1.2
115
+ random_hue:
116
+ - 0.05
117
+ augment_order:
118
+ - random_resized_crop
119
+ - random_brightness
120
+ - random_contrast
121
+ - random_saturation
122
+ - random_hue
123
+ MODEL:
124
+ name: vla.galaxea_FAST.GalaxeaFAST
125
+ model_name: openpi_pytorch.vla.pifast.PiFAST
126
+ action_tokenizer: vla.tokenizer.FAST.FASTActionTokenizer
127
+ AT_CONFIG:
128
+ load_dir: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/codes/AR-VLA/runs/fast_tokenizer/sii_xarm
129
+ use_extra_tokens: false
130
+ vla_name: paligemma-3b-pt-224
131
+ load_inside: true
132
+ pretrained_model_path: /inspire/hdd/global_user/gongjingjing-25039/sdzhang/model/paligemma-3b-pt-224/
133
+ input_ids: true
134
+ action_expert_only: false
135
+ image_token_index: 257152
136
+ vocab_size: 257216
137
+ pad_token_id: 0
138
+ cond_steps: 1
139
+ horizon_steps: 10
140
+ action_dim: 7
141
+ proprio_dim: 7
142
+ max_text_tokens: 50
143
+ max_seq_len: 562
144
+ max_image_text_tokens: 562
145
+ position_ids_type: pi0fast-navie
146
+ flow_sampling: beta
147
+ num_inference_steps: 10
148
+ final_action_clip_value: 1.0
149
+ use_fp32_eval: true
150
+ action_expert_adaptive_mode: null
151
+ num_input_images: 2
152
+ use_lm_head: true
153
+ discrete_action: true
154
+ continuous_action: false
155
+ fm_weight: 0.0
156
+ ce_weight: 1.0
157
+ vision:
158
+ name: allen_model.paligemma.siglip.SiglipVisionModel
159
+ hidden_size: 1152
160
+ intermediate_size: 4304
161
+ num_hidden_layers: 27
162
+ num_attention_heads: 16
163
+ num_channels: 3
164
+ image_size: 224
165
+ patch_size: 14
166
+ layer_norm_eps: 1.0e-06
167
+ attention_dropout: 0.0
168
+ num_image_tokens: 256
169
+ lora:
170
+ r: 32
171
+ dropout: 0.0
172
+ use_quantize: false
173
+ use_lora: false
174
+ vision_projector:
175
+ name: allen_model.paligemma.siglip.PaliGemmaMultiModalProjector
176
+ vision_config:
177
+ hidden_size: 1152
178
+ projection_dim: 2048
179
+ lora:
180
+ r: 32
181
+ dropout: 0.0
182
+ use_quantize: false
183
+ use_lora: false
184
+ joint:
185
+ name: allen_model.vla.joint_model.JointModel
186
+ action_expert_adaptive_mode: null
187
+ mixture:
188
+ vlm:
189
+ hidden_size: 2048
190
+ intermediate_size: 16384
191
+ use_final_norm: true
192
+ cache: true
193
+ use_quantize: false
194
+ use_lora: false
195
+ adaptive_mode: null
196
+ proprio:
197
+ hidden_size: 1024
198
+ intermediate_size: 4096
199
+ use_final_norm: true
200
+ cache: true
201
+ use_quantize: false
202
+ use_lora: false
203
+ adaptive_mode: null
204
+ action:
205
+ hidden_size: 1024
206
+ intermediate_size: 4096
207
+ use_final_norm: true
208
+ cache: false
209
+ use_quantize: false
210
+ use_lora: false
211
+ adaptive_mode: null
212
+ time_hidden_size: 256
213
+ lora:
214
+ r: 32
215
+ dropout: 0.0
216
+ num_hidden_layers: 18
217
+ num_attention_heads: 8
218
+ num_key_value_heads: 1
219
+ head_dim: 256
220
+ max_position_embeddings: 8192
221
+ rms_norm_eps: 1.0e-06
222
+ rope_theta: 10000.0
223
+ attention_bias: false
224
+ attention_dropout: 0.0
225
+ pad_token_id: 0
226
+ model_family: galaxea_zero
227
+ hf_token: .hf_token
228
+ seed: 7
229
+ EVALUATION:
230
+ pretrained_checkpoint: runs/debug_pi_paligemma_full_bridge--0111_065718/model.pt
231
+ load_ema_weights: true
232
+ load_in_8bit: false
233
+ load_in_4bit: false
234
+ center_crop: true
235
+ unnorm_key: vlabench_primitive
236
+ num_trials_per_task: 50
237
+ replan_steps: 5
238
+ visulization: true
239
+ metrics:
240
+ - success_rate
241
+ - intention_score
242
+ - progress_score
243
+ run_id_note: null
244
+ local_log_dir: ./experiments/logs
245
+ use_wandb: false
246
+ seed: 7
247
+ config: vla/config/sii_realworld/xarm/ft_fast_xarm.yml
ft_fast_xarm/dataset_statistics.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "xarm_new": {
3
+ "action": {
4
+ "mean": [
5
+ -2.8300490157562308e-05,
6
+ -5.62300301680807e-05,
7
+ -8.593673555878922e-05,
8
+ 0.00018637969333212823,
9
+ 4.825706491828896e-05,
10
+ -0.00013363653852138668,
11
+ 0.22720520198345184
12
+ ],
13
+ "std": [
14
+ 0.0035002445802092552,
15
+ 0.007190553471446037,
16
+ 0.004537362605333328,
17
+ 0.017881762236356735,
18
+ 0.011786483228206635,
19
+ 0.021039394661784172,
20
+ 0.28757816553115845
21
+ ],
22
+ "max": [
23
+ 0.023313581943511963,
24
+ 0.035652413964271545,
25
+ 0.03438103199005127,
26
+ 1.2487223148345947,
27
+ 0.11948871612548828,
28
+ 0.20714592933654785,
29
+ 1.0049999952316284
30
+ ],
31
+ "min": [
32
+ -0.029834240674972534,
33
+ -0.03374001383781433,
34
+ -0.022577375173568726,
35
+ -0.1317591667175293,
36
+ -0.09295320510864258,
37
+ -1.2283718585968018,
38
+ -0.05249999836087227
39
+ ],
40
+ "q01": [
41
+ -0.009777107238769532,
42
+ -0.01890090763568878,
43
+ -0.011157331764698028,
44
+ -0.04856846809387207,
45
+ -0.03454499244689942,
46
+ -0.052733421325683594,
47
+ -0.04500000178813934
48
+ ],
49
+ "q99": [
50
+ 0.009487972259521487,
51
+ 0.019688914418220536,
52
+ 0.01317290723323823,
53
+ 0.04660269737243665,
54
+ 0.03451104164123539,
55
+ 0.061779155731201324,
56
+ 1.003749966621399
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.4625341594219208,
71
+ -0.007863887585699558,
72
+ 0.10055938363075256,
73
+ -0.1393374651670456,
74
+ -0.02694776840507984,
75
+ -0.11506758630275726,
76
+ 0.22718924283981323
77
+ ],
78
+ "std": [
79
+ 0.06788577139377594,
80
+ 0.15849925577640533,
81
+ 0.08051803708076477,
82
+ 2.9829659461975098,
83
+ 0.17507970333099365,
84
+ 0.3508768379688263,
85
+ 0.28758174180984497
86
+ ],
87
+ "max": [
88
+ 0.6804928183555603,
89
+ 0.4843595027923584,
90
+ 0.476290762424469,
91
+ 3.141592264175415,
92
+ 1.0127925872802734,
93
+ 2.7465152740478516,
94
+ 1.0049999952316284
95
+ ],
96
+ "min": [
97
+ 0.15062777698040009,
98
+ -0.46927616000175476,
99
+ -0.023890115320682526,
100
+ -3.1415903568267822,
101
+ -1.5469555854797363,
102
+ -2.308297872543335,
103
+ -0.05249999836087227
104
+ ],
105
+ "q01": [
106
+ 0.2680466890335083,
107
+ -0.3713310194015503,
108
+ 0.0027664874494075773,
109
+ -3.1384404373168944,
110
+ -0.44922237992286684,
111
+ -1.1490735149383544,
112
+ -0.04500000178813934
113
+ ],
114
+ "q99": [
115
+ 0.6170708632469177,
116
+ 0.33816394925117543,
117
+ 0.3088523066043854,
118
+ 3.1376943969726563,
119
+ 0.49070507526397794,
120
+ 0.5776546812057496,
121
+ 1.003749966621399
122
+ ]
123
+ },
124
+ "num_transitions": 47789,
125
+ "num_trajectories": 115,
126
+ "num_train_transitions": 44050,
127
+ "num_train_trajectories": 109,
128
+ "num_val_transitions": 3739,
129
+ "num_val_trajectories": 6
130
+ },
131
+ "__total__": {
132
+ "action": {
133
+ "min": [
134
+ -0.029834240674972534,
135
+ -0.03374001383781433,
136
+ -0.022577375173568726,
137
+ -0.1317591667175293,
138
+ -0.09295320510864258,
139
+ -1.2283718585968018,
140
+ -0.05249999836087227
141
+ ],
142
+ "max": [
143
+ 0.023313581943511963,
144
+ 0.035652413964271545,
145
+ 0.03438103199005127,
146
+ 1.2487223148345947,
147
+ 0.11948871612548828,
148
+ 0.20714592933654785,
149
+ 1.0049999952316284
150
+ ],
151
+ "q01": [
152
+ -0.009777107238769532,
153
+ -0.01890090763568878,
154
+ -0.011157331764698028,
155
+ -0.04856846809387207,
156
+ -0.03454499244689942,
157
+ -0.052733421325683594,
158
+ -0.04500000178813934
159
+ ],
160
+ "q99": [
161
+ 0.009487972259521487,
162
+ 0.019688914418220536,
163
+ 0.01317290723323823,
164
+ 0.04660269737243665,
165
+ 0.03451104164123539,
166
+ 0.061779155731201324,
167
+ 1.003749966621399
168
+ ],
169
+ "mean": [
170
+ -2.8300490157562308e-05,
171
+ -5.62300301680807e-05,
172
+ -8.593673555878922e-05,
173
+ 0.00018637969333212823,
174
+ 4.825706491828896e-05,
175
+ -0.00013363653852138668,
176
+ 0.22720520198345184
177
+ ],
178
+ "std": [
179
+ 0.0035002445802092552,
180
+ 0.007190553471446038,
181
+ 0.004537362605333328,
182
+ 0.017881762236356735,
183
+ 0.011786483228206635,
184
+ 0.021039394661784172,
185
+ 0.28757816553115845
186
+ ],
187
+ "mask": [
188
+ true,
189
+ true,
190
+ true,
191
+ true,
192
+ true,
193
+ true,
194
+ false
195
+ ]
196
+ },
197
+ "proprio": {
198
+ "min": [
199
+ 0.15062777698040009,
200
+ -0.46927616000175476,
201
+ -0.023890115320682526,
202
+ -3.1415903568267822,
203
+ -1.5469555854797363,
204
+ -2.308297872543335,
205
+ -0.05249999836087227
206
+ ],
207
+ "max": [
208
+ 0.6804928183555603,
209
+ 0.4843595027923584,
210
+ 0.476290762424469,
211
+ 3.141592264175415,
212
+ 1.0127925872802734,
213
+ 2.7465152740478516,
214
+ 1.0049999952316284
215
+ ],
216
+ "q01": [
217
+ 0.2680466890335083,
218
+ -0.3713310194015503,
219
+ 0.0027664874494075773,
220
+ -3.1384404373168944,
221
+ -0.44922237992286684,
222
+ -1.1490735149383544,
223
+ -0.04500000178813934
224
+ ],
225
+ "q99": [
226
+ 0.6170708632469177,
227
+ 0.33816394925117543,
228
+ 0.3088523066043854,
229
+ 3.1376943969726563,
230
+ 0.49070507526397794,
231
+ 0.5776546812057496,
232
+ 1.003749966621399
233
+ ],
234
+ "mean": [
235
+ 0.4625341594219208,
236
+ -0.007863887585699558,
237
+ 0.10055938363075256,
238
+ -0.1393374651670456,
239
+ -0.02694776840507984,
240
+ -0.11506758630275726,
241
+ 0.22718924283981323
242
+ ],
243
+ "std": [
244
+ 0.06788577139377594,
245
+ 0.15849925577640533,
246
+ 0.08051803708076477,
247
+ 2.9829659461975098,
248
+ 0.17507970333099365,
249
+ 0.3508768379688263,
250
+ 0.28758174180984497
251
+ ]
252
+ },
253
+ "num_transitions": 47789,
254
+ "num_trajectories": 115,
255
+ "num_train_transitions": 44050,
256
+ "num_train_trajectories": 109,
257
+ "num_val_transitions": 3739,
258
+ "num_val_trajectories": 6
259
+ }
260
+ }
ft_fast_xarm/model_30000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01336d178483966d501b69d6cff9faae125a10be10005c7eb6a83875f231377
3
+ size 12952808950