Safetensors
English
jie530 commited on
Commit
a4ea721
·
verified ·
1 Parent(s): 141d8d9

Upload folder using huggingface_hub

Browse files
Files changed (20) hide show
  1. psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/argv.txt +53 -0
  2. psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/checkpoints/ckpt_40000/model.safetensors +3 -0
  3. psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/envs.txt +17 -0
  4. psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/run_config.json +347 -0
  5. psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/argv.txt +53 -0
  6. psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/checkpoints/ckpt_40000/model.safetensors +3 -0
  7. psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/envs.txt +17 -0
  8. psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/run_config.json +348 -0
  9. psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/argv.txt +53 -0
  10. psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/checkpoints/ckpt_40000/model.safetensors +3 -0
  11. psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/envs.txt +17 -0
  12. psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/run_config.json +348 -0
  13. psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/argv.txt +53 -0
  14. psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/checkpoints/ckpt_40000/model.safetensors +3 -0
  15. psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/envs.txt +17 -0
  16. psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/run_config.json +348 -0
  17. psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/argv.txt +53 -0
  18. psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/checkpoints/ckpt_40000/model.safetensors +3 -0
  19. psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/envs.txt +17 -0
  20. psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/run_config.json +348 -0
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/argv.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scripts/train.py
2
+ finetune_simple_psi0_config
3
+ --seed=292285
4
+ --exp=g1wholebodybendpick-v0
5
+ --train.name=finetune
6
+ --train.data_parallel=ddp
7
+ --train.mixed_precision=bf16
8
+ --train.train_batch_size=32
9
+ --train.max_checkpoints_to_keep=5
10
+ --train.gradient_accumulation_steps=1
11
+ --train.learning_rate=1e-4
12
+ --train.max_training_steps=40000
13
+ --train.warmup_ratio=None
14
+ --train.warmup_steps=1000
15
+ --train.checkpointing_steps=5000
16
+ --train.validation_steps=500
17
+ --train.val_num_batches=20
18
+ --train.max_grad_norm=1.0
19
+ --train.lr_scheduler_type=cosine
20
+ --train.lr_scheduler_kwargs.weight_decay=1e-6
21
+ --train.lr_scheduler_kwargs.betas 0.95 0.999
22
+ --log.report_to=wandb
23
+ --data.root_dir=/data/jliu/data
24
+ --data.train-repo-ids=G1WholebodyBendPick-v0
25
+ --data.transform.repack.pad-action-dim=36
26
+ --data.transform.repack.pad-state-dim=36
27
+ --data.transform.field.stat-path=meta/stats_psi0.json
28
+ --data.transform.field.stat-action-key=action
29
+ --data.transform.field.stat-state-key=states
30
+ --data.transform.field.action_norm_type=bounds
31
+ --data.transform.field.no-use-norm-mask
32
+ --data.transform.field.normalize-state
33
+ --data.transform.field.pad-action-dim=36
34
+ --data.transform.field.pad-state-dim=36
35
+ --data.transform.model.img-aug
36
+ --data.transform.model.resize.size 180 320
37
+ --data.transform.model.center_crop.size 180 320
38
+ --model.model_name_or_path=/hfm/cache/checkpoints/hfm.pre.fast.mixed.1by1.2601091803.ckpt30k
39
+ --model.pretrained-action-header-path=/hfm/cache/checkpoints/postpre.1by130k.pad36.mixed.2601131206.ckpt34k
40
+ --model.noise-scheduler=flow
41
+ --model.train-diffusion-steps=1000
42
+ --model.n_conditions=0
43
+ --model.action-chunk-size=30
44
+ --model.action-dim=36
45
+ --model.action-exec-horizon=30
46
+ --model.observation-horizon=1
47
+ --model.odim=36
48
+ --model.view_feature_dim=2048
49
+ --model.no-tune-vlm
50
+ --model.no-use_film
51
+ --model.no-combined_temb
52
+ --model.rtc
53
+ --model.max-delay=8
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/checkpoints/ckpt_40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9237021ab453b839b14a1e3bcd680102579aaa829d60e6bf91b0f702d653e879
3
+ size 6253648840
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/envs.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OMP_NUM_THREADS=32
2
+ HF_HOME=/data/cache
3
+ TORCH_HOME=/data/cache
4
+ HF_TOKEN=hf_...TiKa
5
+ HF_LEROBOT_HOME=/data/data/lerobot
6
+ WE_HOME=Not Set
7
+ DATA_HOME=/data/data
8
+ UV_CACHE_DIR=/data/cache
9
+ WANDB_API_KEY=90e...5c06
10
+ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
11
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
12
+ WORLD_SIZE=8
13
+ LOCAL_WORLD_SIZE=8
14
+ RANK=0
15
+ LOCAL_RANK=0
16
+ MASTER_ADDR=127.0.0.1
17
+ MASTER_PORT=29508
psi0/simple-checkpoints/g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312/run_config.json ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp": "g1wholebodybendpick-v0",
3
+ "seed": 292285,
4
+ "auto_tag_run": false,
5
+ "eval": false,
6
+ "debug": false,
7
+ "timestamp": "2603151312",
8
+ "log": {
9
+ "logging_dir": "logs",
10
+ "report_to": "wandb",
11
+ "log_freq": 100
12
+ },
13
+ "wandb": {
14
+ "project": "psi",
15
+ "entity": "jliu530-soochow-university",
16
+ "group": "finetune",
17
+ "id": "rs0xceim",
18
+ "name": "g1wholebodybendpick-v0.simple.flow1000.cosine.lr1.0e-04.b256.gpus8.2603151312",
19
+ "resume": "allow"
20
+ },
21
+ "train": {
22
+ "num_workers": 8,
23
+ "overfit_single_batch": false,
24
+ "name": "finetune",
25
+ "resume_from_checkpoint": null,
26
+ "skip_resumed_steps": false,
27
+ "hf_token": ".hf_token",
28
+ "lora": false,
29
+ "output_dir": ".runs",
30
+ "gradient_accumulation_steps": 1,
31
+ "mixed_precision": "bf16",
32
+ "max_grad_norm": 1.0,
33
+ "train_batch_size": 32,
34
+ "val_batch_size": 16,
35
+ "val_num_batches": 20,
36
+ "checkpointing_steps": 5000,
37
+ "max_checkpoints_to_keep": 5,
38
+ "validation_steps": 500,
39
+ "learning_rate": 0.0001,
40
+ "lr_scheduler_type": "cosine",
41
+ "lr_scheduler_kwargs": {
42
+ "betas": [
43
+ 0.95,
44
+ 0.999
45
+ ],
46
+ "weight_decay": 1e-6,
47
+ "eps": 1e-8
48
+ },
49
+ "scheduler_specific_kwargs": {},
50
+ "data_parallel": "ddp",
51
+ "sharding_strategy": "full-shard",
52
+ "deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
53
+ "enable_gradient_checkpointing": true,
54
+ "enable_mixed_precision_training": true,
55
+ "reduce_in_full_precision": true,
56
+ "max_training_steps": 40000,
57
+ "num_train_epochs": null,
58
+ "warmup_steps": 1000,
59
+ "warmup_ratio": null
60
+ },
61
+ "data": {
62
+ "transform": {
63
+ "repack": {
64
+ "dataset_name": "simple",
65
+ "num_past_frames": 0,
66
+ "action_chunk_size": 30,
67
+ "pad_action_dim": 36,
68
+ "pad_state_dim": 36
69
+ },
70
+ "model": {
71
+ "resize": {
72
+ "size": [
73
+ 180,
74
+ 320
75
+ ]
76
+ },
77
+ "center_crop": {
78
+ "size": [
79
+ 180,
80
+ 320
81
+ ]
82
+ },
83
+ "color_jitter": {
84
+ "brightness": 0.2,
85
+ "contrast": [
86
+ 0.8,
87
+ 1.2
88
+ ],
89
+ "saturation": [
90
+ 0.8,
91
+ 1.2
92
+ ],
93
+ "hue": 0.05
94
+ },
95
+ "gaussian_noise": {
96
+ "mean": 0.0,
97
+ "std": 3.0,
98
+ "prob_skip": 0.1
99
+ },
100
+ "img_aug": true,
101
+ "adaptive_resize": false,
102
+ "img_sizes": {
103
+ "egodex": [
104
+ 270,
105
+ 480
106
+ ],
107
+ "he": [
108
+ 240,
109
+ 320
110
+ ]
111
+ }
112
+ },
113
+ "field": {
114
+ "stat_path": "meta/stats_psi0.json",
115
+ "action_norm_type": "bounds",
116
+ "stat_action_key": "action",
117
+ "stat_state_key": "states",
118
+ "use_norm_mask": false,
119
+ "action_norm_masks": [
120
+ true,
121
+ true,
122
+ true,
123
+ true,
124
+ true,
125
+ true,
126
+ false
127
+ ],
128
+ "action_min": [
129
+ -0.13059291243553162,
130
+ -0.09108058363199234,
131
+ -0.0024844733998179436,
132
+ -0.20733775198459625,
133
+ -0.15850023925304413,
134
+ -0.17450474202632904,
135
+ -0.2997315526008606,
136
+ -0.015391111373901367,
137
+ -0.34571564197540283,
138
+ -0.4991437792778015,
139
+ 0.0,
140
+ 0.0,
141
+ 0.0,
142
+ 0.0,
143
+ -0.1015840545296669,
144
+ -0.06647031009197235,
145
+ -0.16578954458236694,
146
+ -0.14477218687534332,
147
+ -0.3665394186973572,
148
+ -0.28364259004592896,
149
+ -0.1775387078523636,
150
+ -0.48419490456581116,
151
+ -0.7551082968711853,
152
+ -0.2692946195602417,
153
+ -0.03164339065551758,
154
+ -0.00003876500704791397,
155
+ -0.3909206688404083,
156
+ 0.0,
157
+ -0.04351663216948509,
158
+ -0.014203650876879692,
159
+ -0.049649015069007874,
160
+ 0.44999998807907104,
161
+ 0.0,
162
+ 0.0,
163
+ 0.0,
164
+ 0.0
165
+ ],
166
+ "action_max": [
167
+ 0.08620641380548477,
168
+ 0.13058121502399445,
169
+ 0.22948147356510162,
170
+ 0.020551620051264763,
171
+ 0.005824880674481392,
172
+ 0.010019193403422832,
173
+ 8.43817247186962e-7,
174
+ 0.39566752314567566,
175
+ 0.0,
176
+ 0.0,
177
+ 0.4860266447067261,
178
+ 1.0467392206192017,
179
+ 0.6470075845718384,
180
+ 0.8298009037971497,
181
+ 0.03516175225377083,
182
+ 0.11019192636013031,
183
+ 0.04779902100563049,
184
+ 0.12850724160671234,
185
+ 0.000038688118365826085,
186
+ 0.0012142359046265483,
187
+ 0.000033343669201713055,
188
+ 0.002679983852431178,
189
+ 0.00041063950629904866,
190
+ 0.1973484456539154,
191
+ 0.2633756697177887,
192
+ 0.34943076968193054,
193
+ 0.0012102096807211637,
194
+ 0.8342975974082947,
195
+ 0.31870752573013306,
196
+ 0.45533719658851624,
197
+ 0.15729404985904694,
198
+ 0.75,
199
+ 0.0,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0
203
+ ],
204
+ "state_min": [
205
+ -0.13899999856948853,
206
+ -0.09099991619586945,
207
+ -5.989517215532203e-11,
208
+ -0.20900000631809235,
209
+ -0.1589999943971634,
210
+ -0.210999995470047,
211
+ -0.3009999990463257,
212
+ -0.01600000075995922,
213
+ -0.1860000044107437,
214
+ -0.6940000057220459,
215
+ 0.0,
216
+ 0.0,
217
+ 0.0,
218
+ 0.0,
219
+ -0.1019991859793663,
220
+ -0.06899992376565933,
221
+ -0.16899999976158142,
222
+ -0.14499999582767487,
223
+ -0.3709999918937683,
224
+ -0.28700000047683716,
225
+ -0.17800045013427734,
226
+ -0.4869999885559082,
227
+ -0.7599999904632568,
228
+ -0.27300000190734863,
229
+ -0.029999999329447746,
230
+ 0.0,
231
+ -0.39100033044815063,
232
+ -0.0010000000474974513,
233
+ 0.0,
234
+ -0.15000000596046448,
235
+ 0.0,
236
+ 0.44999998807907104,
237
+ 0.0,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0
241
+ ],
242
+ "state_max": [
243
+ 0.0860000029206276,
244
+ 0.2720000147819519,
245
+ 0.23100000619888306,
246
+ 0.0,
247
+ 6.510182259944486e-8,
248
+ 0.0,
249
+ 0.0,
250
+ 0.5550000071525574,
251
+ 0.02100004442036152,
252
+ 0.0,
253
+ 0.5429999828338623,
254
+ 1.13100004196167,
255
+ 0.5770000219345093,
256
+ 0.9580000042915344,
257
+ 0.07141251862049103,
258
+ 0.10899999737739563,
259
+ 0.04699999839067459,
260
+ 0.13600000739097595,
261
+ 0.0,
262
+ 0.003000000026077032,
263
+ 0.0,
264
+ 0.009999999776482582,
265
+ 0.0020000000949949026,
266
+ 0.2029999941587448,
267
+ 0.2759999930858612,
268
+ 0.3499999940395355,
269
+ 0.003000000026077032,
270
+ 0.8370000123977661,
271
+ 0.0,
272
+ 0.0,
273
+ 0.0,
274
+ 0.75,
275
+ 0.0,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0
279
+ ],
280
+ "normalize_state": true,
281
+ "pad_action_dim": 36,
282
+ "pad_state_dim": 36
283
+ }
284
+ },
285
+ "root_dir": "/data/jliu/data",
286
+ "train_repo_ids": [
287
+ "G1WholebodyBendPick-v0"
288
+ ],
289
+ "val_repo_ids": [
290
+ "G1WholebodyBendPick-v0"
291
+ ]
292
+ },
293
+ "model": {
294
+ "resnet_store_path": null,
295
+ "pretrained_action_header_path": "/hfm/cache/checkpoints/postpre.1by130k.pad36.mixed.2601131206.ckpt34k",
296
+ "rtc": true,
297
+ "max_delay": 8,
298
+ "action_dim": 36,
299
+ "action_chunk_size": 30,
300
+ "action_exec_horizon": 30,
301
+ "observation_horizon": 1,
302
+ "img_chunk": 1,
303
+ "n_cams": 1,
304
+ "use_obs": "add_token",
305
+ "dropout": 0.1,
306
+ "noise_scheduler": "flow",
307
+ "train_diffusion_steps": 1000,
308
+ "eval_diffusion_steps": 10,
309
+ "share_cam_features": false,
310
+ "early_fusion": false,
311
+ "odim": 36,
312
+ "n_conditions": 0,
313
+ "token_fusion": "concat",
314
+ "loss_w": [
315
+ 0.1,
316
+ 0.2,
317
+ 0.1
318
+ ],
319
+ "time_dim": 256,
320
+ "hidden_dim": 1536,
321
+ "num_blocks": 6,
322
+ "dim_feedforward": 2048,
323
+ "nhead": 24,
324
+ "activation": "gelu",
325
+ "view_feature_dim": 2048,
326
+ "use_film": false,
327
+ "combined_temb": false,
328
+ "use_dit": false,
329
+ "weight_decay": 0.01,
330
+ "model_name_or_path": "/hfm/cache/checkpoints/hfm.pre.fast.mixed.1by1.2601091803.ckpt30k",
331
+ "vlm_ckpt_step": null,
332
+ "tune_vlm": false,
333
+ "tune_mm_llm": false,
334
+ "tune_mm_vision": false,
335
+ "tune_mm_mlp": false,
336
+ "gradient_checkpointing": true,
337
+ "lang_backbone_lr": 0.00001,
338
+ "mm_projector_lr": 0.00001,
339
+ "vision_tower_lr": 1e-6,
340
+ "optim": "adamw_torch",
341
+ "model_max_length": 4096,
342
+ "data_flatten": true,
343
+ "data_packing": true,
344
+ "max_pixels": 451584,
345
+ "min_pixels": 12544
346
+ }
347
+ }
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/argv.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scripts/train.py
2
+ finetune_simple_psi0_config
3
+ --seed=292285
4
+ --exp=g1wholebodyhandover-v0
5
+ --train.name=finetune
6
+ --train.data_parallel=ddp
7
+ --train.mixed_precision=bf16
8
+ --train.train_batch_size=16
9
+ --train.max_checkpoints_to_keep=5
10
+ --train.gradient_accumulation_steps=1
11
+ --train.learning_rate=1e-4
12
+ --train.max_training_steps=40000
13
+ --train.warmup_ratio=None
14
+ --train.warmup_steps=1000
15
+ --train.checkpointing_steps=10000
16
+ --train.validation_steps=500
17
+ --train.val_num_batches=20
18
+ --train.max_grad_norm=1.0
19
+ --train.lr_scheduler_type=cosine
20
+ --train.lr_scheduler_kwargs.weight_decay=1e-6
21
+ --train.lr_scheduler_kwargs.betas 0.95 0.999
22
+ --log.report_to=wandb
23
+ --data.root_dir=/data/jliu/data
24
+ --data.train-repo-ids=G1WholebodyHandover-v0
25
+ --data.transform.repack.pad-action-dim=36
26
+ --data.transform.repack.pad-state-dim=36
27
+ --data.transform.field.stat-path=meta/stats_psi0.json
28
+ --data.transform.field.stat-action-key=action
29
+ --data.transform.field.stat-state-key=states
30
+ --data.transform.field.action_norm_type=bounds
31
+ --data.transform.field.no-use-norm-mask
32
+ --data.transform.field.normalize-state
33
+ --data.transform.field.pad-action-dim=36
34
+ --data.transform.field.pad-state-dim=36
35
+ --data.transform.model.img-aug
36
+ --data.transform.model.resize.size 180 320
37
+ --data.transform.model.center_crop.size 180 320
38
+ --model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
39
+ --model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
40
+ --model.noise-scheduler=flow
41
+ --model.train-diffusion-steps=1000
42
+ --model.n_conditions=0
43
+ --model.action-chunk-size=30
44
+ --model.action-dim=36
45
+ --model.action-exec-horizon=30
46
+ --model.observation-horizon=1
47
+ --model.odim=36
48
+ --model.view_feature_dim=2048
49
+ --model.no-tune-vlm
50
+ --model.no-use_film
51
+ --model.no-combined_temb
52
+ --model.rtc
53
+ --model.max-delay=8
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/checkpoints/ckpt_40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085c4b290a2f39ff0f75b0d72dd23bc4d59d48198122723e7a5eb6f27a706a0f
3
+ size 6253648840
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/envs.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OMP_NUM_THREADS=32
2
+ HF_HOME=/data/cache
3
+ TORCH_HOME=/data/cache
4
+ HF_TOKEN=hf_...TiKa
5
+ HF_LEROBOT_HOME=/data/data/lerobot
6
+ WE_HOME=Not Set
7
+ DATA_HOME=/data/data
8
+ UV_CACHE_DIR=/data/cache
9
+ WANDB_API_KEY=90e...5c06
10
+ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
11
+ CUDA_VISIBLE_DEVICES=4,5,6,7
12
+ WORLD_SIZE=4
13
+ LOCAL_WORLD_SIZE=4
14
+ RANK=0
15
+ LOCAL_RANK=0
16
+ MASTER_ADDR=127.0.0.1
17
+ MASTER_PORT=29509
psi0/simple-checkpoints/g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507/run_config.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp": "g1wholebodyhandover-v0",
3
+ "seed": 292285,
4
+ "auto_tag_run": false,
5
+ "eval": false,
6
+ "debug": false,
7
+ "timestamp": "2604071507",
8
+ "log": {
9
+ "logging_dir": "logs",
10
+ "report_to": "wandb",
11
+ "log_freq": 100
12
+ },
13
+ "wandb": {
14
+ "project": "psi",
15
+ "entity": "jliu530-soochow-university",
16
+ "group": "finetune",
17
+ "id": "0etggzyx",
18
+ "name": "g1wholebodyhandover-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604071507",
19
+ "resume": "allow"
20
+ },
21
+ "train": {
22
+ "num_workers": 8,
23
+ "overfit_single_batch": false,
24
+ "name": "finetune",
25
+ "resume_from_checkpoint": null,
26
+ "skip_resumed_steps": false,
27
+ "hf_token": ".hf_token",
28
+ "lora": false,
29
+ "output_dir": ".runs",
30
+ "gradient_accumulation_steps": 1,
31
+ "mixed_precision": "bf16",
32
+ "max_grad_norm": 1.0,
33
+ "optimizer_foreach": null,
34
+ "train_batch_size": 16,
35
+ "val_batch_size": 16,
36
+ "val_num_batches": 20,
37
+ "checkpointing_steps": 10000,
38
+ "max_checkpoints_to_keep": 5,
39
+ "validation_steps": 500,
40
+ "learning_rate": 0.0001,
41
+ "lr_scheduler_type": "cosine",
42
+ "lr_scheduler_kwargs": {
43
+ "betas": [
44
+ 0.95,
45
+ 0.999
46
+ ],
47
+ "weight_decay": 1e-6,
48
+ "eps": 1e-8
49
+ },
50
+ "scheduler_specific_kwargs": {},
51
+ "data_parallel": "ddp",
52
+ "sharding_strategy": "full-shard",
53
+ "deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
54
+ "enable_gradient_checkpointing": true,
55
+ "enable_mixed_precision_training": true,
56
+ "reduce_in_full_precision": true,
57
+ "max_training_steps": 40000,
58
+ "num_train_epochs": null,
59
+ "warmup_steps": 1000,
60
+ "warmup_ratio": null
61
+ },
62
+ "data": {
63
+ "transform": {
64
+ "repack": {
65
+ "dataset_name": "simple",
66
+ "num_past_frames": 0,
67
+ "action_chunk_size": 30,
68
+ "pad_action_dim": 36,
69
+ "pad_state_dim": 36
70
+ },
71
+ "model": {
72
+ "resize": {
73
+ "size": [
74
+ 180,
75
+ 320
76
+ ]
77
+ },
78
+ "center_crop": {
79
+ "size": [
80
+ 180,
81
+ 320
82
+ ]
83
+ },
84
+ "color_jitter": {
85
+ "brightness": 0.2,
86
+ "contrast": [
87
+ 0.8,
88
+ 1.2
89
+ ],
90
+ "saturation": [
91
+ 0.8,
92
+ 1.2
93
+ ],
94
+ "hue": 0.05
95
+ },
96
+ "gaussian_noise": {
97
+ "mean": 0.0,
98
+ "std": 3.0,
99
+ "prob_skip": 0.1
100
+ },
101
+ "img_aug": true,
102
+ "adaptive_resize": false,
103
+ "img_sizes": {
104
+ "egodex": [
105
+ 270,
106
+ 480
107
+ ],
108
+ "he": [
109
+ 240,
110
+ 320
111
+ ]
112
+ }
113
+ },
114
+ "field": {
115
+ "stat_path": "meta/stats_psi0.json",
116
+ "action_norm_type": "bounds",
117
+ "stat_action_key": "action",
118
+ "stat_state_key": "states",
119
+ "use_norm_mask": false,
120
+ "action_norm_masks": [
121
+ true,
122
+ true,
123
+ true,
124
+ true,
125
+ true,
126
+ true,
127
+ false
128
+ ],
129
+ "action_min": [
130
+ -0.5,
131
+ -1.1095792134107943e-16,
132
+ -1.1095792134107943e-16,
133
+ -1.5,
134
+ -1.5,
135
+ -1.5,
136
+ -1.5,
137
+ -0.5,
138
+ -0.699999988079071,
139
+ -0.699999988079071,
140
+ -2.2166350627321588e-16,
141
+ -2.2166350627321588e-16,
142
+ 0.0,
143
+ -2.2166350627321588e-16,
144
+ -0.47567468881607056,
145
+ 0.1900009959936142,
146
+ -0.512170135974884,
147
+ -0.6265152096748352,
148
+ -0.5008617043495178,
149
+ -0.8220608830451965,
150
+ -0.9223371148109436,
151
+ -0.49507391452789307,
152
+ -0.3437551259994507,
153
+ -0.6871383190155029,
154
+ -0.7637607455253601,
155
+ -0.7568023204803467,
156
+ -0.576077401638031,
157
+ -0.4588268995285034,
158
+ -0.13876836001873016,
159
+ -0.10360867530107498,
160
+ -0.47856518626213074,
161
+ 0.7400000095367432,
162
+ -0.5,
163
+ -0.5,
164
+ -0.26161932945251465,
165
+ -0.06718750298023224
166
+ ],
167
+ "action_max": [
168
+ 0.5,
169
+ 0.699999988079071,
170
+ 0.699999988079071,
171
+ 0.0,
172
+ 2.2146225653890418e-16,
173
+ 2.2146225653890418e-16,
174
+ 2.2146225653890418e-16,
175
+ 1.2266071310501902e-19,
176
+ 1.1078670818917075e-16,
177
+ 1.1078670818917075e-16,
178
+ 1.5,
179
+ 1.5,
180
+ 0.6000000238418579,
181
+ 1.5,
182
+ 0.2472410947084427,
183
+ 0.7092280983924866,
184
+ 1.2571598291397095,
185
+ 0.42311304807662964,
186
+ 0.8564174771308899,
187
+ 0.5002086162567139,
188
+ 0.5172277092933655,
189
+ 0.16140148043632507,
190
+ -0.1900009959936142,
191
+ 0.5362864136695862,
192
+ 0.5715147256851196,
193
+ 0.5002322196960449,
194
+ 0.566592276096344,
195
+ 0.6392397880554199,
196
+ 0.1580466777086258,
197
+ 0.2233395129442215,
198
+ 0.2582152187824249,
199
+ 0.7400000095367432,
200
+ 0.5,
201
+ 0.5,
202
+ 0.3454970121383667,
203
+ 0.2899305522441864
204
+ ],
205
+ "state_min": [
206
+ -0.5564982891082764,
207
+ -0.48307520151138306,
208
+ -0.0005447770818136632,
209
+ -0.8388738632202148,
210
+ -1.3970016241073608,
211
+ -0.8296014666557312,
212
+ -1.4599460363388062,
213
+ -0.5806806683540344,
214
+ -0.5149835348129272,
215
+ -0.6775947213172913,
216
+ -0.001480442238971591,
217
+ -0.0002713006397243589,
218
+ -0.000914653530344367,
219
+ -0.00019419840828049928,
220
+ -0.4206617772579193,
221
+ 0.13972464203834534,
222
+ -0.546251654624939,
223
+ -0.5596316456794739,
224
+ -0.4764360189437866,
225
+ -0.7253566384315491,
226
+ -0.9443663954734802,
227
+ -0.4381798803806305,
228
+ -0.3338131606578827,
229
+ -0.667724072933197,
230
+ -0.6881827116012573,
231
+ -0.7544379830360413,
232
+ -0.5189417600631714,
233
+ -0.4484957158565521,
234
+ -0.13709338009357452,
235
+ -0.07360810041427612,
236
+ -0.4748336970806122,
237
+ 0.7400000095367432,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0
242
+ ],
243
+ "state_max": [
244
+ 0.43566983938217163,
245
+ 0.3739710748195648,
246
+ 0.6575677990913391,
247
+ 0.004060761071741581,
248
+ 0.0005700877518393099,
249
+ 0.0004725759499706328,
250
+ 0.00010080631182063371,
251
+ 0.00001310737025050912,
252
+ 0.21882089972496033,
253
+ 0.0005271440604701638,
254
+ 0.530737042427063,
255
+ 1.4406861066818237,
256
+ 1.4605127573013306,
257
+ 1.4595911502838135,
258
+ 0.2663630545139313,
259
+ 0.657910943031311,
260
+ 1.2515853643417358,
261
+ 0.502498209476471,
262
+ 0.8292973637580872,
263
+ 0.5248894095420837,
264
+ 0.4653257131576538,
265
+ 0.18638382852077484,
266
+ -0.16696421802043915,
267
+ 0.49318820238113403,
268
+ 0.6363148093223572,
269
+ 0.45773962140083313,
270
+ 0.6238265037536621,
271
+ 0.653800904750824,
272
+ 0.1436084657907486,
273
+ 0.25937986373901367,
274
+ 0.26422709226608276,
275
+ 0.7400000095367432,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0
280
+ ],
281
+ "normalize_state": true,
282
+ "pad_action_dim": 36,
283
+ "pad_state_dim": 36
284
+ }
285
+ },
286
+ "root_dir": "/data/jliu/data",
287
+ "train_repo_ids": [
288
+ "G1WholebodyHandover-v0"
289
+ ],
290
+ "val_repo_ids": [
291
+ "G1WholebodyHandover-v0"
292
+ ]
293
+ },
294
+ "model": {
295
+ "resnet_store_path": null,
296
+ "pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
297
+ "rtc": true,
298
+ "max_delay": 8,
299
+ "action_dim": 36,
300
+ "action_chunk_size": 30,
301
+ "action_exec_horizon": 30,
302
+ "observation_horizon": 1,
303
+ "img_chunk": 1,
304
+ "n_cams": 1,
305
+ "use_obs": "add_token",
306
+ "dropout": 0.1,
307
+ "noise_scheduler": "flow",
308
+ "train_diffusion_steps": 1000,
309
+ "eval_diffusion_steps": 10,
310
+ "share_cam_features": false,
311
+ "early_fusion": false,
312
+ "odim": 36,
313
+ "n_conditions": 0,
314
+ "token_fusion": "concat",
315
+ "loss_w": [
316
+ 0.1,
317
+ 0.2,
318
+ 0.1
319
+ ],
320
+ "time_dim": 256,
321
+ "hidden_dim": 1536,
322
+ "num_blocks": 6,
323
+ "dim_feedforward": 2048,
324
+ "nhead": 24,
325
+ "activation": "gelu",
326
+ "view_feature_dim": 2048,
327
+ "use_film": false,
328
+ "combined_temb": false,
329
+ "use_dit": false,
330
+ "weight_decay": 0.01,
331
+ "model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
332
+ "vlm_ckpt_step": null,
333
+ "tune_vlm": false,
334
+ "tune_mm_llm": false,
335
+ "tune_mm_vision": false,
336
+ "tune_mm_mlp": false,
337
+ "gradient_checkpointing": true,
338
+ "lang_backbone_lr": 0.00001,
339
+ "mm_projector_lr": 0.00001,
340
+ "vision_tower_lr": 1e-6,
341
+ "optim": "adamw_torch",
342
+ "model_max_length": 4096,
343
+ "data_flatten": true,
344
+ "data_packing": true,
345
+ "max_pixels": 451584,
346
+ "min_pixels": 12544
347
+ }
348
+ }
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/argv.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scripts/train.py
2
+ finetune_simple_psi0_config
3
+ --seed=292285
4
+ --exp=g1wholebodylocomotionpickbetweentablesteleop-v0
5
+ --train.name=finetune
6
+ --train.data_parallel=ddp
7
+ --train.mixed_precision=bf16
8
+ --train.train_batch_size=16
9
+ --train.max_checkpoints_to_keep=5
10
+ --train.gradient_accumulation_steps=1
11
+ --train.learning_rate=1e-4
12
+ --train.max_training_steps=40000
13
+ --train.warmup_ratio=None
14
+ --train.warmup_steps=1000
15
+ --train.checkpointing_steps=10000
16
+ --train.validation_steps=500
17
+ --train.val_num_batches=20
18
+ --train.max_grad_norm=1.0
19
+ --train.lr_scheduler_type=cosine
20
+ --train.lr_scheduler_kwargs.weight_decay=1e-6
21
+ --train.lr_scheduler_kwargs.betas 0.95 0.999
22
+ --log.report_to=wandb
23
+ --data.root_dir=/data/jliu/data
24
+ --data.train-repo-ids=G1WholebodyLocomotionPickBetweenTablesTeleop-v0
25
+ --data.transform.repack.pad-action-dim=36
26
+ --data.transform.repack.pad-state-dim=36
27
+ --data.transform.field.stat-path=meta/stats_psi0.json
28
+ --data.transform.field.stat-action-key=action
29
+ --data.transform.field.stat-state-key=states
30
+ --data.transform.field.action_norm_type=bounds
31
+ --data.transform.field.no-use-norm-mask
32
+ --data.transform.field.normalize-state
33
+ --data.transform.field.pad-action-dim=36
34
+ --data.transform.field.pad-state-dim=36
35
+ --data.transform.model.img-aug
36
+ --data.transform.model.resize.size 180 320
37
+ --data.transform.model.center_crop.size 180 320
38
+ --model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
39
+ --model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
40
+ --model.noise-scheduler=flow
41
+ --model.train-diffusion-steps=1000
42
+ --model.n_conditions=0
43
+ --model.action-chunk-size=30
44
+ --model.action-dim=36
45
+ --model.action-exec-horizon=30
46
+ --model.observation-horizon=1
47
+ --model.odim=36
48
+ --model.view_feature_dim=2048
49
+ --model.no-tune-vlm
50
+ --model.no-use_film
51
+ --model.no-combined_temb
52
+ --model.rtc
53
+ --model.max-delay=8
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/checkpoints/ckpt_40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69c2ae866cdff4c51aee3aa2948b6400e8f001e73d38cbff2b364e41b4cf07b
3
+ size 6253648840
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/envs.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OMP_NUM_THREADS=32
2
+ HF_HOME=/data/cache
3
+ TORCH_HOME=/data/cache
4
+ HF_TOKEN=hf_...TiKa
5
+ HF_LEROBOT_HOME=/data/data/lerobot
6
+ WE_HOME=Not Set
7
+ DATA_HOME=/data/data
8
+ UV_CACHE_DIR=/data/cache
9
+ WANDB_API_KEY=90e...5c06
10
+ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
11
+ CUDA_VISIBLE_DEVICES=3,4,5,7
12
+ WORLD_SIZE=4
13
+ LOCAL_WORLD_SIZE=4
14
+ RANK=0
15
+ LOCAL_RANK=0
16
+ MASTER_ADDR=127.0.0.1
17
+ MASTER_PORT=29509
psi0/simple-checkpoints/g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126/run_config.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp": "g1wholebodylocomotionpickbetweentablesteleop-v0",
3
+ "seed": 292285,
4
+ "auto_tag_run": false,
5
+ "eval": false,
6
+ "debug": false,
7
+ "timestamp": "2604081126",
8
+ "log": {
9
+ "logging_dir": "logs",
10
+ "report_to": "wandb",
11
+ "log_freq": 100
12
+ },
13
+ "wandb": {
14
+ "project": "psi",
15
+ "entity": "jliu530-soochow-university",
16
+ "group": "finetune",
17
+ "id": "wmpgbes7",
18
+ "name": "g1wholebodylocomotionpickbetweentablesteleop-v0.simple.flow1000.cosine.lr1.0e-04.b64.gpus4.2604081126",
19
+ "resume": "allow"
20
+ },
21
+ "train": {
22
+ "num_workers": 8,
23
+ "overfit_single_batch": false,
24
+ "name": "finetune",
25
+ "resume_from_checkpoint": null,
26
+ "skip_resumed_steps": false,
27
+ "hf_token": ".hf_token",
28
+ "lora": false,
29
+ "output_dir": ".runs",
30
+ "gradient_accumulation_steps": 1,
31
+ "mixed_precision": "bf16",
32
+ "max_grad_norm": 1.0,
33
+ "optimizer_foreach": null,
34
+ "train_batch_size": 16,
35
+ "val_batch_size": 16,
36
+ "val_num_batches": 20,
37
+ "checkpointing_steps": 10000,
38
+ "max_checkpoints_to_keep": 5,
39
+ "validation_steps": 500,
40
+ "learning_rate": 0.0001,
41
+ "lr_scheduler_type": "cosine",
42
+ "lr_scheduler_kwargs": {
43
+ "betas": [
44
+ 0.95,
45
+ 0.999
46
+ ],
47
+ "weight_decay": 1e-6,
48
+ "eps": 1e-8
49
+ },
50
+ "scheduler_specific_kwargs": {},
51
+ "data_parallel": "ddp",
52
+ "sharding_strategy": "full-shard",
53
+ "deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
54
+ "enable_gradient_checkpointing": true,
55
+ "enable_mixed_precision_training": true,
56
+ "reduce_in_full_precision": true,
57
+ "max_training_steps": 40000,
58
+ "num_train_epochs": null,
59
+ "warmup_steps": 1000,
60
+ "warmup_ratio": null
61
+ },
62
+ "data": {
63
+ "transform": {
64
+ "repack": {
65
+ "dataset_name": "simple",
66
+ "num_past_frames": 0,
67
+ "action_chunk_size": 30,
68
+ "pad_action_dim": 36,
69
+ "pad_state_dim": 36
70
+ },
71
+ "model": {
72
+ "resize": {
73
+ "size": [
74
+ 180,
75
+ 320
76
+ ]
77
+ },
78
+ "center_crop": {
79
+ "size": [
80
+ 180,
81
+ 320
82
+ ]
83
+ },
84
+ "color_jitter": {
85
+ "brightness": 0.2,
86
+ "contrast": [
87
+ 0.8,
88
+ 1.2
89
+ ],
90
+ "saturation": [
91
+ 0.8,
92
+ 1.2
93
+ ],
94
+ "hue": 0.05
95
+ },
96
+ "gaussian_noise": {
97
+ "mean": 0.0,
98
+ "std": 3.0,
99
+ "prob_skip": 0.1
100
+ },
101
+ "img_aug": true,
102
+ "adaptive_resize": false,
103
+ "img_sizes": {
104
+ "egodex": [
105
+ 270,
106
+ 480
107
+ ],
108
+ "he": [
109
+ 240,
110
+ 320
111
+ ]
112
+ }
113
+ },
114
+ "field": {
115
+ "stat_path": "meta/stats_psi0.json",
116
+ "action_norm_type": "bounds",
117
+ "stat_action_key": "action",
118
+ "stat_state_key": "states",
119
+ "use_norm_mask": false,
120
+ "action_norm_masks": [
121
+ true,
122
+ true,
123
+ true,
124
+ true,
125
+ true,
126
+ true,
127
+ false
128
+ ],
129
+ "action_min": [
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ -1.5,
134
+ -1.5,
135
+ -0.6000000238418579,
136
+ -1.5,
137
+ -0.5,
138
+ -0.699999988079071,
139
+ -0.699999988079071,
140
+ -2.216935230032842e-16,
141
+ -2.216935230032842e-16,
142
+ -4.0845591349633594e-18,
143
+ -2.216935230032842e-16,
144
+ -0.4883034825325012,
145
+ 0.1900009959936142,
146
+ -0.5470856428146362,
147
+ -0.34318920969963074,
148
+ -0.35952919721603394,
149
+ -0.35302427411079407,
150
+ -0.4469815790653229,
151
+ -0.6371198296546936,
152
+ -0.7683824300765991,
153
+ -1.0653810501098633,
154
+ -0.8479154706001282,
155
+ -1.0297260284423828,
156
+ -0.42936205863952637,
157
+ -0.5147944092750549,
158
+ -0.16820405423641205,
159
+ -0.045328833162784576,
160
+ -0.13282617926597595,
161
+ 0.7400000095367432,
162
+ -0.5,
163
+ -0.5,
164
+ -1.0,
165
+ -3.138223648071289
166
+ ],
167
+ "action_max": [
168
+ 0.5,
169
+ 0.699999988079071,
170
+ 0.699999988079071,
171
+ 0.0,
172
+ 0.0,
173
+ 0.0,
174
+ 0.0,
175
+ 6.930528109384597e-19,
176
+ 1.108467615016421e-16,
177
+ 1.108467615016421e-16,
178
+ 1.5,
179
+ 1.5,
180
+ 1.0,
181
+ 1.5,
182
+ 0.2557959258556366,
183
+ 0.35884979367256165,
184
+ 0.5090755820274353,
185
+ 0.19132143259048462,
186
+ 0.21249642968177795,
187
+ 0.3992660641670227,
188
+ 0.4283020794391632,
189
+ 0.1457289606332779,
190
+ -0.1900009959936142,
191
+ 0.6150448322296143,
192
+ 0.35468167066574097,
193
+ 0.8703295588493347,
194
+ 0.7531875371932983,
195
+ 0.971237301826477,
196
+ 0.13985762000083923,
197
+ 0.15686897933483124,
198
+ 0.4661160111427307,
199
+ 0.7400000095367432,
200
+ 0.5,
201
+ 0.5,
202
+ 1.0,
203
+ 3.1414895057678223
204
+ ],
205
+ "state_min": [
206
+ -0.02442001923918724,
207
+ -0.0517612099647522,
208
+ -0.0006534014828503132,
209
+ -0.5095356106758118,
210
+ -1.323034405708313,
211
+ -1.3221508264541626,
212
+ -1.3230019807815552,
213
+ -0.5770347714424133,
214
+ -0.4338151812553406,
215
+ -0.6721642017364502,
216
+ -0.0017213862156495452,
217
+ -7.534810038123396e-7,
218
+ -0.001927333534695208,
219
+ -1.075333216249419e-6,
220
+ -0.43650975823402405,
221
+ 0.15721464157104492,
222
+ -0.5489339232444763,
223
+ -0.2632291913032532,
224
+ -0.3508843183517456,
225
+ -0.23784859478473663,
226
+ -0.4281824827194214,
227
+ -0.5803383588790894,
228
+ -0.7118590474128723,
229
+ -1.0344431400299072,
230
+ -0.7932196259498596,
231
+ -1.0205217599868774,
232
+ -0.3445618450641632,
233
+ -0.5986371040344238,
234
+ -0.13537253439426422,
235
+ -0.0017330688424408436,
236
+ -0.1421850621700287,
237
+ 0.7400000095367432,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0
242
+ ],
243
+ "state_max": [
244
+ 0.47981399297714233,
245
+ 0.6772664189338684,
246
+ 0.6746510863304138,
247
+ 0.0010172375477850437,
248
+ 0.0007091082516126335,
249
+ 0.001881288131698966,
250
+ 0.0011398319620639086,
251
+ 6.141255539660051e-7,
252
+ 0.3043450713157654,
253
+ 6.343479981296696e-7,
254
+ 0.6933000087738037,
255
+ 1.4612544775009155,
256
+ 1.4651201963424683,
257
+ 1.4609057903289795,
258
+ 0.2809508740901947,
259
+ 0.34028318524360657,
260
+ 0.47627460956573486,
261
+ 0.26476219296455383,
262
+ 0.20825636386871338,
263
+ 0.4566418528556824,
264
+ 0.42864030599594116,
265
+ 0.1656116098165512,
266
+ -0.1549365073442459,
267
+ 0.5154499411582947,
268
+ 0.4242899715900421,
269
+ 0.8548054695129395,
270
+ 0.8040095567703247,
271
+ 0.9811649322509766,
272
+ 0.136736661195755,
273
+ 0.195722296833992,
274
+ 0.45781663060188293,
275
+ 0.7400000095367432,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0
280
+ ],
281
+ "normalize_state": true,
282
+ "pad_action_dim": 36,
283
+ "pad_state_dim": 36
284
+ }
285
+ },
286
+ "root_dir": "/data/jliu/data",
287
+ "train_repo_ids": [
288
+ "G1WholebodyLocomotionPickBetweenTablesTeleop-v0"
289
+ ],
290
+ "val_repo_ids": [
291
+ "G1WholebodyLocomotionPickBetweenTablesTeleop-v0"
292
+ ]
293
+ },
294
+ "model": {
295
+ "resnet_store_path": null,
296
+ "pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
297
+ "rtc": true,
298
+ "max_delay": 8,
299
+ "action_dim": 36,
300
+ "action_chunk_size": 30,
301
+ "action_exec_horizon": 30,
302
+ "observation_horizon": 1,
303
+ "img_chunk": 1,
304
+ "n_cams": 1,
305
+ "use_obs": "add_token",
306
+ "dropout": 0.1,
307
+ "noise_scheduler": "flow",
308
+ "train_diffusion_steps": 1000,
309
+ "eval_diffusion_steps": 10,
310
+ "share_cam_features": false,
311
+ "early_fusion": false,
312
+ "odim": 36,
313
+ "n_conditions": 0,
314
+ "token_fusion": "concat",
315
+ "loss_w": [
316
+ 0.1,
317
+ 0.2,
318
+ 0.1
319
+ ],
320
+ "time_dim": 256,
321
+ "hidden_dim": 1536,
322
+ "num_blocks": 6,
323
+ "dim_feedforward": 2048,
324
+ "nhead": 24,
325
+ "activation": "gelu",
326
+ "view_feature_dim": 2048,
327
+ "use_film": false,
328
+ "combined_temb": false,
329
+ "use_dit": false,
330
+ "weight_decay": 0.01,
331
+ "model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
332
+ "vlm_ckpt_step": null,
333
+ "tune_vlm": false,
334
+ "tune_mm_llm": false,
335
+ "tune_mm_vision": false,
336
+ "tune_mm_mlp": false,
337
+ "gradient_checkpointing": true,
338
+ "lang_backbone_lr": 0.00001,
339
+ "mm_projector_lr": 0.00001,
340
+ "vision_tower_lr": 1e-6,
341
+ "optim": "adamw_torch",
342
+ "model_max_length": 4096,
343
+ "data_flatten": true,
344
+ "data_packing": true,
345
+ "max_pixels": 451584,
346
+ "min_pixels": 12544
347
+ }
348
+ }
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/argv.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scripts/train.py
2
+ finetune_simple_psi0_config
3
+ --seed=292285
4
+ --exp=g1wholebodytabletopgrasp-v0
5
+ --train.name=finetune
6
+ --train.data_parallel=ddp
7
+ --train.mixed_precision=bf16
8
+ --train.train_batch_size=16
9
+ --train.max_checkpoints_to_keep=5
10
+ --train.gradient_accumulation_steps=1
11
+ --train.learning_rate=1e-4
12
+ --train.max_training_steps=40000
13
+ --train.warmup_ratio=None
14
+ --train.warmup_steps=1000
15
+ --train.checkpointing_steps=10000
16
+ --train.validation_steps=500
17
+ --train.val_num_batches=20
18
+ --train.max_grad_norm=1.0
19
+ --train.lr_scheduler_type=cosine
20
+ --train.lr_scheduler_kwargs.weight_decay=1e-6
21
+ --train.lr_scheduler_kwargs.betas 0.95 0.999
22
+ --log.report_to=wandb
23
+ --data.root_dir=/data/jliu/data
24
+ --data.train-repo-ids=G1WholebodyTabletopGrasp-v0
25
+ --data.transform.repack.pad-action-dim=36
26
+ --data.transform.repack.pad-state-dim=36
27
+ --data.transform.field.stat-path=meta/stats_psi0.json
28
+ --data.transform.field.stat-action-key=action
29
+ --data.transform.field.stat-state-key=states
30
+ --data.transform.field.action_norm_type=bounds
31
+ --data.transform.field.no-use-norm-mask
32
+ --data.transform.field.normalize-state
33
+ --data.transform.field.pad-action-dim=36
34
+ --data.transform.field.pad-state-dim=36
35
+ --data.transform.model.img-aug
36
+ --data.transform.model.resize.size 180 320
37
+ --data.transform.model.center_crop.size 180 320
38
+ --model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
39
+ --model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
40
+ --model.noise-scheduler=flow
41
+ --model.train-diffusion-steps=1000
42
+ --model.n_conditions=0
43
+ --model.action-chunk-size=30
44
+ --model.action-dim=36
45
+ --model.action-exec-horizon=30
46
+ --model.observation-horizon=1
47
+ --model.odim=36
48
+ --model.view_feature_dim=2048
49
+ --model.no-tune-vlm
50
+ --model.no-use_film
51
+ --model.no-combined_temb
52
+ --model.rtc
53
+ --model.max-delay=8
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/checkpoints/ckpt_40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f07e71e1a50bc943b9c4cba47051d7eb8f1d86ca81f3ea0ade7cc8dbad5458
3
+ size 6253648840
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/envs.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OMP_NUM_THREADS=32
2
+ HF_HOME=/data/cache
3
+ TORCH_HOME=/data/cache
4
+ HF_TOKEN=hf_...TiKa
5
+ HF_LEROBOT_HOME=/data/data/lerobot
6
+ WE_HOME=Not Set
7
+ DATA_HOME=/data/data
8
+ UV_CACHE_DIR=/data/cache
9
+ WANDB_API_KEY=90e...5c06
10
+ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
11
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
12
+ WORLD_SIZE=8
13
+ LOCAL_WORLD_SIZE=8
14
+ RANK=0
15
+ LOCAL_RANK=0
16
+ MASTER_ADDR=127.0.0.1
17
+ MASTER_PORT=29500
psi0/simple-checkpoints/g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503/run_config.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp": "g1wholebodytabletopgrasp-v0",
3
+ "seed": 292285,
4
+ "auto_tag_run": false,
5
+ "eval": false,
6
+ "debug": false,
7
+ "timestamp": "2603181503",
8
+ "log": {
9
+ "logging_dir": "logs",
10
+ "report_to": "wandb",
11
+ "log_freq": 100
12
+ },
13
+ "wandb": {
14
+ "project": "psi",
15
+ "entity": "jliu530-soochow-university",
16
+ "group": "finetune",
17
+ "id": "424b3khl",
18
+ "name": "g1wholebodytabletopgrasp-v0.simple.flow1000.cosine.lr1.0e-04.b128.gpus8.2603181503",
19
+ "resume": "allow"
20
+ },
21
+ "train": {
22
+ "num_workers": 8,
23
+ "overfit_single_batch": false,
24
+ "name": "finetune",
25
+ "resume_from_checkpoint": null,
26
+ "skip_resumed_steps": false,
27
+ "hf_token": ".hf_token",
28
+ "lora": false,
29
+ "output_dir": ".runs",
30
+ "gradient_accumulation_steps": 1,
31
+ "mixed_precision": "bf16",
32
+ "max_grad_norm": 1.0,
33
+ "optimizer_foreach": null,
34
+ "train_batch_size": 16,
35
+ "val_batch_size": 16,
36
+ "val_num_batches": 20,
37
+ "checkpointing_steps": 10000,
38
+ "max_checkpoints_to_keep": 5,
39
+ "validation_steps": 500,
40
+ "learning_rate": 0.0001,
41
+ "lr_scheduler_type": "cosine",
42
+ "lr_scheduler_kwargs": {
43
+ "betas": [
44
+ 0.95,
45
+ 0.999
46
+ ],
47
+ "weight_decay": 1e-6,
48
+ "eps": 1e-8
49
+ },
50
+ "scheduler_specific_kwargs": {},
51
+ "data_parallel": "ddp",
52
+ "sharding_strategy": "full-shard",
53
+ "deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
54
+ "enable_gradient_checkpointing": true,
55
+ "enable_mixed_precision_training": true,
56
+ "reduce_in_full_precision": true,
57
+ "max_training_steps": 40000,
58
+ "num_train_epochs": null,
59
+ "warmup_steps": 1000,
60
+ "warmup_ratio": null
61
+ },
62
+ "data": {
63
+ "transform": {
64
+ "repack": {
65
+ "dataset_name": "simple",
66
+ "num_past_frames": 0,
67
+ "action_chunk_size": 30,
68
+ "pad_action_dim": 36,
69
+ "pad_state_dim": 36
70
+ },
71
+ "model": {
72
+ "resize": {
73
+ "size": [
74
+ 180,
75
+ 320
76
+ ]
77
+ },
78
+ "center_crop": {
79
+ "size": [
80
+ 180,
81
+ 320
82
+ ]
83
+ },
84
+ "color_jitter": {
85
+ "brightness": 0.2,
86
+ "contrast": [
87
+ 0.8,
88
+ 1.2
89
+ ],
90
+ "saturation": [
91
+ 0.8,
92
+ 1.2
93
+ ],
94
+ "hue": 0.05
95
+ },
96
+ "gaussian_noise": {
97
+ "mean": 0.0,
98
+ "std": 3.0,
99
+ "prob_skip": 0.1
100
+ },
101
+ "img_aug": true,
102
+ "adaptive_resize": false,
103
+ "img_sizes": {
104
+ "egodex": [
105
+ 270,
106
+ 480
107
+ ],
108
+ "he": [
109
+ 240,
110
+ 320
111
+ ]
112
+ }
113
+ },
114
+ "field": {
115
+ "stat_path": "meta/stats_psi0.json",
116
+ "action_norm_type": "bounds",
117
+ "stat_action_key": "action",
118
+ "stat_state_key": "states",
119
+ "use_norm_mask": false,
120
+ "action_norm_masks": [
121
+ true,
122
+ true,
123
+ true,
124
+ true,
125
+ true,
126
+ true,
127
+ false
128
+ ],
129
+ "action_min": [
130
+ -0.21088384091854095,
131
+ -0.11738907545804977,
132
+ -0.013445371761918068,
133
+ -0.5775371193885803,
134
+ -0.30140629410743713,
135
+ -0.3430681824684143,
136
+ -0.37358492612838745,
137
+ -0.007524379529058933,
138
+ -0.01704181358218193,
139
+ -0.6932834386825562,
140
+ 0.0,
141
+ 0.0,
142
+ 0.0,
143
+ 0.0,
144
+ -0.022009270265698433,
145
+ -0.04607510566711426,
146
+ -0.24804681539535522,
147
+ -0.005683199502527714,
148
+ -0.25139108300209045,
149
+ -0.05519897863268852,
150
+ -0.04459292069077492,
151
+ -1.0918865203857422,
152
+ -0.3572312593460083,
153
+ -0.7850697636604309,
154
+ 0.0,
155
+ -1.4517900943756104,
156
+ -0.5649155378341675,
157
+ -0.4592915177345276,
158
+ -0.08972926437854767,
159
+ -0.1079544723033905,
160
+ -0.04311269149184227,
161
+ 0.75,
162
+ 0.0,
163
+ 0.0,
164
+ 0.0,
165
+ 0.0
166
+ ],
167
+ "action_max": [
168
+ 0.21513332426548004,
169
+ 0.21692107617855072,
170
+ 0.3652719259262085,
171
+ 0.07139641791582108,
172
+ 0.015001054853200912,
173
+ 0.03918211907148361,
174
+ 0.03575323149561882,
175
+ 0.6107784509658813,
176
+ 0.31583136320114136,
177
+ 0.0,
178
+ 0.6836385726928711,
179
+ 1.4285058975219727,
180
+ 0.8524638414382935,
181
+ 1.7429704666137695,
182
+ 0.00735096400603652,
183
+ 0.25089067220687866,
184
+ 0.04510946571826935,
185
+ 0.017853474244475365,
186
+ 0.046191196888685226,
187
+ 0.0031940839253365993,
188
+ 0.24179035425186157,
189
+ 0.0037467884831130505,
190
+ 0.0002910589682869613,
191
+ 0.7421935796737671,
192
+ 1.6607650518417358,
193
+ 0.0,
194
+ 0.3736472725868225,
195
+ 0.30042290687561035,
196
+ 0.009443609043955803,
197
+ 0.11625207960605621,
198
+ 0.013617209158837795,
199
+ 0.75,
200
+ 0.0,
201
+ 0.0,
202
+ 0.0,
203
+ 0.0
204
+ ],
205
+ "state_min": [
206
+ -0.210999995470047,
207
+ -0.11699992418289185,
208
+ -1.1206404693098193e-8,
209
+ -0.5680000185966492,
210
+ -0.29699981212615967,
211
+ -0.3440000116825104,
212
+ -0.37400001287460327,
213
+ -0.00800000037997961,
214
+ -0.004000000189989805,
215
+ -0.6610000133514404,
216
+ 0.0,
217
+ 0.0,
218
+ 0.0,
219
+ 0.0,
220
+ -0.017999978736042976,
221
+ -0.04700015112757683,
222
+ -0.24899962544441223,
223
+ -0.0069999597035348415,
224
+ -0.2510001063346863,
225
+ -0.053999971598386765,
226
+ -0.045001156628131866,
227
+ -1.0800000429153442,
228
+ -0.3590024709701538,
229
+ -0.781000018119812,
230
+ -0.006000000052154064,
231
+ -1.4559999704360962,
232
+ -0.5649999976158142,
233
+ -0.46299999952316284,
234
+ 0.0,
235
+ -0.15000000596046448,
236
+ 0.0,
237
+ 0.75,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0
242
+ ],
243
+ "state_max": [
244
+ 0.2149999588727951,
245
+ 0.21600016951560974,
246
+ 0.36500000953674316,
247
+ 0.0,
248
+ 4.046002644031432e-9,
249
+ 0.0,
250
+ 1.0771045513835453e-11,
251
+ 0.609000027179718,
252
+ 0.3600001633167267,
253
+ 1.1693318297152644e-13,
254
+ 0.5249999761581421,
255
+ 1.3730000257492065,
256
+ 0.8119999766349792,
257
+ 1.7453292608261108,
258
+ 0.014000464230775833,
259
+ 0.2500004470348358,
260
+ 0.04610275477170944,
261
+ 0.020999999716877937,
262
+ 0.04600704088807106,
263
+ 0.006000000052154064,
264
+ 0.24200008809566498,
265
+ 0.012999767437577248,
266
+ 0.003000000026077032,
267
+ 0.7419999837875366,
268
+ 1.6640000343322754,
269
+ 6.225707238627365e-6,
270
+ 0.37400001287460327,
271
+ 0.300999253988266,
272
+ 0.0,
273
+ 0.0,
274
+ 0.0,
275
+ 0.75,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0
280
+ ],
281
+ "normalize_state": true,
282
+ "pad_action_dim": 36,
283
+ "pad_state_dim": 36
284
+ }
285
+ },
286
+ "root_dir": "/data/jliu/data",
287
+ "train_repo_ids": [
288
+ "G1WholebodyTabletopGrasp-v0"
289
+ ],
290
+ "val_repo_ids": [
291
+ "G1WholebodyTabletopGrasp-v0"
292
+ ]
293
+ },
294
+ "model": {
295
+ "resnet_store_path": null,
296
+ "pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
297
+ "rtc": true,
298
+ "max_delay": 8,
299
+ "action_dim": 36,
300
+ "action_chunk_size": 30,
301
+ "action_exec_horizon": 30,
302
+ "observation_horizon": 1,
303
+ "img_chunk": 1,
304
+ "n_cams": 1,
305
+ "use_obs": "add_token",
306
+ "dropout": 0.1,
307
+ "noise_scheduler": "flow",
308
+ "train_diffusion_steps": 1000,
309
+ "eval_diffusion_steps": 10,
310
+ "share_cam_features": false,
311
+ "early_fusion": false,
312
+ "odim": 36,
313
+ "n_conditions": 0,
314
+ "token_fusion": "concat",
315
+ "loss_w": [
316
+ 0.1,
317
+ 0.2,
318
+ 0.1
319
+ ],
320
+ "time_dim": 256,
321
+ "hidden_dim": 1536,
322
+ "num_blocks": 6,
323
+ "dim_feedforward": 2048,
324
+ "nhead": 24,
325
+ "activation": "gelu",
326
+ "view_feature_dim": 2048,
327
+ "use_film": false,
328
+ "combined_temb": false,
329
+ "use_dit": false,
330
+ "weight_decay": 0.01,
331
+ "model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
332
+ "vlm_ckpt_step": null,
333
+ "tune_vlm": false,
334
+ "tune_mm_llm": false,
335
+ "tune_mm_vision": false,
336
+ "tune_mm_mlp": false,
337
+ "gradient_checkpointing": true,
338
+ "lang_backbone_lr": 0.00001,
339
+ "mm_projector_lr": 0.00001,
340
+ "vision_tower_lr": 1e-6,
341
+ "optim": "adamw_torch",
342
+ "model_max_length": 4096,
343
+ "data_flatten": true,
344
+ "data_packing": true,
345
+ "max_pixels": 451584,
346
+ "min_pixels": 12544
347
+ }
348
+ }
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/argv.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ scripts/train.py
2
+ finetune_simple_psi0_config
3
+ --seed=292285
4
+ --exp=g1wholebodyxmovebendpickteleop-v0
5
+ --train.name=finetune
6
+ --train.data_parallel=ddp
7
+ --train.mixed_precision=bf16
8
+ --train.train_batch_size=16
9
+ --train.max_checkpoints_to_keep=5
10
+ --train.gradient_accumulation_steps=1
11
+ --train.learning_rate=1e-4
12
+ --train.max_training_steps=40000
13
+ --train.warmup_ratio=None
14
+ --train.warmup_steps=1000
15
+ --train.checkpointing_steps=10000
16
+ --train.validation_steps=500
17
+ --train.val_num_batches=20
18
+ --train.max_grad_norm=1.0
19
+ --train.lr_scheduler_type=cosine
20
+ --train.lr_scheduler_kwargs.weight_decay=1e-6
21
+ --train.lr_scheduler_kwargs.betas 0.95 0.999
22
+ --log.report_to=wandb
23
+ --data.root_dir=/data/jliu/data
24
+ --data.train-repo-ids=G1WholebodyXMoveBendPickTeleop-v0
25
+ --data.transform.repack.pad-action-dim=36
26
+ --data.transform.repack.pad-state-dim=36
27
+ --data.transform.field.stat-path=meta/stats_psi0.json
28
+ --data.transform.field.stat-action-key=action
29
+ --data.transform.field.stat-state-key=states
30
+ --data.transform.field.action_norm_type=bounds
31
+ --data.transform.field.no-use-norm-mask
32
+ --data.transform.field.normalize-state
33
+ --data.transform.field.pad-action-dim=36
34
+ --data.transform.field.pad-state-dim=36
35
+ --data.transform.model.img-aug
36
+ --data.transform.model.resize.size 180 320
37
+ --data.transform.model.center_crop.size 180 320
38
+ --model.model_name_or_path=/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k
39
+ --model.pretrained-action-header-path=/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k
40
+ --model.noise-scheduler=flow
41
+ --model.train-diffusion-steps=1000
42
+ --model.n_conditions=0
43
+ --model.action-chunk-size=30
44
+ --model.action-dim=36
45
+ --model.action-exec-horizon=30
46
+ --model.observation-horizon=1
47
+ --model.odim=36
48
+ --model.view_feature_dim=2048
49
+ --model.no-tune-vlm
50
+ --model.no-use_film
51
+ --model.no-combined_temb
52
+ --model.rtc
53
+ --model.max-delay=8
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/checkpoints/ckpt_40000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8a6ab62c8defd7099f834b0e7827a7ac868206dfab5f1083adf2719ccb77bc
3
+ size 6253648840
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/envs.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OMP_NUM_THREADS=32
2
+ HF_HOME=/data/cache
3
+ TORCH_HOME=/data/cache
4
+ HF_TOKEN=hf_...TiKa
5
+ HF_LEROBOT_HOME=/data/data/lerobot
6
+ WE_HOME=Not Set
7
+ DATA_HOME=/data/data
8
+ UV_CACHE_DIR=/data/cache
9
+ WANDB_API_KEY=90e...5c06
10
+ PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
11
+ CUDA_VISIBLE_DEVICES=0,2,3,4,5,6,7
12
+ WORLD_SIZE=7
13
+ LOCAL_WORLD_SIZE=7
14
+ RANK=0
15
+ LOCAL_RANK=0
16
+ MASTER_ADDR=127.0.0.1
17
+ MASTER_PORT=29509
psi0/simple-checkpoints/g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422/run_config.json ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "exp": "g1wholebodyxmovebendpickteleop-v0",
3
+ "seed": 292285,
4
+ "auto_tag_run": false,
5
+ "eval": false,
6
+ "debug": false,
7
+ "timestamp": "2604100422",
8
+ "log": {
9
+ "logging_dir": "logs",
10
+ "report_to": "wandb",
11
+ "log_freq": 100
12
+ },
13
+ "wandb": {
14
+ "project": "psi",
15
+ "entity": "jliu530-soochow-university",
16
+ "group": "finetune",
17
+ "id": "1jvqo3pw",
18
+ "name": "g1wholebodyxmovebendpickteleop-v0.simple.flow1000.cosine.lr1.0e-04.b112.gpus7.2604100422",
19
+ "resume": "allow"
20
+ },
21
+ "train": {
22
+ "num_workers": 8,
23
+ "overfit_single_batch": false,
24
+ "name": "finetune",
25
+ "resume_from_checkpoint": null,
26
+ "skip_resumed_steps": false,
27
+ "hf_token": ".hf_token",
28
+ "lora": false,
29
+ "output_dir": ".runs",
30
+ "gradient_accumulation_steps": 1,
31
+ "mixed_precision": "bf16",
32
+ "max_grad_norm": 1.0,
33
+ "optimizer_foreach": null,
34
+ "train_batch_size": 16,
35
+ "val_batch_size": 16,
36
+ "val_num_batches": 20,
37
+ "checkpointing_steps": 10000,
38
+ "max_checkpoints_to_keep": 5,
39
+ "validation_steps": 500,
40
+ "learning_rate": 0.0001,
41
+ "lr_scheduler_type": "cosine",
42
+ "lr_scheduler_kwargs": {
43
+ "betas": [
44
+ 0.95,
45
+ 0.999
46
+ ],
47
+ "weight_decay": 1e-6,
48
+ "eps": 1e-8
49
+ },
50
+ "scheduler_specific_kwargs": {},
51
+ "data_parallel": "ddp",
52
+ "sharding_strategy": "full-shard",
53
+ "deepspeed_config": "/data/jliu/psi/src/InternVLA/config/deepseeds/zero3.json",
54
+ "enable_gradient_checkpointing": true,
55
+ "enable_mixed_precision_training": true,
56
+ "reduce_in_full_precision": true,
57
+ "max_training_steps": 40000,
58
+ "num_train_epochs": null,
59
+ "warmup_steps": 1000,
60
+ "warmup_ratio": null
61
+ },
62
+ "data": {
63
+ "transform": {
64
+ "repack": {
65
+ "dataset_name": "simple",
66
+ "num_past_frames": 0,
67
+ "action_chunk_size": 30,
68
+ "pad_action_dim": 36,
69
+ "pad_state_dim": 36
70
+ },
71
+ "model": {
72
+ "resize": {
73
+ "size": [
74
+ 180,
75
+ 320
76
+ ]
77
+ },
78
+ "center_crop": {
79
+ "size": [
80
+ 180,
81
+ 320
82
+ ]
83
+ },
84
+ "color_jitter": {
85
+ "brightness": 0.2,
86
+ "contrast": [
87
+ 0.8,
88
+ 1.2
89
+ ],
90
+ "saturation": [
91
+ 0.8,
92
+ 1.2
93
+ ],
94
+ "hue": 0.05
95
+ },
96
+ "gaussian_noise": {
97
+ "mean": 0.0,
98
+ "std": 3.0,
99
+ "prob_skip": 0.1
100
+ },
101
+ "img_aug": true,
102
+ "adaptive_resize": false,
103
+ "img_sizes": {
104
+ "egodex": [
105
+ 270,
106
+ 480
107
+ ],
108
+ "he": [
109
+ 240,
110
+ 320
111
+ ]
112
+ }
113
+ },
114
+ "field": {
115
+ "stat_path": "meta/stats_psi0.json",
116
+ "action_norm_type": "bounds",
117
+ "stat_action_key": "action",
118
+ "stat_state_key": "states",
119
+ "use_norm_mask": false,
120
+ "action_norm_masks": [
121
+ true,
122
+ true,
123
+ true,
124
+ true,
125
+ true,
126
+ true,
127
+ false
128
+ ],
129
+ "action_min": [
130
+ 0.0,
131
+ 0.0,
132
+ 0.0,
133
+ 0.0,
134
+ 0.0,
135
+ 0.0,
136
+ 0.0,
137
+ -0.5,
138
+ -0.699999988079071,
139
+ -0.699999988079071,
140
+ -1.8059087783367424e-18,
141
+ -1.8059087783367424e-18,
142
+ -2.3222253007177214e-19,
143
+ -1.8059087783367424e-18,
144
+ -0.3314070701599121,
145
+ 0.1900009959936142,
146
+ -0.8766500353813171,
147
+ -0.12303244322538376,
148
+ -0.4908517599105835,
149
+ -0.2786784768104553,
150
+ -0.022629141807556152,
151
+ -0.6784858703613281,
152
+ -0.5865002870559692,
153
+ -0.645729660987854,
154
+ -0.3608185946941376,
155
+ -0.15172408521175385,
156
+ -0.4648345112800598,
157
+ -0.2964947521686554,
158
+ -0.10700750350952148,
159
+ -0.21067920327186584,
160
+ -0.08102670311927795,
161
+ 0.44999998807907104,
162
+ -0.5,
163
+ -0.26561295986175537,
164
+ -0.11697302013635635,
165
+ 0.0
166
+ ],
167
+ "action_max": [
168
+ 0.0,
169
+ 0.0,
170
+ 0.0,
171
+ 0.0,
172
+ 0.0,
173
+ 0.0,
174
+ 0.0,
175
+ 2.189282125137906e-19,
176
+ 3.6880364598559585e-19,
177
+ 3.6880364598559585e-19,
178
+ 1.5,
179
+ 1.5,
180
+ 0.6000000238418579,
181
+ 1.5,
182
+ 0.06430592387914658,
183
+ 0.2996276319026947,
184
+ 0.5128592252731323,
185
+ 0.3017215132713318,
186
+ -0.007464056834578514,
187
+ 0.21968720853328705,
188
+ 0.827497124671936,
189
+ 0.044904597103595734,
190
+ -0.1900009959936142,
191
+ 0.413065105676651,
192
+ 0.41873428225517273,
193
+ 0.6618388891220093,
194
+ 0.4026392698287964,
195
+ 0.8194853663444519,
196
+ 0.12383800745010376,
197
+ 0.16346246004104614,
198
+ 0.15494900941848755,
199
+ 0.7400000095367432,
200
+ 0.5,
201
+ 0.21786384284496307,
202
+ 0.1749052256345749,
203
+ 0.0
204
+ ],
205
+ "state_min": [
206
+ -0.00044060105574317276,
207
+ -0.029227260500192642,
208
+ -0.0007062808726914227,
209
+ -0.006396367214620113,
210
+ -0.034731876105070114,
211
+ -0.00020073111227247864,
212
+ -8.215621392082539e-7,
213
+ -0.5499086976051331,
214
+ -0.5100165009498596,
215
+ -0.613179087638855,
216
+ -0.0030598489101976156,
217
+ -0.0002515389060135931,
218
+ -0.00361030176281929,
219
+ -0.003131122561171651,
220
+ -0.30267173051834106,
221
+ 0.162300705909729,
222
+ -0.8084174394607544,
223
+ -0.053157128393650055,
224
+ -0.48188674449920654,
225
+ -0.28324440121650696,
226
+ -0.02153456024825573,
227
+ -0.559512734413147,
228
+ -0.4063037037849426,
229
+ -0.625334620475769,
230
+ -0.17857033014297485,
231
+ -0.14080968499183655,
232
+ -0.3861367404460907,
233
+ -0.2920348048210144,
234
+ -0.0902835875749588,
235
+ -0.1666938215494156,
236
+ -0.07615894079208374,
237
+ 0.44999998807907104,
238
+ 0.0,
239
+ 0.0,
240
+ 0.0,
241
+ 0.0
242
+ ],
243
+ "state_max": [
244
+ 0.013749510049819946,
245
+ 0.0003444451722316444,
246
+ 5.732499630539678e-6,
247
+ 0.0019246992887929082,
248
+ 0.0014607172925025225,
249
+ 0.0007710650679655373,
250
+ 0.0006001993897370994,
251
+ 4.888642592959513e-7,
252
+ 0.06670719385147095,
253
+ 1.4086220971876173e-6,
254
+ 0.43387407064437866,
255
+ 1.2414171695709229,
256
+ 0.6964682936668396,
257
+ 1.2072811126708984,
258
+ 0.0866343304514885,
259
+ 0.2651435434818268,
260
+ 0.49075624346733093,
261
+ 0.34916067123413086,
262
+ -0.0007500328356400132,
263
+ 0.2507650554180145,
264
+ 0.9099032282829285,
265
+ 0.07794909924268723,
266
+ -0.15903376042842865,
267
+ 0.29115578532218933,
268
+ 0.48632845282554626,
269
+ 0.4680853486061096,
270
+ 0.40000519156455994,
271
+ 0.7901750206947327,
272
+ 0.11165501922369003,
273
+ 0.1871986985206604,
274
+ 0.15685616433620453,
275
+ 0.7400000095367432,
276
+ 0.0,
277
+ 0.0,
278
+ 0.0,
279
+ 0.0
280
+ ],
281
+ "normalize_state": true,
282
+ "pad_action_dim": 36,
283
+ "pad_state_dim": 36
284
+ }
285
+ },
286
+ "root_dir": "/data/jliu/data",
287
+ "train_repo_ids": [
288
+ "G1WholebodyXMoveBendPickTeleop-v0"
289
+ ],
290
+ "val_repo_ids": [
291
+ "G1WholebodyXMoveBendPickTeleop-v0"
292
+ ]
293
+ },
294
+ "model": {
295
+ "resnet_store_path": null,
296
+ "pretrained_action_header_path": "/hfm/cache/checkpoints/psi0/postpre.1by1.pad36.2601131206.ckpt.he30k",
297
+ "rtc": true,
298
+ "max_delay": 8,
299
+ "action_dim": 36,
300
+ "action_chunk_size": 30,
301
+ "action_exec_horizon": 30,
302
+ "observation_horizon": 1,
303
+ "img_chunk": 1,
304
+ "n_cams": 1,
305
+ "use_obs": "add_token",
306
+ "dropout": 0.1,
307
+ "noise_scheduler": "flow",
308
+ "train_diffusion_steps": 1000,
309
+ "eval_diffusion_steps": 10,
310
+ "share_cam_features": false,
311
+ "early_fusion": false,
312
+ "odim": 36,
313
+ "n_conditions": 0,
314
+ "token_fusion": "concat",
315
+ "loss_w": [
316
+ 0.1,
317
+ 0.2,
318
+ 0.1
319
+ ],
320
+ "time_dim": 256,
321
+ "hidden_dim": 1536,
322
+ "num_blocks": 6,
323
+ "dim_feedforward": 2048,
324
+ "nhead": 24,
325
+ "activation": "gelu",
326
+ "view_feature_dim": 2048,
327
+ "use_film": false,
328
+ "combined_temb": false,
329
+ "use_dit": false,
330
+ "weight_decay": 0.01,
331
+ "model_name_or_path": "/hfm/cache/checkpoints/psi0/pre.fast.1by1.2601091803.ckpt.ego200k.he30k",
332
+ "vlm_ckpt_step": null,
333
+ "tune_vlm": false,
334
+ "tune_mm_llm": false,
335
+ "tune_mm_vision": false,
336
+ "tune_mm_mlp": false,
337
+ "gradient_checkpointing": true,
338
+ "lang_backbone_lr": 0.00001,
339
+ "mm_projector_lr": 0.00001,
340
+ "vision_tower_lr": 1e-6,
341
+ "optim": "adamw_torch",
342
+ "model_max_length": 4096,
343
+ "data_flatten": true,
344
+ "data_packing": true,
345
+ "max_pixels": 451584,
346
+ "min_pixels": 12544
347
+ }
348
+ }