lixiang-95 commited on
Commit
29beccd
·
verified ·
1 Parent(s): 00464c4

Initial model upload

Browse files
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 32,
3
+ "action_head_cfg": {
4
+ "action_dim": 32,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "diffusion_model_cfg": {
9
+ "attention_head_dim": 48,
10
+ "cross_attention_dim": 2048,
11
+ "dropout": 0.2,
12
+ "final_dropout": true,
13
+ "interleave_self_attention": true,
14
+ "norm_type": "ada_norm",
15
+ "num_attention_heads": 32,
16
+ "num_layers": 16,
17
+ "output_dim": 1024,
18
+ "positional_embeddings": null
19
+ },
20
+ "hidden_size": 1024,
21
+ "input_embedding_dim": 1536,
22
+ "max_action_dim": 32,
23
+ "max_state_dim": 64,
24
+ "model_dtype": "float32",
25
+ "noise_beta_alpha": 1.5,
26
+ "noise_beta_beta": 1.0,
27
+ "noise_s": 0.999,
28
+ "num_inference_timesteps": 4,
29
+ "num_target_vision_tokens": 32,
30
+ "num_timestep_buckets": 1000,
31
+ "tune_diffusion_model": true,
32
+ "tune_projector": true,
33
+ "use_vlln": true,
34
+ "vl_self_attention_cfg": {
35
+ "attention_head_dim": 64,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 32,
39
+ "num_layers": 4,
40
+ "positional_embeddings": null
41
+ }
42
+ },
43
+ "action_horizon": 16,
44
+ "architectures": [
45
+ "GR00T_N1_5"
46
+ ],
47
+ "attn_implementation": null,
48
+ "backbone_cfg": {
49
+ "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
50
+ "load_bf16": false,
51
+ "project_to_dim": null,
52
+ "reproject_vision": false,
53
+ "select_layer": 12,
54
+ "tune_llm": false,
55
+ "tune_visual": true,
56
+ "use_flash_attention": true
57
+ },
58
+ "compute_dtype": "bfloat16",
59
+ "hidden_size": 2048,
60
+ "model_dtype": "float32",
61
+ "model_type": "gr00t_n1_5",
62
+ "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.51.3"
64
+ }
experiment_cfg/metadata.json ADDED
@@ -0,0 +1,433 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "libero_franka": {
3
+ "statistics": {
4
+ "state": {
5
+ "x": {
6
+ "max": [
7
+ 0.14580604434013367
8
+ ],
9
+ "min": [
10
+ -0.1765444278717041
11
+ ],
12
+ "mean": [
13
+ -0.02999030612409115
14
+ ],
15
+ "std": [
16
+ 0.06694897264242172
17
+ ],
18
+ "q01": [
19
+ -0.14911890715360643
20
+ ],
21
+ "q99": [
22
+ 0.09063626825809479
23
+ ]
24
+ },
25
+ "y": {
26
+ "max": [
27
+ 0.33216384053230286
28
+ ],
29
+ "min": [
30
+ -0.29457300901412964
31
+ ],
32
+ "mean": [
33
+ -0.007947085425257683
34
+ ],
35
+ "std": [
36
+ 0.17608462274074554
37
+ ],
38
+ "q01": [
39
+ -0.25978428691625594
40
+ ],
41
+ "q99": [
42
+ 0.29066365867853167
43
+ ]
44
+ },
45
+ "z": {
46
+ "max": [
47
+ 0.3857804834842682
48
+ ],
49
+ "min": [
50
+ 0.008128180168569088
51
+ ],
52
+ "mean": [
53
+ 0.20293472707271576
54
+ ],
55
+ "std": [
56
+ 0.07807064801454544
57
+ ],
58
+ "q01": [
59
+ 0.009925739830359817
60
+ ],
61
+ "q99": [
62
+ 0.3370887073874472
63
+ ]
64
+ },
65
+ "roll": {
66
+ "max": [
67
+ 3.4003844261169434
68
+ ],
69
+ "min": [
70
+ 2.2890501022338867
71
+ ],
72
+ "mean": [
73
+ 3.1086409091949463
74
+ ],
75
+ "std": [
76
+ 0.0868484303355217
77
+ ],
78
+ "q01": [
79
+ 2.7545341420173646
80
+ ],
81
+ "q99": [
82
+ 3.2611824750900267
83
+ ]
84
+ },
85
+ "pitch": {
86
+ "max": [
87
+ 0.7954911589622498
88
+ ],
89
+ "min": [
90
+ -1.883241891860962
91
+ ],
92
+ "mean": [
93
+ -0.21404768526554108
94
+ ],
95
+ "std": [
96
+ 0.33540457487106323
97
+ ],
98
+ "q01": [
99
+ -1.3996034812927245
100
+ ],
101
+ "q99": [
102
+ 0.32092821151018125
103
+ ]
104
+ },
105
+ "yaw": {
106
+ "max": [
107
+ 0.6642207503318787
108
+ ],
109
+ "min": [
110
+ -1.0600427389144897
111
+ ],
112
+ "mean": [
113
+ -0.11307074874639511
114
+ ],
115
+ "std": [
116
+ 0.20728276669979095
117
+ ],
118
+ "q01": [
119
+ -0.6867720144987106
120
+ ],
121
+ "q99": [
122
+ 0.4037663781642913
123
+ ]
124
+ },
125
+ "gripper": {
126
+ "max": [
127
+ 0.04104341194033623,
128
+ -0.00018117300351150334
129
+ ],
130
+ "min": [
131
+ 0.0006495157140307128,
132
+ -0.041782498359680176
133
+ ],
134
+ "mean": [
135
+ 0.029380427673459053,
136
+ -0.030556727200746536
137
+ ],
138
+ "std": [
139
+ 0.00956575945019722,
140
+ 0.009197483770549297
141
+ ],
142
+ "q01": [
143
+ 0.008197814421728254,
144
+ -0.04015838988125324
145
+ ],
146
+ "q99": [
147
+ 0.039891827926039694,
148
+ -0.009106044843792932
149
+ ]
150
+ }
151
+ },
152
+ "action": {
153
+ "x": {
154
+ "max": [
155
+ 0.9375
156
+ ],
157
+ "min": [
158
+ -0.8839285969734192
159
+ ],
160
+ "mean": [
161
+ 0.07096529006958008
162
+ ],
163
+ "std": [
164
+ 0.2681235373020172
165
+ ],
166
+ "q01": [
167
+ -0.5383928418159485
168
+ ],
169
+ "q99": [
170
+ 0.8464285731315613
171
+ ]
172
+ },
173
+ "y": {
174
+ "max": [
175
+ 0.8919642567634583
176
+ ],
177
+ "min": [
178
+ -0.9375
179
+ ],
180
+ "mean": [
181
+ 0.13498851656913757
182
+ ],
183
+ "std": [
184
+ 0.43846824765205383
185
+ ],
186
+ "q01": [
187
+ -0.8758928775787354
188
+ ],
189
+ "q99": [
190
+ 0.84375
191
+ ]
192
+ },
193
+ "z": {
194
+ "max": [
195
+ 0.9375
196
+ ],
197
+ "min": [
198
+ -0.9375
199
+ ],
200
+ "mean": [
201
+ -0.04601382836699486
202
+ ],
203
+ "std": [
204
+ 0.4474974274635315
205
+ ],
206
+ "q01": [
207
+ -0.9375
208
+ ],
209
+ "q99": [
210
+ 0.9375
211
+ ]
212
+ },
213
+ "roll": {
214
+ "max": [
215
+ 0.17678570747375488
216
+ ],
217
+ "min": [
218
+ -0.15000000596046448
219
+ ],
220
+ "mean": [
221
+ 0.00123520044144243
222
+ ],
223
+ "std": [
224
+ 0.024446550756692886
225
+ ],
226
+ "q01": [
227
+ -0.06964285671710968
228
+ ],
229
+ "q99": [
230
+ 0.08142857253551483
231
+ ]
232
+ },
233
+ "pitch": {
234
+ "max": [
235
+ 0.35035714507102966
236
+ ],
237
+ "min": [
238
+ -0.29035714268684387
239
+ ],
240
+ "mean": [
241
+ 0.006998839322477579
242
+ ],
243
+ "std": [
244
+ 0.049355510622262955
245
+ ],
246
+ "q01": [
247
+ -0.11678571254014969
248
+ ],
249
+ "q99": [
250
+ 0.14892856776714325
251
+ ]
252
+ },
253
+ "yaw": {
254
+ "max": [
255
+ 0.1810714304447174
256
+ ],
257
+ "min": [
258
+ -0.32892856001853943
259
+ ],
260
+ "mean": [
261
+ -0.015027612447738647
262
+ ],
263
+ "std": [
264
+ 0.042107198387384415
265
+ ],
266
+ "q01": [
267
+ -0.15964286029338837
268
+ ],
269
+ "q99": [
270
+ 0.0867857113480568
271
+ ]
272
+ },
273
+ "gripper": {
274
+ "max": [
275
+ 1.0
276
+ ],
277
+ "min": [
278
+ 0.0
279
+ ],
280
+ "mean": [
281
+ 0.46428999304771423
282
+ ],
283
+ "std": [
284
+ 0.49879148602485657
285
+ ],
286
+ "q01": [
287
+ 0.0
288
+ ],
289
+ "q99": [
290
+ 1.0
291
+ ]
292
+ }
293
+ }
294
+ },
295
+ "modalities": {
296
+ "video": {
297
+ "image": {
298
+ "resolution": [
299
+ 256,
300
+ 256
301
+ ],
302
+ "channels": 3,
303
+ "fps": 20.0
304
+ },
305
+ "wrist_image": {
306
+ "resolution": [
307
+ 256,
308
+ 256
309
+ ],
310
+ "channels": 3,
311
+ "fps": 20.0
312
+ }
313
+ },
314
+ "state": {
315
+ "x": {
316
+ "absolute": true,
317
+ "rotation_type": null,
318
+ "shape": [
319
+ 1
320
+ ],
321
+ "continuous": true
322
+ },
323
+ "y": {
324
+ "absolute": true,
325
+ "rotation_type": null,
326
+ "shape": [
327
+ 1
328
+ ],
329
+ "continuous": true
330
+ },
331
+ "z": {
332
+ "absolute": true,
333
+ "rotation_type": null,
334
+ "shape": [
335
+ 1
336
+ ],
337
+ "continuous": true
338
+ },
339
+ "roll": {
340
+ "absolute": true,
341
+ "rotation_type": null,
342
+ "shape": [
343
+ 1
344
+ ],
345
+ "continuous": true
346
+ },
347
+ "pitch": {
348
+ "absolute": true,
349
+ "rotation_type": null,
350
+ "shape": [
351
+ 1
352
+ ],
353
+ "continuous": true
354
+ },
355
+ "yaw": {
356
+ "absolute": true,
357
+ "rotation_type": null,
358
+ "shape": [
359
+ 1
360
+ ],
361
+ "continuous": true
362
+ },
363
+ "gripper": {
364
+ "absolute": true,
365
+ "rotation_type": null,
366
+ "shape": [
367
+ 2
368
+ ],
369
+ "continuous": true
370
+ }
371
+ },
372
+ "action": {
373
+ "x": {
374
+ "absolute": true,
375
+ "rotation_type": null,
376
+ "shape": [
377
+ 1
378
+ ],
379
+ "continuous": true
380
+ },
381
+ "y": {
382
+ "absolute": true,
383
+ "rotation_type": null,
384
+ "shape": [
385
+ 1
386
+ ],
387
+ "continuous": true
388
+ },
389
+ "z": {
390
+ "absolute": true,
391
+ "rotation_type": null,
392
+ "shape": [
393
+ 1
394
+ ],
395
+ "continuous": true
396
+ },
397
+ "roll": {
398
+ "absolute": true,
399
+ "rotation_type": null,
400
+ "shape": [
401
+ 1
402
+ ],
403
+ "continuous": true
404
+ },
405
+ "pitch": {
406
+ "absolute": true,
407
+ "rotation_type": null,
408
+ "shape": [
409
+ 1
410
+ ],
411
+ "continuous": true
412
+ },
413
+ "yaw": {
414
+ "absolute": true,
415
+ "rotation_type": null,
416
+ "shape": [
417
+ 1
418
+ ],
419
+ "continuous": true
420
+ },
421
+ "gripper": {
422
+ "absolute": true,
423
+ "rotation_type": null,
424
+ "shape": [
425
+ 1
426
+ ],
427
+ "continuous": true
428
+ }
429
+ }
430
+ },
431
+ "embodiment_tag": "libero_franka"
432
+ }
433
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636066fca40fbd69b0b2c4d160ee0f9656c565c161d5f1c5b3a1acfe05944dae
3
+ size 4999367032
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4917b473d89d5a31af802fe4e40e0dbe73866a938b9cd39fd1bbb9b6d55eec0
3
+ size 2586705312
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0303030303030303,
6
+ "eval_steps": 500,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.15151515151515152,
14
+ "grad_norm": 1.0779532194137573,
15
+ "learning_rate": 9e-05,
16
+ "loss": 0.5809,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.30303030303030304,
21
+ "grad_norm": 1.257666826248169,
22
+ "learning_rate": 9.944739353007344e-05,
23
+ "loss": 0.2898,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.45454545454545453,
28
+ "grad_norm": 1.1526721715927124,
29
+ "learning_rate": 9.755282581475769e-05,
30
+ "loss": 0.1944,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.6060606060606061,
35
+ "grad_norm": 0.740468442440033,
36
+ "learning_rate": 9.43611409721806e-05,
37
+ "loss": 0.1489,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.7575757575757576,
42
+ "grad_norm": 0.5122345089912415,
43
+ "learning_rate": 8.995939984474624e-05,
44
+ "loss": 0.1241,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.9090909090909091,
49
+ "grad_norm": 0.3294401168823242,
50
+ "learning_rate": 8.44676704559283e-05,
51
+ "loss": 0.1162,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 1.0606060606060606,
56
+ "grad_norm": 0.3713489770889282,
57
+ "learning_rate": 7.803575286758364e-05,
58
+ "loss": 0.105,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 1.2121212121212122,
63
+ "grad_norm": 0.38999056816101074,
64
+ "learning_rate": 7.083909302476453e-05,
65
+ "loss": 0.102,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 1.3636363636363638,
70
+ "grad_norm": 0.313165545463562,
71
+ "learning_rate": 6.307399704769099e-05,
72
+ "loss": 0.0917,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 1.5151515151515151,
77
+ "grad_norm": 0.2271764725446701,
78
+ "learning_rate": 5.495227651252315e-05,
79
+ "loss": 0.0896,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 1.6666666666666665,
84
+ "grad_norm": 0.2582443356513977,
85
+ "learning_rate": 4.669547078371504e-05,
86
+ "loss": 0.0906,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 1.8181818181818183,
91
+ "grad_norm": 0.28969135880470276,
92
+ "learning_rate": 3.852880399766243e-05,
93
+ "loss": 0.0898,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 1.9696969696969697,
98
+ "grad_norm": 0.21474944055080414,
99
+ "learning_rate": 3.0675041535377405e-05,
100
+ "loss": 0.0858,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 2.121212121212121,
105
+ "grad_norm": 0.17501652240753174,
106
+ "learning_rate": 2.3348413563600325e-05,
107
+ "loss": 0.0827,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 2.2727272727272725,
112
+ "grad_norm": 0.17531634867191315,
113
+ "learning_rate": 1.6748771394307585e-05,
114
+ "loss": 0.0835,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 2.4242424242424243,
119
+ "grad_norm": 0.1865883618593216,
120
+ "learning_rate": 1.1056136061894384e-05,
121
+ "loss": 0.0795,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 2.5757575757575757,
126
+ "grad_norm": 0.23194383084774017,
127
+ "learning_rate": 6.425787818636131e-06,
128
+ "loss": 0.0871,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 2.7272727272727275,
133
+ "grad_norm": 0.15936115384101868,
134
+ "learning_rate": 2.9840304941919415e-06,
135
+ "loss": 0.0838,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 2.878787878787879,
140
+ "grad_norm": 0.18423740565776825,
141
+ "learning_rate": 8.247462563808817e-07,
142
+ "loss": 0.0791,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 3.0303030303030303,
147
+ "grad_norm": 0.13376280665397644,
148
+ "learning_rate": 6.834750376549792e-09,
149
+ "loss": 0.0796,
150
+ "step": 200
151
+ }
152
+ ],
153
+ "logging_steps": 10,
154
+ "max_steps": 200,
155
+ "num_input_tokens_seen": 0,
156
+ "num_train_epochs": 4,
157
+ "save_steps": 40,
158
+ "stateful_callbacks": {
159
+ "TrainerControl": {
160
+ "args": {
161
+ "should_epoch_stop": false,
162
+ "should_evaluate": false,
163
+ "should_log": false,
164
+ "should_save": true,
165
+ "should_training_stop": true
166
+ },
167
+ "attributes": {}
168
+ }
169
+ },
170
+ "total_flos": 0.0,
171
+ "train_batch_size": 128,
172
+ "trial_name": null,
173
+ "trial_params": null
174
+ }