Initial model upload

Browse files

Files changed (7) hide show

config.json +64 -0
experiment_cfg/metadata.json +433 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
scheduler.pt +3 -0
trainer_state.json +118 -0

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,433 @@

+{
+    "libero_franka": {
+        "statistics": {
+            "state": {
+                "x": {
+                    "max": [
+                        0.13579000532627106
+                    ],
+                    "min": [
+                        -0.46141114830970764
+                    ],
+                    "mean": [
+                        -0.09923473745584488
+                    ],
+                    "std": [
+                        0.11653962731361389
+                    ],
+                    "q01": [
+                        -0.42401049643754957
+                    ],
+                    "q99": [
+                        0.08990443304181095
+                    ]
+                },
+                "y": {
+                    "max": [
+                        0.33316105604171753
+                    ],
+                    "min": [
+                        -0.30129560828208923
+                    ],
+                    "mean": [
+                        0.013597904704511166
+                    ],
+                    "std": [
+                        0.11478105187416077
+                    ],
+                    "q01": [
+                        -0.27338370531797407
+                    ],
+                    "q99": [
+                        0.26473945528268716
+                    ]
+                },
+                "z": {
+                    "max": [
+                        1.3660105466842651
+                    ],
+                    "min": [
+                        0.9083037972450256
+                    ],
+                    "mean": [
+                        1.0694637298583984
+                    ],
+                    "std": [
+                        0.10487838834524155
+                    ],
+                    "q01": [
+                        0.911226047873497
+                    ],
+                    "q99": [
+                        1.2910678112506866
+                    ]
+                },
+                "roll": {
+                    "max": [
+                        3.473310708999634
+                    ],
+                    "min": [
+                        0.35277295112609863
+                    ],
+                    "mean": [
+                        2.82898211479187
+                    ],
+                    "std": [
+                        0.5570293664932251
+                    ],
+                    "q01": [
+                        1.3085840785503386
+                    ],
+                    "q99": [
+                        3.2425890421867365
+                    ]
+                },
+                "pitch": {
+                    "max": [
+                        2.6688623428344727
+                    ],
+                    "min": [
+                        -1.4858465194702148
+                    ],
+                    "mean": [
+                        0.30799180269241333
+                    ],
+                    "std": [
+                        0.7221656441688538
+                    ],
+                    "q01": [
+                        -0.691297555565834
+                    ],
+                    "q99": [
+                        2.3376442337036116
+                    ]
+                },
+                "yaw": {
+                    "max": [
+                        0.8255361318588257
+                    ],
+                    "min": [
+                        -1.5227035284042358
+                    ],
+                    "mean": [
+                        -0.274286687374115
+                    ],
+                    "std": [
+                        0.36479514837265015
+                    ],
+                    "q01": [
+                        -1.130668159723282
+                    ],
+                    "q99": [
+                        0.4659483411908149
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        0.04233968257904053,
+                        0.0010111660230904818
+                    ],
+                    "min": [
+                        -0.0013586411951109767,
+                        -0.042040832340717316
+                    ],
+                    "mean": [
+                        0.028092455118894577,
+                        -0.027339335530996323
+                    ],
+                    "std": [
+                        0.01507475133985281,
+                        0.014990941621363163
+                    ],
+                    "q01": [
+                        0.0016738151130266487,
+                        -0.040336399003863335
+                    ],
+                    "q99": [
+                        0.040610933862626555,
+                        -0.0015016929572448147
+                    ]
+                }
+            },
+            "action": {
+                "x": {
+                    "max": [
+                        0.9375
+                    ],
+                    "min": [
+                        -0.9375
+                    ],
+                    "mean": [
+                        0.04721052572131157
+                    ],
+                    "std": [
+                        0.3968801498413086
+                    ],
+                    "q01": [
+                        -0.8785714507102966
+                    ],
+                    "q99": [
+                        0.9375
+                    ]
+                },
+                "y": {
+                    "max": [
+                        0.9375
+                    ],
+                    "min": [
+                        -0.9375
+                    ],
+                    "mean": [
+                        0.028835246339440346
+                    ],
+                    "std": [
+                        0.3473387360572815
+                    ],
+                    "q01": [
+                        -0.7553571462631226
+                    ],
+                    "q99": [
+                        0.9107142686843872
+                    ]
+                },
+                "z": {
+                    "max": [
+                        0.9375
+                    ],
+                    "min": [
+                        -0.9375
+                    ],
+                    "mean": [
+                        -0.1485840231180191
+                    ],
+                    "std": [
+                        0.49239858984947205
+                    ],
+                    "q01": [
+                        -0.9375
+                    ],
+                    "q99": [
+                        0.9375
+                    ]
+                },
+                "roll": {
+                    "max": [
+                        0.3557142913341522
+                    ],
+                    "min": [
+                        -0.2582142949104309
+                    ],
+                    "mean": [
+                        -0.0025010062381625175
+                    ],
+                    "std": [
+                        0.055331431329250336
+                    ],
+                    "q01": [
+                        -0.1510714292526245
+                    ],
+                    "q99": [
+                        0.20357142388820648
+                    ]
+                },
+                "pitch": {
+                    "max": [
+                        0.375
+                    ],
+                    "min": [
+                        -0.375
+                    ],
+                    "mean": [
+                        0.026408178731799126
+                    ],
+                    "std": [
+                        0.07844757288694382
+                    ],
+                    "q01": [
+                        -0.1639285683631897
+                    ],
+                    "q99": [
+                        0.26357144117355347
+                    ]
+                },
+                "yaw": {
+                    "max": [
+                        0.375
+                    ],
+                    "min": [
+                        -0.2871428430080414
+                    ],
+                    "mean": [
+                        0.027379808947443962
+                    ],
+                    "std": [
+                        0.10008802264928818
+                    ],
+                    "q01": [
+                        -0.13777500048279764
+                    ],
+                    "q99": [
+                        0.375
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.6299911737442017
+                    ],
+                    "std": [
+                        0.48270025849342346
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "image": {
+                    "resolution": [
+                        256,
+                        256
+                    ],
+                    "channels": 3,
+                    "fps": 20.0
+                },
+                "wrist_image": {
+                    "resolution": [
+                        256,
+                        256
+                    ],
+                    "channels": 3,
+                    "fps": 20.0
+                }
+            },
+            "state": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "roll": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "pitch": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "yaw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        2
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "roll": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "pitch": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "yaw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "libero_franka"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55779e30362f6e34ba39c0ce9d2ac0919423c8047840ae160fd85122c4596b9f
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85ed0529a6209d2e77a6016b65e41ac871469afef53bb1e7a1d571225345e48c
+size 2586705312

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:478e83c7632ed85eeb43c4adcffb356f6787ad4b114eba1103e1cde5f0e935e9
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.3529411764705883,
+  "eval_steps": 500,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.19607843137254902,
+      "grad_norm": 2.844466209411621,
+      "learning_rate": 9e-05,
+      "loss": 0.5843,
+      "step": 10
+    },
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 1.278521180152893,
+      "learning_rate": 9.944739353007344e-05,
+      "loss": 0.2849,
+      "step": 20
+    },
+    {
+      "epoch": 0.5882352941176471,
+      "grad_norm": 0.7505949139595032,
+      "learning_rate": 9.755282581475769e-05,
+      "loss": 0.2106,
+      "step": 30
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 0.5767074227333069,
+      "learning_rate": 9.43611409721806e-05,
+      "loss": 0.169,
+      "step": 40
+    },
+    {
+      "epoch": 0.9803921568627451,
+      "grad_norm": 0.4589107632637024,
+      "learning_rate": 8.995939984474624e-05,
+      "loss": 0.1408,
+      "step": 50
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.5532191395759583,
+      "learning_rate": 8.44676704559283e-05,
+      "loss": 0.1278,
+      "step": 60
+    },
+    {
+      "epoch": 1.3725490196078431,
+      "grad_norm": 0.38863489031791687,
+      "learning_rate": 7.803575286758364e-05,
+      "loss": 0.1163,
+      "step": 70
+    },
+    {
+      "epoch": 1.5686274509803921,
+      "grad_norm": 0.43584027886390686,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 0.1131,
+      "step": 80
+    },
+    {
+      "epoch": 1.7647058823529411,
+      "grad_norm": 0.4503878951072693,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 0.1024,
+      "step": 90
+    },
+    {
+      "epoch": 1.9607843137254903,
+      "grad_norm": 0.4599202871322632,
+      "learning_rate": 5.495227651252315e-05,
+      "loss": 0.1015,
+      "step": 100
+    },
+    {
+      "epoch": 2.156862745098039,
+      "grad_norm": 0.15444983541965485,
+      "learning_rate": 4.669547078371504e-05,
+      "loss": 0.097,
+      "step": 110
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.2130330502986908,
+      "learning_rate": 3.852880399766243e-05,
+      "loss": 0.0958,
+      "step": 120
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 200,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 40,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": null
+}