Training completed - 1000 steps

Browse files

Files changed (4) hide show

.gitattributes +1 -0
config.json +68 -62
model.safetensors +3 -0
train_config.json +177 -0

.gitattributes CHANGED Viewed

@@ -3,3 +3,4 @@ experiment_cfg/* filter=lfs diff=lfs merge=lfs -text
 runs/* filter=lfs diff=lfs merge=lfs -text
 model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
 model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text

 runs/* filter=lfs diff=lfs merge=lfs -text
 model-00002-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
 model-00001-of-00002.safetensors filter=lfs diff=lfs merge=lfs -text
+model.safetensors filter=lfs diff=lfs merge=lfs -text

config.json CHANGED Viewed

@@ -1,65 +1,71 @@
 {
-  "_name_or_path": "/root/.cache/huggingface/hub/models--nvidia--GR00T-N1-2B/snapshots/32e1fd2507f7739fad443e6b449c8188e0e02fcb",
-  "action_dim": 32,
-  "action_head_cfg": {
-    "action_dim": 32,
-    "action_horizon": 16,
-    "add_pos_embed": true,
-    "diffusion_model_cfg": {
-      "attention_head_dim": 48,
-      "dropout": 0.2,
-      "final_dropout": true,
-      "interleave_self_attention": true,
-      "norm_type": "ada_norm",
-      "num_attention_heads": 32,
-      "num_layers": 16,
-      "output_dim": 1024,
-      "positional_embeddings": null
     },
-    "freeze_decode_layer": false,
-    "hidden_size": 1024,
-    "input_embedding_dim": 1536,
-    "load_pretrained_det_decode_layer_path": null,
-    "max_action_dim": 32,
-    "max_state_dim": 64,
-    "model_dtype": "float32",
-    "noise_beta_alpha": 1.5,
-    "noise_beta_beta": 1.0,
-    "noise_s": 0.999,
-    "num_inference_timesteps": 16,
-    "num_timestep_buckets": 1000,
-    "tune_diffusion_model": true,
-    "tune_projector": true
-  },
-  "action_horizon": 16,
-  "architectures": [
-    "GR00T_N1"
-  ],
-  "attn_implementation": null,
-  "backbone_cfg": {
-    "allow_reshape_visual": true,
-    "load_pretrained_det_eagle_path": null,
-    "model_name": "$GR00T_BACKBONE_PATH/eagle2_hg_model",
-    "processor_cfg": {
-      "max_input_tiles": 1,
-      "model_path": "$GR00T_BACKBONE_PATH/eagle2_hg_model",
-      "model_spec": {
-        "num_image_token": 64,
-        "template": "qwen2-chat"
-      }
     },
-    "projector_dim": 2048,
-    "remove_llm": false,
-    "reproject_vision": false,
-    "scale_image_resolution": 1,
-    "select_layer": 12,
-    "tune_llm": false,
-    "tune_visual": true
-  },
-  "compute_dtype": "bfloat16",
-  "hidden_size": 1536,
-  "model_dtype": "float32",
-  "model_type": "gr00t_n1",
-  "torch_dtype": "float32",
-  "transformers_version": "4.45.2"
-}

 {
+    "type": "act",
+    "n_obs_steps": 1,
+    "normalization_mapping": {
+        "VISUAL": "MEAN_STD",
+        "STATE": "MEAN_STD",
+        "ACTION": "MEAN_STD"
     },
+    "input_features": {
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                6
+            ]
+        },
+        "observation.images.laptop": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                480,
+                640
+            ]
+        },
+        "observation.images.phone": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                480,
+                640
+            ]
+        }
     },
+    "output_features": {
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                6
+            ]
+        }
+    },
+    "device": "cuda",
+    "use_amp": false,
+    "push_to_hub": false,
+    "repo_id": "Ofiroz91/groot-test-1",
+    "private": null,
+    "tags": [
+        "cubix"
+    ],
+    "license": null,
+    "chunk_size": 100,
+    "n_action_steps": 100,
+    "vision_backbone": "resnet18",
+    "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
+    "replace_final_stride_with_dilation": false,
+    "pre_norm": false,
+    "dim_model": 512,
+    "n_heads": 8,
+    "dim_feedforward": 3200,
+    "feedforward_activation": "relu",
+    "n_encoder_layers": 4,
+    "n_decoder_layers": 1,
+    "use_vae": true,
+    "latent_dim": 32,
+    "n_vae_encoder_layers": 4,
+    "temporal_ensemble_coeff": null,
+    "dropout": 0.1,
+    "kl_weight": 10.0,
+    "optimizer_lr": 1e-05,
+    "optimizer_weight_decay": 0.0001,
+    "optimizer_lr_backbone": 1e-05
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4284af35abfb3e40f562e2e1e24adc2313164beb4cbe436a39defe00a5a3b211
+size 206701072

train_config.json ADDED Viewed

	@@ -0,0 +1,177 @@

+{
+    "dataset": {
+        "repo_id": "Ofiroz91/eval_so_100_cube2bowl",
+        "root": null,
+        "episodes": null,
+        "image_transforms": {
+            "enable": false,
+            "max_num_transforms": 3,
+            "random_order": false,
+            "tfs": {
+                "brightness": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "brightness": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "contrast": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "contrast": [
+                            0.8,
+                            1.2
+                        ]
+                    }
+                },
+                "saturation": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "saturation": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                },
+                "hue": {
+                    "weight": 1.0,
+                    "type": "ColorJitter",
+                    "kwargs": {
+                        "hue": [
+                            -0.05,
+                            0.05
+                        ]
+                    }
+                },
+                "sharpness": {
+                    "weight": 1.0,
+                    "type": "SharpnessJitter",
+                    "kwargs": {
+                        "sharpness": [
+                            0.5,
+                            1.5
+                        ]
+                    }
+                }
+            }
+        },
+        "revision": null,
+        "use_imagenet_stats": true,
+        "video_backend": "torchcodec"
+    },
+    "env": null,
+    "policy": {
+        "type": "act",
+        "n_obs_steps": 1,
+        "normalization_mapping": {
+            "VISUAL": "MEAN_STD",
+            "STATE": "MEAN_STD",
+            "ACTION": "MEAN_STD"
+        },
+        "input_features": {
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    6
+                ]
+            },
+            "observation.images.laptop": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    480,
+                    640
+                ]
+            },
+            "observation.images.phone": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    480,
+                    640
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    6
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "push_to_hub": false,
+        "repo_id": "Ofiroz91/groot-test-1",
+        "private": null,
+        "tags": [
+            "cubix"
+        ],
+        "license": null,
+        "chunk_size": 100,
+        "n_action_steps": 100,
+        "vision_backbone": "resnet18",
+        "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
+        "replace_final_stride_with_dilation": false,
+        "pre_norm": false,
+        "dim_model": 512,
+        "n_heads": 8,
+        "dim_feedforward": 3200,
+        "feedforward_activation": "relu",
+        "n_encoder_layers": 4,
+        "n_decoder_layers": 1,
+        "use_vae": true,
+        "latent_dim": 32,
+        "n_vae_encoder_layers": 4,
+        "temporal_ensemble_coeff": null,
+        "dropout": 0.1,
+        "kl_weight": 10.0,
+        "optimizer_lr": 1e-05,
+        "optimizer_weight_decay": 0.0001,
+        "optimizer_lr_backbone": 1e-05
+    },
+    "output_dir": "/tmp/lerobot_training_1752880447268_d0991f10",
+    "job_name": "testing trainging",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 4,
+    "batch_size": 4,
+    "steps": 1000,
+    "eval_freq": 100,
+    "log_freq": 200,
+    "save_checkpoint": true,
+    "save_freq": 20000,
+    "use_policy_training_preset": true,
+    "optimizer": {
+        "type": "adamw",
+        "lr": 1e-05,
+        "weight_decay": 0.0001,
+        "grad_clip_norm": 10.0,
+        "betas": [
+            0.9,
+            0.999
+        ],
+        "eps": 1e-08
+    },
+    "scheduler": null,
+    "eval": {
+        "n_episodes": 50,
+        "batch_size": 50,
+        "use_async_envs": false
+    },
+    "wandb": {
+        "enable": false,
+        "disable_artifact": false,
+        "project": "lerobot",
+        "entity": null,
+        "notes": null,
+        "run_id": null,
+        "mode": null
+    }
+}