Upload policy weights, train config and readme

Browse files

Files changed (4) hide show

README.md +62 -0
config.json +4 -4
model.safetensors +2 -2
train_config.json +7 -7

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+datasets: romanrab/battery_box_right_arm_150
+library_name: lerobot
+license: apache-2.0
+model_name: diffusion
+pipeline_tag: robotics
+tags:
+- lerobot
+- robotics
+- diffusion
+---
+# Model Card for diffusion
+<!-- Provide a quick summary of what the model is/does. -->
+[Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
+This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
+See the full documentation at [LeRobot Docs](https://huggingface.co/docs/lerobot/index).
+---
+## How to Get Started with the Model
+For a complete walkthrough, see the [training guide](https://huggingface.co/docs/lerobot/il_robots#train-a-policy).
+Below is the short version on how to train and run inference/eval:
+### Train from scratch
+```bash
+python -m lerobot.scripts.train \
+  --dataset.repo_id=${HF_USER}/<dataset> \
+  --policy.type=act \
+  --output_dir=outputs/train/<desired_policy_repo_id> \
+  --job_name=lerobot_training \
+  --policy.device=cuda \
+  --policy.repo_id=${HF_USER}/<desired_policy_repo_id>
+  --wandb.enable=true
+```
+_Writes checkpoints to `outputs/train/<desired_policy_repo_id>/checkpoints/`._
+### Evaluate the policy/run inference
+```bash
+python -m lerobot.record \
+  --robot.type=so100_follower \
+  --dataset.repo_id=<hf_user>/eval_<dataset> \
+  --policy.path=<hf_user>/<desired_policy_repo_id> \
+  --episodes=10
+```
+Prefix the dataset repo with **eval\_** and supply `--policy.path` pointing to a local or hub checkpoint.
+---
+## Model Details
+- **License:** apache-2.0

config.json CHANGED Viewed

@@ -50,8 +50,8 @@
     "drop_n_last_frames": 7,
     "vision_backbone": "resnet18",
     "crop_shape": [
-        84,
-        84
     ],
     "crop_is_random": true,
     "pretrained_backbone_weights": null,
@@ -59,9 +59,9 @@
     "spatial_softmax_num_keypoints": 32,
     "use_separate_rgb_encoder_per_camera": true,
     "down_dims": [
         512,
-        1024,
-        2048
     ],
     "kernel_size": 5,
     "n_groups": 8,

     "drop_n_last_frames": 7,
     "vision_backbone": "resnet18",
     "crop_shape": [
+        64,
+        64
     ],
     "crop_is_random": true,
     "pretrained_backbone_weights": null,
     "spatial_softmax_num_keypoints": 32,
     "use_separate_rgb_encoder_per_camera": true,
     "down_dims": [
+        256,
         512,
+        1024
     ],
     "kernel_size": 5,
     "n_groups": 8,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6924ce9e8318f2a5d3f8b49ca9dfcbdc22d7ab8ef69bcd1a471f3b61d9979fb4
-size 1114998460

 version https://git-lfs.github.com/spec/v1
+oid sha256:4475cdbac7d779f85360ea6911e1913870aaf1bb58dfd35f90cad32e55170955
+size 358774204

train_config.json CHANGED Viewed

@@ -117,8 +117,8 @@
         "drop_n_last_frames": 7,
         "vision_backbone": "resnet18",
         "crop_shape": [
-            84,
-            84
         ],
         "crop_is_random": true,
         "pretrained_backbone_weights": null,
@@ -126,9 +126,9 @@
         "spatial_softmax_num_keypoints": 32,
         "use_separate_rgb_encoder_per_camera": true,
         "down_dims": [
             512,
-            1024,
-            2048
         ],
         "kernel_size": 5,
         "n_groups": 8,
@@ -159,12 +159,12 @@
     "resume": false,
     "seed": 1000,
     "num_workers": 4,
-    "batch_size": 64,
-    "steps": 50000,
     "eval_freq": 20000,
     "log_freq": 100,
     "save_checkpoint": true,
-    "save_freq": 5000,
     "use_policy_training_preset": true,
     "optimizer": {
         "type": "adam",

         "drop_n_last_frames": 7,
         "vision_backbone": "resnet18",
         "crop_shape": [
+            64,
+            64
         ],
         "crop_is_random": true,
         "pretrained_backbone_weights": null,
         "spatial_softmax_num_keypoints": 32,
         "use_separate_rgb_encoder_per_camera": true,
         "down_dims": [
+            256,
             512,
+            1024
         ],
         "kernel_size": 5,
         "n_groups": 8,
     "resume": false,
     "seed": 1000,
     "num_workers": 4,
+    "batch_size": 32,
+    "steps": 15000,
     "eval_freq": 20000,
     "log_freq": 100,
     "save_checkpoint": true,
+    "save_freq": 2500,
     "use_policy_training_preset": true,
     "optimizer": {
         "type": "adam",