Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

.gitattributes +1 -0
README.md +30 -0
config.json +85 -0
model.safetensors +3 -0
policy_postprocessor.json +31 -0
policy_preprocessor.json +86 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+---
+license: apache-2.0
+library_name: lerobot
+pipeline_tag: robotics
+---
+## Pi0 pretrained model
+This repository contains the model described in [π_0: A Vision-Language-Action Flow Model for General Robot Control](https://huggingface.co/papers/2410.24164).
+See the [Twitter thread](https://x.com/RemiCadene/status/1886823939856589296) and [blog post](https://huggingface.co/blog/pi0) for more info regarding its integration in [LeRobot](https://github.com/huggingface/lerobot).
+## Usage
+You can download and use this model with:
+```python
+policy = Pi0Policy.from_pretrained("lerobot/pi0")
+action = policy.select_action(batch)
+```
+## Fine-tuning
+You can easily finetune it on your dataset. For instance on @dana_55517 's [dataset](https://huggingface.co/spaces/lerobot/visualize_dataset?dataset=danaaubakirova%2Fkoch_test&episode=0):
+```python
+python lerobot/scripts/train.py \
+--policy.path=lerobot/pi0 \
+--dataset.repo_id=danaaubakirova/koch_test
+```
+Take a look at the [code](https://github.com/huggingface/lerobot/blob/main/lerobot/common/policies/pi0/modeling_pi0.py) regarding the implementation.

config.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+    "type": "pi0",
+    "n_obs_steps": 1,
+    "input_features": {
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                6
+            ]
+        },
+        "observation.images.camera0": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                480,
+                640
+            ]
+        },
+        "observation.images.camera1": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                480,
+                640
+            ]
+        },
+        "observation.images.camera2": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                480,
+                640
+            ]
+        }
+    },
+    "output_features": {
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                6
+            ]
+        }
+    },
+    "device": "cpu",
+    "use_amp": false,
+    "push_to_hub": true,
+    "repo_id": null,
+    "private": null,
+    "tags": null,
+    "license": null,
+    "chunk_size": 50,
+    "n_action_steps": 50,
+    "normalization_mapping": {
+        "VISUAL": "IDENTITY",
+        "STATE": "MEAN_STD",
+        "ACTION": "MEAN_STD"
+    },
+    "max_state_dim": 32,
+    "max_action_dim": 32,
+    "resize_imgs_with_padding": [
+        224,
+        224
+    ],
+    "empty_cameras": 0,
+    "adapt_to_pi_aloha": false,
+    "use_delta_joint_actions_aloha": false,
+    "tokenizer_max_length": 48,
+    "proj_width": 1024,
+    "num_steps": 10,
+    "use_cache": true,
+    "attention_implementation": "eager",
+    "freeze_vision_encoder": true,
+    "train_expert_only": false,
+    "train_state_proj": true,
+    "optimizer_lr": 2.5e-05,
+    "optimizer_betas": [
+        0.9,
+        0.95
+    ],
+    "optimizer_eps": 1e-08,
+    "optimizer_weight_decay": 1e-10,
+    "scheduler_warmup_steps": 1000,
+    "scheduler_decay_steps": 30000,
+    "scheduler_decay_lr": 2.5e-06
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766fa4f2a981da1e46b0be7ecb6055c4780117c6e8aef8a9f36ecdfd0b1c5da8
+size 136

policy_postprocessor.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "name": "policy_postprocessor",
+  "steps": [
+    {
+      "registry_name": "unnormalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {
+          "action": {
+            "type": "ACTION",
+            "shape": [
+              6
+            ]
+          }
+        },
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "MEAN_STD",
+          "ACTION": "MEAN_STD"
+        }
+      }
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cpu",
+        "float_dtype": null
+      }
+    }
+  ]
+}

policy_preprocessor.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "name": "policy_preprocessor",
+  "steps": [
+    {
+      "registry_name": "rename_observations_processor",
+      "config": {
+        "rename_map": {}
+      }
+    },
+    {
+      "registry_name": "to_batch_processor",
+      "config": {}
+    },
+    {
+      "registry_name": "pi0_new_line_processor",
+      "config": {}
+    },
+    {
+      "registry_name": "tokenizer_processor",
+      "config": {
+        "max_length": 48,
+        "task_key": "task",
+        "padding_side": "right",
+        "padding": "max_length",
+        "truncation": true,
+        "tokenizer_name": "google/paligemma-3b-pt-224"
+      }
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cpu",
+        "float_dtype": null
+      }
+    },
+    {
+      "registry_name": "normalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {
+          "observation.state": {
+            "type": "STATE",
+            "shape": [
+              6
+            ]
+          },
+          "observation.images.camera0": {
+            "type": "VISUAL",
+            "shape": [
+              3,
+              480,
+              640
+            ]
+          },
+          "observation.images.camera1": {
+            "type": "VISUAL",
+            "shape": [
+              3,
+              480,
+              640
+            ]
+          },
+          "observation.images.camera2": {
+            "type": "VISUAL",
+            "shape": [
+              3,
+              480,
+              640
+            ]
+          },
+          "action": {
+            "type": "ACTION",
+            "shape": [
+              6
+            ]
+          }
+        },
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "MEAN_STD",
+          "ACTION": "MEAN_STD"
+        }
+      }
+    }
+  ]
+}