Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

.gitattributes +0 -34
README.md +151 -0
config.json +82 -0
model.safetensors +3 -0
policy_postprocessor.json +24 -0
policy_preprocessor.json +49 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


























1	*.safetensors filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,151 @@

+---
+language:
+- en
+library_name: lerobot
+pipeline_tag: robotics
+tags:
+- vision-language-action
+- imitation-learning
+- lerobot
+inference: false
+license: gemma
+---
+# π₀.₅ (Pi05) (LeRobot)
+π₀.₅ is a Vision-Language-Action (VLA) model with open-world generalization from Physical Intelligence, co-trained on robot demonstrations and large-scale multimodal data to execute long-horizon tasks in unseen real-world environments.
+**Note:** This model currently supports only the flow-matching action head for π₀.₅ training and inference.
+Other components from the original work (e.g., subtask prediction, action tokenization, or RL) were not released upstream and are not included here, though the LeRobot team is actively working to support them.
+**Original paper:** π0.5: A Vision-Language-Action Model with Open-World Generalization
+**Reference implementation:** https://github.com/Physical-Intelligence/openpi
+**LeRobot implementation:** Follows the original reference code for compatibility.
+## Model description
+- **Inputs:** images (multi-view), proprio/state, optional language instruction
+- **Outputs:** continuous actions
+- **Training objective:** flow matching
+- **Action representation:** continuous
+- **Intended use:** Base model to fine tune on your specific use case
+## Quick start (inference on a real batch)
+### Installation
+```bash
+pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"
+```
+For full installation details (including optional video dependencies such as ffmpeg for torchcodec), see the official documentation: https://huggingface.co/docs/lerobot/installation
+### Load model + dataset, run `select_action`
+```python
+import torch
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.policies.factory import make_pre_post_processors
+# Swap this import per-policy
+from lerobot.policies.pi05 import PI05Policy
+# load a policy
+model_id = "lerobot/pi05_base"  # <- swap checkpoint
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+policy = PI05Policy.from_pretrained(model_id).to(device).eval()
+preprocess, postprocess = make_pre_post_processors(
+    policy.config,
+    model_id,
+    preprocessor_overrides={"device_processor": {"device": str(device)}},
+)
+# load a lerobotdataset (we will replace with a simpler dataset)
+dataset = LeRobotDataset("lerobot/libero")
+# pick an episode
+episode_index = 0
+# each episode corresponds to a contiguous range of frame indices
+from_idx = dataset.meta.episodes["dataset_from_index"][episode_index]
+to_idx   = dataset.meta.episodes["dataset_to_index"][episode_index]
+# get a single frame from that episode (e.g. the first frame)
+frame_index = from_idx
+frame = dict(dataset[frame_index])
+batch = preprocess(frame)
+with torch.inference_mode():
+    pred_action = policy.select_action(batch)
+    # use your policy postprocess, this post process the action
+    # for instance unnormalize the actions, detokenize it etc..
+    pred_action = postprocess(pred_action)
+```
+## Training step (loss + backward)
+If you’re training / fine-tuning, you typically call `forward(...)` to get a loss and then:
+```python
+policy.train()
+batch = dict(dataset[0])
+batch = preprocess(batch)
+loss, outputs = policy.forward(batch)
+loss.backward()
+```
+> Notes:
+>
+> - Some policies expose `policy(**batch)` or return a dict; keep this snippet aligned with the policy API.
+> - Use your trainer script (`lerobot-train`) for full training loops.
+## How to train / fine-tune
+```bash
+lerobot-train \
+  --dataset.repo_id=${HF_USER}/<dataset> \
+  --output_dir=./outputs/[RUN_NAME] \
+  --job_name=[RUN_NAME] \
+  --policy.repo_id=${HF_USER}/<desired_policy_repo_id> \
+  --policy.path=lerobot/[BASE_CHECKPOINT] \
+  --policy.dtype=bfloat16 \
+  --policy.device=cuda \
+  --steps=100000 \
+  --batch_size=4
+```
+Add policy-specific flags below:
+- `-policy.chunk_size=...`
+- `-policy.n_action_steps=...`
+- `-policy.max_action_tokens=...`
+- `-policy.gradient_checkpointing=true`
+## Real-World Inference & Evaluation
+You can use the `record` script from [**`lerobot-record`**](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/lerobot_record.py) with a policy checkpoint as input, to run inference and evaluate your policy.
+For instance, run this command or API example to run inference and record 10 evaluation episodes:
+```
+lerobot-record  \
+  --robot.type=so100_follower \
+  --robot.port=/dev/ttyACM1 \
+  --robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
+  --robot.id=my_awesome_follower_arm \
+  --display_data=false \
+  --dataset.repo_id=${HF_USER}/eval_so100 \
+  --dataset.single_task="Put lego brick into the transparent box" \
+  # <- Teleop optional if you want to teleoperate in between episodes \
+  # --teleop.type=so100_leader \
+  # --teleop.port=/dev/ttyACM0 \
+  # --teleop.id=my_awesome_leader_arm \
+  --policy.path=${HF_USER}/my_policy
+```

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+    "type": "pi05",
+    "n_obs_steps": 1,
+    "input_features": {
+        "observation.images.base_0_rgb": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                224,
+                224
+            ]
+        },
+        "observation.images.left_wrist_0_rgb": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                224,
+                224
+            ]
+        },
+        "observation.images.right_wrist_0_rgb": {
+            "type": "VISUAL",
+            "shape": [
+                3,
+                224,
+                224
+            ]
+        },
+        "observation.state": {
+            "type": "STATE",
+            "shape": [
+                32
+            ]
+        }
+    },
+    "output_features": {
+        "action": {
+            "type": "ACTION",
+            "shape": [
+                32
+            ]
+        }
+    },
+    "device": "mps",
+    "use_amp": false,
+    "push_to_hub": true,
+    "repo_id": null,
+    "private": null,
+    "tags": null,
+    "license": null,
+    "paligemma_variant": "gemma_2b",
+    "action_expert_variant": "gemma_300m",
+    "dtype": "bfloat16",
+    "chunk_size": 50,
+    "n_action_steps": 50,
+    "max_action_dim": 32,
+    "max_state_dim": 32,
+    "num_inference_steps": 10,
+    "time_sampling_beta_alpha": 1.5,
+    "time_sampling_beta_beta": 1.0,
+    "min_period": 0.004,
+    "max_period": 4.0,
+    "image_resolution": [
+        224,
+        224
+    ],
+    "gradient_checkpointing": false,
+    "compile_model": false,
+    "compile_mode": "max-autotune",
+    "optimizer_lr": 2.5e-05,
+    "optimizer_betas": [
+        0.9,
+        0.95
+    ],
+    "optimizer_eps": 1e-08,
+    "optimizer_weight_decay": 0.01,
+    "optimizer_grad_clip_norm": 1.0,
+    "scheduler_warmup_steps": 1000,
+    "scheduler_decay_steps": 30000,
+    "scheduler_decay_lr": 2.5e-06,
+    "tokenizer_max_length": 200
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e97ac12eda0cba1636497390dd8e8b7bb8e7436c854bd91e55f1e89ee2bbad
+size 7233650408

policy_postprocessor.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "name": "policy_postprocessor",
+  "steps": [
+    {
+      "registry_name": "unnormalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {},
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "QUANTILES",
+          "ACTION": "QUANTILES"
+        }
+      }
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cpu",
+        "float_dtype": null
+      }
+    }
+  ]
+}

policy_preprocessor.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "name": "policy_preprocessor",
+  "steps": [
+    {
+      "registry_name": "rename_observations_processor",
+      "config": {
+        "rename_map": {}
+      }
+    },
+    {
+      "registry_name": "to_batch_processor",
+      "config": {}
+    },
+    {
+      "registry_name": "normalizer_processor",
+      "config": {
+        "eps": 1e-08,
+        "features": {},
+        "norm_map": {
+          "VISUAL": "IDENTITY",
+          "STATE": "QUANTILES",
+          "ACTION": "QUANTILES"
+        }
+      }
+    },
+    {
+      "registry_name": "pi05_prepare_state_tokenizer_processor_step",
+      "config": {}
+    },
+    {
+      "registry_name": "tokenizer_processor",
+      "config": {
+        "max_length": 200,
+        "task_key": "task",
+        "padding_side": "right",
+        "padding": "max_length",
+        "truncation": true,
+        "tokenizer_name": "google/paligemma-3b-pt-224"
+      }
+    },
+    {
+      "registry_name": "device_processor",
+      "config": {
+        "device": "cpu",
+        "float_dtype": null
+      }
+    }
+  ]
+}