jcoholich commited on
Commit
ea391ed
·
verified ·
1 Parent(s): 20e09a2

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.safetensors filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
README.md ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: lerobot
5
+ pipeline_tag: robotics
6
+ tags:
7
+ - vision-language-action
8
+ - imitation-learning
9
+ - lerobot
10
+ inference: false
11
+ license: gemma
12
+ ---
13
+
14
+ # π₀.₅ (Pi05) (LeRobot)
15
+
16
+ π₀.₅ is a Vision-Language-Action (VLA) model with open-world generalization from Physical Intelligence, co-trained on robot demonstrations and large-scale multimodal data to execute long-horizon tasks in unseen real-world environments.
17
+
18
+ **Note:** This model currently supports only the flow-matching action head for π₀.₅ training and inference.
19
+ Other components from the original work (e.g., subtask prediction, action tokenization, or RL) were not released upstream and are not included here, though the LeRobot team is actively working to support them.
20
+
21
+ **Original paper:** π0.5: A Vision-Language-Action Model with Open-World Generalization
22
+ **Reference implementation:** https://github.com/Physical-Intelligence/openpi
23
+ **LeRobot implementation:** Follows the original reference code for compatibility.
24
+
25
+
26
+ ## Model description
27
+
28
+ - **Inputs:** images (multi-view), proprio/state, optional language instruction
29
+ - **Outputs:** continuous actions
30
+ - **Training objective:** flow matching
31
+ - **Action representation:** continuous
32
+ - **Intended use:** Base model to fine tune on your specific use case
33
+
34
+
35
+ ## Quick start (inference on a real batch)
36
+
37
+ ### Installation
38
+
39
+ ```bash
40
+ pip install "lerobot[pi]@git+https://github.com/huggingface/lerobot.git"
41
+ ```
42
+ For full installation details (including optional video dependencies such as ffmpeg for torchcodec), see the official documentation: https://huggingface.co/docs/lerobot/installation
43
+
44
+ ### Load model + dataset, run `select_action`
45
+
46
+ ```python
47
+ import torch
48
+ from lerobot.datasets.lerobot_dataset import LeRobotDataset
49
+ from lerobot.policies.factory import make_pre_post_processors
50
+
51
+ # Swap this import per-policy
52
+ from lerobot.policies.pi05 import PI05Policy
53
+
54
+ # load a policy
55
+ model_id = "lerobot/pi05_base" # <- swap checkpoint
56
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
57
+
58
+ policy = PI05Policy.from_pretrained(model_id).to(device).eval()
59
+
60
+ preprocess, postprocess = make_pre_post_processors(
61
+ policy.config,
62
+ model_id,
63
+ preprocessor_overrides={"device_processor": {"device": str(device)}},
64
+ )
65
+ # load a lerobotdataset (we will replace with a simpler dataset)
66
+ dataset = LeRobotDataset("lerobot/libero")
67
+
68
+ # pick an episode
69
+ episode_index = 0
70
+
71
+ # each episode corresponds to a contiguous range of frame indices
72
+ from_idx = dataset.meta.episodes["dataset_from_index"][episode_index]
73
+ to_idx = dataset.meta.episodes["dataset_to_index"][episode_index]
74
+
75
+ # get a single frame from that episode (e.g. the first frame)
76
+ frame_index = from_idx
77
+ frame = dict(dataset[frame_index])
78
+
79
+ batch = preprocess(frame)
80
+ with torch.inference_mode():
81
+ pred_action = policy.select_action(batch)
82
+ # use your policy postprocess, this post process the action
83
+ # for instance unnormalize the actions, detokenize it etc..
84
+ pred_action = postprocess(pred_action)
85
+ ```
86
+
87
+
88
+ ## Training step (loss + backward)
89
+
90
+ If you’re training / fine-tuning, you typically call `forward(...)` to get a loss and then:
91
+
92
+ ```python
93
+ policy.train()
94
+ batch = dict(dataset[0])
95
+ batch = preprocess(batch)
96
+
97
+ loss, outputs = policy.forward(batch)
98
+ loss.backward()
99
+
100
+ ```
101
+
102
+ > Notes:
103
+ >
104
+ > - Some policies expose `policy(**batch)` or return a dict; keep this snippet aligned with the policy API.
105
+ > - Use your trainer script (`lerobot-train`) for full training loops.
106
+
107
+
108
+ ## How to train / fine-tune
109
+
110
+ ```bash
111
+ lerobot-train \
112
+ --dataset.repo_id=${HF_USER}/<dataset> \
113
+ --output_dir=./outputs/[RUN_NAME] \
114
+ --job_name=[RUN_NAME] \
115
+ --policy.repo_id=${HF_USER}/<desired_policy_repo_id> \
116
+ --policy.path=lerobot/[BASE_CHECKPOINT] \
117
+ --policy.dtype=bfloat16 \
118
+ --policy.device=cuda \
119
+ --steps=100000 \
120
+ --batch_size=4
121
+ ```
122
+
123
+ Add policy-specific flags below:
124
+
125
+ - `-policy.chunk_size=...`
126
+ - `-policy.n_action_steps=...`
127
+ - `-policy.max_action_tokens=...`
128
+ - `-policy.gradient_checkpointing=true`
129
+
130
+
131
+ ## Real-World Inference & Evaluation
132
+
133
+ You can use the `record` script from [**`lerobot-record`**](https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/lerobot_record.py) with a policy checkpoint as input, to run inference and evaluate your policy.
134
+
135
+ For instance, run this command or API example to run inference and record 10 evaluation episodes:
136
+
137
+ ```
138
+ lerobot-record \
139
+ --robot.type=so100_follower \
140
+ --robot.port=/dev/ttyACM1 \
141
+ --robot.cameras="{ up: {type: opencv, index_or_path: /dev/video10, width: 640, height: 480, fps: 30}, side: {type: intelrealsense, serial_number_or_name: 233522074606, width: 640, height: 480, fps: 30}}" \
142
+ --robot.id=my_awesome_follower_arm \
143
+ --display_data=false \
144
+ --dataset.repo_id=${HF_USER}/eval_so100 \
145
+ --dataset.single_task="Put lego brick into the transparent box" \
146
+ # <- Teleop optional if you want to teleoperate in between episodes \
147
+ # --teleop.type=so100_leader \
148
+ # --teleop.port=/dev/ttyACM0 \
149
+ # --teleop.id=my_awesome_leader_arm \
150
+ --policy.path=${HF_USER}/my_policy
151
+ ```
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "pi05",
3
+ "n_obs_steps": 1,
4
+ "input_features": {
5
+ "observation.images.base_0_rgb": {
6
+ "type": "VISUAL",
7
+ "shape": [
8
+ 3,
9
+ 224,
10
+ 224
11
+ ]
12
+ },
13
+ "observation.images.left_wrist_0_rgb": {
14
+ "type": "VISUAL",
15
+ "shape": [
16
+ 3,
17
+ 224,
18
+ 224
19
+ ]
20
+ },
21
+ "observation.images.right_wrist_0_rgb": {
22
+ "type": "VISUAL",
23
+ "shape": [
24
+ 3,
25
+ 224,
26
+ 224
27
+ ]
28
+ },
29
+ "observation.state": {
30
+ "type": "STATE",
31
+ "shape": [
32
+ 32
33
+ ]
34
+ }
35
+ },
36
+ "output_features": {
37
+ "action": {
38
+ "type": "ACTION",
39
+ "shape": [
40
+ 32
41
+ ]
42
+ }
43
+ },
44
+ "device": "mps",
45
+ "use_amp": false,
46
+ "push_to_hub": true,
47
+ "repo_id": null,
48
+ "private": null,
49
+ "tags": null,
50
+ "license": null,
51
+ "paligemma_variant": "gemma_2b",
52
+ "action_expert_variant": "gemma_300m",
53
+ "dtype": "bfloat16",
54
+ "chunk_size": 50,
55
+ "n_action_steps": 50,
56
+ "max_action_dim": 32,
57
+ "max_state_dim": 32,
58
+ "num_inference_steps": 10,
59
+ "time_sampling_beta_alpha": 1.5,
60
+ "time_sampling_beta_beta": 1.0,
61
+ "min_period": 0.004,
62
+ "max_period": 4.0,
63
+ "image_resolution": [
64
+ 224,
65
+ 224
66
+ ],
67
+ "gradient_checkpointing": false,
68
+ "compile_model": false,
69
+ "compile_mode": "max-autotune",
70
+ "optimizer_lr": 2.5e-05,
71
+ "optimizer_betas": [
72
+ 0.9,
73
+ 0.95
74
+ ],
75
+ "optimizer_eps": 1e-08,
76
+ "optimizer_weight_decay": 0.01,
77
+ "optimizer_grad_clip_norm": 1.0,
78
+ "scheduler_warmup_steps": 1000,
79
+ "scheduler_decay_steps": 30000,
80
+ "scheduler_decay_lr": 2.5e-06,
81
+ "tokenizer_max_length": 200
82
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9e97ac12eda0cba1636497390dd8e8b7bb8e7436c854bd91e55f1e89ee2bbad
3
+ size 7233650408
policy_postprocessor.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_postprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "unnormalizer_processor",
6
+ "config": {
7
+ "eps": 1e-08,
8
+ "features": {},
9
+ "norm_map": {
10
+ "VISUAL": "IDENTITY",
11
+ "STATE": "QUANTILES",
12
+ "ACTION": "QUANTILES"
13
+ }
14
+ }
15
+ },
16
+ {
17
+ "registry_name": "device_processor",
18
+ "config": {
19
+ "device": "cpu",
20
+ "float_dtype": null
21
+ }
22
+ }
23
+ ]
24
+ }
policy_preprocessor.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "policy_preprocessor",
3
+ "steps": [
4
+ {
5
+ "registry_name": "rename_observations_processor",
6
+ "config": {
7
+ "rename_map": {}
8
+ }
9
+ },
10
+ {
11
+ "registry_name": "to_batch_processor",
12
+ "config": {}
13
+ },
14
+ {
15
+ "registry_name": "normalizer_processor",
16
+ "config": {
17
+ "eps": 1e-08,
18
+ "features": {},
19
+ "norm_map": {
20
+ "VISUAL": "IDENTITY",
21
+ "STATE": "QUANTILES",
22
+ "ACTION": "QUANTILES"
23
+ }
24
+ }
25
+ },
26
+ {
27
+ "registry_name": "pi05_prepare_state_tokenizer_processor_step",
28
+ "config": {}
29
+ },
30
+ {
31
+ "registry_name": "tokenizer_processor",
32
+ "config": {
33
+ "max_length": 200,
34
+ "task_key": "task",
35
+ "padding_side": "right",
36
+ "padding": "max_length",
37
+ "truncation": true,
38
+ "tokenizer_name": "google/paligemma-3b-pt-224"
39
+ }
40
+ },
41
+ {
42
+ "registry_name": "device_processor",
43
+ "config": {
44
+ "device": "cpu",
45
+ "float_dtype": null
46
+ }
47
+ }
48
+ ]
49
+ }