joaoocruz00 commited on
Commit
4c94f9c
·
verified ·
1 Parent(s): fc1c46c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .cache/calibration/aloha_default/left_follower.json +68 -0
  2. .cache/calibration/aloha_default/left_leader.json +68 -0
  3. .cache/calibration/aloha_default/right_follower.json +68 -0
  4. .cache/calibration/aloha_default/right_leader.json +68 -0
  5. .dockerignore +160 -0
  6. .gitattributes +19 -34
  7. .github/ISSUE_TEMPLATE/bug-report.yml +68 -0
  8. .github/PULL_REQUEST_TEMPLATE.md +34 -0
  9. .github/workflows/build-docker-images.yml +135 -0
  10. .github/workflows/nightly-tests.yml +93 -0
  11. .github/workflows/quality.yml +72 -0
  12. .github/workflows/test-docker-build.yml +82 -0
  13. .github/workflows/test.yml +150 -0
  14. .github/workflows/trufflehog.yml +35 -0
  15. .gitignore +173 -0
  16. .pre-commit-config.yaml +74 -0
  17. CODE_OF_CONDUCT.md +133 -0
  18. CONTRIBUTING.md +308 -0
  19. LICENSE +507 -0
  20. Makefile +142 -0
  21. README.md +389 -0
  22. benchmarks/video/README.md +271 -0
  23. benchmarks/video/capture_camera_feed.py +90 -0
  24. benchmarks/video/run_video_benchmark.py +490 -0
  25. docker/lerobot-cpu/Dockerfile +29 -0
  26. docker/lerobot-gpu-dev/Dockerfile +68 -0
  27. docker/lerobot-gpu/Dockerfile +24 -0
  28. examples/10_use_so100.md +621 -0
  29. examples/11_use_lekiwi.md +585 -0
  30. examples/11_use_moss.md +335 -0
  31. examples/1_load_lerobot_dataset.py +148 -0
  32. examples/2_evaluate_pretrained_policy.py +139 -0
  33. examples/3_train_policy.py +120 -0
  34. examples/4_train_policy_with_script.md +274 -0
  35. examples/7_get_started_with_real_robot.md +1012 -0
  36. examples/8_use_stretch.md +161 -0
  37. examples/9_use_aloha.md +181 -0
  38. examples/advanced/1_add_image_transforms.py +67 -0
  39. examples/advanced/2_calculate_validation_loss.py +104 -0
  40. examples/port_datasets/pusht_zarr.py +243 -0
  41. lerobot/__init__.py +217 -0
  42. lerobot/__version__.py +23 -0
  43. lerobot/common/constants.py +45 -0
  44. lerobot/common/datasets/backward_compatibility.py +68 -0
  45. lerobot/common/datasets/card_template.md +27 -0
  46. lerobot/common/datasets/compute_stats.py +176 -0
  47. lerobot/common/datasets/factory.py +118 -0
  48. lerobot/common/datasets/image_writer.py +178 -0
  49. lerobot/common/datasets/lerobot_dataset.py +1217 -0
  50. lerobot/common/datasets/online_buffer.py +384 -0
.cache/calibration/aloha_default/left_follower.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "homing_offset": [
3
+ 2048,
4
+ 3072,
5
+ 3072,
6
+ -1024,
7
+ -1024,
8
+ 2048,
9
+ -2048,
10
+ 2048,
11
+ -2048
12
+ ],
13
+ "drive_mode": [
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 0,
18
+ 0,
19
+ 1,
20
+ 0,
21
+ 1,
22
+ 0
23
+ ],
24
+ "start_pos": [
25
+ 2015,
26
+ 3058,
27
+ 3061,
28
+ 1071,
29
+ 1071,
30
+ 2035,
31
+ 2152,
32
+ 2029,
33
+ 2499
34
+ ],
35
+ "end_pos": [
36
+ -1008,
37
+ -1963,
38
+ -1966,
39
+ 2141,
40
+ 2143,
41
+ -971,
42
+ 3043,
43
+ -1077,
44
+ 3144
45
+ ],
46
+ "calib_mode": [
47
+ "DEGREE",
48
+ "DEGREE",
49
+ "DEGREE",
50
+ "DEGREE",
51
+ "DEGREE",
52
+ "DEGREE",
53
+ "DEGREE",
54
+ "DEGREE",
55
+ "LINEAR"
56
+ ],
57
+ "motor_names": [
58
+ "waist",
59
+ "shoulder",
60
+ "shoulder_shadow",
61
+ "elbow",
62
+ "elbow_shadow",
63
+ "forearm_roll",
64
+ "wrist_angle",
65
+ "wrist_rotate",
66
+ "gripper"
67
+ ]
68
+ }
.cache/calibration/aloha_default/left_leader.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "homing_offset": [
3
+ 2048,
4
+ 3072,
5
+ 3072,
6
+ -1024,
7
+ -1024,
8
+ 2048,
9
+ -2048,
10
+ 2048,
11
+ -1024
12
+ ],
13
+ "drive_mode": [
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 0,
18
+ 0,
19
+ 1,
20
+ 0,
21
+ 1,
22
+ 0
23
+ ],
24
+ "start_pos": [
25
+ 2035,
26
+ 3024,
27
+ 3019,
28
+ 979,
29
+ 981,
30
+ 1982,
31
+ 2166,
32
+ 2124,
33
+ 1968
34
+ ],
35
+ "end_pos": [
36
+ -990,
37
+ -2017,
38
+ -2015,
39
+ 2078,
40
+ 2076,
41
+ -1030,
42
+ 3117,
43
+ -1016,
44
+ 2556
45
+ ],
46
+ "calib_mode": [
47
+ "DEGREE",
48
+ "DEGREE",
49
+ "DEGREE",
50
+ "DEGREE",
51
+ "DEGREE",
52
+ "DEGREE",
53
+ "DEGREE",
54
+ "DEGREE",
55
+ "LINEAR"
56
+ ],
57
+ "motor_names": [
58
+ "waist",
59
+ "shoulder",
60
+ "shoulder_shadow",
61
+ "elbow",
62
+ "elbow_shadow",
63
+ "forearm_roll",
64
+ "wrist_angle",
65
+ "wrist_rotate",
66
+ "gripper"
67
+ ]
68
+ }
.cache/calibration/aloha_default/right_follower.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "homing_offset": [
3
+ 2048,
4
+ 3072,
5
+ 3072,
6
+ -1024,
7
+ -1024,
8
+ 2048,
9
+ -2048,
10
+ 2048,
11
+ -2048
12
+ ],
13
+ "drive_mode": [
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 0,
18
+ 0,
19
+ 1,
20
+ 0,
21
+ 1,
22
+ 0
23
+ ],
24
+ "start_pos": [
25
+ 2056,
26
+ 2895,
27
+ 2896,
28
+ 1191,
29
+ 1190,
30
+ 2018,
31
+ 2051,
32
+ 2056,
33
+ 2509
34
+ ],
35
+ "end_pos": [
36
+ -1040,
37
+ -2004,
38
+ -2006,
39
+ 2126,
40
+ 2127,
41
+ -1010,
42
+ 3050,
43
+ -1117,
44
+ 3143
45
+ ],
46
+ "calib_mode": [
47
+ "DEGREE",
48
+ "DEGREE",
49
+ "DEGREE",
50
+ "DEGREE",
51
+ "DEGREE",
52
+ "DEGREE",
53
+ "DEGREE",
54
+ "DEGREE",
55
+ "LINEAR"
56
+ ],
57
+ "motor_names": [
58
+ "waist",
59
+ "shoulder",
60
+ "shoulder_shadow",
61
+ "elbow",
62
+ "elbow_shadow",
63
+ "forearm_roll",
64
+ "wrist_angle",
65
+ "wrist_rotate",
66
+ "gripper"
67
+ ]
68
+ }
.cache/calibration/aloha_default/right_leader.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "homing_offset": [
3
+ 2048,
4
+ 3072,
5
+ 3072,
6
+ -1024,
7
+ -1024,
8
+ 2048,
9
+ -2048,
10
+ 2048,
11
+ -2048
12
+ ],
13
+ "drive_mode": [
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 0,
18
+ 0,
19
+ 1,
20
+ 0,
21
+ 1,
22
+ 0
23
+ ],
24
+ "start_pos": [
25
+ 2068,
26
+ 3034,
27
+ 3030,
28
+ 1038,
29
+ 1041,
30
+ 1991,
31
+ 1948,
32
+ 2090,
33
+ 1985
34
+ ],
35
+ "end_pos": [
36
+ -1025,
37
+ -2014,
38
+ -2015,
39
+ 2058,
40
+ 2060,
41
+ -955,
42
+ 3091,
43
+ -940,
44
+ 2576
45
+ ],
46
+ "calib_mode": [
47
+ "DEGREE",
48
+ "DEGREE",
49
+ "DEGREE",
50
+ "DEGREE",
51
+ "DEGREE",
52
+ "DEGREE",
53
+ "DEGREE",
54
+ "DEGREE",
55
+ "LINEAR"
56
+ ],
57
+ "motor_names": [
58
+ "waist",
59
+ "shoulder",
60
+ "shoulder_shadow",
61
+ "elbow",
62
+ "elbow_shadow",
63
+ "forearm_roll",
64
+ "wrist_angle",
65
+ "wrist_rotate",
66
+ "gripper"
67
+ ]
68
+ }
.dockerignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Misc
16
+ .git
17
+ tmp
18
+ wandb
19
+ data
20
+ outputs
21
+ .vscode
22
+ rl
23
+ media
24
+
25
+
26
+ # Logging
27
+ logs
28
+
29
+ # HPC
30
+ nautilus/*.yaml
31
+ *.key
32
+
33
+ # Slurm
34
+ sbatch*.sh
35
+
36
+ # Byte-compiled / optimized / DLL files
37
+ __pycache__/
38
+ *.py[cod]
39
+ *$py.class
40
+
41
+ # C extensions
42
+ *.so
43
+
44
+ # Distribution / packaging
45
+ .Python
46
+ build/
47
+ develop-eggs/
48
+ dist/
49
+ downloads/
50
+ eggs/
51
+ .eggs/
52
+ lib/
53
+ lib64/
54
+ parts/
55
+ sdist/
56
+ var/
57
+ wheels/
58
+ pip-wheel-metadata/
59
+ share/python-wheels/
60
+ *.egg-info/
61
+ .installed.cfg
62
+ *.egg
63
+ MANIFEST
64
+
65
+ # PyInstaller
66
+ # Usually these files are written by a python script from a template
67
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
68
+ *.manifest
69
+ *.spec
70
+
71
+ # Installer logs
72
+ pip-log.txt
73
+ pip-delete-this-directory.txt
74
+
75
+ # Unit test / coverage reports
76
+ !tests/artifacts
77
+ htmlcov/
78
+ .tox/
79
+ .nox/
80
+ .coverage
81
+ .coverage.*
82
+ nosetests.xml
83
+ coverage.xml
84
+ *.cover
85
+ *.py,cover
86
+ .hypothesis/
87
+ .pytest_cache/
88
+
89
+ # Ignore .cache except calibration
90
+ .cache/*
91
+ !.cache/calibration/
92
+ !.cache/calibration/**
93
+
94
+ # Translations
95
+ *.mo
96
+ *.pot
97
+
98
+ # Django stuff:
99
+ *.log
100
+ local_settings.py
101
+ db.sqlite3
102
+ db.sqlite3-journal
103
+
104
+ # Flask stuff:
105
+ instance/
106
+ .webassets-cache
107
+
108
+ # Scrapy stuff:
109
+ .scrapy
110
+
111
+ # Sphinx documentation
112
+ docs/_build/
113
+
114
+ # PyBuilder
115
+ target/
116
+
117
+ # Jupyter Notebook
118
+ .ipynb_checkpoints
119
+
120
+ # IPython
121
+ profile_default/
122
+ ipython_config.py
123
+
124
+ # pyenv
125
+ .python-version
126
+
127
+ # pipenv
128
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
130
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
131
+ # install all needed dependencies.
132
+ #Pipfile.lock
133
+
134
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
135
+ __pypackages__/
136
+
137
+ # Celery stuff
138
+ celerybeat-schedule
139
+ celerybeat.pid
140
+
141
+ # SageMath parsed files
142
+ *.sage.py
143
+
144
+ # Spyder project settings
145
+ .spyderproject
146
+ .spyproject
147
+
148
+ # Rope project settings
149
+ .ropeproject
150
+
151
+ # mkdocs documentation
152
+ /site
153
+
154
+ # mypy
155
+ .mypy_cache/
156
+ .dmypy.json
157
+ dmypy.json
158
+
159
+ # Pyre type checker
160
+ .pyre/
.gitattributes CHANGED
@@ -1,35 +1,20 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ *.memmap filter=lfs diff=lfs merge=lfs -text
16
+ *.stl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
17
  *.safetensors filter=lfs diff=lfs merge=lfs -text
18
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
19
+ *.arrow filter=lfs diff=lfs merge=lfs -text
20
+ *.json !text !filter !merge !diff
 
 
 
 
 
 
 
.github/ISSUE_TEMPLATE/bug-report.yml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: "\U0001F41B Bug Report"
16
+ description: Submit a bug report to help us improve LeRobot
17
+ body:
18
+ - type: markdown
19
+ attributes:
20
+ value: |
21
+ Thanks for taking the time to submit a bug report! 🐛
22
+ If this is not a bug related to the LeRobot library directly, but instead a general question about your code or the library specifically please use our [discord](https://discord.gg/s3KuuzsPFb).
23
+
24
+ - type: textarea
25
+ id: system-info
26
+ attributes:
27
+ label: System Info
28
+ description: If needed, you can share your lerobot configuration with us by running `python -m lerobot.scripts.display_sys_info` and copy-pasting its outputs below
29
+ render: Shell
30
+ placeholder: lerobot version, OS, python version, numpy version, torch version, and lerobot's configuration
31
+ validations:
32
+ required: true
33
+
34
+ - type: checkboxes
35
+ id: information-scripts-examples
36
+ attributes:
37
+ label: Information
38
+ description: 'The problem arises when using:'
39
+ options:
40
+ - label: "One of the scripts in the examples/ folder of LeRobot"
41
+ - label: "My own task or dataset (give details below)"
42
+
43
+ - type: textarea
44
+ id: reproduction
45
+ validations:
46
+ required: true
47
+ attributes:
48
+ label: Reproduction
49
+ description: |
50
+ If needed, provide a simple code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
51
+ Sharing error messages or stack traces could be useful as well!
52
+ Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
53
+ Try to avoid screenshots, as they are hard to read and don't allow copy-and-pasting.
54
+
55
+ placeholder: |
56
+ Steps to reproduce the behavior:
57
+
58
+ 1.
59
+ 2.
60
+ 3.
61
+
62
+ - type: textarea
63
+ id: expected-behavior
64
+ validations:
65
+ required: true
66
+ attributes:
67
+ label: Expected behavior
68
+ description: "A clear and concise description of what you would expect to happen."
.github/PULL_REQUEST_TEMPLATE.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## What this does
2
+ Explain what this PR does. Feel free to tag your PR with the appropriate label(s).
3
+
4
+ Examples:
5
+ | Title | Label |
6
+ |----------------------|-----------------|
7
+ | Fixes #[issue] | (🐛 Bug) |
8
+ | Adds new dataset | (🗃️ Dataset) |
9
+ | Optimizes something | (⚡️ Performance) |
10
+
11
+ ## How it was tested
12
+ Explain/show how you tested your changes.
13
+
14
+ Examples:
15
+ - Added `test_something` in `tests/test_stuff.py`.
16
+ - Added `new_feature` and checked that training converges with policy X on dataset/environment Y.
17
+ - Optimized `some_function`, it now runs X times faster than previously.
18
+
19
+ ## How to checkout & try? (for the reviewer)
20
+ Provide a simple way for the reviewer to try out your changes.
21
+
22
+ Examples:
23
+ ```bash
24
+ pytest -sx tests/test_stuff.py::test_something
25
+ ```
26
+ ```bash
27
+ python lerobot/scripts/train.py --some.option=true
28
+ ```
29
+
30
+ ## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR
31
+ **Note**: Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
32
+ members/contributors who may be interested in your PR. Try to avoid tagging more than 3 people.
33
+
34
+ **Note**: Before submitting this PR, please read the [contributor guideline](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md#submitting-a-pull-request-pr).
.github/workflows/build-docker-images.yml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Inspired by
16
+ # https://github.com/huggingface/peft/blob/main/.github/workflows/build_docker_images.yml
17
+ name: Builds
18
+
19
+ on:
20
+ workflow_dispatch:
21
+ workflow_call:
22
+ schedule:
23
+ - cron: "0 1 * * *"
24
+
25
+ permissions: {}
26
+
27
+ env:
28
+ PYTHON_VERSION: "3.10"
29
+
30
+ jobs:
31
+ latest-cpu:
32
+ name: CPU
33
+ runs-on:
34
+ group: aws-general-8-plus
35
+ steps:
36
+ - name: Install Git LFS
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install git-lfs
40
+ git lfs install
41
+
42
+ - name: Set up Docker Buildx
43
+ uses: docker/setup-buildx-action@v3
44
+ with:
45
+ cache-binary: false
46
+
47
+ - name: Check out code
48
+ uses: actions/checkout@v4
49
+ with:
50
+ lfs: true
51
+ persist-credentials: false
52
+
53
+ - name: Login to DockerHub
54
+ uses: docker/login-action@v3
55
+ with:
56
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
57
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
58
+
59
+ - name: Build and Push CPU
60
+ uses: docker/build-push-action@v5
61
+ with:
62
+ context: .
63
+ file: ./docker/lerobot-cpu/Dockerfile
64
+ push: true
65
+ tags: huggingface/lerobot-cpu
66
+ build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
67
+
68
+
69
+ latest-cuda:
70
+ name: GPU
71
+ runs-on:
72
+ group: aws-general-8-plus
73
+ steps:
74
+ - name: Install Git LFS
75
+ run: |
76
+ sudo apt-get update
77
+ sudo apt-get install git-lfs
78
+ git lfs install
79
+
80
+ - name: Set up Docker Buildx
81
+ uses: docker/setup-buildx-action@v3
82
+ with:
83
+ cache-binary: false
84
+
85
+ - name: Check out code
86
+ uses: actions/checkout@v4
87
+ with:
88
+ lfs: true
89
+ persist-credentials: false
90
+
91
+ - name: Login to DockerHub
92
+ uses: docker/login-action@v3
93
+ with:
94
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
95
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
96
+
97
+ - name: Build and Push GPU
98
+ uses: docker/build-push-action@v5
99
+ with:
100
+ context: .
101
+ file: ./docker/lerobot-gpu/Dockerfile
102
+ push: true
103
+ tags: huggingface/lerobot-gpu
104
+ build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
105
+
106
+
107
+ latest-cuda-dev:
108
+ name: GPU Dev
109
+ runs-on:
110
+ group: aws-general-8-plus
111
+ steps:
112
+ - name: Set up Docker Buildx
113
+ uses: docker/setup-buildx-action@v3
114
+ with:
115
+ cache-binary: false
116
+
117
+ - name: Check out code
118
+ uses: actions/checkout@v4
119
+ with:
120
+ persist-credentials: false
121
+
122
+ - name: Login to DockerHub
123
+ uses: docker/login-action@v3
124
+ with:
125
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
126
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
127
+
128
+ - name: Build and Push GPU dev
129
+ uses: docker/build-push-action@v5
130
+ with:
131
+ context: .
132
+ file: ./docker/lerobot-gpu-dev/Dockerfile
133
+ push: true
134
+ tags: huggingface/lerobot-gpu:dev
135
+ build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
.github/workflows/nightly-tests.yml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Inspired by
16
+ # https://github.com/huggingface/peft/blob/main/.github/workflows/nightly.yml
17
+ name: Nightly
18
+
19
+ on:
20
+ workflow_dispatch:
21
+ schedule:
22
+ - cron: "0 2 * * *"
23
+
24
+ permissions: {}
25
+
26
+ # env:
27
+ # SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
28
+ jobs:
29
+ run_all_tests_cpu:
30
+ name: CPU
31
+ strategy:
32
+ fail-fast: false
33
+ runs-on:
34
+ group: aws-general-8-plus
35
+ container:
36
+ image: huggingface/lerobot-cpu:latest
37
+ options: --shm-size "16gb"
38
+ credentials:
39
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
40
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
41
+ defaults:
42
+ run:
43
+ shell: bash
44
+ working-directory: /lerobot
45
+ steps:
46
+ - name: Tests
47
+ run: pytest -v --cov=./lerobot --disable-warnings tests
48
+
49
+ - name: Tests end-to-end
50
+ run: make test-end-to-end
51
+
52
+
53
+ run_all_tests_single_gpu:
54
+ name: GPU
55
+ strategy:
56
+ fail-fast: false
57
+ runs-on:
58
+ group: aws-g6-4xlarge-plus
59
+ env:
60
+ CUDA_VISIBLE_DEVICES: "0"
61
+ TEST_TYPE: "single_gpu"
62
+ container:
63
+ image: huggingface/lerobot-gpu:latest
64
+ options: --gpus all --shm-size "16gb"
65
+ credentials:
66
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
67
+ password: ${{ secrets.DOCKERHUB_PASSWORD }}
68
+ defaults:
69
+ run:
70
+ shell: bash
71
+ working-directory: /lerobot
72
+ steps:
73
+ - name: Nvidia-smi
74
+ run: nvidia-smi
75
+
76
+ - name: Test
77
+ run: pytest -v --cov=./lerobot --cov-report=xml --disable-warnings tests
78
+ # TODO(aliberts): Link with HF Codecov account
79
+ # - name: Upload coverage reports to Codecov with GitHub Action
80
+ # uses: codecov/codecov-action@v4
81
+ # with:
82
+ # files: ./coverage.xml
83
+ # verbose: true
84
+ - name: Tests end-to-end
85
+ env:
86
+ DEVICE: cuda
87
+ run: make test-end-to-end
88
+
89
+ # - name: Generate Report
90
+ # if: always()
91
+ # run: |
92
+ # pip install slack_sdk tabulate
93
+ # python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
.github/workflows/quality.yml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: Quality
16
+
17
+ on:
18
+ workflow_dispatch:
19
+ workflow_call:
20
+ pull_request:
21
+ push:
22
+ branches:
23
+ - main
24
+
25
+ permissions: {}
26
+
27
+ env:
28
+ PYTHON_VERSION: "3.10"
29
+
30
+ jobs:
31
+ style:
32
+ name: Style
33
+ runs-on: ubuntu-latest
34
+ steps:
35
+ - name: Checkout Repository
36
+ uses: actions/checkout@v4
37
+ with:
38
+ persist-credentials: false
39
+
40
+ - name: Set up Python
41
+ uses: actions/setup-python@v4
42
+ with:
43
+ python-version: ${{ env.PYTHON_VERSION }}
44
+
45
+ - name: Get Ruff Version from pre-commit-config.yaml
46
+ id: get-ruff-version
47
+ run: |
48
+ RUFF_VERSION=$(awk '/repo: https:\/\/github.com\/astral-sh\/ruff-pre-commit/{flag=1;next}/rev:/{if(flag){print $2;exit}}' .pre-commit-config.yaml)
49
+ echo "ruff_version=${RUFF_VERSION}" >> $GITHUB_OUTPUT
50
+
51
+ - name: Install Ruff
52
+ env:
53
+ RUFF_VERSION: ${{ steps.get-ruff-version.outputs.ruff_version }}
54
+ run: python -m pip install "ruff==${RUFF_VERSION}"
55
+
56
+ - name: Ruff check
57
+ run: ruff check --output-format=github
58
+
59
+ - name: Ruff format
60
+ run: ruff format --diff
61
+
62
+ typos:
63
+ name: Typos
64
+ runs-on: ubuntu-latest
65
+ steps:
66
+ - name: Checkout Repository
67
+ uses: actions/checkout@v4
68
+ with:
69
+ persist-credentials: false
70
+
71
+ - name: typos-action
72
+ uses: crate-ci/typos@v1.29.10
.github/workflows/test-docker-build.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Inspired by
16
+ # https://github.com/huggingface/peft/blob/main/.github/workflows/test-docker-build.yml
17
+ name: Test Dockerfiles
18
+
19
+ on:
20
+ pull_request:
21
+ paths:
22
+ # Run only when DockerFile files are modified
23
+ - "docker/**"
24
+
25
+ permissions: {}
26
+
27
+ env:
28
+ PYTHON_VERSION: "3.10"
29
+
30
+ jobs:
31
+ get_changed_files:
32
+ name: Detect modified Dockerfiles
33
+ runs-on: ubuntu-latest
34
+ outputs:
35
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
36
+ steps:
37
+ - name: Check out code
38
+ uses: actions/checkout@v4
39
+ with:
40
+ persist-credentials: false
41
+
42
+ - name: Get changed files
43
+ id: changed-files
44
+ uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
45
+ with:
46
+ files: docker/**
47
+ json: "true"
48
+
49
+ - name: Run step if only the files listed above change # zizmor: ignore[template-injection]
50
+ if: steps.changed-files.outputs.any_changed == 'true'
51
+ id: set-matrix
52
+ run: |
53
+ echo "matrix=${{ steps.changed-files.outputs.all_changed_files}}" >> $GITHUB_OUTPUT
54
+
55
+ build_modified_dockerfiles:
56
+ name: Build modified Docker images
57
+ needs: get_changed_files
58
+ runs-on:
59
+ group: aws-general-8-plus
60
+ if: needs.get_changed_files.outputs.matrix != ''
61
+ strategy:
62
+ fail-fast: false
63
+ matrix:
64
+ docker-file: ${{ fromJson(needs.get_changed_files.outputs.matrix) }}
65
+ steps:
66
+ - name: Set up Docker Buildx
67
+ uses: docker/setup-buildx-action@v3
68
+ with:
69
+ cache-binary: false
70
+
71
+ - name: Check out code
72
+ uses: actions/checkout@v4
73
+ with:
74
+ persist-credentials: false
75
+
76
+ - name: Build Docker image
77
+ uses: docker/build-push-action@v5
78
+ with:
79
+ file: ${{ matrix.docker-file }}
80
+ context: .
81
+ push: False
82
+ build-args: PYTHON_VERSION=${{ env.PYTHON_VERSION }}
.github/workflows/test.yml ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ name: Tests
16
+
17
+ on:
18
+ pull_request:
19
+ paths:
20
+ - "lerobot/**"
21
+ - "tests/**"
22
+ - "examples/**"
23
+ - ".github/**"
24
+ - "pyproject.toml"
25
+ - ".pre-commit-config.yaml"
26
+ - "Makefile"
27
+ - ".cache/**"
28
+ push:
29
+ branches:
30
+ - main
31
+ paths:
32
+ - "lerobot/**"
33
+ - "tests/**"
34
+ - "examples/**"
35
+ - ".github/**"
36
+ - "pyproject.toml"
37
+ - ".pre-commit-config.yaml"
38
+ - "Makefile"
39
+ - ".cache/**"
40
+
41
+ permissions: {}
42
+
43
+ env:
44
+ UV_VERSION: "0.6.0"
45
+
46
+ jobs:
47
+ pytest:
48
+ name: Pytest
49
+ runs-on: ubuntu-latest
50
+ env:
51
+ MUJOCO_GL: egl
52
+ steps:
53
+ - uses: actions/checkout@v4
54
+ with:
55
+ lfs: true # Ensure LFS files are pulled
56
+ persist-credentials: false
57
+
58
+ - name: Install apt dependencies
59
+ # portaudio19-dev is needed to install pyaudio
60
+ run: |
61
+ sudo apt-get update && \
62
+ sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
63
+
64
+ - name: Install uv and python
65
+ uses: astral-sh/setup-uv@v5
66
+ with:
67
+ enable-cache: true
68
+ version: ${{ env.UV_VERSION }}
69
+ python-version: "3.10"
70
+
71
+ - name: Install lerobot (all extras)
72
+ run: uv sync --all-extras
73
+
74
+ - name: Test with pytest
75
+ run: |
76
+ uv run pytest tests -v --cov=./lerobot --durations=0 \
77
+ -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
78
+ -W ignore::UserWarning:torch.utils.data.dataloader:558 \
79
+ -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
80
+ && rm -rf tests/outputs outputs
81
+
82
+ pytest-minimal:
83
+ name: Pytest (minimal install)
84
+ runs-on: ubuntu-latest
85
+ env:
86
+ MUJOCO_GL: egl
87
+ steps:
88
+ - uses: actions/checkout@v4
89
+ with:
90
+ lfs: true # Ensure LFS files are pulled
91
+ persist-credentials: false
92
+
93
+ - name: Install apt dependencies
94
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg
95
+
96
+ - name: Install uv and python
97
+ uses: astral-sh/setup-uv@v5
98
+ with:
99
+ enable-cache: true
100
+ version: ${{ env.UV_VERSION }}
101
+ python-version: "3.10"
102
+
103
+ - name: Install lerobot
104
+ run: uv sync --extra "test"
105
+
106
+ - name: Test with pytest
107
+ run: |
108
+ uv run pytest tests -v --cov=./lerobot --durations=0 \
109
+ -W ignore::DeprecationWarning:imageio_ffmpeg._utils:7 \
110
+ -W ignore::UserWarning:torch.utils.data.dataloader:558 \
111
+ -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
112
+ && rm -rf tests/outputs outputs
113
+
114
+ end-to-end:
115
+ name: End-to-end
116
+ runs-on: ubuntu-latest
117
+ env:
118
+ MUJOCO_GL: egl
119
+ steps:
120
+ - uses: actions/checkout@v4
121
+ with:
122
+ lfs: true # Ensure LFS files are pulled
123
+ persist-credentials: false
124
+
125
+ - name: Install apt dependencies
126
+ # portaudio19-dev is needed to install pyaudio
127
+ run: |
128
+ sudo apt-get update && \
129
+ sudo apt-get install -y libegl1-mesa-dev ffmpeg portaudio19-dev
130
+
131
+ - name: Install uv and python
132
+ uses: astral-sh/setup-uv@v5
133
+ with:
134
+ enable-cache: true
135
+ version: ${{ env.UV_VERSION }}
136
+ python-version: "3.10"
137
+
138
+ - name: Install lerobot (all extras)
139
+ run: |
140
+ uv venv
141
+ uv sync --all-extras
142
+
143
+ - name: venv
144
+ run: |
145
+ echo "PYTHON_PATH=${{ github.workspace }}/.venv/bin/python" >> $GITHUB_ENV
146
+
147
+ - name: Test end-to-end
148
+ run: |
149
+ make test-end-to-end \
150
+ && rm -rf outputs
.github/workflows/trufflehog.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ on:
16
+ push:
17
+
18
+ name: Secret Leaks
19
+
20
+ permissions: {}
21
+
22
+ jobs:
23
+ trufflehog:
24
+ runs-on: ubuntu-latest
25
+ steps:
26
+ - name: Checkout code
27
+ uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 0
30
+ persist-credentials: false
31
+
32
+ - name: Secret Scanning
33
+ uses: trufflesecurity/trufflehog@main
34
+ with:
35
+ extra_args: --only-verified
.gitignore ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Logging
16
+ logs
17
+ tmp
18
+ wandb
19
+
20
+ # Data
21
+ data
22
+ outputs
23
+
24
+ # Apple
25
+ .DS_Store
26
+
27
+ # VS Code
28
+ .vscode
29
+
30
+ # HPC
31
+ nautilus/*.yaml
32
+ *.key
33
+
34
+ # Slurm
35
+ sbatch*.sh
36
+
37
+ # Byte-compiled / optimized / DLL files
38
+ __pycache__/
39
+ *.py[cod]
40
+ *$py.class
41
+
42
+ # C extensions
43
+ *.so
44
+
45
+ # Distribution / packaging
46
+ .Python
47
+ build/
48
+ develop-eggs/
49
+ dist/
50
+ downloads/
51
+ eggs/
52
+ .eggs/
53
+ lib/
54
+ lib64/
55
+ parts/
56
+ sdist/
57
+ var/
58
+ wheels/
59
+ pip-wheel-metadata/
60
+ share/python-wheels/
61
+ *.egg-info/
62
+ .installed.cfg
63
+ *.egg
64
+ MANIFEST
65
+
66
+ # uv/poetry lock files
67
+ poetry.lock
68
+ uv.lock
69
+
70
+ # PyInstaller
71
+ # Usually these files are written by a python script from a template
72
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
73
+ *.manifest
74
+ *.spec
75
+
76
+ # Installer logs
77
+ pip-log.txt
78
+ pip-delete-this-directory.txt
79
+
80
+ # Unit test / coverage reports
81
+ !tests/artifacts
82
+ htmlcov/
83
+ .tox/
84
+ .nox/
85
+ .coverage
86
+ .coverage.*
87
+ nosetests.xml
88
+ coverage.xml
89
+ *.cover
90
+ *.py,cover
91
+ .hypothesis/
92
+ .pytest_cache/
93
+
94
+ # Ignore .cache except calibration
95
+ .cache/*
96
+ !.cache/calibration/
97
+ !.cache/calibration/**
98
+
99
+ # Translations
100
+ *.mo
101
+ *.pot
102
+
103
+ # Django stuff:
104
+ *.log
105
+ local_settings.py
106
+ db.sqlite3
107
+ db.sqlite3-journal
108
+
109
+ # Flask stuff:
110
+ instance/
111
+ .webassets-cache
112
+
113
+ # Scrapy stuff:
114
+ .scrapy
115
+
116
+ # Sphinx documentation
117
+ docs/_build/
118
+
119
+ # PyBuilder
120
+ .pybuilder/
121
+ target/
122
+
123
+ # Jupyter Notebook
124
+ .ipynb_checkpoints
125
+
126
+ # IPython
127
+ profile_default/
128
+ ipython_config.py
129
+
130
+ # pyenv
131
+ .python-version
132
+
133
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
134
+ __pypackages__/
135
+
136
+ # Celery stuff
137
+ celerybeat-schedule
138
+ celerybeat.pid
139
+
140
+ # SageMath parsed files
141
+ *.sage.py
142
+
143
+ # Environments
144
+ .env
145
+ .venv
146
+ env/
147
+ venv/
148
+ env.bak/
149
+ venv.bak/
150
+
151
+ # Spyder project settings
152
+ .spyderproject
153
+ .spyproject
154
+
155
+ # Rope project settings
156
+ .ropeproject
157
+
158
+ # mkdocs documentation
159
+ /site
160
+
161
+ # mypy
162
+ .mypy_cache/
163
+ .dmypy.json
164
+ dmypy.json
165
+
166
+ # Pyre type checker
167
+ .pyre/
168
+
169
+ # pytype static type analyzer
170
+ .pytype/
171
+
172
+ # Cython debug symbols
173
+ cython_debug/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ exclude: "tests/artifacts/.*\\.safetensors$"
16
+ default_language_version:
17
+ python: python3.10
18
+ repos:
19
+ ##### Meta #####
20
+ - repo: meta
21
+ hooks:
22
+ - id: check-useless-excludes
23
+ - id: check-hooks-apply
24
+
25
+
26
+ ##### Style / Misc. #####
27
+ - repo: https://github.com/pre-commit/pre-commit-hooks
28
+ rev: v5.0.0
29
+ hooks:
30
+ - id: check-added-large-files
31
+ - id: debug-statements
32
+ - id: check-merge-conflict
33
+ - id: check-case-conflict
34
+ - id: check-yaml
35
+ - id: check-toml
36
+ - id: end-of-file-fixer
37
+ - id: trailing-whitespace
38
+
39
+ - repo: https://github.com/crate-ci/typos
40
+ rev: v1.30.2
41
+ hooks:
42
+ - id: typos
43
+ args: [--force-exclude]
44
+
45
+ - repo: https://github.com/asottile/pyupgrade
46
+ rev: v3.19.1
47
+ hooks:
48
+ - id: pyupgrade
49
+
50
+ - repo: https://github.com/astral-sh/ruff-pre-commit
51
+ rev: v0.9.10
52
+ hooks:
53
+ - id: ruff
54
+ args: [--fix]
55
+ - id: ruff-format
56
+
57
+
58
+ ##### Security #####
59
+ - repo: https://github.com/gitleaks/gitleaks
60
+ rev: v8.24.0
61
+ hooks:
62
+ - id: gitleaks
63
+
64
+ - repo: https://github.com/woodruffw/zizmor-pre-commit
65
+ rev: v1.4.1
66
+ hooks:
67
+ - id: zizmor
68
+
69
+ - repo: https://github.com/PyCQA/bandit
70
+ rev: 1.8.3
71
+ hooks:
72
+ - id: bandit
73
+ args: ["-c", "pyproject.toml"]
74
+ additional_dependencies: ["bandit[toml]"]
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Contributor Covenant Code of Conduct
3
+
4
+ ## Our Pledge
5
+
6
+ We as members, contributors, and leaders pledge to make participation in our
7
+ community a harassment-free experience for everyone, regardless of age, body
8
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
9
+ identity and expression, level of experience, education, socio-economic status,
10
+ nationality, personal appearance, race, caste, color, religion, or sexual
11
+ identity and orientation.
12
+
13
+ We pledge to act and interact in ways that contribute to an open, welcoming,
14
+ diverse, inclusive, and healthy community.
15
+
16
+ ## Our Standards
17
+
18
+ Examples of behavior that contributes to a positive environment for our
19
+ community include:
20
+
21
+ * Demonstrating empathy and kindness toward other people
22
+ * Being respectful of differing opinions, viewpoints, and experiences
23
+ * Giving and gracefully accepting constructive feedback
24
+ * Accepting responsibility and apologizing to those affected by our mistakes,
25
+ and learning from the experience
26
+ * Focusing on what is best not just for us as individuals, but for the overall
27
+ community
28
+
29
+ Examples of unacceptable behavior include:
30
+
31
+ * The use of sexualized language or imagery, and sexual attention or advances of
32
+ any kind
33
+ * Trolling, insulting or derogatory comments, and personal or political attacks
34
+ * Public or private harassment
35
+ * Publishing others' private information, such as a physical or email address,
36
+ without their explicit permission
37
+ * Other conduct which could reasonably be considered inappropriate in a
38
+ professional setting
39
+
40
+ ## Enforcement Responsibilities
41
+
42
+ Community leaders are responsible for clarifying and enforcing our standards of
43
+ acceptable behavior and will take appropriate and fair corrective action in
44
+ response to any behavior that they deem inappropriate, threatening, offensive,
45
+ or harmful.
46
+
47
+ Community leaders have the right and responsibility to remove, edit, or reject
48
+ comments, commits, code, wiki edits, issues, and other contributions that are
49
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
50
+ decisions when appropriate.
51
+
52
+ ## Scope
53
+
54
+ This Code of Conduct applies within all community spaces, and also applies when
55
+ an individual is officially representing the community in public spaces.
56
+ Examples of representing our community include using an official email address,
57
+ posting via an official social media account, or acting as an appointed
58
+ representative at an online or offline event.
59
+
60
+ ## Enforcement
61
+
62
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
63
+ reported to the community leaders responsible for enforcement at
64
+ [feedback@huggingface.co](mailto:feedback@huggingface.co).
65
+ All complaints will be reviewed and investigated promptly and fairly.
66
+
67
+ All community leaders are obligated to respect the privacy and security of the
68
+ reporter of any incident.
69
+
70
+ ## Enforcement Guidelines
71
+
72
+ Community leaders will follow these Community Impact Guidelines in determining
73
+ the consequences for any action they deem in violation of this Code of Conduct:
74
+
75
+ ### 1. Correction
76
+
77
+ **Community Impact**: Use of inappropriate language or other behavior deemed
78
+ unprofessional or unwelcome in the community.
79
+
80
+ **Consequence**: A private, written warning from community leaders, providing
81
+ clarity around the nature of the violation and an explanation of why the
82
+ behavior was inappropriate. A public apology may be requested.
83
+
84
+ ### 2. Warning
85
+
86
+ **Community Impact**: A violation through a single incident or series of
87
+ actions.
88
+
89
+ **Consequence**: A warning with consequences for continued behavior. No
90
+ interaction with the people involved, including unsolicited interaction with
91
+ those enforcing the Code of Conduct, for a specified period of time. This
92
+ includes avoiding interactions in community spaces as well as external channels
93
+ like social media. Violating these terms may lead to a temporary or permanent
94
+ ban.
95
+
96
+ ### 3. Temporary Ban
97
+
98
+ **Community Impact**: A serious violation of community standards, including
99
+ sustained inappropriate behavior.
100
+
101
+ **Consequence**: A temporary ban from any sort of interaction or public
102
+ communication with the community for a specified period of time. No public or
103
+ private interaction with the people involved, including unsolicited interaction
104
+ with those enforcing the Code of Conduct, is allowed during this period.
105
+ Violating these terms may lead to a permanent ban.
106
+
107
+ ### 4. Permanent Ban
108
+
109
+ **Community Impact**: Demonstrating a pattern of violation of community
110
+ standards, including sustained inappropriate behavior, harassment of an
111
+ individual, or aggression toward or disparagement of classes of individuals.
112
+
113
+ **Consequence**: A permanent ban from any sort of public interaction within the
114
+ community.
115
+
116
+ ## Attribution
117
+
118
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119
+ version 2.1, available at
120
+ [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121
+
122
+ Community Impact Guidelines were inspired by
123
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124
+
125
+ For answers to common questions about this code of conduct, see the FAQ at
126
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127
+ [https://www.contributor-covenant.org/translations][translations].
128
+
129
+ [homepage]: https://www.contributor-covenant.org
130
+ [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131
+ [Mozilla CoC]: https://github.com/mozilla/diversity
132
+ [FAQ]: https://www.contributor-covenant.org/faq
133
+ [translations]: https://www.contributor-covenant.org/translations
CONTRIBUTING.md ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How to contribute to 🤗 LeRobot?
2
+
3
+ Everyone is welcome to contribute, and we value everybody's contribution. Code
4
+ is thus not the only way to help the community. Answering questions, helping
5
+ others, reaching out and improving the documentations are immensely valuable to
6
+ the community.
7
+
8
+ It also helps us if you spread the word: reference the library from blog posts
9
+ on the awesome projects it made possible, shout out on Twitter when it has
10
+ helped you, or simply ⭐️ the repo to say "thank you".
11
+
12
+ Whichever way you choose to contribute, please be mindful to respect our
13
+ [code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md).
14
+
15
+ ## You can contribute in so many ways!
16
+
17
+ Some of the ways you can contribute to 🤗 LeRobot:
18
+ * Fixing outstanding issues with the existing code.
19
+ * Implementing new models, datasets or simulation environments.
20
+ * Contributing to the examples or to the documentation.
21
+ * Submitting issues related to bugs or desired new features.
22
+
23
+ Following the guides below, feel free to open issues and PRs and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](mailto:remi.cadene@huggingface.co).
24
+
25
+ If you are not sure how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/orgs/huggingface/projects/46)
26
+
27
+ ## Submitting a new issue or feature request
28
+
29
+ Do your best to follow these guidelines when submitting an issue or a feature
30
+ request. It will make it easier for us to come back to you quickly and with good
31
+ feedback.
32
+
33
+ ### Did you find a bug?
34
+
35
+ The 🤗 LeRobot library is robust and reliable thanks to the users who notify us of
36
+ the problems they encounter. So thank you for reporting an issue.
37
+
38
+ First, we would really appreciate it if you could **make sure the bug was not
39
+ already reported** (use the search bar on Github under Issues).
40
+
41
+ Did not find it? :( So we can act quickly on it, please follow these steps:
42
+
43
+ * Include your **OS type and version**, the versions of **Python** and **PyTorch**.
44
+ * A short, self-contained, code snippet that allows us to reproduce the bug in
45
+ less than 30s.
46
+ * The full traceback if an exception is raised.
47
+ * Attach any other additional information, like screenshots, you think may help.
48
+
49
+ ### Do you want a new feature?
50
+
51
+ A good feature request addresses the following points:
52
+
53
+ 1. Motivation first:
54
+ * Is it related to a problem/frustration with the library? If so, please explain
55
+ why. Providing a code snippet that demonstrates the problem is best.
56
+ * Is it related to something you would need for a project? We'd love to hear
57
+ about it!
58
+ * Is it something you worked on and think could benefit the community?
59
+ Awesome! Tell us what problem it solved for you.
60
+ 2. Write a *paragraph* describing the feature.
61
+ 3. Provide a **code snippet** that demonstrates its future use.
62
+ 4. In case this is related to a paper, please attach a link.
63
+ 5. Attach any additional information (drawings, screenshots, etc.) you think may help.
64
+
65
+ If your issue is well written we're already 80% of the way there by the time you
66
+ post it.
67
+
68
+ ## Adding new policies, datasets or environments
69
+
70
+ Look at our implementations for [datasets](./lerobot/common/datasets/), [policies](./lerobot/common/policies/),
71
+ environments ([aloha](https://github.com/huggingface/gym-aloha),
72
+ [xarm](https://github.com/huggingface/gym-xarm),
73
+ [pusht](https://github.com/huggingface/gym-pusht))
74
+ and follow the same api design.
75
+
76
+ When implementing a new dataset loadable with LeRobotDataset follow these steps:
77
+ - Update `available_datasets_per_env` in `lerobot/__init__.py`
78
+
79
+ When implementing a new environment (e.g. `gym_aloha`), follow these steps:
80
+ - Update `available_tasks_per_env` and `available_datasets_per_env` in `lerobot/__init__.py`
81
+
82
+ When implementing a new policy class (e.g. `DiffusionPolicy`) follow these steps:
83
+ - Update `available_policies` and `available_policies_per_env`, in `lerobot/__init__.py`
84
+ - Set the required `name` class attribute.
85
+ - Update variables in `tests/test_available.py` by importing your new Policy class
86
+
87
+ ## Submitting a pull request (PR)
88
+
89
+ Before writing code, we strongly advise you to search through the existing PRs or
90
+ issues to make sure that nobody is already working on the same thing. If you are
91
+ unsure, it is always a good idea to open an issue to get some feedback.
92
+
93
+ You will need basic `git` proficiency to be able to contribute to
94
+ 🤗 LeRobot. `git` is not the easiest tool to use but it has the greatest
95
+ manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
96
+ Git](https://git-scm.com/book/en/v2) is a very good reference.
97
+
98
+ Follow these steps to start contributing:
99
+
100
+ 1. Fork the [repository](https://github.com/huggingface/lerobot) by
101
+ clicking on the 'Fork' button on the repository's page. This creates a copy of the code
102
+ under your GitHub user account.
103
+
104
+ 2. Clone your fork to your local disk, and add the base repository as a remote. The following command
105
+ assumes you have your public SSH key uploaded to GitHub. See the following guide for more
106
+ [information](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository).
107
+
108
+ ```bash
109
+ git clone git@github.com:<your Github handle>/lerobot.git
110
+ cd lerobot
111
+ git remote add upstream https://github.com/huggingface/lerobot.git
112
+ ```
113
+
114
+ 3. Create a new branch to hold your development changes, and do this for every new PR you work on.
115
+
116
+ Start by synchronizing your `main` branch with the `upstream/main` branch (more details in the [GitHub Docs](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork)):
117
+
118
+ ```bash
119
+ git checkout main
120
+ git fetch upstream
121
+ git rebase upstream/main
122
+ ```
123
+
124
+ Once your `main` branch is synchronized, create a new branch from it:
125
+
126
+ ```bash
127
+ git checkout -b a-descriptive-name-for-my-changes
128
+ ```
129
+
130
+ 🚨 **Do not** work on the `main` branch.
131
+
132
+ 4. for development, we advise to use a tool like `poetry` or `uv` instead of just `pip` to easily track our dependencies.
133
+ Follow the instructions to [install poetry](https://python-poetry.org/docs/#installation) (use a version >=2.1.0) or to [install uv](https://docs.astral.sh/uv/getting-started/installation/#installation-methods) if you don't have one of them already.
134
+
135
+ Set up a development environment with conda or miniconda:
136
+ ```bash
137
+ conda create -y -n lerobot-dev python=3.10 && conda activate lerobot-dev
138
+ ```
139
+
140
+ If you're using `uv`, it can manage python versions so you can instead do:
141
+ ```bash
142
+ uv venv --python 3.10 && source .venv/bin/activate
143
+ ```
144
+
145
+ To develop on 🤗 LeRobot, you will at least need to install the `dev` and `test` extras dependencies along with the core library:
146
+
147
+ using `poetry`
148
+ ```bash
149
+ poetry sync --extras "dev test"
150
+ ```
151
+
152
+ using `uv`
153
+ ```bash
154
+ uv sync --extra dev --extra test
155
+ ```
156
+
157
+ You can also install the project with all its dependencies (including environments):
158
+
159
+ using `poetry`
160
+ ```bash
161
+ poetry sync --all-extras
162
+ ```
163
+
164
+ using `uv`
165
+ ```bash
166
+ uv sync --all-extras
167
+ ```
168
+
169
+ > **Note:** If you don't install simulation environments with `--all-extras`, the tests that require them will be skipped when running the pytest suite locally. However, they *will* be tested in the CI. In general, we advise you to install everything and test locally before pushing.
170
+
171
+ Whichever command you chose to install the project (e.g. `poetry sync --all-extras`), you should run it again when pulling code with an updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the new dependencies.
172
+
173
+ The equivalent of `pip install some-package`, would just be:
174
+
175
+ using `poetry`
176
+ ```bash
177
+ poetry add some-package
178
+ ```
179
+
180
+ using `uv`
181
+ ```bash
182
+ uv add some-package
183
+ ```
184
+
185
+ When making changes to the poetry sections of the `pyproject.toml`, you should run the following command to lock dependencies.
186
+ using `poetry`
187
+ ```bash
188
+ poetry lock
189
+ ```
190
+
191
+ using `uv`
192
+ ```bash
193
+ uv lock
194
+ ```
195
+
196
+
197
+ 5. Develop the features on your branch.
198
+
199
+ As you work on the features, you should make sure that the test suite
200
+ passes. You should run the tests impacted by your changes like this (see
201
+ below an explanation regarding the environment variable):
202
+
203
+ ```bash
204
+ pytest tests/<TEST_TO_RUN>.py
205
+ ```
206
+
207
+ 6. Follow our style.
208
+
209
+ `lerobot` relies on `ruff` to format its source code
210
+ consistently. Set up [`pre-commit`](https://pre-commit.com/) to run these checks
211
+ automatically as Git commit hooks.
212
+
213
+ Install `pre-commit` hooks:
214
+ ```bash
215
+ pre-commit install
216
+ ```
217
+
218
+ You can run these hooks whenever you need on staged files with:
219
+ ```bash
220
+ pre-commit
221
+ ```
222
+
223
+ Once you're happy with your changes, add changed files using `git add` and
224
+ make a commit with `git commit` to record your changes locally:
225
+
226
+ ```bash
227
+ git add modified_file.py
228
+ git commit
229
+ ```
230
+
231
+ Note, if you already committed some changes that have a wrong formatting, you can use:
232
+ ```bash
233
+ pre-commit run --all-files
234
+ ```
235
+
236
+ Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
237
+
238
+ It is a good idea to sync your copy of the code with the original
239
+ repository regularly. This way you can quickly account for changes:
240
+
241
+ ```bash
242
+ git fetch upstream
243
+ git rebase upstream/main
244
+ ```
245
+
246
+ Push the changes to your account using:
247
+
248
+ ```bash
249
+ git push -u origin a-descriptive-name-for-my-changes
250
+ ```
251
+
252
+ 6. Once you are satisfied (**and the checklist below is happy too**), go to the
253
+ webpage of your fork on GitHub. Click on 'Pull request' to send your changes
254
+ to the project maintainers for review.
255
+
256
+ 7. It's ok if maintainers ask you for changes. It happens to core contributors
257
+ too! So everyone can see the changes in the Pull request, work in your local
258
+ branch and push the changes to your fork. They will automatically appear in
259
+ the pull request.
260
+
261
+
262
+ ### Checklist
263
+
264
+ 1. The title of your pull request should be a summary of its contribution;
265
+ 2. If your pull request addresses an issue, please mention the issue number in
266
+ the pull request description to make sure they are linked (and people
267
+ consulting the issue know you are working on it);
268
+ 3. To indicate a work in progress please prefix the title with `[WIP]`, or preferably mark
269
+ the PR as a draft PR. These are useful to avoid duplicated work, and to differentiate
270
+ it from PRs ready to be merged;
271
+ 4. Make sure existing tests pass;
272
+ <!-- 5. Add high-coverage tests. No quality testing = no merge.
273
+
274
+ See an example of a good PR here: https://github.com/huggingface/lerobot/pull/ -->
275
+
276
+ ### Tests
277
+
278
+ An extensive test suite is included to test the library behavior and several examples. Library tests can be found in the [tests folder](https://github.com/huggingface/lerobot/tree/main/tests).
279
+
280
+ Install [git lfs](https://git-lfs.com/) to retrieve test artifacts (if you don't have it already).
281
+
282
+ On Mac:
283
+ ```bash
284
+ brew install git-lfs
285
+ git lfs install
286
+ ```
287
+
288
+ On Ubuntu:
289
+ ```bash
290
+ sudo apt-get install git-lfs
291
+ git lfs install
292
+ ```
293
+
294
+ Pull artifacts if they're not in [tests/artifacts](tests/artifacts)
295
+ ```bash
296
+ git lfs pull
297
+ ```
298
+
299
+ We use `pytest` in order to run the tests. From the root of the
300
+ repository, here's how to run tests with `pytest` for the library:
301
+
302
+ ```bash
303
+ python -m pytest -sv ./tests
304
+ ```
305
+
306
+
307
+ You can specify a smaller set of tests in order to test only the feature
308
+ you're working on.
LICENSE ADDED
@@ -0,0 +1,507 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2024 The Hugging Face team. All rights reserved.
2
+
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright [yyyy] [name of copyright owner]
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
204
+
205
+
206
+ ## Some of lerobot's code is derived from Diffusion Policy, which is subject to the following copyright notice:
207
+
208
+ MIT License
209
+
210
+ Copyright (c) 2023 Columbia Artificial Intelligence and Robotics Lab
211
+
212
+ Permission is hereby granted, free of charge, to any person obtaining a copy
213
+ of this software and associated documentation files (the "Software"), to deal
214
+ in the Software without restriction, including without limitation the rights
215
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
216
+ copies of the Software, and to permit persons to whom the Software is
217
+ furnished to do so, subject to the following conditions:
218
+
219
+ The above copyright notice and this permission notice shall be included in all
220
+ copies or substantial portions of the Software.
221
+
222
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
223
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
224
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
225
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
226
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
227
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
228
+ SOFTWARE.
229
+
230
+
231
+ ## Some of lerobot's code is derived from FOWM, which is subject to the following copyright notice:
232
+
233
+ MIT License
234
+
235
+ Copyright (c) 2023 Yunhai Feng
236
+
237
+ Permission is hereby granted, free of charge, to any person obtaining a copy
238
+ of this software and associated documentation files (the "Software"), to deal
239
+ in the Software without restriction, including without limitation the rights
240
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
241
+ copies of the Software, and to permit persons to whom the Software is
242
+ furnished to do so, subject to the following conditions:
243
+
244
+ The above copyright notice and this permission notice shall be included in all
245
+ copies or substantial portions of the Software.
246
+
247
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
248
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
249
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
250
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
251
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
252
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
253
+ SOFTWARE.
254
+
255
+
256
+ ## Some of lerobot's code is derived from simxarm, which is subject to the following copyright notice:
257
+
258
+ MIT License
259
+
260
+ Copyright (c) 2023 Nicklas Hansen & Yanjie Ze
261
+
262
+ Permission is hereby granted, free of charge, to any person obtaining a copy
263
+ of this software and associated documentation files (the "Software"), to deal
264
+ in the Software without restriction, including without limitation the rights
265
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
266
+ copies of the Software, and to permit persons to whom the Software is
267
+ furnished to do so, subject to the following conditions:
268
+
269
+ The above copyright notice and this permission notice shall be included in all
270
+ copies or substantial portions of the Software.
271
+
272
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
273
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
274
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
275
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
276
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
277
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
278
+ SOFTWARE.
279
+
280
+
281
+ ## Some of lerobot's code is derived from ALOHA, which is subject to the following copyright notice:
282
+
283
+ MIT License
284
+
285
+ Copyright (c) 2023 Tony Z. Zhao
286
+
287
+ Permission is hereby granted, free of charge, to any person obtaining a copy
288
+ of this software and associated documentation files (the "Software"), to deal
289
+ in the Software without restriction, including without limitation the rights
290
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
291
+ copies of the Software, and to permit persons to whom the Software is
292
+ furnished to do so, subject to the following conditions:
293
+
294
+ The above copyright notice and this permission notice shall be included in all
295
+ copies or substantial portions of the Software.
296
+
297
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
298
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
299
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
300
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
301
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
302
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
303
+ SOFTWARE.
304
+
305
+ ## Some of lerobot's code is derived from DETR, which is subject to the following copyright notice:
306
+
307
+ Apache License
308
+ Version 2.0, January 2004
309
+ http://www.apache.org/licenses/
310
+
311
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
312
+
313
+ 1. Definitions.
314
+
315
+ "License" shall mean the terms and conditions for use, reproduction,
316
+ and distribution as defined by Sections 1 through 9 of this document.
317
+
318
+ "Licensor" shall mean the copyright owner or entity authorized by
319
+ the copyright owner that is granting the License.
320
+
321
+ "Legal Entity" shall mean the union of the acting entity and all
322
+ other entities that control, are controlled by, or are under common
323
+ control with that entity. For the purposes of this definition,
324
+ "control" means (i) the power, direct or indirect, to cause the
325
+ direction or management of such entity, whether by contract or
326
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
327
+ outstanding shares, or (iii) beneficial ownership of such entity.
328
+
329
+ "You" (or "Your") shall mean an individual or Legal Entity
330
+ exercising permissions granted by this License.
331
+
332
+ "Source" form shall mean the preferred form for making modifications,
333
+ including but not limited to software source code, documentation
334
+ source, and configuration files.
335
+
336
+ "Object" form shall mean any form resulting from mechanical
337
+ transformation or translation of a Source form, including but
338
+ not limited to compiled object code, generated documentation,
339
+ and conversions to other media types.
340
+
341
+ "Work" shall mean the work of authorship, whether in Source or
342
+ Object form, made available under the License, as indicated by a
343
+ copyright notice that is included in or attached to the work
344
+ (an example is provided in the Appendix below).
345
+
346
+ "Derivative Works" shall mean any work, whether in Source or Object
347
+ form, that is based on (or derived from) the Work and for which the
348
+ editorial revisions, annotations, elaborations, or other modifications
349
+ represent, as a whole, an original work of authorship. For the purposes
350
+ of this License, Derivative Works shall not include works that remain
351
+ separable from, or merely link (or bind by name) to the interfaces of,
352
+ the Work and Derivative Works thereof.
353
+
354
+ "Contribution" shall mean any work of authorship, including
355
+ the original version of the Work and any modifications or additions
356
+ to that Work or Derivative Works thereof, that is intentionally
357
+ submitted to Licensor for inclusion in the Work by the copyright owner
358
+ or by an individual or Legal Entity authorized to submit on behalf of
359
+ the copyright owner. For the purposes of this definition, "submitted"
360
+ means any form of electronic, verbal, or written communication sent
361
+ to the Licensor or its representatives, including but not limited to
362
+ communication on electronic mailing lists, source code control systems,
363
+ and issue tracking systems that are managed by, or on behalf of, the
364
+ Licensor for the purpose of discussing and improving the Work, but
365
+ excluding communication that is conspicuously marked or otherwise
366
+ designated in writing by the copyright owner as "Not a Contribution."
367
+
368
+ "Contributor" shall mean Licensor and any individual or Legal Entity
369
+ on behalf of whom a Contribution has been received by Licensor and
370
+ subsequently incorporated within the Work.
371
+
372
+ 2. Grant of Copyright License. Subject to the terms and conditions of
373
+ this License, each Contributor hereby grants to You a perpetual,
374
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
375
+ copyright license to reproduce, prepare Derivative Works of,
376
+ publicly display, publicly perform, sublicense, and distribute the
377
+ Work and such Derivative Works in Source or Object form.
378
+
379
+ 3. Grant of Patent License. Subject to the terms and conditions of
380
+ this License, each Contributor hereby grants to You a perpetual,
381
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
382
+ (except as stated in this section) patent license to make, have made,
383
+ use, offer to sell, sell, import, and otherwise transfer the Work,
384
+ where such license applies only to those patent claims licensable
385
+ by such Contributor that are necessarily infringed by their
386
+ Contribution(s) alone or by combination of their Contribution(s)
387
+ with the Work to which such Contribution(s) was submitted. If You
388
+ institute patent litigation against any entity (including a
389
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
390
+ or a Contribution incorporated within the Work constitutes direct
391
+ or contributory patent infringement, then any patent licenses
392
+ granted to You under this License for that Work shall terminate
393
+ as of the date such litigation is filed.
394
+
395
+ 4. Redistribution. You may reproduce and distribute copies of the
396
+ Work or Derivative Works thereof in any medium, with or without
397
+ modifications, and in Source or Object form, provided that You
398
+ meet the following conditions:
399
+
400
+ (a) You must give any other recipients of the Work or
401
+ Derivative Works a copy of this License; and
402
+
403
+ (b) You must cause any modified files to carry prominent notices
404
+ stating that You changed the files; and
405
+
406
+ (c) You must retain, in the Source form of any Derivative Works
407
+ that You distribute, all copyright, patent, trademark, and
408
+ attribution notices from the Source form of the Work,
409
+ excluding those notices that do not pertain to any part of
410
+ the Derivative Works; and
411
+
412
+ (d) If the Work includes a "NOTICE" text file as part of its
413
+ distribution, then any Derivative Works that You distribute must
414
+ include a readable copy of the attribution notices contained
415
+ within such NOTICE file, excluding those notices that do not
416
+ pertain to any part of the Derivative Works, in at least one
417
+ of the following places: within a NOTICE text file distributed
418
+ as part of the Derivative Works; within the Source form or
419
+ documentation, if provided along with the Derivative Works; or,
420
+ within a display generated by the Derivative Works, if and
421
+ wherever such third-party notices normally appear. The contents
422
+ of the NOTICE file are for informational purposes only and
423
+ do not modify the License. You may add Your own attribution
424
+ notices within Derivative Works that You distribute, alongside
425
+ or as an addendum to the NOTICE text from the Work, provided
426
+ that such additional attribution notices cannot be construed
427
+ as modifying the License.
428
+
429
+ You may add Your own copyright statement to Your modifications and
430
+ may provide additional or different license terms and conditions
431
+ for use, reproduction, or distribution of Your modifications, or
432
+ for any such Derivative Works as a whole, provided Your use,
433
+ reproduction, and distribution of the Work otherwise complies with
434
+ the conditions stated in this License.
435
+
436
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
437
+ any Contribution intentionally submitted for inclusion in the Work
438
+ by You to the Licensor shall be under the terms and conditions of
439
+ this License, without any additional terms or conditions.
440
+ Notwithstanding the above, nothing herein shall supersede or modify
441
+ the terms of any separate license agreement you may have executed
442
+ with Licensor regarding such Contributions.
443
+
444
+ 6. Trademarks. This License does not grant permission to use the trade
445
+ names, trademarks, service marks, or product names of the Licensor,
446
+ except as required for reasonable and customary use in describing the
447
+ origin of the Work and reproducing the content of the NOTICE file.
448
+
449
+ 7. Disclaimer of Warranty. Unless required by applicable law or
450
+ agreed to in writing, Licensor provides the Work (and each
451
+ Contributor provides its Contributions) on an "AS IS" BASIS,
452
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
453
+ implied, including, without limitation, any warranties or conditions
454
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
455
+ PARTICULAR PURPOSE. You are solely responsible for determining the
456
+ appropriateness of using or redistributing the Work and assume any
457
+ risks associated with Your exercise of permissions under this License.
458
+
459
+ 8. Limitation of Liability. In no event and under no legal theory,
460
+ whether in tort (including negligence), contract, or otherwise,
461
+ unless required by applicable law (such as deliberate and grossly
462
+ negligent acts) or agreed to in writing, shall any Contributor be
463
+ liable to You for damages, including any direct, indirect, special,
464
+ incidental, or consequential damages of any character arising as a
465
+ result of this License or out of the use or inability to use the
466
+ Work (including but not limited to damages for loss of goodwill,
467
+ work stoppage, computer failure or malfunction, or any and all
468
+ other commercial damages or losses), even if such Contributor
469
+ has been advised of the possibility of such damages.
470
+
471
+ 9. Accepting Warranty or Additional Liability. While redistributing
472
+ the Work or Derivative Works thereof, You may choose to offer,
473
+ and charge a fee for, acceptance of support, warranty, indemnity,
474
+ or other liability obligations and/or rights consistent with this
475
+ License. However, in accepting such obligations, You may act only
476
+ on Your own behalf and on Your sole responsibility, not on behalf
477
+ of any other Contributor, and only if You agree to indemnify,
478
+ defend, and hold each Contributor harmless for any liability
479
+ incurred by, or claims asserted against, such Contributor by reason
480
+ of your accepting any such warranty or additional liability.
481
+
482
+ END OF TERMS AND CONDITIONS
483
+
484
+ APPENDIX: How to apply the Apache License to your work.
485
+
486
+ To apply the Apache License to your work, attach the following
487
+ boilerplate notice, with the fields enclosed by brackets "[]"
488
+ replaced with your own identifying information. (Don't include
489
+ the brackets!) The text should be enclosed in the appropriate
490
+ comment syntax for the file format. We also recommend that a
491
+ file or class name and description of purpose be included on the
492
+ same "printed page" as the copyright notice for easier
493
+ identification within third-party archives.
494
+
495
+ Copyright 2020 - present, Facebook, Inc
496
+
497
+ Licensed under the Apache License, Version 2.0 (the "License");
498
+ you may not use this file except in compliance with the License.
499
+ You may obtain a copy of the License at
500
+
501
+ http://www.apache.org/licenses/LICENSE-2.0
502
+
503
+ Unless required by applicable law or agreed to in writing, software
504
+ distributed under the License is distributed on an "AS IS" BASIS,
505
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
506
+ See the License for the specific language governing permissions and
507
+ limitations under the License.
Makefile ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ .PHONY: tests
16
+
17
+ PYTHON_PATH := $(shell which python)
18
+
19
+ # If uv is installed and a virtual environment exists, use it
20
+ UV_CHECK := $(shell command -v uv)
21
+ ifneq ($(UV_CHECK),)
22
+ PYTHON_PATH := $(shell .venv/bin/python)
23
+ endif
24
+
25
+ export PATH := $(dir $(PYTHON_PATH)):$(PATH)
26
+
27
+ DEVICE ?= cpu
28
+
29
+ build-cpu:
30
+ docker build -t lerobot:latest -f docker/lerobot-cpu/Dockerfile .
31
+
32
+ build-gpu:
33
+ docker build -t lerobot:latest -f docker/lerobot-gpu/Dockerfile .
34
+
35
+ test-end-to-end:
36
+ ${MAKE} DEVICE=$(DEVICE) test-act-ete-train
37
+ ${MAKE} DEVICE=$(DEVICE) test-act-ete-train-resume
38
+ ${MAKE} DEVICE=$(DEVICE) test-act-ete-eval
39
+ ${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-train
40
+ ${MAKE} DEVICE=$(DEVICE) test-diffusion-ete-eval
41
+ ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-train
42
+ ${MAKE} DEVICE=$(DEVICE) test-tdmpc-ete-eval
43
+
44
+ test-act-ete-train:
45
+ python lerobot/scripts/train.py \
46
+ --policy.type=act \
47
+ --policy.dim_model=64 \
48
+ --policy.n_action_steps=20 \
49
+ --policy.chunk_size=20 \
50
+ --policy.device=$(DEVICE) \
51
+ --env.type=aloha \
52
+ --env.episode_length=5 \
53
+ --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
54
+ --dataset.image_transforms.enable=true \
55
+ --dataset.episodes="[0]" \
56
+ --batch_size=2 \
57
+ --steps=4 \
58
+ --eval_freq=2 \
59
+ --eval.n_episodes=1 \
60
+ --eval.batch_size=1 \
61
+ --save_freq=2 \
62
+ --save_checkpoint=true \
63
+ --log_freq=1 \
64
+ --wandb.enable=false \
65
+ --output_dir=tests/outputs/act/
66
+
67
+ test-act-ete-train-resume:
68
+ python lerobot/scripts/train.py \
69
+ --config_path=tests/outputs/act/checkpoints/000002/pretrained_model/train_config.json \
70
+ --resume=true
71
+
72
+ test-act-ete-eval:
73
+ python lerobot/scripts/eval.py \
74
+ --policy.path=tests/outputs/act/checkpoints/000004/pretrained_model \
75
+ --policy.device=$(DEVICE) \
76
+ --env.type=aloha \
77
+ --env.episode_length=5 \
78
+ --eval.n_episodes=1 \
79
+ --eval.batch_size=1
80
+
81
+ test-diffusion-ete-train:
82
+ python lerobot/scripts/train.py \
83
+ --policy.type=diffusion \
84
+ --policy.down_dims='[64,128,256]' \
85
+ --policy.diffusion_step_embed_dim=32 \
86
+ --policy.num_inference_steps=10 \
87
+ --policy.device=$(DEVICE) \
88
+ --env.type=pusht \
89
+ --env.episode_length=5 \
90
+ --dataset.repo_id=lerobot/pusht \
91
+ --dataset.image_transforms.enable=true \
92
+ --dataset.episodes="[0]" \
93
+ --batch_size=2 \
94
+ --steps=2 \
95
+ --eval_freq=2 \
96
+ --eval.n_episodes=1 \
97
+ --eval.batch_size=1 \
98
+ --save_checkpoint=true \
99
+ --save_freq=2 \
100
+ --log_freq=1 \
101
+ --wandb.enable=false \
102
+ --output_dir=tests/outputs/diffusion/
103
+
104
+ test-diffusion-ete-eval:
105
+ python lerobot/scripts/eval.py \
106
+ --policy.path=tests/outputs/diffusion/checkpoints/000002/pretrained_model \
107
+ --policy.device=$(DEVICE) \
108
+ --env.type=pusht \
109
+ --env.episode_length=5 \
110
+ --eval.n_episodes=1 \
111
+ --eval.batch_size=1
112
+
113
+ test-tdmpc-ete-train:
114
+ python lerobot/scripts/train.py \
115
+ --policy.type=tdmpc \
116
+ --policy.device=$(DEVICE) \
117
+ --env.type=xarm \
118
+ --env.task=XarmLift-v0 \
119
+ --env.episode_length=5 \
120
+ --dataset.repo_id=lerobot/xarm_lift_medium \
121
+ --dataset.image_transforms.enable=true \
122
+ --dataset.episodes="[0]" \
123
+ --batch_size=2 \
124
+ --steps=2 \
125
+ --eval_freq=2 \
126
+ --eval.n_episodes=1 \
127
+ --eval.batch_size=1 \
128
+ --save_checkpoint=true \
129
+ --save_freq=2 \
130
+ --log_freq=1 \
131
+ --wandb.enable=false \
132
+ --output_dir=tests/outputs/tdmpc/
133
+
134
+ test-tdmpc-ete-eval:
135
+ python lerobot/scripts/eval.py \
136
+ --policy.path=tests/outputs/tdmpc/checkpoints/000002/pretrained_model \
137
+ --policy.device=$(DEVICE) \
138
+ --env.type=xarm \
139
+ --env.episode_length=5 \
140
+ --env.task=XarmLift-v0 \
141
+ --eval.n_episodes=1 \
142
+ --eval.batch_size=1
README.md ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="media/lerobot-logo-thumbnail.png">
4
+ <source media="(prefers-color-scheme: light)" srcset="media/lerobot-logo-thumbnail.png">
5
+ <img alt="LeRobot, Hugging Face Robotics Library" src="media/lerobot-logo-thumbnail.png" style="max-width: 100%;">
6
+ </picture>
7
+ <br/>
8
+ <br/>
9
+ </p>
10
+
11
+ <div align="center">
12
+
13
+ [![Tests](https://github.com/huggingface/lerobot/actions/workflows/nightly-tests.yml/badge.svg?branch=main)](https://github.com/huggingface/lerobot/actions/workflows/nightly-tests.yml?query=branch%3Amain)
14
+ [![Coverage](https://codecov.io/gh/huggingface/lerobot/branch/main/graph/badge.svg?token=TODO)](https://codecov.io/gh/huggingface/lerobot)
15
+ [![Python versions](https://img.shields.io/pypi/pyversions/lerobot)](https://www.python.org/downloads/)
16
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/huggingface/lerobot/blob/main/LICENSE)
17
+ [![Status](https://img.shields.io/pypi/status/lerobot)](https://pypi.org/project/lerobot/)
18
+ [![Version](https://img.shields.io/pypi/v/lerobot)](https://pypi.org/project/lerobot/)
19
+ [![Examples](https://img.shields.io/badge/Examples-green.svg)](https://github.com/huggingface/lerobot/tree/main/examples)
20
+ [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.1%20adopted-ff69b4.svg)](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md)
21
+ [![Discord](https://dcbadge.vercel.app/api/server/C5P34WJ68S?style=flat)](https://discord.gg/s3KuuzsPFb)
22
+
23
+ </div>
24
+
25
+ <h2 align="center">
26
+ <p><a href="https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md">
27
+ Build Your Own SO-100 Robot!</a></p>
28
+ </h2>
29
+
30
+ <div align="center">
31
+ <img src="media/so100/leader_follower.webp?raw=true" alt="SO-100 leader and follower arms" title="SO-100 leader and follower arms" width="50%">
32
+
33
+ <p><strong>Meet the SO-100 – Just $110 per arm!</strong></p>
34
+ <p>Train it in minutes with a few simple moves on your laptop.</p>
35
+ <p>Then sit back and watch your creation act autonomously! 🤯</p>
36
+
37
+ <p><a href="https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md">
38
+ Get the full SO-100 tutorial here.</a></p>
39
+
40
+ <p>Want to take it to the next level? Make your SO-100 mobile by building LeKiwi!</p>
41
+ <p>Check out the <a href="https://github.com/huggingface/lerobot/blob/main/examples/11_use_lekiwi.md">LeKiwi tutorial</a> and bring your robot to life on wheels.</p>
42
+
43
+ <img src="media/lekiwi/kiwi.webp?raw=true" alt="LeKiwi mobile robot" title="LeKiwi mobile robot" width="50%">
44
+ </div>
45
+
46
+ <br/>
47
+
48
+ <h3 align="center">
49
+ <p>LeRobot: State-of-the-art AI for real-world robotics</p>
50
+ </h3>
51
+
52
+ ---
53
+
54
+
55
+ 🤗 LeRobot aims to provide models, datasets, and tools for real-world robotics in PyTorch. The goal is to lower the barrier to entry to robotics so that everyone can contribute and benefit from sharing datasets and pretrained models.
56
+
57
+ 🤗 LeRobot contains state-of-the-art approaches that have been shown to transfer to the real-world with a focus on imitation learning and reinforcement learning.
58
+
59
+ 🤗 LeRobot already provides a set of pretrained models, datasets with human collected demonstrations, and simulation environments to get started without assembling a robot. In the coming weeks, the plan is to add more and more support for real-world robotics on the most affordable and capable robots out there.
60
+
61
+ 🤗 LeRobot hosts pretrained models and datasets on this Hugging Face community page: [huggingface.co/lerobot](https://huggingface.co/lerobot)
62
+
63
+ #### Examples of pretrained models on simulation environments
64
+
65
+ <table>
66
+ <tr>
67
+ <td><img src="media/gym/aloha_act.gif" width="100%" alt="ACT policy on ALOHA env"/></td>
68
+ <td><img src="media/gym/simxarm_tdmpc.gif" width="100%" alt="TDMPC policy on SimXArm env"/></td>
69
+ <td><img src="media/gym/pusht_diffusion.gif" width="100%" alt="Diffusion policy on PushT env"/></td>
70
+ </tr>
71
+ <tr>
72
+ <td align="center">ACT policy on ALOHA env</td>
73
+ <td align="center">TDMPC policy on SimXArm env</td>
74
+ <td align="center">Diffusion policy on PushT env</td>
75
+ </tr>
76
+ </table>
77
+
78
+ ### Acknowledgment
79
+
80
+ - Thanks to Tony Zhao, Zipeng Fu and colleagues for open sourcing ACT policy, ALOHA environments and datasets. Ours are adapted from [ALOHA](https://tonyzhaozh.github.io/aloha) and [Mobile ALOHA](https://mobile-aloha.github.io).
81
+ - Thanks to Cheng Chi, Zhenjia Xu and colleagues for open sourcing Diffusion policy, Pusht environment and datasets, as well as UMI datasets. Ours are adapted from [Diffusion Policy](https://diffusion-policy.cs.columbia.edu) and [UMI Gripper](https://umi-gripper.github.io).
82
+ - Thanks to Nicklas Hansen, Yunhai Feng and colleagues for open sourcing TDMPC policy, Simxarm environments and datasets. Ours are adapted from [TDMPC](https://github.com/nicklashansen/tdmpc) and [FOWM](https://www.yunhaifeng.com/FOWM).
83
+ - Thanks to Antonio Loquercio and Ashish Kumar for their early support.
84
+ - Thanks to [Seungjae (Jay) Lee](https://sjlee.cc/), [Mahi Shafiullah](https://mahis.life/) and colleagues for open sourcing [VQ-BeT](https://sjlee.cc/vq-bet/) policy and helping us adapt the codebase to our repository. The policy is adapted from [VQ-BeT repo](https://github.com/jayLEE0301/vq_bet_official).
85
+
86
+
87
+ ## Installation
88
+
89
+ Download our source code:
90
+ ```bash
91
+ git clone https://github.com/huggingface/lerobot.git
92
+ cd lerobot
93
+ ```
94
+
95
+ Create a virtual environment with Python 3.10 and activate it, e.g. with [`miniconda`](https://docs.anaconda.com/free/miniconda/index.html):
96
+ ```bash
97
+ conda create -y -n lerobot python=3.10
98
+ conda activate lerobot
99
+ ```
100
+
101
+ Install 🤗 LeRobot:
102
+ ```bash
103
+ pip install -e .
104
+ ```
105
+
106
+ > **NOTE:** Depending on your platform, If you encounter any build errors during this step
107
+ you may need to install `cmake` and `build-essential` for building some of our dependencies.
108
+ On linux: `sudo apt-get install cmake build-essential`
109
+
110
+ For simulations, 🤗 LeRobot comes with gymnasium environments that can be installed as extras:
111
+ - [aloha](https://github.com/huggingface/gym-aloha)
112
+ - [xarm](https://github.com/huggingface/gym-xarm)
113
+ - [pusht](https://github.com/huggingface/gym-pusht)
114
+
115
+ For instance, to install 🤗 LeRobot with aloha and pusht, use:
116
+ ```bash
117
+ pip install -e ".[aloha, pusht]"
118
+ ```
119
+
120
+ To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with
121
+ ```bash
122
+ wandb login
123
+ ```
124
+
125
+ (note: you will also need to enable WandB in the configuration. See below.)
126
+
127
+ ## Walkthrough
128
+
129
+ ```
130
+ .
131
+ ├── examples # contains demonstration examples, start here to learn about LeRobot
132
+ | └── advanced # contains even more examples for those who have mastered the basics
133
+ ├── lerobot
134
+ | ├── configs # contains config classes with all options that you can override in the command line
135
+ | ├── common # contains classes and utilities
136
+ | | ├── datasets # various datasets of human demonstrations: aloha, pusht, xarm
137
+ | | ├── envs # various sim environments: aloha, pusht, xarm
138
+ | | ├── policies # various policies: act, diffusion, tdmpc
139
+ | | ├── robot_devices # various real devices: dynamixel motors, opencv cameras, koch robots
140
+ | | └── utils # various utilities
141
+ | └── scripts # contains functions to execute via command line
142
+ | ├── eval.py # load policy and evaluate it on an environment
143
+ | ├── train.py # train a policy via imitation learning and/or reinforcement learning
144
+ | ├── control_robot.py # teleoperate a real robot, record data, run a policy
145
+ | ├── push_dataset_to_hub.py # convert your dataset into LeRobot dataset format and upload it to the Hugging Face hub
146
+ | └── visualize_dataset.py # load a dataset and render its demonstrations
147
+ ├── outputs # contains results of scripts execution: logs, videos, model checkpoints
148
+ └── tests # contains pytest utilities for continuous integration
149
+ ```
150
+
151
+ ### Visualize datasets
152
+
153
+ Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically downloads data from the Hugging Face hub.
154
+
155
+ You can also locally visualize episodes from a dataset on the hub by executing our script from the command line:
156
+ ```bash
157
+ python lerobot/scripts/visualize_dataset.py \
158
+ --repo-id lerobot/pusht \
159
+ --episode-index 0
160
+ ```
161
+
162
+ or from a dataset in a local folder with the `root` option and the `--local-files-only` (in the following case the dataset will be searched for in `./my_local_data_dir/lerobot/pusht`)
163
+ ```bash
164
+ python lerobot/scripts/visualize_dataset.py \
165
+ --repo-id lerobot/pusht \
166
+ --root ./my_local_data_dir \
167
+ --local-files-only 1 \
168
+ --episode-index 0
169
+ ```
170
+
171
+
172
+ It will open `rerun.io` and display the camera streams, robot states and actions, like this:
173
+
174
+ https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144
175
+
176
+
177
+ Our script can also visualize datasets stored on a distant server. See `python lerobot/scripts/visualize_dataset.py --help` for more instructions.
178
+
179
+ ### The `LeRobotDataset` format
180
+
181
+ A dataset in `LeRobotDataset` format is very simple to use. It can be loaded from a repository on the Hugging Face hub or a local folder simply with e.g. `dataset = LeRobotDataset("lerobot/aloha_static_coffee")` and can be indexed into like any Hugging Face and PyTorch dataset. For instance `dataset[0]` will retrieve a single temporal frame from the dataset containing observation(s) and an action as PyTorch tensors ready to be fed to a model.
182
+
183
+ A specificity of `LeRobotDataset` is that, rather than retrieving a single frame by its index, we can retrieve several frames based on their temporal relationship with the indexed frame, by setting `delta_timestamps` to a list of relative times with respect to the indexed frame. For example, with `delta_timestamps = {"observation.image": [-1, -0.5, -0.2, 0]}` one can retrieve, for a given index, 4 frames: 3 "previous" frames 1 second, 0.5 seconds, and 0.2 seconds before the indexed frame, and the indexed frame itself (corresponding to the 0 entry). See example [1_load_lerobot_dataset.py](examples/1_load_lerobot_dataset.py) for more details on `delta_timestamps`.
184
+
185
+ Under the hood, the `LeRobotDataset` format makes use of several ways to serialize data which can be useful to understand if you plan to work more closely with this format. We tried to make a flexible yet simple dataset format that would cover most type of features and specificities present in reinforcement learning and robotics, in simulation and in real-world, with a focus on cameras and robot states but easily extended to other types of sensory inputs as long as they can be represented by a tensor.
186
+
187
+ Here are the important details and internal structure organization of a typical `LeRobotDataset` instantiated with `dataset = LeRobotDataset("lerobot/aloha_static_coffee")`. The exact features will change from dataset to dataset but not the main aspects:
188
+
189
+ ```
190
+ dataset attributes:
191
+ ├ hf_dataset: a Hugging Face dataset (backed by Arrow/parquet). Typical features example:
192
+ │ ├ observation.images.cam_high (VideoFrame):
193
+ │ │ VideoFrame = {'path': path to a mp4 video, 'timestamp' (float32): timestamp in the video}
194
+ │ ├ observation.state (list of float32): position of an arm joints (for instance)
195
+ │ ... (more observations)
196
+ │ ├ action (list of float32): goal position of an arm joints (for instance)
197
+ │ ├ episode_index (int64): index of the episode for this sample
198
+ │ ├ frame_index (int64): index of the frame for this sample in the episode ; starts at 0 for each episode
199
+ │ ├ timestamp (float32): timestamp in the episode
200
+ │ ├ next.done (bool): indicates the end of en episode ; True for the last frame in each episode
201
+ │ └ index (int64): general index in the whole dataset
202
+ ├ episode_data_index: contains 2 tensors with the start and end indices of each episode
203
+ │ ├ from (1D int64 tensor): first frame index for each episode — shape (num episodes,) starts with 0
204
+ │ └ to: (1D int64 tensor): last frame index for each episode — shape (num episodes,)
205
+ ├ stats: a dictionary of statistics (max, mean, min, std) for each feature in the dataset, for instance
206
+ │ ├ observation.images.cam_high: {'max': tensor with same number of dimensions (e.g. `(c, 1, 1)` for images, `(c,)` for states), etc.}
207
+ │ ...
208
+ ├ info: a dictionary of metadata on the dataset
209
+ │ ├ codebase_version (str): this is to keep track of the codebase version the dataset was created with
210
+ │ ├ fps (float): frame per second the dataset is recorded/synchronized to
211
+ │ ├ video (bool): indicates if frames are encoded in mp4 video files to save space or stored as png files
212
+ │ └ encoding (dict): if video, this documents the main options that were used with ffmpeg to encode the videos
213
+ ├ videos_dir (Path): where the mp4 videos or png images are stored/accessed
214
+ └ camera_keys (list of string): the keys to access camera features in the item returned by the dataset (e.g. `["observation.images.cam_high", ...]`)
215
+ ```
216
+
217
+ A `LeRobotDataset` is serialised using several widespread file formats for each of its parts, namely:
218
+ - hf_dataset stored using Hugging Face datasets library serialization to parquet
219
+ - videos are stored in mp4 format to save space
220
+ - metadata are stored in plain json/jsonl files
221
+
222
+ Dataset can be uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can specify its location with the `root` argument if it's not in the default `~/.cache/huggingface/lerobot` location.
223
+
224
+ ### Evaluate a pretrained policy
225
+
226
+ Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrates how to download a pretrained policy from Hugging Face hub, and run an evaluation on its corresponding environment.
227
+
228
+ We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
229
+ ```bash
230
+ python lerobot/scripts/eval.py \
231
+ --policy.path=lerobot/diffusion_pusht \
232
+ --env.type=pusht \
233
+ --eval.batch_size=10 \
234
+ --eval.n_episodes=10 \
235
+ --policy.use_amp=false \
236
+ --policy.device=cuda
237
+ ```
238
+
239
+ Note: After training your own policy, you can re-evaluate the checkpoints with:
240
+
241
+ ```bash
242
+ python lerobot/scripts/eval.py --policy.path={OUTPUT_DIR}/checkpoints/last/pretrained_model
243
+ ```
244
+
245
+ See `python lerobot/scripts/eval.py --help` for more instructions.
246
+
247
+ ### Train your own policy
248
+
249
+ Check out [example 3](./examples/3_train_policy.py) that illustrate how to train a model using our core library in python, and [example 4](./examples/4_train_policy_with_script.md) that shows how to use our training script from command line.
250
+
251
+ To use wandb for logging training and evaluation curves, make sure you've run `wandb login` as a one-time setup step. Then, when running the training command above, enable WandB in the configuration by adding `--wandb.enable=true`.
252
+
253
+ A link to the wandb logs for the run will also show up in yellow in your terminal. Here is an example of what they look like in your browser. Please also check [here](./examples/4_train_policy_with_script.md#typical-logs-and-metrics) for the explanation of some commonly used metrics in logs.
254
+
255
+ ![](media/wandb.png)
256
+
257
+ Note: For efficiency, during training every checkpoint is evaluated on a low number of episodes. You may use `--eval.n_episodes=500` to evaluate on more episodes than the default. Or, after training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
258
+
259
+ #### Reproduce state-of-the-art (SOTA)
260
+
261
+ We provide some pretrained policies on our [hub page](https://huggingface.co/lerobot) that can achieve state-of-the-art performances.
262
+ You can reproduce their training by loading the config from their run. Simply running:
263
+ ```bash
264
+ python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
265
+ ```
266
+ reproduces SOTA results for Diffusion Policy on the PushT task.
267
+
268
+ ## Contribute
269
+
270
+ If you would like to contribute to 🤗 LeRobot, please check out our [contribution guide](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md).
271
+
272
+ <!-- ### Add a new dataset
273
+
274
+ To add a dataset to the hub, you need to login using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
275
+ ```bash
276
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
277
+ ```
278
+
279
+ Then point to your raw dataset folder (e.g. `data/aloha_static_pingpong_test_raw`), and push your dataset to the hub with:
280
+ ```bash
281
+ python lerobot/scripts/push_dataset_to_hub.py \
282
+ --raw-dir data/aloha_static_pingpong_test_raw \
283
+ --out-dir data \
284
+ --repo-id lerobot/aloha_static_pingpong_test \
285
+ --raw-format aloha_hdf5
286
+ ```
287
+
288
+ See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions.
289
+
290
+ If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py). -->
291
+
292
+
293
+ ### Add a pretrained policy
294
+
295
+ Once you have trained a policy you may upload it to the Hugging Face hub using a hub id that looks like `${hf_user}/${repo_name}` (e.g. [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)).
296
+
297
+ You first need to find the checkpoint folder located inside your experiment directory (e.g. `outputs/train/2024-05-05/20-21-12_aloha_act_default/checkpoints/002500`). Within that there is a `pretrained_model` directory which should contain:
298
+ - `config.json`: A serialized version of the policy configuration (following the policy's dataclass config).
299
+ - `model.safetensors`: A set of `torch.nn.Module` parameters, saved in [Hugging Face Safetensors](https://huggingface.co/docs/safetensors/index) format.
300
+ - `train_config.json`: A consolidated configuration containing all parameter userd for training. The policy configuration should match `config.json` exactly. Thisis useful for anyone who wants to evaluate your policy or for reproducibility.
301
+
302
+ To upload these to the hub, run the following:
303
+ ```bash
304
+ huggingface-cli upload ${hf_user}/${repo_name} path/to/pretrained_model
305
+ ```
306
+
307
+ See [eval.py](https://github.com/huggingface/lerobot/blob/main/lerobot/scripts/eval.py) for an example of how other people may use your policy.
308
+
309
+
310
+ ### Improve your code with profiling
311
+
312
+ An example of a code snippet to profile the evaluation of a policy:
313
+ ```python
314
+ from torch.profiler import profile, record_function, ProfilerActivity
315
+
316
+ def trace_handler(prof):
317
+ prof.export_chrome_trace(f"tmp/trace_schedule_{prof.step_num}.json")
318
+
319
+ with profile(
320
+ activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
321
+ schedule=torch.profiler.schedule(
322
+ wait=2,
323
+ warmup=2,
324
+ active=3,
325
+ ),
326
+ on_trace_ready=trace_handler
327
+ ) as prof:
328
+ with record_function("eval_policy"):
329
+ for i in range(num_episodes):
330
+ prof.step()
331
+ # insert code to profile, potentially whole body of eval_policy function
332
+ ```
333
+
334
+ ## Citation
335
+
336
+ If you want, you can cite this work with:
337
+ ```bibtex
338
+ @misc{cadene2024lerobot,
339
+ author = {Cadene, Remi and Alibert, Simon and Soare, Alexander and Gallouedec, Quentin and Zouitine, Adil and Wolf, Thomas},
340
+ title = {LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch},
341
+ howpublished = "\url{https://github.com/huggingface/lerobot}",
342
+ year = {2024}
343
+ }
344
+ ```
345
+
346
+ Additionally, if you are using any of the particular policy architecture, pretrained models, or datasets, it is recommended to cite the original authors of the work as they appear below:
347
+
348
+ - [Diffusion Policy](https://diffusion-policy.cs.columbia.edu)
349
+ ```bibtex
350
+ @article{chi2024diffusionpolicy,
351
+ author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song},
352
+ title ={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
353
+ journal = {The International Journal of Robotics Research},
354
+ year = {2024},
355
+ }
356
+ ```
357
+ - [ACT or ALOHA](https://tonyzhaozh.github.io/aloha)
358
+ ```bibtex
359
+ @article{zhao2023learning,
360
+ title={Learning fine-grained bimanual manipulation with low-cost hardware},
361
+ author={Zhao, Tony Z and Kumar, Vikash and Levine, Sergey and Finn, Chelsea},
362
+ journal={arXiv preprint arXiv:2304.13705},
363
+ year={2023}
364
+ }
365
+ ```
366
+
367
+ - [TDMPC](https://www.nicklashansen.com/td-mpc/)
368
+
369
+ ```bibtex
370
+ @inproceedings{Hansen2022tdmpc,
371
+ title={Temporal Difference Learning for Model Predictive Control},
372
+ author={Nicklas Hansen and Xiaolong Wang and Hao Su},
373
+ booktitle={ICML},
374
+ year={2022}
375
+ }
376
+ ```
377
+
378
+ - [VQ-BeT](https://sjlee.cc/vq-bet/)
379
+ ```bibtex
380
+ @article{lee2024behavior,
381
+ title={Behavior generation with latent actions},
382
+ author={Lee, Seungjae and Wang, Yibin and Etukuru, Haritheja and Kim, H Jin and Shafiullah, Nur Muhammad Mahi and Pinto, Lerrel},
383
+ journal={arXiv preprint arXiv:2403.03181},
384
+ year={2024}
385
+ }
386
+ ```
387
+ ## Star History
388
+
389
+ [![Star History Chart](https://api.star-history.com/svg?repos=huggingface/lerobot&type=Timeline)](https://star-history.com/#huggingface/lerobot&Timeline)
benchmarks/video/README.md ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Video benchmark
2
+
3
+
4
+ ## Questions
5
+ What is the optimal trade-off between:
6
+ - maximizing loading time with random access,
7
+ - minimizing memory space on disk,
8
+ - maximizing success rate of policies,
9
+ - compatibility across devices/platforms for decoding videos (e.g. video players, web browsers).
10
+
11
+ How to encode videos?
12
+ - Which video codec (`-vcodec`) to use? h264, h265, AV1?
13
+ - What pixel format to use (`-pix_fmt`)? `yuv444p` or `yuv420p`?
14
+ - How much compression (`-crf`)? No compression with `0`, intermediate compression with `25` or extreme with `50+`?
15
+ - Which frequency to chose for key frames (`-g`)? A key frame every `10` frames?
16
+
17
+ How to decode videos?
18
+ - Which `decoder`? `torchvision`, `torchaudio`, `ffmpegio`, `decord`, or `nvc`?
19
+ - What scenarios to use for the requesting timestamps during benchmark? (`timestamps_mode`)
20
+
21
+
22
+ ## Variables
23
+ **Image content & size**
24
+ We don't expect the same optimal settings for a dataset of images from a simulation, or from real-world in an apartment, or in a factory, or outdoor, or with lots of moving objects in the scene, etc. Similarly, loading times might not vary linearly with the image size (resolution).
25
+ For these reasons, we run this benchmark on four representative datasets:
26
+ - `lerobot/pusht_image`: (96 x 96 pixels) simulation with simple geometric shapes, fixed camera.
27
+ - `aliberts/aloha_mobile_shrimp_image`: (480 x 640 pixels) real-world indoor, moving camera.
28
+ - `aliberts/paris_street`: (720 x 1280 pixels) real-world outdoor, moving camera.
29
+ - `aliberts/kitchen`: (1080 x 1920 pixels) real-world indoor, fixed camera.
30
+
31
+ Note: The datasets used for this benchmark need to be image datasets, not video datasets.
32
+
33
+ **Data augmentations**
34
+ We might revisit this benchmark and find better settings if we train our policies with various data augmentations to make them more robust (e.g. robust to color changes, compression, etc.).
35
+
36
+ ### Encoding parameters
37
+ | parameter | values |
38
+ |-------------|--------------------------------------------------------------|
39
+ | **vcodec** | `libx264`, `libx265`, `libsvtav1` |
40
+ | **pix_fmt** | `yuv444p`, `yuv420p` |
41
+ | **g** | `1`, `2`, `3`, `4`, `5`, `6`, `10`, `15`, `20`, `40`, `None` |
42
+ | **crf** | `0`, `5`, `10`, `15`, `20`, `25`, `30`, `40`, `50`, `None` |
43
+
44
+ Note that `crf` value might be interpreted differently by various video codecs. In other words, the same value used with one codec doesn't necessarily translate into the same compression level with another codec. In fact, the default value (`None`) isn't the same amongst the different video codecs. Importantly, it is also the case for many other ffmpeg arguments like `g` which specifies the frequency of the key frames.
45
+
46
+ For a comprehensive list and documentation of these parameters, see the ffmpeg documentation depending on the video codec used:
47
+ - h264: https://trac.ffmpeg.org/wiki/Encode/H.264
48
+ - h265: https://trac.ffmpeg.org/wiki/Encode/H.265
49
+ - AV1: https://trac.ffmpeg.org/wiki/Encode/AV1
50
+
51
+ ### Decoding parameters
52
+ **Decoder**
53
+ We tested two video decoding backends from torchvision:
54
+ - `pyav`
55
+ - `video_reader` (requires to build torchvision from source)
56
+
57
+ **Requested timestamps**
58
+ Given the way video decoding works, once a keyframe has been loaded, the decoding of subsequent frames is fast.
59
+ This of course is affected by the `-g` parameter during encoding, which specifies the frequency of the keyframes. Given our typical use cases in robotics policies which might request a few timestamps in different random places, we want to replicate these use cases with the following scenarios:
60
+ - `1_frame`: 1 frame,
61
+ - `2_frames`: 2 consecutive frames (e.g. `[t, t + 1 / fps]`),
62
+ - `6_frames`: 6 consecutive frames (e.g. `[t + i / fps for i in range(6)]`)
63
+
64
+ Note that this differs significantly from a typical use case like watching a movie, in which every frame is loaded sequentially from the beginning to the end and it's acceptable to have big values for `-g`.
65
+
66
+ Additionally, because some policies might request single timestamps that are a few frames apart, we also have the following scenario:
67
+ - `2_frames_4_space`: 2 frames with 4 consecutive frames of spacing in between (e.g `[t, t + 5 / fps]`),
68
+
69
+ However, due to how video decoding is implemented with `pyav`, we don't have access to an accurate seek so in practice this scenario is essentially the same as `6_frames` since all 6 frames between `t` and `t + 5 / fps` will be decoded.
70
+
71
+
72
+ ## Metrics
73
+ **Data compression ratio (lower is better)**
74
+ `video_images_size_ratio` is the ratio of the memory space on disk taken by the encoded video over the memory space taken by the original images. For instance, `video_images_size_ratio=25%` means that the video takes 4 times less memory space on disk compared to the original images.
75
+
76
+ **Loading time ratio (lower is better)**
77
+ `video_images_load_time_ratio` is the ratio of the time it takes to decode frames from the video at a given timestamps over the time it takes to load the exact same original images. Lower is better. For instance, `video_images_load_time_ratio=200%` means that decoding from video is 2 times slower than loading the original images.
78
+
79
+ **Average Mean Square Error (lower is better)**
80
+ `avg_mse` is the average mean square error between each decoded frame and its corresponding original image over all requested timestamps, and also divided by the number of pixels in the image to be comparable when switching to different image sizes.
81
+
82
+ **Average Peak Signal to Noise Ratio (higher is better)**
83
+ `avg_psnr` measures the ratio between the maximum possible power of a signal and the power of corrupting noise that affects the fidelity of its representation. Higher PSNR indicates better quality.
84
+
85
+ **Average Structural Similarity Index Measure (higher is better)**
86
+ `avg_ssim` evaluates the perceived quality of images by comparing luminance, contrast, and structure. SSIM values range from -1 to 1, where 1 indicates perfect similarity.
87
+
88
+ One aspect that can't be measured here with those metrics is the compatibility of the encoding across platforms, in particular on web browser, for visualization purposes.
89
+ h264, h265 and AV1 are all commonly used codecs and should not pose an issue. However, the chroma subsampling (`pix_fmt`) format might affect compatibility:
90
+ - `yuv420p` is more widely supported across various platforms, including web browsers.
91
+ - `yuv444p` offers higher color fidelity but might not be supported as broadly.
92
+
93
+
94
+ <!-- **Loss of a pretrained policy (higher is better)** (not available)
95
+ `loss_pretrained` is the result of evaluating with the selected encoding/decoding settings a policy pretrained on original images. It is easier to understand than `avg_l2_error`.
96
+
97
+ **Success rate after retraining (higher is better)** (not available)
98
+ `success_rate` is the result of training and evaluating a policy with the selected encoding/decoding settings. It is the most difficult metric to get but also the very best. -->
99
+
100
+
101
+ ## How the benchmark works
102
+ The benchmark evaluates both encoding and decoding of video frames on the first episode of each dataset.
103
+
104
+ **Encoding:** for each `vcodec` and `pix_fmt` pair, we use a default value for `g` and `crf` upon which we change a single value (either `g` or `crf`) to one of the specified values (we don't test every combination of those as this would be computationally too heavy).
105
+ This gives a unique set of encoding parameters which is used to encode the episode.
106
+
107
+ **Decoding:** Then, for each of those unique encodings, we iterate through every combination of the decoding parameters `backend` and `timestamps_mode`. For each of them, we record the metrics of a number of samples (given by `--num-samples`). This is parallelized for efficiency and the number of processes can be controlled with `--num-workers`. Ideally, it's best to have a `--num-samples` that is divisible by `--num-workers`.
108
+
109
+ Intermediate results saved for each `vcodec` and `pix_fmt` combination in csv tables.
110
+ These are then all concatenated to a single table ready for analysis.
111
+
112
+ ## Caveats
113
+ We tried to measure the most impactful parameters for both encoding and decoding. However, for computational reasons we can't test out every combination.
114
+
115
+ Additional encoding parameters exist that are not included in this benchmark. In particular:
116
+ - `-preset` which allows for selecting encoding presets. This represents a collection of options that will provide a certain encoding speed to compression ratio. By leaving this parameter unspecified, it is considered to be `medium` for libx264 and libx265 and `8` for libsvtav1.
117
+ - `-tune` which allows to optimize the encoding for certain aspects (e.g. film quality, fast decoding, etc.).
118
+
119
+ See the documentation mentioned above for more detailed info on these settings and for a more comprehensive list of other parameters.
120
+
121
+ Similarly on the decoding side, other decoders exist but are not implemented in our current benchmark. To name a few:
122
+ - `torchaudio`
123
+ - `ffmpegio`
124
+ - `decord`
125
+ - `nvc`
126
+
127
+ Note as well that since we are mostly interested in the performance at decoding time (also because encoding is done only once before uploading a dataset), we did not measure encoding times nor have any metrics regarding encoding.
128
+ However, besides the necessity to build ffmpeg from source, encoding did not pose any issue and it didn't take a significant amount of time during this benchmark.
129
+
130
+
131
+ ## Install
132
+ Building ffmpeg from source is required to include libx265 and libaom/libsvtav1 (av1) video codecs ([compilation guide](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu)).
133
+
134
+ **Note:** While you still need to build torchvision with a conda-installed `ffmpeg<4.3` to use the `video_reader` decoder (as described in [#220](https://github.com/huggingface/lerobot/pull/220)), you also need another version which is custom-built with all the video codecs for encoding. For the script to then use that version, you can prepend the command above with `PATH="$HOME/bin:$PATH"`, which is where ffmpeg should be built.
135
+
136
+
137
+ ## Adding a video decoder
138
+ Right now, we're only benchmarking the two video decoder available with torchvision: `pyav` and `video_reader`.
139
+ You can easily add a new decoder to benchmark by adding it to this function in the script:
140
+ ```diff
141
+ def decode_video_frames(
142
+ video_path: str,
143
+ timestamps: list[float],
144
+ tolerance_s: float,
145
+ backend: str,
146
+ ) -> torch.Tensor:
147
+ if backend in ["pyav", "video_reader"]:
148
+ return decode_video_frames_torchvision(
149
+ video_path, timestamps, tolerance_s, backend
150
+ )
151
+ + elif backend == ["your_decoder"]:
152
+ + return your_decoder_function(
153
+ + video_path, timestamps, tolerance_s, backend
154
+ + )
155
+ else:
156
+ raise NotImplementedError(backend)
157
+ ```
158
+
159
+
160
+ ## Example
161
+ For a quick run, you can try these parameters:
162
+ ```bash
163
+ python benchmark/video/run_video_benchmark.py \
164
+ --output-dir outputs/video_benchmark \
165
+ --repo-ids \
166
+ lerobot/pusht_image \
167
+ aliberts/aloha_mobile_shrimp_image \
168
+ --vcodec libx264 libx265 \
169
+ --pix-fmt yuv444p yuv420p \
170
+ --g 2 20 None \
171
+ --crf 10 40 None \
172
+ --timestamps-modes 1_frame 2_frames \
173
+ --backends pyav video_reader \
174
+ --num-samples 5 \
175
+ --num-workers 5 \
176
+ --save-frames 0
177
+ ```
178
+
179
+
180
+ ## Results
181
+
182
+ ### Reproduce
183
+ We ran the benchmark with the following parameters:
184
+ ```bash
185
+ # h264 and h265 encodings
186
+ python benchmark/video/run_video_benchmark.py \
187
+ --output-dir outputs/video_benchmark \
188
+ --repo-ids \
189
+ lerobot/pusht_image \
190
+ aliberts/aloha_mobile_shrimp_image \
191
+ aliberts/paris_street \
192
+ aliberts/kitchen \
193
+ --vcodec libx264 libx265 \
194
+ --pix-fmt yuv444p yuv420p \
195
+ --g 1 2 3 4 5 6 10 15 20 40 None \
196
+ --crf 0 5 10 15 20 25 30 40 50 None \
197
+ --timestamps-modes 1_frame 2_frames 6_frames \
198
+ --backends pyav video_reader \
199
+ --num-samples 50 \
200
+ --num-workers 5 \
201
+ --save-frames 1
202
+
203
+ # av1 encoding (only compatible with yuv420p and pyav decoder)
204
+ python benchmark/video/run_video_benchmark.py \
205
+ --output-dir outputs/video_benchmark \
206
+ --repo-ids \
207
+ lerobot/pusht_image \
208
+ aliberts/aloha_mobile_shrimp_image \
209
+ aliberts/paris_street \
210
+ aliberts/kitchen \
211
+ --vcodec libsvtav1 \
212
+ --pix-fmt yuv420p \
213
+ --g 1 2 3 4 5 6 10 15 20 40 None \
214
+ --crf 0 5 10 15 20 25 30 40 50 None \
215
+ --timestamps-modes 1_frame 2_frames 6_frames \
216
+ --backends pyav \
217
+ --num-samples 50 \
218
+ --num-workers 5 \
219
+ --save-frames 1
220
+ ```
221
+
222
+ The full results are available [here](https://docs.google.com/spreadsheets/d/1OYJB43Qu8fC26k_OyoMFgGBBKfQRCi4BIuYitQnq3sw/edit?usp=sharing)
223
+
224
+
225
+ ### Parameters selected for LeRobotDataset
226
+ Considering these results, we chose what we think is the best set of encoding parameter:
227
+ - vcodec: `libsvtav1`
228
+ - pix-fmt: `yuv420p`
229
+ - g: `2`
230
+ - crf: `30`
231
+
232
+ Since we're using av1 encoding, we're choosing the `pyav` decoder as `video_reader` does not support it (and `pyav` doesn't require a custom build of `torchvision`).
233
+
234
+ ### Summary
235
+
236
+ These tables show the results for `g=2` and `crf=30`, using `timestamps-modes=6_frames` and `backend=pyav`
237
+
238
+ | video_images_size_ratio | vcodec | pix_fmt | | | |
239
+ |------------------------------------|------------|---------|-----------|-----------|-----------|
240
+ | | libx264 | | libx265 | | libsvtav1 |
241
+ | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
242
+ | lerobot/pusht_image | **16.97%** | 17.58% | 18.57% | 18.86% | 22.06% |
243
+ | aliberts/aloha_mobile_shrimp_image | 2.14% | 2.11% | 1.38% | **1.37%** | 5.59% |
244
+ | aliberts/paris_street | 2.12% | 2.13% | **1.54%** | **1.54%** | 4.43% |
245
+ | aliberts/kitchen | 1.40% | 1.39% | **1.00%** | **1.00%** | 2.52% |
246
+
247
+ | video_images_load_time_ratio | vcodec | pix_fmt | | | |
248
+ |------------------------------------|---------|---------|----------|---------|-----------|
249
+ | | libx264 | | libx265 | | libsvtav1 |
250
+ | repo_id | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
251
+ | lerobot/pusht_image | 6.45 | 5.19 | **1.90** | 2.12 | 2.47 |
252
+ | aliberts/aloha_mobile_shrimp_image | 11.80 | 7.92 | 0.71 | 0.85 | **0.48** |
253
+ | aliberts/paris_street | 2.21 | 2.05 | 0.36 | 0.49 | **0.30** |
254
+ | aliberts/kitchen | 1.46 | 1.46 | 0.28 | 0.51 | **0.26** |
255
+
256
+ | | | vcodec | pix_fmt | | | |
257
+ |------------------------------------|----------|----------|--------------|----------|-----------|--------------|
258
+ | | | libx264 | | libx265 | | libsvtav1 |
259
+ | repo_id | metric | yuv420p | yuv444p | yuv420p | yuv444p | yuv420p |
260
+ | lerobot/pusht_image | avg_mse | 2.90E-04 | **2.03E-04** | 3.13E-04 | 2.29E-04 | 2.19E-04 |
261
+ | | avg_psnr | 35.44 | 37.07 | 35.49 | **37.30** | 37.20 |
262
+ | | avg_ssim | 98.28% | **98.85%** | 98.31% | 98.84% | 98.72% |
263
+ | aliberts/aloha_mobile_shrimp_image | avg_mse | 2.76E-04 | 2.59E-04 | 3.17E-04 | 3.06E-04 | **1.30E-04** |
264
+ | | avg_psnr | 35.91 | 36.21 | 35.88 | 36.09 | **40.17** |
265
+ | | avg_ssim | 95.19% | 95.18% | 95.00% | 95.05% | **97.73%** |
266
+ | aliberts/paris_street | avg_mse | 6.89E-04 | 6.70E-04 | 4.03E-03 | 4.02E-03 | **3.09E-04** |
267
+ | | avg_psnr | 33.48 | 33.68 | 32.05 | 32.15 | **35.40** |
268
+ | | avg_ssim | 93.76% | 93.75% | 89.46% | 89.46% | **95.46%** |
269
+ | aliberts/kitchen | avg_mse | 2.50E-04 | 2.24E-04 | 4.28E-04 | 4.18E-04 | **1.53E-04** |
270
+ | | avg_psnr | 36.73 | 37.33 | 36.56 | 36.75 | **39.12** |
271
+ | | avg_ssim | 95.47% | 95.58% | 95.52% | 95.53% | **96.82%** |
benchmarks/video/capture_camera_feed.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """Capture video feed from a camera as raw images."""
17
+
18
+ import argparse
19
+ import datetime as dt
20
+ from pathlib import Path
21
+
22
+ import cv2
23
+
24
+
25
+ def display_and_save_video_stream(output_dir: Path, fps: int, width: int, height: int):
26
+ now = dt.datetime.now()
27
+ capture_dir = output_dir / f"{now:%Y-%m-%d}" / f"{now:%H-%M-%S}"
28
+ if not capture_dir.exists():
29
+ capture_dir.mkdir(parents=True, exist_ok=True)
30
+
31
+ # Opens the default webcam
32
+ cap = cv2.VideoCapture(0)
33
+ if not cap.isOpened():
34
+ print("Error: Could not open video stream.")
35
+ return
36
+
37
+ cap.set(cv2.CAP_PROP_FPS, fps)
38
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
39
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
40
+
41
+ frame_index = 0
42
+ while True:
43
+ ret, frame = cap.read()
44
+
45
+ if not ret:
46
+ print("Error: Could not read frame.")
47
+ break
48
+
49
+ cv2.imshow("Video Stream", frame)
50
+ cv2.imwrite(str(capture_dir / f"frame_{frame_index:06d}.png"), frame)
51
+ frame_index += 1
52
+
53
+ # Break the loop on 'q' key press
54
+ if cv2.waitKey(1) & 0xFF == ord("q"):
55
+ break
56
+
57
+ # Release the capture and destroy all windows
58
+ cap.release()
59
+ cv2.destroyAllWindows()
60
+
61
+
62
+ if __name__ == "__main__":
63
+ parser = argparse.ArgumentParser()
64
+
65
+ parser.add_argument(
66
+ "--output-dir",
67
+ type=Path,
68
+ default=Path("outputs/cam_capture/"),
69
+ help="Directory where the capture images are written. A subfolder named with the current date & time will be created inside it for each capture.",
70
+ )
71
+ parser.add_argument(
72
+ "--fps",
73
+ type=int,
74
+ default=30,
75
+ help="Frames Per Second of the capture.",
76
+ )
77
+ parser.add_argument(
78
+ "--width",
79
+ type=int,
80
+ default=1280,
81
+ help="Width of the captured images.",
82
+ )
83
+ parser.add_argument(
84
+ "--height",
85
+ type=int,
86
+ default=720,
87
+ help="Height of the captured images.",
88
+ )
89
+ args = parser.parse_args()
90
+ display_and_save_video_stream(**vars(args))
benchmarks/video/run_video_benchmark.py ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """Assess the performance of video decoding in various configurations.
17
+
18
+ This script will benchmark different video encoding and decoding parameters.
19
+ See the provided README.md or run `python benchmark/video/run_video_benchmark.py --help` for usage info.
20
+ """
21
+
22
+ import argparse
23
+ import datetime as dt
24
+ import random
25
+ import shutil
26
+ from collections import OrderedDict
27
+ from concurrent.futures import ThreadPoolExecutor, as_completed
28
+ from pathlib import Path
29
+
30
+ import einops
31
+ import numpy as np
32
+ import pandas as pd
33
+ import PIL
34
+ import torch
35
+ from skimage.metrics import mean_squared_error, peak_signal_noise_ratio, structural_similarity
36
+ from tqdm import tqdm
37
+
38
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
39
+ from lerobot.common.datasets.video_utils import (
40
+ decode_video_frames_torchvision,
41
+ encode_video_frames,
42
+ )
43
+ from lerobot.common.utils.benchmark import TimeBenchmark
44
+
45
+ BASE_ENCODING = OrderedDict(
46
+ [
47
+ ("vcodec", "libx264"),
48
+ ("pix_fmt", "yuv444p"),
49
+ ("g", 2),
50
+ ("crf", None),
51
+ # TODO(aliberts): Add fastdecode
52
+ # ("fastdecode", 0),
53
+ ]
54
+ )
55
+
56
+
57
+ # TODO(rcadene, aliberts): move to `utils.py` folder when we want to refactor
58
+ def parse_int_or_none(value) -> int | None:
59
+ if value.lower() == "none":
60
+ return None
61
+ try:
62
+ return int(value)
63
+ except ValueError as e:
64
+ raise argparse.ArgumentTypeError(f"Invalid int or None: {value}") from e
65
+
66
+
67
+ def check_datasets_formats(repo_ids: list) -> None:
68
+ for repo_id in repo_ids:
69
+ dataset = LeRobotDataset(repo_id)
70
+ if len(dataset.meta.video_keys) > 0:
71
+ raise ValueError(
72
+ f"Use only image dataset for running this benchmark. Video dataset provided: {repo_id}"
73
+ )
74
+
75
+
76
+ def get_directory_size(directory: Path) -> int:
77
+ total_size = 0
78
+ for item in directory.rglob("*"):
79
+ if item.is_file():
80
+ total_size += item.stat().st_size
81
+ return total_size
82
+
83
+
84
+ def load_original_frames(imgs_dir: Path, timestamps: list[float], fps: int) -> torch.Tensor:
85
+ frames = []
86
+ for ts in timestamps:
87
+ idx = int(ts * fps)
88
+ frame = PIL.Image.open(imgs_dir / f"frame_{idx:06d}.png")
89
+ frame = torch.from_numpy(np.array(frame))
90
+ frame = frame.type(torch.float32) / 255
91
+ frame = einops.rearrange(frame, "h w c -> c h w")
92
+ frames.append(frame)
93
+ return torch.stack(frames)
94
+
95
+
96
+ def save_decoded_frames(
97
+ imgs_dir: Path, save_dir: Path, frames: torch.Tensor, timestamps: list[float], fps: int
98
+ ) -> None:
99
+ if save_dir.exists() and len(list(save_dir.glob("frame_*.png"))) == len(timestamps):
100
+ return
101
+
102
+ save_dir.mkdir(parents=True, exist_ok=True)
103
+ for i, ts in enumerate(timestamps):
104
+ idx = int(ts * fps)
105
+ frame_hwc = (frames[i].permute((1, 2, 0)) * 255).type(torch.uint8).cpu().numpy()
106
+ PIL.Image.fromarray(frame_hwc).save(save_dir / f"frame_{idx:06d}_decoded.png")
107
+ shutil.copyfile(imgs_dir / f"frame_{idx:06d}.png", save_dir / f"frame_{idx:06d}_original.png")
108
+
109
+
110
+ def save_first_episode(imgs_dir: Path, dataset: LeRobotDataset) -> None:
111
+ ep_num_images = dataset.episode_data_index["to"][0].item()
112
+ if imgs_dir.exists() and len(list(imgs_dir.glob("frame_*.png"))) == ep_num_images:
113
+ return
114
+
115
+ imgs_dir.mkdir(parents=True, exist_ok=True)
116
+ hf_dataset = dataset.hf_dataset.with_format(None)
117
+
118
+ # We only save images from the first camera
119
+ img_keys = [key for key in hf_dataset.features if key.startswith("observation.image")]
120
+ imgs_dataset = hf_dataset.select_columns(img_keys[0])
121
+
122
+ for i, item in enumerate(
123
+ tqdm(imgs_dataset, desc=f"saving {dataset.repo_id} first episode images", leave=False)
124
+ ):
125
+ img = item[img_keys[0]]
126
+ img.save(str(imgs_dir / f"frame_{i:06d}.png"), quality=100)
127
+
128
+ if i >= ep_num_images - 1:
129
+ break
130
+
131
+
132
+ def sample_timestamps(timestamps_mode: str, ep_num_images: int, fps: int) -> list[float]:
133
+ # Start at 5 to allow for 2_frames_4_space and 6_frames
134
+ idx = random.randint(5, ep_num_images - 1)
135
+ match timestamps_mode:
136
+ case "1_frame":
137
+ frame_indexes = [idx]
138
+ case "2_frames":
139
+ frame_indexes = [idx - 1, idx]
140
+ case "2_frames_4_space":
141
+ frame_indexes = [idx - 5, idx]
142
+ case "6_frames":
143
+ frame_indexes = [idx - i for i in range(6)][::-1]
144
+ case _:
145
+ raise ValueError(timestamps_mode)
146
+
147
+ return [idx / fps for idx in frame_indexes]
148
+
149
+
150
+ def decode_video_frames(
151
+ video_path: str,
152
+ timestamps: list[float],
153
+ tolerance_s: float,
154
+ backend: str,
155
+ ) -> torch.Tensor:
156
+ if backend in ["pyav", "video_reader"]:
157
+ return decode_video_frames_torchvision(video_path, timestamps, tolerance_s, backend)
158
+ else:
159
+ raise NotImplementedError(backend)
160
+
161
+
162
+ def benchmark_decoding(
163
+ imgs_dir: Path,
164
+ video_path: Path,
165
+ timestamps_mode: str,
166
+ backend: str,
167
+ ep_num_images: int,
168
+ fps: int,
169
+ num_samples: int = 50,
170
+ num_workers: int = 4,
171
+ save_frames: bool = False,
172
+ ) -> dict:
173
+ def process_sample(sample: int):
174
+ time_benchmark = TimeBenchmark()
175
+ timestamps = sample_timestamps(timestamps_mode, ep_num_images, fps)
176
+ num_frames = len(timestamps)
177
+ result = {
178
+ "psnr_values": [],
179
+ "ssim_values": [],
180
+ "mse_values": [],
181
+ }
182
+
183
+ with time_benchmark:
184
+ frames = decode_video_frames(video_path, timestamps=timestamps, tolerance_s=5e-1, backend=backend)
185
+ result["load_time_video_ms"] = time_benchmark.result_ms / num_frames
186
+
187
+ with time_benchmark:
188
+ original_frames = load_original_frames(imgs_dir, timestamps, fps)
189
+ result["load_time_images_ms"] = time_benchmark.result_ms / num_frames
190
+
191
+ frames_np, original_frames_np = frames.numpy(), original_frames.numpy()
192
+ for i in range(num_frames):
193
+ result["mse_values"].append(mean_squared_error(original_frames_np[i], frames_np[i]))
194
+ result["psnr_values"].append(
195
+ peak_signal_noise_ratio(original_frames_np[i], frames_np[i], data_range=1.0)
196
+ )
197
+ result["ssim_values"].append(
198
+ structural_similarity(original_frames_np[i], frames_np[i], data_range=1.0, channel_axis=0)
199
+ )
200
+
201
+ if save_frames and sample == 0:
202
+ save_dir = video_path.with_suffix("") / f"{timestamps_mode}_{backend}"
203
+ save_decoded_frames(imgs_dir, save_dir, frames, timestamps, fps)
204
+
205
+ return result
206
+
207
+ load_times_video_ms = []
208
+ load_times_images_ms = []
209
+ mse_values = []
210
+ psnr_values = []
211
+ ssim_values = []
212
+
213
+ # A sample is a single set of decoded frames specified by timestamps_mode (e.g. a single frame, 2 frames, etc.).
214
+ # For each sample, we record metrics (loading time and quality metrics) which are then averaged over all samples.
215
+ # As these samples are independent, we run them in parallel threads to speed up the benchmark.
216
+ with ThreadPoolExecutor(max_workers=num_workers) as executor:
217
+ futures = [executor.submit(process_sample, i) for i in range(num_samples)]
218
+ for future in tqdm(as_completed(futures), total=num_samples, desc="samples", leave=False):
219
+ result = future.result()
220
+ load_times_video_ms.append(result["load_time_video_ms"])
221
+ load_times_images_ms.append(result["load_time_images_ms"])
222
+ psnr_values.extend(result["psnr_values"])
223
+ ssim_values.extend(result["ssim_values"])
224
+ mse_values.extend(result["mse_values"])
225
+
226
+ avg_load_time_video_ms = float(np.array(load_times_video_ms).mean())
227
+ avg_load_time_images_ms = float(np.array(load_times_images_ms).mean())
228
+ video_images_load_time_ratio = avg_load_time_video_ms / avg_load_time_images_ms
229
+
230
+ return {
231
+ "avg_load_time_video_ms": avg_load_time_video_ms,
232
+ "avg_load_time_images_ms": avg_load_time_images_ms,
233
+ "video_images_load_time_ratio": video_images_load_time_ratio,
234
+ "avg_mse": float(np.mean(mse_values)),
235
+ "avg_psnr": float(np.mean(psnr_values)),
236
+ "avg_ssim": float(np.mean(ssim_values)),
237
+ }
238
+
239
+
240
+ def benchmark_encoding_decoding(
241
+ dataset: LeRobotDataset,
242
+ video_path: Path,
243
+ imgs_dir: Path,
244
+ encoding_cfg: dict,
245
+ decoding_cfg: dict,
246
+ num_samples: int,
247
+ num_workers: int,
248
+ save_frames: bool,
249
+ overwrite: bool = False,
250
+ seed: int = 1337,
251
+ ) -> list[dict]:
252
+ fps = dataset.fps
253
+
254
+ if overwrite or not video_path.is_file():
255
+ tqdm.write(f"encoding {video_path}")
256
+ encode_video_frames(
257
+ imgs_dir=imgs_dir,
258
+ video_path=video_path,
259
+ fps=fps,
260
+ vcodec=encoding_cfg["vcodec"],
261
+ pix_fmt=encoding_cfg["pix_fmt"],
262
+ g=encoding_cfg.get("g"),
263
+ crf=encoding_cfg.get("crf"),
264
+ # fast_decode=encoding_cfg.get("fastdecode"),
265
+ overwrite=True,
266
+ )
267
+
268
+ ep_num_images = dataset.episode_data_index["to"][0].item()
269
+ width, height = tuple(dataset[0][dataset.meta.camera_keys[0]].shape[-2:])
270
+ num_pixels = width * height
271
+ video_size_bytes = video_path.stat().st_size
272
+ images_size_bytes = get_directory_size(imgs_dir)
273
+ video_images_size_ratio = video_size_bytes / images_size_bytes
274
+
275
+ random.seed(seed)
276
+ benchmark_table = []
277
+ for timestamps_mode in tqdm(
278
+ decoding_cfg["timestamps_modes"], desc="decodings (timestamps_modes)", leave=False
279
+ ):
280
+ for backend in tqdm(decoding_cfg["backends"], desc="decodings (backends)", leave=False):
281
+ benchmark_row = benchmark_decoding(
282
+ imgs_dir,
283
+ video_path,
284
+ timestamps_mode,
285
+ backend,
286
+ ep_num_images,
287
+ fps,
288
+ num_samples,
289
+ num_workers,
290
+ save_frames,
291
+ )
292
+ benchmark_row.update(
293
+ **{
294
+ "repo_id": dataset.repo_id,
295
+ "resolution": f"{width} x {height}",
296
+ "num_pixels": num_pixels,
297
+ "video_size_bytes": video_size_bytes,
298
+ "images_size_bytes": images_size_bytes,
299
+ "video_images_size_ratio": video_images_size_ratio,
300
+ "timestamps_mode": timestamps_mode,
301
+ "backend": backend,
302
+ },
303
+ **encoding_cfg,
304
+ )
305
+ benchmark_table.append(benchmark_row)
306
+
307
+ return benchmark_table
308
+
309
+
310
+ def main(
311
+ output_dir: Path,
312
+ repo_ids: list[str],
313
+ vcodec: list[str],
314
+ pix_fmt: list[str],
315
+ g: list[int],
316
+ crf: list[int],
317
+ # fastdecode: list[int],
318
+ timestamps_modes: list[str],
319
+ backends: list[str],
320
+ num_samples: int,
321
+ num_workers: int,
322
+ save_frames: bool,
323
+ ):
324
+ check_datasets_formats(repo_ids)
325
+ encoding_benchmarks = {
326
+ "g": g,
327
+ "crf": crf,
328
+ # "fastdecode": fastdecode,
329
+ }
330
+ decoding_benchmarks = {
331
+ "timestamps_modes": timestamps_modes,
332
+ "backends": backends,
333
+ }
334
+ headers = ["repo_id", "resolution", "num_pixels"]
335
+ headers += list(BASE_ENCODING.keys())
336
+ headers += [
337
+ "timestamps_mode",
338
+ "backend",
339
+ "video_size_bytes",
340
+ "images_size_bytes",
341
+ "video_images_size_ratio",
342
+ "avg_load_time_video_ms",
343
+ "avg_load_time_images_ms",
344
+ "video_images_load_time_ratio",
345
+ "avg_mse",
346
+ "avg_psnr",
347
+ "avg_ssim",
348
+ ]
349
+ file_paths = []
350
+ for video_codec in tqdm(vcodec, desc="encodings (vcodec)"):
351
+ for pixel_format in tqdm(pix_fmt, desc="encodings (pix_fmt)", leave=False):
352
+ benchmark_table = []
353
+ for repo_id in tqdm(repo_ids, desc="encodings (datasets)", leave=False):
354
+ dataset = LeRobotDataset(repo_id)
355
+ imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_")
356
+ # We only use the first episode
357
+ save_first_episode(imgs_dir, dataset)
358
+ for key, values in tqdm(encoding_benchmarks.items(), desc="encodings (g, crf)", leave=False):
359
+ for value in tqdm(values, desc=f"encodings ({key})", leave=False):
360
+ encoding_cfg = BASE_ENCODING.copy()
361
+ encoding_cfg["vcodec"] = video_codec
362
+ encoding_cfg["pix_fmt"] = pixel_format
363
+ encoding_cfg[key] = value
364
+ args_path = Path("_".join(str(value) for value in encoding_cfg.values()))
365
+ video_path = output_dir / "videos" / args_path / f"{repo_id.replace('/', '_')}.mp4"
366
+ benchmark_table += benchmark_encoding_decoding(
367
+ dataset,
368
+ video_path,
369
+ imgs_dir,
370
+ encoding_cfg,
371
+ decoding_benchmarks,
372
+ num_samples,
373
+ num_workers,
374
+ save_frames,
375
+ )
376
+
377
+ # Save intermediate results
378
+ benchmark_df = pd.DataFrame(benchmark_table, columns=headers)
379
+ now = dt.datetime.now()
380
+ csv_path = (
381
+ output_dir
382
+ / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{video_codec}_{pixel_format}_{num_samples}-samples.csv"
383
+ )
384
+ benchmark_df.to_csv(csv_path, header=True, index=False)
385
+ file_paths.append(csv_path)
386
+ del benchmark_df
387
+
388
+ # Concatenate all results
389
+ df_list = [pd.read_csv(csv_path) for csv_path in file_paths]
390
+ concatenated_df = pd.concat(df_list, ignore_index=True)
391
+ concatenated_path = output_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_all_{num_samples}-samples.csv"
392
+ concatenated_df.to_csv(concatenated_path, header=True, index=False)
393
+
394
+
395
+ if __name__ == "__main__":
396
+ parser = argparse.ArgumentParser()
397
+ parser.add_argument(
398
+ "--output-dir",
399
+ type=Path,
400
+ default=Path("outputs/video_benchmark"),
401
+ help="Directory where the video benchmark outputs are written.",
402
+ )
403
+ parser.add_argument(
404
+ "--repo-ids",
405
+ type=str,
406
+ nargs="*",
407
+ default=[
408
+ "lerobot/pusht_image",
409
+ "aliberts/aloha_mobile_shrimp_image",
410
+ "aliberts/paris_street",
411
+ "aliberts/kitchen",
412
+ ],
413
+ help="Datasets repo-ids to test against. First episodes only are used. Must be images.",
414
+ )
415
+ parser.add_argument(
416
+ "--vcodec",
417
+ type=str,
418
+ nargs="*",
419
+ default=["libx264", "libx265", "libsvtav1"],
420
+ help="Video codecs to be tested",
421
+ )
422
+ parser.add_argument(
423
+ "--pix-fmt",
424
+ type=str,
425
+ nargs="*",
426
+ default=["yuv444p", "yuv420p"],
427
+ help="Pixel formats (chroma subsampling) to be tested",
428
+ )
429
+ parser.add_argument(
430
+ "--g",
431
+ type=parse_int_or_none,
432
+ nargs="*",
433
+ default=[1, 2, 3, 4, 5, 6, 10, 15, 20, 40, 100, None],
434
+ help="Group of pictures sizes to be tested.",
435
+ )
436
+ parser.add_argument(
437
+ "--crf",
438
+ type=parse_int_or_none,
439
+ nargs="*",
440
+ default=[0, 5, 10, 15, 20, 25, 30, 40, 50, None],
441
+ help="Constant rate factors to be tested.",
442
+ )
443
+ # parser.add_argument(
444
+ # "--fastdecode",
445
+ # type=int,
446
+ # nargs="*",
447
+ # default=[0, 1],
448
+ # help="Use the fastdecode tuning option. 0 disables it. "
449
+ # "For libx264 and libx265, only 1 is possible. "
450
+ # "For libsvtav1, 1, 2 or 3 are possible values with a higher number meaning a faster decoding optimization",
451
+ # )
452
+ parser.add_argument(
453
+ "--timestamps-modes",
454
+ type=str,
455
+ nargs="*",
456
+ default=[
457
+ "1_frame",
458
+ "2_frames",
459
+ "2_frames_4_space",
460
+ "6_frames",
461
+ ],
462
+ help="Timestamps scenarios to be tested.",
463
+ )
464
+ parser.add_argument(
465
+ "--backends",
466
+ type=str,
467
+ nargs="*",
468
+ default=["pyav", "video_reader"],
469
+ help="Torchvision decoding backend to be tested.",
470
+ )
471
+ parser.add_argument(
472
+ "--num-samples",
473
+ type=int,
474
+ default=50,
475
+ help="Number of samples for each encoding x decoding config.",
476
+ )
477
+ parser.add_argument(
478
+ "--num-workers",
479
+ type=int,
480
+ default=10,
481
+ help="Number of processes for parallelized sample processing.",
482
+ )
483
+ parser.add_argument(
484
+ "--save-frames",
485
+ type=int,
486
+ default=0,
487
+ help="Whether to save decoded frames or not. Enter a non-zero number for true.",
488
+ )
489
+ args = parser.parse_args()
490
+ main(**vars(args))
docker/lerobot-cpu/Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configure image
2
+ ARG PYTHON_VERSION=3.10
3
+ FROM python:${PYTHON_VERSION}-slim
4
+
5
+ # Configure environment variables
6
+ ARG PYTHON_VERSION
7
+ ENV DEBIAN_FRONTEND=noninteractive
8
+ ENV MUJOCO_GL="egl"
9
+ ENV PATH="/opt/venv/bin:$PATH"
10
+
11
+ # Install dependencies and set up Python in a single layer
12
+ RUN apt-get update && apt-get install -y --no-install-recommends \
13
+ build-essential cmake git \
14
+ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
15
+ speech-dispatcher libgeos-dev \
16
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
17
+ && python -m venv /opt/venv \
18
+ && apt-get clean && rm -rf /var/lib/apt/lists/* \
19
+ && echo "source /opt/venv/bin/activate" >> /root/.bashrc
20
+
21
+ # Clone repository and install LeRobot in a single layer
22
+ COPY . /lerobot
23
+ WORKDIR /lerobot
24
+ RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
25
+ && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]" \
26
+ --extra-index-url https://download.pytorch.org/whl/cpu
27
+
28
+ # Execute in bash shell rather than python
29
+ CMD ["/bin/bash"]
docker/lerobot-gpu-dev/Dockerfile ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
2
+
3
+ # Configure image
4
+ ARG PYTHON_VERSION=3.10
5
+ ARG DEBIAN_FRONTEND=noninteractive
6
+
7
+ # Install apt dependencies
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ build-essential cmake \
10
+ git git-lfs openssh-client \
11
+ nano vim less util-linux tree \
12
+ htop atop nvtop \
13
+ sed gawk grep curl wget zip unzip \
14
+ tcpdump sysstat screen tmux \
15
+ libglib2.0-0 libgl1-mesa-glx libegl1-mesa \
16
+ speech-dispatcher portaudio19-dev libgeos-dev \
17
+ python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \
18
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
19
+
20
+ # Install ffmpeg build dependencies. See:
21
+ # https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu
22
+ # TODO(aliberts): create image to build dependencies from source instead
23
+ RUN apt-get update && apt-get install -y --no-install-recommends \
24
+ autoconf automake yasm \
25
+ libass-dev \
26
+ libfreetype6-dev \
27
+ libgnutls28-dev \
28
+ libunistring-dev \
29
+ libmp3lame-dev \
30
+ libtool \
31
+ libvorbis-dev \
32
+ meson \
33
+ ninja-build \
34
+ pkg-config \
35
+ texinfo \
36
+ yasm \
37
+ zlib1g-dev \
38
+ nasm \
39
+ libx264-dev \
40
+ libx265-dev libnuma-dev \
41
+ libvpx-dev \
42
+ libfdk-aac-dev \
43
+ libopus-dev \
44
+ libsvtav1-dev libsvtav1enc-dev libsvtav1dec-dev \
45
+ libdav1d-dev
46
+
47
+ # Install gh cli tool
48
+ RUN (type -p wget >/dev/null || (apt update && apt-get install wget -y)) \
49
+ && mkdir -p -m 755 /etc/apt/keyrings \
50
+ && wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
51
+ && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
52
+ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
53
+ && apt update \
54
+ && apt install gh -y \
55
+ && apt clean && rm -rf /var/lib/apt/lists/*
56
+
57
+ # Setup `python`
58
+ RUN ln -s /usr/bin/python3 /usr/bin/python
59
+
60
+ # Install poetry
61
+ RUN curl -sSL https://install.python-poetry.org | python -
62
+ ENV PATH="/root/.local/bin:$PATH"
63
+ RUN echo 'if [ "$HOME" != "/root" ]; then ln -sf /root/.local/bin/poetry $HOME/.local/bin/poetry; fi' >> /root/.bashrc
64
+ RUN poetry config virtualenvs.create false
65
+ RUN poetry config virtualenvs.in-project true
66
+
67
+ # Set EGL as the rendering backend for MuJoCo
68
+ ENV MUJOCO_GL="egl"
docker/lerobot-gpu/Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.4.1-base-ubuntu22.04
2
+
3
+ # Configure environment variables
4
+ ARG PYTHON_VERSION=3.10
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV MUJOCO_GL="egl"
7
+ ENV PATH="/opt/venv/bin:$PATH"
8
+
9
+ # Install dependencies and set up Python in a single layer
10
+ RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ build-essential cmake git \
12
+ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \
13
+ speech-dispatcher libgeos-dev \
14
+ python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
15
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
16
+ && python -m venv /opt/venv \
17
+ && apt-get clean && rm -rf /var/lib/apt/lists/* \
18
+ && echo "source /opt/venv/bin/activate" >> /root/.bashrc
19
+
20
+ # Clone repository and install LeRobot in a single layer
21
+ COPY . /lerobot
22
+ WORKDIR /lerobot
23
+ RUN /opt/venv/bin/pip install --upgrade --no-cache-dir pip \
24
+ && /opt/venv/bin/pip install --no-cache-dir ".[test, aloha, xarm, pusht, dynamixel]"
examples/10_use_so100.md ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Using the [SO-100](https://github.com/TheRobotStudio/SO-ARM100) with LeRobot
2
+
3
+ ## Table of Contents
4
+
5
+ - [A. Source the parts](#a-source-the-parts)
6
+ - [B. Install LeRobot](#b-install-lerobot)
7
+ - [C. Configure the Motors](#c-configure-the-motors)
8
+ - [D. Step-by-Step Assembly Instructions](#d-step-by-step-assembly-instructions)
9
+ - [E. Calibrate](#e-calibrate)
10
+ - [F. Teleoperate](#f-teleoperate)
11
+ - [G. Record a dataset](#g-record-a-dataset)
12
+ - [H. Visualize a dataset](#h-visualize-a-dataset)
13
+ - [I. Replay an episode](#i-replay-an-episode)
14
+ - [J. Train a policy](#j-train-a-policy)
15
+ - [K. Evaluate your policy](#k-evaluate-your-policy)
16
+ - [L. More Information](#l-more-information)
17
+
18
+ ## A. Source the parts
19
+
20
+ Follow this [README](https://github.com/TheRobotStudio/SO-ARM100). It contains the bill of materials, with a link to source the parts, as well as the instructions to 3D print the parts,
21
+ and advice if it's your first time printing or if you don't own a 3D printer.
22
+
23
+ Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly.
24
+
25
+ ## B. Install LeRobot
26
+
27
+ > [!TIP]
28
+ > We use the Command Prompt (cmd) quite a lot. If you are not comfortable using the cmd or want to brush up using the command line you can have a look here: [Command line crash course](https://developer.mozilla.org/en-US/docs/Learn_web_development/Getting_started/Environment_setup/Command_line)
29
+
30
+ On your computer:
31
+
32
+ #### 1. [Install Miniconda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install):
33
+
34
+ #### 2. Restart shell
35
+ Copy paste in your shell: `source ~/.bashrc` or for Mac: `source ~/.bash_profile` or `source ~/.zshrc` if you're using zshell
36
+
37
+ #### 3. Create and activate a fresh conda environment for lerobot
38
+
39
+ <details>
40
+ <summary><strong>Video install instructions</strong></summary>
41
+
42
+ <video src="https://github.com/user-attachments/assets/17172d3b-3b64-4b80-9cf1-b2b7c5cbd236"></video>
43
+
44
+ </details>
45
+
46
+ ```bash
47
+ conda create -y -n lerobot python=3.10
48
+ ```
49
+
50
+ Then activate your conda environment (do this each time you open a shell to use lerobot!):
51
+ ```bash
52
+ conda activate lerobot
53
+ ```
54
+
55
+ #### 4. Clone LeRobot:
56
+ ```bash
57
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
58
+ ```
59
+
60
+ #### 5. Install LeRobot with dependencies for the feetech motors:
61
+ ```bash
62
+ cd ~/lerobot && pip install -e ".[feetech]"
63
+ ```
64
+
65
+ *EXTRA: For Linux only (not Mac)*: install extra dependencies for recording datasets:
66
+ ```bash
67
+ conda install -y -c conda-forge ffmpeg
68
+ pip uninstall -y opencv-python
69
+ conda install -y -c conda-forge "opencv>=4.10.0"
70
+ ```
71
+ Great :hugs:! You are now done installing LeRobot and we can begin assembling the SO100 arms :robot:.
72
+ Every time you now want to use LeRobot you can go to the `~/lerobot` folder where we installed LeRobot and run one of the commands.
73
+
74
+ ## C. Configure the motors
75
+
76
+ > [!NOTE]
77
+ > Throughout this tutorial you will find videos on how to do the steps, the full video tutorial can be found here: [assembly video](https://www.youtube.com/watch?v=FioA2oeFZ5I).
78
+
79
+ ### 1. Find the USB ports associated to each arm
80
+
81
+ Designate one bus servo adapter and 6 motors for your leader arm, and similarly the other bus servo adapter and 6 motors for the follower arm. It's convenient to label them and write on each motor if it's for the follower `F` or for the leader `L` and it's ID from 1 to 6 (F1...F6 and L1...L6).
82
+
83
+ #### a. Run the script to find port
84
+
85
+ <details>
86
+ <summary><strong>Video finding port</strong></summary>
87
+ <video src="https://github.com/user-attachments/assets/4a21a14d-2046-4805-93c4-ee97a30ba33f"></video>
88
+ <video src="https://github.com/user-attachments/assets/1cc3aecf-c16d-4ff9-aec7-8c175afbbce2"></video>
89
+ </details>
90
+
91
+ To find the port for each bus servo adapter, run the utility script:
92
+ ```bash
93
+ python lerobot/scripts/find_motors_bus_port.py
94
+ ```
95
+
96
+ #### b. Example outputs
97
+
98
+ Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
99
+ ```
100
+ Finding all available ports for the MotorBus.
101
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
102
+ Remove the usb cable from your MotorsBus and press Enter when done.
103
+
104
+ [...Disconnect leader arm and press Enter...]
105
+
106
+ The port of this MotorsBus is /dev/tty.usbmodem575E0031751
107
+ Reconnect the usb cable.
108
+ ```
109
+ Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
110
+ ```
111
+ Finding all available ports for the MotorBus.
112
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
113
+ Remove the usb cable from your MotorsBus and press Enter when done.
114
+
115
+ [...Disconnect follower arm and press Enter...]
116
+
117
+ The port of this MotorsBus is /dev/tty.usbmodem575E0032081
118
+ Reconnect the usb cable.
119
+ ```
120
+
121
+ #### c. Troubleshooting
122
+ On Linux, you might need to give access to the USB ports by running:
123
+ ```bash
124
+ sudo chmod 666 /dev/ttyACM0
125
+ sudo chmod 666 /dev/ttyACM1
126
+ ```
127
+
128
+ #### d. Update config file
129
+
130
+ IMPORTANTLY: Now that you have your ports, update the **port** default values of [`SO100RobotConfig`](../lerobot/common/robot_devices/robots/configs.py). You will find something like:
131
+ ```python
132
+ @RobotConfig.register_subclass("so100")
133
+ @dataclass
134
+ class So100RobotConfig(ManipulatorRobotConfig):
135
+ calibration_dir: str = ".cache/calibration/so100"
136
+ # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
137
+ # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
138
+ # the number of motors in your follower arms.
139
+ max_relative_target: int | None = None
140
+
141
+ leader_arms: dict[str, MotorsBusConfig] = field(
142
+ default_factory=lambda: {
143
+ "main": FeetechMotorsBusConfig(
144
+ port="/dev/tty.usbmodem58760431091", <-- UPDATE HERE
145
+ motors={
146
+ # name: (index, model)
147
+ "shoulder_pan": [1, "sts3215"],
148
+ "shoulder_lift": [2, "sts3215"],
149
+ "elbow_flex": [3, "sts3215"],
150
+ "wrist_flex": [4, "sts3215"],
151
+ "wrist_roll": [5, "sts3215"],
152
+ "gripper": [6, "sts3215"],
153
+ },
154
+ ),
155
+ }
156
+ )
157
+
158
+ follower_arms: dict[str, MotorsBusConfig] = field(
159
+ default_factory=lambda: {
160
+ "main": FeetechMotorsBusConfig(
161
+ port="/dev/tty.usbmodem585A0076891", <-- UPDATE HERE
162
+ motors={
163
+ # name: (index, model)
164
+ "shoulder_pan": [1, "sts3215"],
165
+ "shoulder_lift": [2, "sts3215"],
166
+ "elbow_flex": [3, "sts3215"],
167
+ "wrist_flex": [4, "sts3215"],
168
+ "wrist_roll": [5, "sts3215"],
169
+ "gripper": [6, "sts3215"],
170
+ },
171
+ ),
172
+ }
173
+ )
174
+ ```
175
+
176
+ ### 2. Assembling the Base
177
+ Let's begin with assembling the follower arm base
178
+
179
+ #### a. Set IDs for all 12 motors
180
+
181
+ <details>
182
+ <summary><strong>Video configuring motor</strong></summary>
183
+ <video src="https://github.com/user-attachments/assets/ef9b3317-2e11-4858-b9d3-f0a02fb48ecf"></video>
184
+ <video src="https://github.com/user-attachments/assets/f36b5ed5-c803-4ebe-8947-b39278776a0d"></video>
185
+ </details>
186
+
187
+ Plug your first motor F1 and run this script to set its ID to 1. It will also set its present position to 2048, so expect your motor to rotate. Replace the text after --port to the corresponding follower control board port and run this command in cmd:
188
+ ```bash
189
+ python lerobot/scripts/configure_motor.py \
190
+ --port /dev/tty.usbmodem58760432961 \
191
+ --brand feetech \
192
+ --model sts3215 \
193
+ --baudrate 1000000 \
194
+ --ID 1
195
+ ```
196
+
197
+ > [!NOTE]
198
+ > These motors are currently limited. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
199
+
200
+ Then unplug your motor and plug the second motor and set its ID to 2.
201
+ ```bash
202
+ python lerobot/scripts/configure_motor.py \
203
+ --port /dev/tty.usbmodem58760432961 \
204
+ --brand feetech \
205
+ --model sts3215 \
206
+ --baudrate 1000000 \
207
+ --ID 2
208
+ ```
209
+
210
+ Redo the process for all your motors until ID 6. Do the same for the 6 motors of the leader arm.
211
+
212
+
213
+ #### b. Remove the gears of the 6 leader motors
214
+
215
+ <details>
216
+ <summary><strong>Video removing gears</strong></summary>
217
+
218
+ <video src="https://github.com/user-attachments/assets/0c95b88c-5b85-413d-ba19-aee2f864f2a7"></video>
219
+
220
+ </details>
221
+
222
+
223
+ Follow the video for removing gears. You need to remove the gear for the motors of the leader arm. As a result, you will only use the position encoding of the motor and reduce friction to more easily operate the leader arm.
224
+
225
+ ## D. Step-by-Step Assembly Instructions
226
+
227
+ **Step 1: Clean Parts**
228
+ - Remove all support material from the 3D-printed parts.
229
+ ---
230
+
231
+ ### Additional Guidance
232
+
233
+ <details>
234
+ <summary><strong>Video assembling arms</strong></summary>
235
+
236
+ <video src="https://github.com/user-attachments/assets/488a39de-0189-4461-9de3-05b015f90cca"></video>
237
+
238
+ </details>
239
+
240
+ **Note:**
241
+ This video provides visual guidance for assembling the arms, but it doesn't specify when or how to do the wiring. Inserting the cables beforehand is much easier than doing it afterward. The first arm may take a bit more than 1 hour to assemble, but once you get used to it, you can assemble the second arm in under 1 hour.
242
+
243
+ ---
244
+
245
+ ### First Motor
246
+
247
+ **Step 2: Insert Wires**
248
+ - Insert two wires into the first motor.
249
+
250
+ <img src="../media/tutorial/img1.jpg" style="height:300px;">
251
+
252
+ **Step 3: Install in Base**
253
+ - Place the first motor into the base.
254
+
255
+ <img src="../media/tutorial/img2.jpg" style="height:300px;">
256
+
257
+ **Step 4: Secure Motor**
258
+ - Fasten the motor with 4 screws. Two from the bottom and two from top.
259
+
260
+ **Step 5: Attach Motor Holder**
261
+ - Slide over the first motor holder and fasten it using two screws (one on each side).
262
+
263
+ <img src="../media/tutorial/img4.jpg" style="height:300px;">
264
+
265
+ **Step 6: Attach Motor Horns**
266
+ - Install both motor horns, securing the top horn with a screw. Try not to move the motor position when attaching the motor horn, especially for the leader arms, where we removed the gears.
267
+
268
+ <img src="../media/tutorial/img5.jpg" style="height:300px;">
269
+ <details>
270
+ <summary><strong>Video adding motor horn</strong></summary>
271
+ <video src="https://github.com/user-attachments/assets/ef3391a4-ad05-4100-b2bd-1699bf86c969"></video>
272
+ </details>
273
+
274
+ **Step 7: Attach Shoulder Part**
275
+ - Route one wire to the back of the robot and the other to the left or in photo towards you (see photo).
276
+ - Attach the shoulder part.
277
+
278
+ <img src="../media/tutorial/img6.jpg" style="height:300px;">
279
+
280
+ **Step 8: Secure Shoulder**
281
+ - Tighten the shoulder part with 4 screws on top and 4 on the bottom
282
+ *(access bottom holes by turning the shoulder).*
283
+
284
+ ---
285
+
286
+ ### Second Motor Assembly
287
+
288
+ **Step 9: Install Motor 2**
289
+ - Slide the second motor in from the top and link the wire from motor 1 to motor 2.
290
+
291
+ <img src="../media/tutorial/img8.jpg" style="height:300px;">
292
+
293
+ **Step 10: Attach Shoulder Holder**
294
+ - Add the shoulder motor holder.
295
+ - Ensure the wire from motor 1 to motor 2 goes behind the holder while the other wire is routed upward (see photo).
296
+ - This part can be tight to assemble, you can use a workbench like the image or a similar setup to push the part around the motor.
297
+
298
+ <div style="display: flex;">
299
+ <img src="../media/tutorial/img9.jpg" style="height:250px;">
300
+ <img src="../media/tutorial/img10.jpg" style="height:250px;">
301
+ <img src="../media/tutorial/img12.jpg" style="height:250px;">
302
+ </div>
303
+
304
+ **Step 11: Secure Motor 2**
305
+ - Fasten the second motor with 4 screws.
306
+
307
+ **Step 12: Attach Motor Horn**
308
+ - Attach both motor horns to motor 2, again use the horn screw.
309
+
310
+ **Step 13: Attach Base**
311
+ - Install the base attachment using 2 screws.
312
+
313
+ <img src="../media/tutorial/img11.jpg" style="height:300px;">
314
+
315
+ **Step 14: Attach Upper Arm**
316
+ - Attach the upper arm with 4 screws on each side.
317
+
318
+ <img src="../media/tutorial/img13.jpg" style="height:300px;">
319
+
320
+ ---
321
+
322
+ ### Third Motor Assembly
323
+
324
+ **Step 15: Install Motor 3**
325
+ - Route the motor cable from motor 2 through the cable holder to motor 3, then secure motor 3 with 4 screws.
326
+
327
+ **Step 16: Attach Motor Horn**
328
+ - Attach both motor horns to motor 3 and secure one again with a horn screw.
329
+
330
+ <img src="../media/tutorial/img14.jpg" style="height:300px;">
331
+
332
+ **Step 17: Attach Forearm**
333
+ - Connect the forearm to motor 3 using 4 screws on each side.
334
+
335
+ <img src="../media/tutorial/img15.jpg" style="height:300px;">
336
+
337
+ ---
338
+
339
+ ### Fourth Motor Assembly
340
+
341
+ **Step 18: Install Motor 4**
342
+ - Slide in motor 4, attach the cable from motor 3, and secure the cable in its holder with a screw.
343
+
344
+ <div style="display: flex;">
345
+ <img src="../media/tutorial/img16.jpg" style="height:300px;">
346
+ <img src="../media/tutorial/img19.jpg" style="height:300px;">
347
+ </div>
348
+
349
+ **Step 19: Attach Motor Holder 4**
350
+ - Install the fourth motor holder (a tight fit). Ensure one wire is routed upward and the wire from motor 3 is routed downward (see photo).
351
+
352
+ <img src="../media/tutorial/img17.jpg" style="height:300px;">
353
+
354
+ **Step 20: Secure Motor 4 & Attach Horn**
355
+ - Fasten motor 4 with 4 screws and attach its motor horns, use for one a horn screw.
356
+
357
+ <img src="../media/tutorial/img18.jpg" style="height:300px;">
358
+
359
+ ---
360
+
361
+ ### Wrist Assembly
362
+
363
+ **Step 21: Install Motor 5**
364
+ - Insert motor 5 into the wrist holder and secure it with 2 front screws.
365
+
366
+ <img src="../media/tutorial/img20.jpg" style="height:300px;">
367
+
368
+ **Step 22: Attach Wrist**
369
+ - Connect the wire from motor 4 to motor 5. And already insert the other wire for the gripper.
370
+ - Secure the wrist to motor 4 using 4 screws on both sides.
371
+
372
+ <img src="../media/tutorial/img22.jpg" style="height:300px;">
373
+
374
+ **Step 23: Attach Wrist Horn**
375
+ - Install only one motor horn on the wrist motor and secure it with a horn screw.
376
+
377
+ <img src="../media/tutorial/img23.jpg" style="height:300px;">
378
+
379
+ ---
380
+
381
+ ### Follower Configuration
382
+
383
+ **Step 24: Attach Gripper**
384
+ - Attach the gripper to motor 5.
385
+
386
+ <img src="../media/tutorial/img24.jpg" style="height:300px;">
387
+
388
+ **Step 25: Install Gripper Motor**
389
+ - Insert the gripper motor, connect the motor wire from motor 5 to motor 6, and secure it with 3 screws on each side.
390
+
391
+ <img src="../media/tutorial/img25.jpg" style="height:300px;">
392
+
393
+ **Step 26: Attach Gripper Horn & Claw**
394
+ - Attach the motor horns and again use a horn screw.
395
+ - Install the gripper claw and secure it with 4 screws on both sides.
396
+
397
+ <img src="../media/tutorial/img26.jpg" style="height:300px;">
398
+
399
+ **Step 27: Mount Controller**
400
+ - Attach the motor controller on the back.
401
+
402
+ <div style="display: flex;">
403
+ <img src="../media/tutorial/img27.jpg" style="height:300px;">
404
+ <img src="../media/tutorial/img28.jpg" style="height:300px;">
405
+ </div>
406
+
407
+ *Assembly complete – proceed to Leader arm assembly.*
408
+
409
+ ---
410
+
411
+ ### Leader Configuration
412
+
413
+ For the leader configuration, perform **Steps 1–23**. Make sure that you removed the motor gears from the motors.
414
+
415
+ **Step 24: Attach Leader Holder**
416
+ - Mount the leader holder onto the wrist and secure it with a screw.
417
+
418
+ <img src="../media/tutorial/img29.jpg" style="height:300px;">
419
+
420
+ **Step 25: Attach Handle**
421
+ - Attach the handle to motor 5 using 4 screws.
422
+
423
+ <img src="../media/tutorial/img30.jpg" style="height:300px;">
424
+
425
+ **Step 26: Install Gripper Motor**
426
+ - Insert the gripper motor, secure it with 3 screws on each side, attach a motor horn using a horn screw, and connect the motor wire.
427
+
428
+ <img src="../media/tutorial/img31.jpg" style="height:300px;">
429
+
430
+ **Step 27: Attach Trigger**
431
+ - Attach the follower trigger with 4 screws.
432
+
433
+ <img src="../media/tutorial/img32.jpg" style="height:300px;">
434
+
435
+ **Step 28: Mount Controller**
436
+ - Attach the motor controller on the back.
437
+
438
+ <div style="display: flex;">
439
+ <img src="../media/tutorial/img27.jpg" style="height:300px;">
440
+ <img src="../media/tutorial/img28.jpg" style="height:300px;">
441
+ </div>
442
+
443
+ *Assembly complete – proceed to calibration.*
444
+
445
+
446
+ ## E. Calibrate
447
+
448
+ Next, you'll need to calibrate your SO-100 robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one SO-100 robot to work on another.
449
+
450
+ #### a. Manual calibration of follower arm
451
+
452
+ > [!IMPORTANT]
453
+ > Contrarily to step 6 of the [assembly video](https://youtu.be/FioA2oeFZ5I?t=724) which illustrates the auto calibration, we will actually do manual calibration of follower for now.
454
+
455
+ You will need to move the follower arm to these positions sequentially:
456
+
457
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
458
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
459
+ | <img src="../media/so100/follower_zero.webp?raw=true" alt="SO-100 follower arm zero position" title="SO-100 follower arm zero position" style="width:100%;"> | <img src="../media/so100/follower_rotated.webp?raw=true" alt="SO-100 follower arm rotated position" title="SO-100 follower arm rotated position" style="width:100%;"> | <img src="../media/so100/follower_rest.webp?raw=true" alt="SO-100 follower arm rest position" title="SO-100 follower arm rest position" style="width:100%;"> |
460
+
461
+ Make sure both arms are connected and run this script to launch manual calibration:
462
+ ```bash
463
+ python lerobot/scripts/control_robot.py \
464
+ --robot.type=so100 \
465
+ --robot.cameras='{}' \
466
+ --control.type=calibrate \
467
+ --control.arms='["main_follower"]'
468
+ ```
469
+
470
+ #### b. Manual calibration of leader arm
471
+ Follow step 6 of the [assembly video](https://youtu.be/FioA2oeFZ5I?t=724) which illustrates the manual calibration. You will need to move the leader arm to these positions sequentially:
472
+
473
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
474
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
475
+ | <img src="../media/so100/leader_zero.webp?raw=true" alt="SO-100 leader arm zero position" title="SO-100 leader arm zero position" style="width:100%;"> | <img src="../media/so100/leader_rotated.webp?raw=true" alt="SO-100 leader arm rotated position" title="SO-100 leader arm rotated position" style="width:100%;"> | <img src="../media/so100/leader_rest.webp?raw=true" alt="SO-100 leader arm rest position" title="SO-100 leader arm rest position" style="width:100%;"> |
476
+
477
+ Run this script to launch manual calibration:
478
+ ```bash
479
+ python lerobot/scripts/control_robot.py \
480
+ --robot.type=so100 \
481
+ --robot.cameras='{}' \
482
+ --control.type=calibrate \
483
+ --control.arms='["main_leader"]'
484
+ ```
485
+
486
+ ## F. Teleoperate
487
+
488
+ **Simple teleop**
489
+ Then you are ready to teleoperate your robot! Run this simple script (it won't connect and display the cameras):
490
+ ```bash
491
+ python lerobot/scripts/control_robot.py \
492
+ --robot.type=so100 \
493
+ --robot.cameras='{}' \
494
+ --control.type=teleoperate
495
+ ```
496
+
497
+
498
+ #### a. Teleop with displaying cameras
499
+ Follow [this guide to setup your cameras](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#c-add-your-cameras-with-opencvcamera). Then you will be able to display the cameras on your computer while you are teleoperating by running the following code. This is useful to prepare your setup before recording your first dataset.
500
+ ```bash
501
+ python lerobot/scripts/control_robot.py \
502
+ --robot.type=so100 \
503
+ --control.type=teleoperate
504
+ ```
505
+
506
+ ## G. Record a dataset
507
+
508
+ Once you're familiar with teleoperation, you can record your first dataset with SO-100.
509
+
510
+ If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
511
+ ```bash
512
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
513
+ ```
514
+
515
+ Store your Hugging Face repository name in a variable to run these commands:
516
+ ```bash
517
+ HF_USER=$(huggingface-cli whoami | head -n 1)
518
+ echo $HF_USER
519
+ ```
520
+
521
+ Record 2 episodes and upload your dataset to the hub:
522
+ ```bash
523
+ python lerobot/scripts/control_robot.py \
524
+ --robot.type=so100 \
525
+ --control.type=record \
526
+ --control.fps=30 \
527
+ --control.single_task="Grasp a lego block and put it in the bin." \
528
+ --control.repo_id=${HF_USER}/so100_test \
529
+ --control.tags='["so100","tutorial"]' \
530
+ --control.warmup_time_s=5 \
531
+ --control.episode_time_s=30 \
532
+ --control.reset_time_s=30 \
533
+ --control.num_episodes=2 \
534
+ --control.push_to_hub=true
535
+ ```
536
+
537
+ Note: You can resume recording by adding `--control.resume=true`.
538
+
539
+ ## H. Visualize a dataset
540
+
541
+ If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
542
+ ```bash
543
+ echo ${HF_USER}/so100_test
544
+ ```
545
+
546
+ If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with (a window can be opened in the browser `http://127.0.0.1:9090` with the visualization tool):
547
+ ```bash
548
+ python lerobot/scripts/visualize_dataset_html.py \
549
+ --repo-id ${HF_USER}/so100_test \
550
+ --local-files-only 1
551
+ ```
552
+
553
+ ## I. Replay an episode
554
+
555
+ Now try to replay the first episode on your robot:
556
+ ```bash
557
+ python lerobot/scripts/control_robot.py \
558
+ --robot.type=so100 \
559
+ --control.type=replay \
560
+ --control.fps=30 \
561
+ --control.repo_id=${HF_USER}/so100_test \
562
+ --control.episode=0
563
+ ```
564
+
565
+ ## J. Train a policy
566
+
567
+ To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
568
+ ```bash
569
+ python lerobot/scripts/train.py \
570
+ --dataset.repo_id=${HF_USER}/so100_test \
571
+ --policy.type=act \
572
+ --output_dir=outputs/train/act_so100_test \
573
+ --job_name=act_so100_test \
574
+ --policy.device=cuda \
575
+ --wandb.enable=true
576
+ ```
577
+
578
+ Let's explain it:
579
+ 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/so100_test`.
580
+ 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
581
+ 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
582
+ 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
583
+
584
+ Training should take several hours. You will find checkpoints in `outputs/train/act_so100_test/checkpoints`.
585
+
586
+ To resume training from a checkpoint, below is an example command to resume from `last` checkpoint of the `act_so100_test` policy:
587
+ ```bash
588
+ python lerobot/scripts/train.py \
589
+ --config_path=outputs/train/act_so100_test/checkpoints/last/pretrained_model/train_config.json \
590
+ --resume=true
591
+ ```
592
+
593
+ ## K. Evaluate your policy
594
+
595
+ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
596
+ ```bash
597
+ python lerobot/scripts/control_robot.py \
598
+ --robot.type=so100 \
599
+ --control.type=record \
600
+ --control.fps=30 \
601
+ --control.single_task="Grasp a lego block and put it in the bin." \
602
+ --control.repo_id=${HF_USER}/eval_act_so100_test \
603
+ --control.tags='["tutorial"]' \
604
+ --control.warmup_time_s=5 \
605
+ --control.episode_time_s=30 \
606
+ --control.reset_time_s=30 \
607
+ --control.num_episodes=10 \
608
+ --control.push_to_hub=true \
609
+ --control.policy.path=outputs/train/act_so100_test/checkpoints/last/pretrained_model
610
+ ```
611
+
612
+ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
613
+ 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_so100_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_so100_test`).
614
+ 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_so100_test`).
615
+
616
+ ## L. More Information
617
+
618
+ Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth tutorial on controlling real robots with LeRobot.
619
+
620
+ > [!TIP]
621
+ > If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb) in the channel [`#so100-arm`](https://discord.com/channels/1216765309076115607/1237741463832363039).
examples/11_use_lekiwi.md ADDED
@@ -0,0 +1,585 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Using the [LeKiwi](https://github.com/SIGRobotics-UIUC/LeKiwi) Robot with LeRobot
2
+
3
+ ## Table of Contents
4
+
5
+ - [A. Source the parts](#a-source-the-parts)
6
+ - [B. Install software Pi](#b-install-software-on-pi)
7
+ - [C. Setup LeRobot laptop/pc](#c-install-lerobot-on-laptop)
8
+ - [D. Assemble the arms](#d-assembly)
9
+ - [E. Calibrate](#e-calibration)
10
+ - [F. Teleoperate](#f-teleoperate)
11
+ - [G. Record a dataset](#g-record-a-dataset)
12
+ - [H. Visualize a dataset](#h-visualize-a-dataset)
13
+ - [I. Replay an episode](#i-replay-an-episode)
14
+ - [J. Train a policy](#j-train-a-policy)
15
+ - [K. Evaluate your policy](#k-evaluate-your-policy)
16
+
17
+ > [!TIP]
18
+ > If you have any questions or need help, please reach out on [Discord](https://discord.com/invite/s3KuuzsPFb) in the channel [`#mobile-so-100-arm`](https://discord.com/channels/1216765309076115607/1318390825528332371).
19
+
20
+ ## A. Source the parts
21
+
22
+ Follow this [README](https://github.com/SIGRobotics-UIUC/LeKiwi). It contains the bill of materials, with a link to source the parts, as well as the instructions to 3D print the parts, and advice if it's your first time printing or if you don't own a 3D printer.
23
+
24
+ Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly.
25
+
26
+ ### Wired version
27
+ If you have the **wired** LeKiwi version you can skip the installation of the Raspberry Pi and setting up SSH. You can also run all commands directly on your PC for both the LeKiwi scripts and the leader arm scripts for teleoperating.
28
+
29
+ ## B. Install software on Pi
30
+ Now we have to setup the remote PC that will run on the LeKiwi Robot. This is normally a Raspberry Pi, but can be any PC that can run on 5V and has enough usb ports (2 or more) for the cameras and motor control board.
31
+
32
+ ### Install OS
33
+ For setting up the Raspberry Pi and its SD-card see: [Setup PI](https://www.raspberrypi.com/documentation/computers/getting-started.html). Here is explained how to download the [Imager](https://www.raspberrypi.com/software/) to install Raspberry Pi OS or Ubuntu.
34
+
35
+ ### Setup SSH
36
+ After setting up your Pi, you should enable and setup [SSH](https://www.raspberrypi.com/news/coding-on-raspberry-pi-remotely-with-visual-studio-code/) (Secure Shell Protocol) so you can login into the Pi from your laptop without requiring a screen, keyboard and mouse in the Pi. A great tutorial on how to do this can be found [here](https://www.raspberrypi.com/documentation/computers/remote-access.html#ssh). Logging into your Pi can be done in your Command Prompt (cmd) or if you use VSCode you can use [this](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-ssh) extension.
37
+
38
+ ### Install LeRobot
39
+
40
+ On your Raspberry Pi:
41
+
42
+ #### 1. [Install Miniconda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install):
43
+
44
+ #### 2. Restart shell
45
+ Copy paste in your shell: `source ~/.bashrc` or for Mac: `source ~/.bash_profile` or `source ~/.zshrc` if you're using zshell
46
+
47
+ #### 3. Create and activate a fresh conda environment for lerobot
48
+
49
+ <details>
50
+ <summary><strong>Video install instructions</strong></summary>
51
+
52
+ <video src="https://github.com/user-attachments/assets/17172d3b-3b64-4b80-9cf1-b2b7c5cbd236"></video>
53
+
54
+ </details>
55
+
56
+ ```bash
57
+ conda create -y -n lerobot python=3.10
58
+ ```
59
+
60
+ Then activate your conda environment (do this each time you open a shell to use lerobot!):
61
+ ```bash
62
+ conda activate lerobot
63
+ ```
64
+
65
+ #### 4. Clone LeRobot:
66
+ ```bash
67
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
68
+ ```
69
+
70
+ #### 5. Install LeRobot with dependencies for the feetech motors:
71
+ ```bash
72
+ cd ~/lerobot && pip install -e ".[feetech]"
73
+ ```
74
+
75
+ ## C. Install LeRobot on laptop
76
+ If you already have install LeRobot on your laptop you can skip this step, otherwise please follow along as we do the same steps we did on the Pi.
77
+
78
+ > [!TIP]
79
+ > We use the Command Prompt (cmd) quite a lot. If you are not comfortable using the cmd or want to brush up using the command line you can have a look here: [Command line crash course](https://developer.mozilla.org/en-US/docs/Learn_web_development/Getting_started/Environment_setup/Command_line)
80
+
81
+ On your computer:
82
+
83
+ #### 1. [Install Miniconda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install):
84
+
85
+ #### 2. Restart shell
86
+ Copy paste in your shell: `source ~/.bashrc` or for Mac: `source ~/.bash_profile` or `source ~/.zshrc` if you're using zshell
87
+
88
+ #### 3. Create and activate a fresh conda environment for lerobot
89
+
90
+ <details>
91
+ <summary><strong>Video install instructions</strong></summary>
92
+
93
+ <video src="https://github.com/user-attachments/assets/17172d3b-3b64-4b80-9cf1-b2b7c5cbd236"></video>
94
+
95
+ </details>
96
+
97
+ ```bash
98
+ conda create -y -n lerobot python=3.10
99
+ ```
100
+
101
+ Then activate your conda environment (do this each time you open a shell to use lerobot!):
102
+ ```bash
103
+ conda activate lerobot
104
+ ```
105
+
106
+ #### 4. Clone LeRobot:
107
+ ```bash
108
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
109
+ ```
110
+
111
+ #### 5. Install LeRobot with dependencies for the feetech motors:
112
+ ```bash
113
+ cd ~/lerobot && pip install -e ".[feetech]"
114
+ ```
115
+
116
+ *EXTRA: For Linux only (not Mac)*: install extra dependencies for recording datasets:
117
+ ```bash
118
+ conda install -y -c conda-forge ffmpeg
119
+ pip uninstall -y opencv-python
120
+ conda install -y -c conda-forge "opencv>=4.10.0"
121
+ ```
122
+ Great :hugs:! You are now done installing LeRobot and we can begin assembling the SO100 arms and Mobile base :robot:.
123
+ Every time you now want to use LeRobot you can go to the `~/lerobot` folder where we installed LeRobot and run one of the commands.
124
+
125
+ # D. Assembly
126
+
127
+ First we will assemble the two SO100 arms. One to attach to the mobile base and one for teleoperation. Then we will assemble the mobile base.
128
+
129
+ ## SO100 Arms
130
+ ### Configure motors
131
+ The instructions for configuring the motors can be found [Here](https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md#c-configure-the-motors) in step C of the SO100 tutorial. Besides the ID's for the arm motors we also need to set the motor ID's for the mobile base. These needs to be in a specific order to work. Below an image of the motor ID's and motor mounting positions for the mobile base. Note that we only use one Motor Control board on LeKiwi. This means the motor ID's for the wheels are 7, 8 and 9.
132
+
133
+ <img src="../media/lekiwi/motor_ids.webp?raw=true" alt="Motor ID's for mobile robot" title="Motor ID's for mobile robot" width="60%">
134
+
135
+ ### Assemble arms
136
+ [Assemble arms instruction](https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md#d-assemble-the-arms)
137
+
138
+ ## Mobile base (LeKiwi)
139
+ [Assemble LeKiwi](https://github.com/SIGRobotics-UIUC/LeKiwi)
140
+
141
+ ### Update config
142
+ Both config files on the LeKiwi LeRobot and on the laptop should be the same. First we should find the Ip address of the Raspberry Pi of the mobile manipulator. This is the same Ip address used in SSH. We also need the usb port of the control board of the leader arm on the laptop and the port of the control board on LeKiwi. We can find these ports with the following script.
143
+
144
+ #### a. Run the script to find port
145
+
146
+ <details>
147
+ <summary><strong>Video finding port</strong></summary>
148
+ <video src="https://github.com/user-attachments/assets/4a21a14d-2046-4805-93c4-ee97a30ba33f"></video>
149
+ <video src="https://github.com/user-attachments/assets/1cc3aecf-c16d-4ff9-aec7-8c175afbbce2"></video>
150
+ </details>
151
+
152
+ To find the port for each bus servo adapter, run the utility script:
153
+ ```bash
154
+ python lerobot/scripts/find_motors_bus_port.py
155
+ ```
156
+
157
+ #### b. Example outputs
158
+
159
+ Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
160
+ ```
161
+ Finding all available ports for the MotorBus.
162
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
163
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
164
+
165
+ [...Disconnect leader arm and press Enter...]
166
+
167
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751
168
+ Reconnect the usb cable.
169
+ ```
170
+ Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
171
+ ```
172
+ Finding all available ports for the MotorBus.
173
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
174
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
175
+
176
+ [...Disconnect follower arm and press Enter...]
177
+
178
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081
179
+ Reconnect the usb cable.
180
+ ```
181
+
182
+ #### c. Troubleshooting
183
+ On Linux, you might need to give access to the USB ports by running:
184
+ ```bash
185
+ sudo chmod 666 /dev/ttyACM0
186
+ sudo chmod 666 /dev/ttyACM1
187
+ ```
188
+
189
+ #### d. Update config file
190
+
191
+ IMPORTANTLY: Now that you have your ports of leader and follower arm and ip address of the mobile-so100, update the **ip** in Network configuration, **port** in leader_arms and **port** in lekiwi. In the [`LeKiwiRobotConfig`](../lerobot/common/robot_devices/robots/configs.py) file. Where you will find something like:
192
+ ```python
193
+ @RobotConfig.register_subclass("lekiwi")
194
+ @dataclass
195
+ class LeKiwiRobotConfig(RobotConfig):
196
+ # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
197
+ # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
198
+ # the number of motors in your follower arms.
199
+ max_relative_target: int | None = None
200
+
201
+ # Network Configuration
202
+ ip: str = "172.17.133.91"
203
+ port: int = 5555
204
+ video_port: int = 5556
205
+
206
+ cameras: dict[str, CameraConfig] = field(
207
+ default_factory=lambda: {
208
+ "mobile": OpenCVCameraConfig(camera_index="/dev/video0", fps=30, width=640, height=480),
209
+ "mobile2": OpenCVCameraConfig(camera_index="/dev/video2", fps=30, width=640, height=480),
210
+ }
211
+ )
212
+
213
+ calibration_dir: str = ".cache/calibration/lekiwi"
214
+
215
+ leader_arms: dict[str, MotorsBusConfig] = field(
216
+ default_factory=lambda: {
217
+ "main": FeetechMotorsBusConfig(
218
+ port="/dev/tty.usbmodem585A0077581",
219
+ motors={
220
+ # name: (index, model)
221
+ "shoulder_pan": [1, "sts3215"],
222
+ "shoulder_lift": [2, "sts3215"],
223
+ "elbow_flex": [3, "sts3215"],
224
+ "wrist_flex": [4, "sts3215"],
225
+ "wrist_roll": [5, "sts3215"],
226
+ "gripper": [6, "sts3215"],
227
+ },
228
+ ),
229
+ }
230
+ )
231
+
232
+ follower_arms: dict[str, MotorsBusConfig] = field(
233
+ default_factory=lambda: {
234
+ "main": FeetechMotorsBusConfig(
235
+ port="/dev/ttyACM0",
236
+ motors={
237
+ # name: (index, model)
238
+ "shoulder_pan": [1, "sts3215"],
239
+ "shoulder_lift": [2, "sts3215"],
240
+ "elbow_flex": [3, "sts3215"],
241
+ "wrist_flex": [4, "sts3215"],
242
+ "wrist_roll": [5, "sts3215"],
243
+ "gripper": [6, "sts3215"],
244
+ "left_wheel": (7, "sts3215"),
245
+ "back_wheel": (8, "sts3215"),
246
+ "right_wheel": (9, "sts3215"),
247
+ },
248
+ ),
249
+ }
250
+ )
251
+
252
+ teleop_keys: dict[str, str] = field(
253
+ default_factory=lambda: {
254
+ # Movement
255
+ "forward": "w",
256
+ "backward": "s",
257
+ "left": "a",
258
+ "right": "d",
259
+ "rotate_left": "z",
260
+ "rotate_right": "x",
261
+ # Speed control
262
+ "speed_up": "r",
263
+ "speed_down": "f",
264
+ # quit teleop
265
+ "quit": "q",
266
+ }
267
+ )
268
+
269
+ mock: bool = False
270
+ ```
271
+
272
+ ## Wired version
273
+
274
+ For the wired LeKiwi version your configured IP address should refer to your own laptop (127.0.0.1), because leader arm and LeKiwi are in this case connected to own laptop. Below and example configuration for this wired setup:
275
+ ```python
276
+ @RobotConfig.register_subclass("lekiwi")
277
+ @dataclass
278
+ class LeKiwiRobotConfig(RobotConfig):
279
+ # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
280
+ # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
281
+ # the number of motors in your follower arms.
282
+ max_relative_target: int | None = None
283
+
284
+ # Network Configuration
285
+ ip: str = "127.0.0.1"
286
+ port: int = 5555
287
+ video_port: int = 5556
288
+
289
+ cameras: dict[str, CameraConfig] = field(
290
+ default_factory=lambda: {
291
+ "front": OpenCVCameraConfig(
292
+ camera_index=0, fps=30, width=640, height=480, rotation=90
293
+ ),
294
+ "wrist": OpenCVCameraConfig(
295
+ camera_index=1, fps=30, width=640, height=480, rotation=180
296
+ ),
297
+ }
298
+ )
299
+
300
+ calibration_dir: str = ".cache/calibration/lekiwi"
301
+
302
+ leader_arms: dict[str, MotorsBusConfig] = field(
303
+ default_factory=lambda: {
304
+ "main": FeetechMotorsBusConfig(
305
+ port="/dev/tty.usbmodem585A0077581",
306
+ motors={
307
+ # name: (index, model)
308
+ "shoulder_pan": [1, "sts3215"],
309
+ "shoulder_lift": [2, "sts3215"],
310
+ "elbow_flex": [3, "sts3215"],
311
+ "wrist_flex": [4, "sts3215"],
312
+ "wrist_roll": [5, "sts3215"],
313
+ "gripper": [6, "sts3215"],
314
+ },
315
+ ),
316
+ }
317
+ )
318
+
319
+ follower_arms: dict[str, MotorsBusConfig] = field(
320
+ default_factory=lambda: {
321
+ "main": FeetechMotorsBusConfig(
322
+ port="/dev/tty.usbmodem58760431061",
323
+ motors={
324
+ # name: (index, model)
325
+ "shoulder_pan": [1, "sts3215"],
326
+ "shoulder_lift": [2, "sts3215"],
327
+ "elbow_flex": [3, "sts3215"],
328
+ "wrist_flex": [4, "sts3215"],
329
+ "wrist_roll": [5, "sts3215"],
330
+ "gripper": [6, "sts3215"],
331
+ "left_wheel": (7, "sts3215"),
332
+ "back_wheel": (8, "sts3215"),
333
+ "right_wheel": (9, "sts3215"),
334
+ },
335
+ ),
336
+ }
337
+ )
338
+
339
+ teleop_keys: dict[str, str] = field(
340
+ default_factory=lambda: {
341
+ # Movement
342
+ "forward": "w",
343
+ "backward": "s",
344
+ "left": "a",
345
+ "right": "d",
346
+ "rotate_left": "z",
347
+ "rotate_right": "x",
348
+ # Speed control
349
+ "speed_up": "r",
350
+ "speed_down": "f",
351
+ # quit teleop
352
+ "quit": "q",
353
+ }
354
+ )
355
+
356
+ mock: bool = False
357
+ ```
358
+
359
+ # E. Calibration
360
+ Now we have to calibrate the leader arm and the follower arm. The wheel motors don't have to be calibrated.
361
+
362
+
363
+ ### Calibrate follower arm (on mobile base)
364
+ > [!IMPORTANT]
365
+ > Contrarily to step 6 of the [assembly video](https://youtu.be/FioA2oeFZ5I?t=724) which illustrates the auto calibration, we will actually do manual calibration of follower for now.
366
+
367
+ You will need to move the follower arm to these positions sequentially:
368
+
369
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
370
+ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
371
+ | <img src="../media/lekiwi/mobile_calib_zero.webp?raw=true" alt="SO-100 follower arm zero position" title="SO-100 follower arm zero position" style="width:100%;"> | <img src="../media/lekiwi/mobile_calib_rotated.webp?raw=true" alt="SO-100 follower arm rotated position" title="SO-100 follower arm rotated position" style="width:100%;"> | <img src="../media/lekiwi/mobile_calib_rest.webp?raw=true" alt="SO-100 follower arm rest position" title="SO-100 follower arm rest position" style="width:100%;"> |
372
+
373
+ Make sure the arm is connected to the Raspberry Pi and run this script (on the Raspberry Pi) to launch manual calibration:
374
+ ```bash
375
+ python lerobot/scripts/control_robot.py \
376
+ --robot.type=lekiwi \
377
+ --robot.cameras='{}' \
378
+ --control.type=calibrate \
379
+ --control.arms='["main_follower"]'
380
+ ```
381
+
382
+ ### Wired version
383
+ If you have the **wired** LeKiwi version please run all commands including this calibration command on your laptop.
384
+
385
+ ### Calibrate leader arm
386
+ Then to calibrate the leader arm (which is attached to the laptop/pc). You will need to move the leader arm to these positions sequentially:
387
+
388
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
389
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
390
+ | <img src="../media/so100/leader_zero.webp?raw=true" alt="SO-100 leader arm zero position" title="SO-100 leader arm zero position" style="width:100%;"> | <img src="../media/so100/leader_rotated.webp?raw=true" alt="SO-100 leader arm rotated position" title="SO-100 leader arm rotated position" style="width:100%;"> | <img src="../media/so100/leader_rest.webp?raw=true" alt="SO-100 leader arm rest position" title="SO-100 leader arm rest position" style="width:100%;"> |
391
+
392
+ Run this script (on your laptop/pc) to launch manual calibration:
393
+ ```bash
394
+ python lerobot/scripts/control_robot.py \
395
+ --robot.type=lekiwi \
396
+ --robot.cameras='{}' \
397
+ --control.type=calibrate \
398
+ --control.arms='["main_leader"]'
399
+ ```
400
+
401
+ # F. Teleoperate
402
+ To teleoperate SSH into your Raspberry Pi, and run `conda activate lerobot` and this script:
403
+ ```bash
404
+ python lerobot/scripts/control_robot.py \
405
+ --robot.type=lekiwi \
406
+ --control.type=remote_robot
407
+ ```
408
+
409
+ Then on your laptop, also run `conda activate lerobot` and this script:
410
+ ```bash
411
+ python lerobot/scripts/control_robot.py \
412
+ --robot.type=lekiwi \
413
+ --control.type=teleoperate \
414
+ --control.fps=30
415
+ ```
416
+
417
+ You should see on your laptop something like this: ```[INFO] Connected to remote robot at tcp://172.17.133.91:5555 and video stream at tcp://172.17.133.91:5556.``` Now you can move the leader arm and use the keyboard (w,a,s,d) to drive forward, left, backwards, right. And use (z,x) to turn left or turn right. You can use (r,f) to increase and decrease the speed of the mobile robot. There are three speed modes, see the table below:
418
+ | Speed Mode | Linear Speed (m/s) | Rotation Speed (deg/s) |
419
+ | ---------- | ------------------ | ---------------------- |
420
+ | Fast | 0.4 | 90 |
421
+ | Medium | 0.25 | 60 |
422
+ | Slow | 0.1 | 30 |
423
+
424
+
425
+ | Key | Action |
426
+ | --- | -------------- |
427
+ | W | Move forward |
428
+ | A | Move left |
429
+ | S | Move backward |
430
+ | D | Move right |
431
+ | Z | Turn left |
432
+ | X | Turn right |
433
+ | R | Increase speed |
434
+ | F | Decrease speed |
435
+
436
+ > [!TIP]
437
+ > If you use a different keyboard you can change the keys for each command in the [`LeKiwiRobotConfig`](../lerobot/common/robot_devices/robots/configs.py).
438
+
439
+ ### Wired version
440
+ If you have the **wired** LeKiwi version please run all commands including both these teleoperation commands on your laptop.
441
+
442
+ ## Troubleshoot communication
443
+
444
+ If you are having trouble connecting to the Mobile SO100, follow these steps to diagnose and resolve the issue.
445
+
446
+ ### 1. Verify IP Address Configuration
447
+ Make sure that the correct ip for the Pi is set in the configuration file. To check the Raspberry Pi's IP address, run (on the Pi command line):
448
+ ```bash
449
+ hostname -I
450
+ ```
451
+
452
+ ### 2. Check if Pi is reachable from laptop/pc
453
+ Try pinging the Raspberry Pi from your laptop:
454
+ ```bach
455
+ ping <your_pi_ip_address>
456
+ ```
457
+
458
+ If the ping fails:
459
+ - Ensure the Pi is powered on and connected to the same network.
460
+ - Check if SSH is enabled on the Pi.
461
+
462
+ ### 3. Try SSH connection
463
+ If you can't SSH into the Pi, it might not be properly connected. Use:
464
+ ```bash
465
+ ssh <your_pi_user_name>@<your_pi_ip_address>
466
+ ```
467
+ If you get a connection error:
468
+ - Ensure SSH is enabled on the Pi by running:
469
+ ```bash
470
+ sudo raspi-config
471
+ ```
472
+ Then navigate to: **Interfacing Options -> SSH** and enable it.
473
+
474
+ ### 4. Same config file
475
+ Make sure the configuration file on both your laptop/pc and the Raspberry Pi is the same.
476
+
477
+ # G. Record a dataset
478
+ Once you're familiar with teleoperation, you can record your first dataset with LeKiwi.
479
+
480
+ To start the program on LeKiwi, SSH into your Raspberry Pi, and run `conda activate lerobot` and this script:
481
+ ```bash
482
+ python lerobot/scripts/control_robot.py \
483
+ --robot.type=lekiwi \
484
+ --control.type=remote_robot
485
+ ```
486
+
487
+ If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
488
+ ```bash
489
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
490
+ ```
491
+
492
+ Store your Hugging Face repository name in a variable to run these commands:
493
+ ```bash
494
+ HF_USER=$(huggingface-cli whoami | head -n 1)
495
+ echo $HF_USER
496
+ ```
497
+ On your laptop then run this command to record 2 episodes and upload your dataset to the hub:
498
+ ```bash
499
+ python lerobot/scripts/control_robot.py \
500
+ --robot.type=lekiwi \
501
+ --control.type=record \
502
+ --control.fps=30 \
503
+ --control.single_task="Grasp a lego block and put it in the bin." \
504
+ --control.repo_id=${HF_USER}/lekiwi_test \
505
+ --control.tags='["tutorial"]' \
506
+ --control.warmup_time_s=5 \
507
+ --control.episode_time_s=30 \
508
+ --control.reset_time_s=30 \
509
+ --control.num_episodes=2 \
510
+ --control.push_to_hub=true
511
+ ```
512
+
513
+ Note: You can resume recording by adding `--control.resume=true`.
514
+
515
+ ### Wired version
516
+ If you have the **wired** LeKiwi version please run all commands including both these record dataset commands on your laptop.
517
+
518
+ # H. Visualize a dataset
519
+
520
+ If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
521
+ ```bash
522
+ echo ${HF_USER}/lekiwi_test
523
+ ```
524
+
525
+ If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with (a window can be opened in the browser `http://127.0.0.1:9090` with the visualization tool):
526
+ ```bash
527
+ python lerobot/scripts/visualize_dataset_html.py \
528
+ --repo-id ${HF_USER}/lekiwi_test \
529
+ --local-files-only 1
530
+ ```
531
+
532
+ # I. Replay an episode
533
+ Now try to replay the first episode on your robot:
534
+ ```bash
535
+ python lerobot/scripts/control_robot.py \
536
+ --robot.type=lekiwi \
537
+ --control.type=replay \
538
+ --control.fps=30 \
539
+ --control.repo_id=${HF_USER}/lekiwi_test \
540
+ --control.episode=0
541
+ ```
542
+
543
+ ## J. Train a policy
544
+
545
+ To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
546
+ ```bash
547
+ python lerobot/scripts/train.py \
548
+ --dataset.repo_id=${HF_USER}/lekiwi_test \
549
+ --policy.type=act \
550
+ --output_dir=outputs/train/act_lekiwi_test \
551
+ --job_name=act_lekiwi_test \
552
+ --policy.device=cuda \
553
+ --wandb.enable=true
554
+ ```
555
+
556
+ Let's explain it:
557
+ 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/lekiwi_test`.
558
+ 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
559
+ 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
560
+ 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
561
+
562
+ Training should take several hours. You will find checkpoints in `outputs/train/act_lekiwi_test/checkpoints`.
563
+
564
+ ## K. Evaluate your policy
565
+
566
+ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
567
+ ```bash
568
+ python lerobot/scripts/control_robot.py \
569
+ --robot.type=lekiwi \
570
+ --control.type=record \
571
+ --control.fps=30 \
572
+ --control.single_task="Drive to the red block and pick it up" \
573
+ --control.repo_id=${HF_USER}/eval_act_lekiwi_test \
574
+ --control.tags='["tutorial"]' \
575
+ --control.warmup_time_s=5 \
576
+ --control.episode_time_s=30 \
577
+ --control.reset_time_s=30 \
578
+ --control.num_episodes=10 \
579
+ --control.push_to_hub=true \
580
+ --control.policy.path=outputs/train/act_lekiwi_test/checkpoints/last/pretrained_model
581
+ ```
582
+
583
+ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
584
+ 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_lekiwi_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_lekiwi_test`).
585
+ 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_lekiwi_test`).
examples/11_use_moss.md ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This tutorial explains how to use [Moss v1](https://github.com/jess-moss/moss-robot-arms) with LeRobot.
2
+
3
+ ## Source the parts
4
+
5
+ Follow this [README](https://github.com/jess-moss/moss-robot-arms). It contains the bill of materials with link to source the parts, as well as the instructions to 3D print the parts and advice if it's your first time printing or if you don't own a 3D printer already.
6
+
7
+ **Important**: Before assembling, you will first need to configure your motors. To this end, we provide a nice script, so let's first install LeRobot. After configuration, we will also guide you through assembly.
8
+
9
+ ## Install LeRobot
10
+
11
+ On your computer:
12
+
13
+ 1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
14
+ ```bash
15
+ mkdir -p ~/miniconda3
16
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
17
+ bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
18
+ rm ~/miniconda3/miniconda.sh
19
+ ~/miniconda3/bin/conda init bash
20
+ ```
21
+
22
+ 2. Restart shell or `source ~/.bashrc`
23
+
24
+ 3. Create and activate a fresh conda environment for lerobot
25
+ ```bash
26
+ conda create -y -n lerobot python=3.10 && conda activate lerobot
27
+ ```
28
+
29
+ 4. Clone LeRobot:
30
+ ```bash
31
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
32
+ ```
33
+
34
+ 5. Install LeRobot with dependencies for the feetech motors:
35
+ ```bash
36
+ cd ~/lerobot && pip install -e ".[feetech]"
37
+ ```
38
+
39
+ For Linux only (not Mac), install extra dependencies for recording datasets:
40
+ ```bash
41
+ conda install -y -c conda-forge ffmpeg
42
+ pip uninstall -y opencv-python
43
+ conda install -y -c conda-forge "opencv>=4.10.0"
44
+ ```
45
+
46
+ ## Configure the motors
47
+
48
+ Follow steps 1 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the use of our scripts below.
49
+
50
+ **Find USB ports associated to your arms**
51
+ To find the correct ports for each arm, run the utility script twice:
52
+ ```bash
53
+ python lerobot/scripts/find_motors_bus_port.py
54
+ ```
55
+
56
+ Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
57
+ ```
58
+ Finding all available ports for the MotorBus.
59
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
60
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
61
+
62
+ [...Disconnect leader arm and press Enter...]
63
+
64
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751
65
+ Reconnect the usb cable.
66
+ ```
67
+
68
+ Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
69
+ ```
70
+ Finding all available ports for the MotorBus.
71
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
72
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
73
+
74
+ [...Disconnect follower arm and press Enter...]
75
+
76
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081
77
+ Reconnect the usb cable.
78
+ ```
79
+
80
+ Troubleshooting: On Linux, you might need to give access to the USB ports by running:
81
+ ```bash
82
+ sudo chmod 666 /dev/ttyACM0
83
+ sudo chmod 666 /dev/ttyACM1
84
+ ```
85
+
86
+ #### Update config file
87
+
88
+ IMPORTANTLY: Now that you have your ports, update the **port** default values of [`MossRobotConfig`](../lerobot/common/robot_devices/robots/configs.py). You will find something like:
89
+ ```python
90
+ @RobotConfig.register_subclass("moss")
91
+ @dataclass
92
+ class MossRobotConfig(ManipulatorRobotConfig):
93
+ calibration_dir: str = ".cache/calibration/moss"
94
+ # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
95
+ # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
96
+ # the number of motors in your follower arms.
97
+ max_relative_target: int | None = None
98
+
99
+ leader_arms: dict[str, MotorsBusConfig] = field(
100
+ default_factory=lambda: {
101
+ "main": FeetechMotorsBusConfig(
102
+ port="/dev/tty.usbmodem58760431091", <-- UPDATE HERE
103
+ motors={
104
+ # name: (index, model)
105
+ "shoulder_pan": [1, "sts3215"],
106
+ "shoulder_lift": [2, "sts3215"],
107
+ "elbow_flex": [3, "sts3215"],
108
+ "wrist_flex": [4, "sts3215"],
109
+ "wrist_roll": [5, "sts3215"],
110
+ "gripper": [6, "sts3215"],
111
+ },
112
+ ),
113
+ }
114
+ )
115
+
116
+ follower_arms: dict[str, MotorsBusConfig] = field(
117
+ default_factory=lambda: {
118
+ "main": FeetechMotorsBusConfig(
119
+ port="/dev/tty.usbmodem585A0076891", <-- UPDATE HERE
120
+ motors={
121
+ # name: (index, model)
122
+ "shoulder_pan": [1, "sts3215"],
123
+ "shoulder_lift": [2, "sts3215"],
124
+ "elbow_flex": [3, "sts3215"],
125
+ "wrist_flex": [4, "sts3215"],
126
+ "wrist_roll": [5, "sts3215"],
127
+ "gripper": [6, "sts3215"],
128
+ },
129
+ ),
130
+ }
131
+ )
132
+ ```
133
+
134
+ **Configure your motors**
135
+ Plug your first motor and run this script to set its ID to 1. It will also set its present position to 2048, so expect your motor to rotate:
136
+ ```bash
137
+ python lerobot/scripts/configure_motor.py \
138
+ --port /dev/tty.usbmodem58760432961 \
139
+ --brand feetech \
140
+ --model sts3215 \
141
+ --baudrate 1000000 \
142
+ --ID 1
143
+ ```
144
+
145
+ Note: These motors are currently limitated. They can take values between 0 and 4096 only, which corresponds to a full turn. They can't turn more than that. 2048 is at the middle of this range, so we can take -2048 steps (180 degrees anticlockwise) and reach the maximum range, or take +2048 steps (180 degrees clockwise) and reach the maximum range. The configuration step also sets the homing offset to 0, so that if you misassembled the arm, you can always update the homing offset to account for a shift up to ± 2048 steps (± 180 degrees).
146
+
147
+ Then unplug your motor and plug the second motor and set its ID to 2.
148
+ ```bash
149
+ python lerobot/scripts/configure_motor.py \
150
+ --port /dev/tty.usbmodem58760432961 \
151
+ --brand feetech \
152
+ --model sts3215 \
153
+ --baudrate 1000000 \
154
+ --ID 2
155
+ ```
156
+
157
+ Redo the process for all your motors until ID 6. Do the same for the 6 motors of the leader arm.
158
+
159
+ **Remove the gears of the 6 leader motors**
160
+ Follow step 2 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). You need to remove the gear for the motors of the leader arm. As a result, you will only use the position encoding of the motor and reduce friction to more easily operate the leader arm.
161
+
162
+ **Add motor horn to the motors**
163
+ Follow step 3 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). For Moss v1, you need to align the holes on the motor horn to the motor spline to be approximately 3, 6, 9 and 12 o'clock.
164
+ Try to avoid rotating the motor while doing so to keep position 2048 set during configuration. It is especially tricky for the leader motors as it is more sensible without the gears, but it's ok if it's a bit rotated.
165
+
166
+ ## Assemble the arms
167
+
168
+ Follow step 4 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic). The first arm should take a bit more than 1 hour to assemble, but once you get use to it, you can do it under 1 hour for the second arm.
169
+
170
+ ## Calibrate
171
+
172
+ Next, you'll need to calibrate your Moss v1 robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Moss v1 robot to work on another.
173
+
174
+ **Manual calibration of follower arm**
175
+ /!\ Contrarily to step 6 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the auto calibration, we will actually do manual calibration of follower for now.
176
+
177
+ You will need to move the follower arm to these positions sequentially:
178
+
179
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
180
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
181
+ | <img src="../media/moss/follower_zero.webp?raw=true" alt="Moss v1 follower arm zero position" title="Moss v1 follower arm zero position" style="width:100%;"> | <img src="../media/moss/follower_rotated.webp?raw=true" alt="Moss v1 follower arm rotated position" title="Moss v1 follower arm rotated position" style="width:100%;"> | <img src="../media/moss/follower_rest.webp?raw=true" alt="Moss v1 follower arm rest position" title="Moss v1 follower arm rest position" style="width:100%;"> |
182
+
183
+ Make sure both arms are connected and run this script to launch manual calibration:
184
+ ```bash
185
+ python lerobot/scripts/control_robot.py \
186
+ --robot.type=moss \
187
+ --robot.cameras='{}' \
188
+ --control.type=calibrate \
189
+ --control.arms='["main_follower"]'
190
+ ```
191
+
192
+ **Manual calibration of leader arm**
193
+ Follow step 6 of the [assembly video](https://www.youtube.com/watch?v=DA91NJOtMic) which illustrates the manual calibration. You will need to move the leader arm to these positions sequentially:
194
+
195
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
196
+ | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
197
+ | <img src="../media/moss/leader_zero.webp?raw=true" alt="Moss v1 leader arm zero position" title="Moss v1 leader arm zero position" style="width:100%;"> | <img src="../media/moss/leader_rotated.webp?raw=true" alt="Moss v1 leader arm rotated position" title="Moss v1 leader arm rotated position" style="width:100%;"> | <img src="../media/moss/leader_rest.webp?raw=true" alt="Moss v1 leader arm rest position" title="Moss v1 leader arm rest position" style="width:100%;"> |
198
+
199
+ Run this script to launch manual calibration:
200
+ ```bash
201
+ python lerobot/scripts/control_robot.py \
202
+ --robot.type=moss \
203
+ --robot.cameras='{}' \
204
+ --control.type=calibrate \
205
+ --control.arms='["main_leader"]'
206
+ ```
207
+
208
+ ## Teleoperate
209
+
210
+ **Simple teleop**
211
+ Then you are ready to teleoperate your robot! Run this simple script (it won't connect and display the cameras):
212
+ ```bash
213
+ python lerobot/scripts/control_robot.py \
214
+ --robot.type=moss \
215
+ --robot.cameras='{}' \
216
+ --control.type=teleoperate
217
+ ```
218
+
219
+
220
+ **Teleop with displaying cameras**
221
+ Follow [this guide to setup your cameras](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#c-add-your-cameras-with-opencvcamera). Then you will be able to display the cameras on your computer while you are teleoperating by running the following code. This is useful to prepare your setup before recording your first dataset.
222
+ ```bash
223
+ python lerobot/scripts/control_robot.py \
224
+ --robot.type=moss \
225
+ --control.type=teleoperate
226
+ ```
227
+
228
+ ## Record a dataset
229
+
230
+ Once you're familiar with teleoperation, you can record your first dataset with Moss v1.
231
+
232
+ If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
233
+ ```bash
234
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
235
+ ```
236
+
237
+ Store your Hugging Face repository name in a variable to run these commands:
238
+ ```bash
239
+ HF_USER=$(huggingface-cli whoami | head -n 1)
240
+ echo $HF_USER
241
+ ```
242
+
243
+ Record 2 episodes and upload your dataset to the hub:
244
+ ```bash
245
+ python lerobot/scripts/control_robot.py \
246
+ --robot.type=moss \
247
+ --control.type=record \
248
+ --control.fps=30 \
249
+ --control.single_task="Grasp a lego block and put it in the bin." \
250
+ --control.repo_id=${HF_USER}/moss_test \
251
+ --control.tags='["moss","tutorial"]' \
252
+ --control.warmup_time_s=5 \
253
+ --control.episode_time_s=30 \
254
+ --control.reset_time_s=30 \
255
+ --control.num_episodes=2 \
256
+ --control.push_to_hub=true
257
+ ```
258
+
259
+ Note: You can resume recording by adding `--control.resume=true`.
260
+
261
+ ## Visualize a dataset
262
+
263
+ If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
264
+ ```bash
265
+ echo ${HF_USER}/moss_test
266
+ ```
267
+
268
+ If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with:
269
+ ```bash
270
+ python lerobot/scripts/visualize_dataset_html.py \
271
+ --repo-id ${HF_USER}/moss_test \
272
+ --local-files-only 1
273
+ ```
274
+
275
+ ## Replay an episode
276
+
277
+ Now try to replay the first episode on your robot:
278
+ ```bash
279
+ python lerobot/scripts/control_robot.py \
280
+ --robot.type=moss \
281
+ --control.type=replay \
282
+ --control.fps=30 \
283
+ --control.repo_id=${HF_USER}/moss_test \
284
+ --control.episode=0
285
+ ```
286
+
287
+ ## Train a policy
288
+
289
+ To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
290
+ ```bash
291
+ python lerobot/scripts/train.py \
292
+ --dataset.repo_id=${HF_USER}/moss_test \
293
+ --policy.type=act \
294
+ --output_dir=outputs/train/act_moss_test \
295
+ --job_name=act_moss_test \
296
+ --policy.device=cuda \
297
+ --wandb.enable=true
298
+ ```
299
+
300
+ Let's explain it:
301
+ 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/moss_test`.
302
+ 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
303
+ 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
304
+ 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
305
+
306
+ Training should take several hours. You will find checkpoints in `outputs/train/act_moss_test/checkpoints`.
307
+
308
+ ## Evaluate your policy
309
+
310
+ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
311
+ ```bash
312
+ python lerobot/scripts/control_robot.py \
313
+ --robot.type=moss \
314
+ --control.type=record \
315
+ --control.fps=30 \
316
+ --control.single_task="Grasp a lego block and put it in the bin." \
317
+ --control.repo_id=${HF_USER}/eval_act_moss_test \
318
+ --control.tags='["tutorial"]' \
319
+ --control.warmup_time_s=5 \
320
+ --control.episode_time_s=30 \
321
+ --control.reset_time_s=30 \
322
+ --control.num_episodes=10 \
323
+ --control.push_to_hub=true \
324
+ --control.policy.path=outputs/train/act_moss_test/checkpoints/last/pretrained_model
325
+ ```
326
+
327
+ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
328
+ 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_moss_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_moss_test`).
329
+ 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_moss_test`).
330
+
331
+ ## More
332
+
333
+ Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth tutorial on controlling real robots with LeRobot.
334
+
335
+ If you have any question or need help, please reach out on Discord in the channel [`#moss-arm`](https://discord.com/channels/1216765309076115607/1275374638985252925).
examples/1_load_lerobot_dataset.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ This script demonstrates the use of `LeRobotDataset` class for handling and processing robotic datasets from Hugging Face.
17
+ It illustrates how to load datasets, manipulate them, and apply transformations suitable for machine learning tasks in PyTorch.
18
+
19
+ Features included in this script:
20
+ - Viewing a dataset's metadata and exploring its properties.
21
+ - Loading an existing dataset from the hub or a subset of it.
22
+ - Accessing frames by episode number.
23
+ - Using advanced dataset features like timestamp-based frame selection.
24
+ - Demonstrating compatibility with PyTorch DataLoader for batch processing.
25
+
26
+ The script ends with examples of how to batch process data using PyTorch's DataLoader.
27
+ """
28
+
29
+ from pprint import pprint
30
+
31
+ import torch
32
+ from huggingface_hub import HfApi
33
+
34
+ import lerobot
35
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
36
+
37
+ # We ported a number of existing datasets ourselves, use this to see the list:
38
+ print("List of available datasets:")
39
+ pprint(lerobot.available_datasets)
40
+
41
+ # You can also browse through the datasets created/ported by the community on the hub using the hub api:
42
+ hub_api = HfApi()
43
+ repo_ids = [info.id for info in hub_api.list_datasets(task_categories="robotics", tags=["LeRobot"])]
44
+ pprint(repo_ids)
45
+
46
+ # Or simply explore them in your web browser directly at:
47
+ # https://huggingface.co/datasets?other=LeRobot
48
+
49
+ # Let's take this one for this example
50
+ repo_id = "lerobot/aloha_mobile_cabinet"
51
+ # We can have a look and fetch its metadata to know more about it:
52
+ ds_meta = LeRobotDatasetMetadata(repo_id)
53
+
54
+ # By instantiating just this class, you can quickly access useful information about the content and the
55
+ # structure of the dataset without downloading the actual data yet (only metadata files — which are
56
+ # lightweight).
57
+ print(f"Total number of episodes: {ds_meta.total_episodes}")
58
+ print(f"Average number of frames per episode: {ds_meta.total_frames / ds_meta.total_episodes:.3f}")
59
+ print(f"Frames per second used during data collection: {ds_meta.fps}")
60
+ print(f"Robot type: {ds_meta.robot_type}")
61
+ print(f"keys to access images from cameras: {ds_meta.camera_keys=}\n")
62
+
63
+ print("Tasks:")
64
+ print(ds_meta.tasks)
65
+ print("Features:")
66
+ pprint(ds_meta.features)
67
+
68
+ # You can also get a short summary by simply printing the object:
69
+ print(ds_meta)
70
+
71
+ # You can then load the actual dataset from the hub.
72
+ # Either load any subset of episodes:
73
+ dataset = LeRobotDataset(repo_id, episodes=[0, 10, 11, 23])
74
+
75
+ # And see how many frames you have:
76
+ print(f"Selected episodes: {dataset.episodes}")
77
+ print(f"Number of episodes selected: {dataset.num_episodes}")
78
+ print(f"Number of frames selected: {dataset.num_frames}")
79
+
80
+ # Or simply load the entire dataset:
81
+ dataset = LeRobotDataset(repo_id)
82
+ print(f"Number of episodes selected: {dataset.num_episodes}")
83
+ print(f"Number of frames selected: {dataset.num_frames}")
84
+
85
+ # The previous metadata class is contained in the 'meta' attribute of the dataset:
86
+ print(dataset.meta)
87
+
88
+ # LeRobotDataset actually wraps an underlying Hugging Face dataset
89
+ # (see https://huggingface.co/docs/datasets for more information).
90
+ print(dataset.hf_dataset)
91
+
92
+ # LeRobot datasets also subclasses PyTorch datasets so you can do everything you know and love from working
93
+ # with the latter, like iterating through the dataset.
94
+ # The __getitem__ iterates over the frames of the dataset. Since our datasets are also structured by
95
+ # episodes, you can access the frame indices of any episode using the episode_data_index. Here, we access
96
+ # frame indices associated to the first episode:
97
+ episode_index = 0
98
+ from_idx = dataset.episode_data_index["from"][episode_index].item()
99
+ to_idx = dataset.episode_data_index["to"][episode_index].item()
100
+
101
+ # Then we grab all the image frames from the first camera:
102
+ camera_key = dataset.meta.camera_keys[0]
103
+ frames = [dataset[idx][camera_key] for idx in range(from_idx, to_idx)]
104
+
105
+ # The objects returned by the dataset are all torch.Tensors
106
+ print(type(frames[0]))
107
+ print(frames[0].shape)
108
+
109
+ # Since we're using pytorch, the shape is in pytorch, channel-first convention (c, h, w).
110
+ # We can compare this shape with the information available for that feature
111
+ pprint(dataset.features[camera_key])
112
+ # In particular:
113
+ print(dataset.features[camera_key]["shape"])
114
+ # The shape is in (h, w, c) which is a more universal format.
115
+
116
+ # For many machine learning applications we need to load the history of past observations or trajectories of
117
+ # future actions. Our datasets can load previous and future frames for each key/modality, using timestamps
118
+ # differences with the current loaded frame. For instance:
119
+ delta_timestamps = {
120
+ # loads 4 images: 1 second before current frame, 500 ms before, 200 ms before, and current frame
121
+ camera_key: [-1, -0.5, -0.20, 0],
122
+ # loads 8 state vectors: 1.5 seconds before, 1 second before, ... 200 ms, 100 ms, and current frame
123
+ "observation.state": [-1.5, -1, -0.5, -0.20, -0.10, 0],
124
+ # loads 64 action vectors: current frame, 1 frame in the future, 2 frames, ... 63 frames in the future
125
+ "action": [t / dataset.fps for t in range(64)],
126
+ }
127
+ # Note that in any case, these delta_timestamps values need to be multiples of (1/fps) so that added to any
128
+ # timestamp, you still get a valid timestamp.
129
+
130
+ dataset = LeRobotDataset(repo_id, delta_timestamps=delta_timestamps)
131
+ print(f"\n{dataset[0][camera_key].shape=}") # (4, c, h, w)
132
+ print(f"{dataset[0]['observation.state'].shape=}") # (6, c)
133
+ print(f"{dataset[0]['action'].shape=}\n") # (64, c)
134
+
135
+ # Finally, our datasets are fully compatible with PyTorch dataloaders and samplers because they are just
136
+ # PyTorch datasets.
137
+ dataloader = torch.utils.data.DataLoader(
138
+ dataset,
139
+ num_workers=0,
140
+ batch_size=32,
141
+ shuffle=True,
142
+ )
143
+
144
+ for batch in dataloader:
145
+ print(f"{batch[camera_key].shape=}") # (32, 4, c, h, w)
146
+ print(f"{batch['observation.state'].shape=}") # (32, 5, c)
147
+ print(f"{batch['action'].shape=}") # (32, 64, c)
148
+ break
examples/2_evaluate_pretrained_policy.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ This scripts demonstrates how to evaluate a pretrained policy from the HuggingFace Hub or from your local
17
+ training outputs directory. In the latter case, you might want to run examples/3_train_policy.py first.
18
+
19
+ It requires the installation of the 'gym_pusht' simulation environment. Install it by running:
20
+ ```bash
21
+ pip install -e ".[pusht]"`
22
+ ```
23
+ """
24
+
25
+ from pathlib import Path
26
+
27
+ import gym_pusht # noqa: F401
28
+ import gymnasium as gym
29
+ import imageio
30
+ import numpy
31
+ import torch
32
+
33
+ from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
34
+
35
+ # Create a directory to store the video of the evaluation
36
+ output_directory = Path("outputs/eval/example_pusht_diffusion")
37
+ output_directory.mkdir(parents=True, exist_ok=True)
38
+
39
+ # Select your device
40
+ device = "cuda"
41
+
42
+ # Provide the [hugging face repo id](https://huggingface.co/lerobot/diffusion_pusht):
43
+ pretrained_policy_path = "lerobot/diffusion_pusht"
44
+ # OR a path to a local outputs/train folder.
45
+ # pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
46
+
47
+ policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
48
+
49
+ # Initialize evaluation environment to render two observation types:
50
+ # an image of the scene and state/position of the agent. The environment
51
+ # also automatically stops running after 300 interactions/steps.
52
+ env = gym.make(
53
+ "gym_pusht/PushT-v0",
54
+ obs_type="pixels_agent_pos",
55
+ max_episode_steps=300,
56
+ )
57
+
58
+ # We can verify that the shapes of the features expected by the policy match the ones from the observations
59
+ # produced by the environment
60
+ print(policy.config.input_features)
61
+ print(env.observation_space)
62
+
63
+ # Similarly, we can check that the actions produced by the policy will match the actions expected by the
64
+ # environment
65
+ print(policy.config.output_features)
66
+ print(env.action_space)
67
+
68
+ # Reset the policy and environments to prepare for rollout
69
+ policy.reset()
70
+ numpy_observation, info = env.reset(seed=42)
71
+
72
+ # Prepare to collect every rewards and all the frames of the episode,
73
+ # from initial state to final state.
74
+ rewards = []
75
+ frames = []
76
+
77
+ # Render frame of the initial state
78
+ frames.append(env.render())
79
+
80
+ step = 0
81
+ done = False
82
+ while not done:
83
+ # Prepare observation for the policy running in Pytorch
84
+ state = torch.from_numpy(numpy_observation["agent_pos"])
85
+ image = torch.from_numpy(numpy_observation["pixels"])
86
+
87
+ # Convert to float32 with image from channel first in [0,255]
88
+ # to channel last in [0,1]
89
+ state = state.to(torch.float32)
90
+ image = image.to(torch.float32) / 255
91
+ image = image.permute(2, 0, 1)
92
+
93
+ # Send data tensors from CPU to GPU
94
+ state = state.to(device, non_blocking=True)
95
+ image = image.to(device, non_blocking=True)
96
+
97
+ # Add extra (empty) batch dimension, required to forward the policy
98
+ state = state.unsqueeze(0)
99
+ image = image.unsqueeze(0)
100
+
101
+ # Create the policy input dictionary
102
+ observation = {
103
+ "observation.state": state,
104
+ "observation.image": image,
105
+ }
106
+
107
+ # Predict the next action with respect to the current observation
108
+ with torch.inference_mode():
109
+ action = policy.select_action(observation)
110
+
111
+ # Prepare the action for the environment
112
+ numpy_action = action.squeeze(0).to("cpu").numpy()
113
+
114
+ # Step through the environment and receive a new observation
115
+ numpy_observation, reward, terminated, truncated, info = env.step(numpy_action)
116
+ print(f"{step=} {reward=} {terminated=}")
117
+
118
+ # Keep track of all the rewards and frames
119
+ rewards.append(reward)
120
+ frames.append(env.render())
121
+
122
+ # The rollout is considered done when the success state is reach (i.e. terminated is True),
123
+ # or the maximum number of iterations is reached (i.e. truncated is True)
124
+ done = terminated | truncated | done
125
+ step += 1
126
+
127
+ if terminated:
128
+ print("Success!")
129
+ else:
130
+ print("Failure!")
131
+
132
+ # Get the speed of environment (i.e. its number of frames per second).
133
+ fps = env.metadata["render_fps"]
134
+
135
+ # Encode all frames into a mp4 video.
136
+ video_path = output_directory / "rollout.mp4"
137
+ imageio.mimsave(str(video_path), numpy.stack(frames), fps=fps)
138
+
139
+ print(f"Video of the evaluation is available in '{video_path}'.")
examples/3_train_policy.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """This scripts demonstrates how to train Diffusion Policy on the PushT environment.
16
+
17
+ Once you have trained a model with this script, you can try to evaluate it on
18
+ examples/2_evaluate_pretrained_policy.py
19
+ """
20
+
21
+ from pathlib import Path
22
+
23
+ import torch
24
+
25
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
26
+ from lerobot.common.datasets.utils import dataset_to_policy_features
27
+ from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig
28
+ from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
29
+ from lerobot.configs.types import FeatureType
30
+
31
+
32
+ def main():
33
+ # Create a directory to store the training checkpoint.
34
+ output_directory = Path("outputs/train/example_pusht_diffusion")
35
+ output_directory.mkdir(parents=True, exist_ok=True)
36
+
37
+ # # Select your device
38
+ device = torch.device("cuda")
39
+
40
+ # Number of offline training steps (we'll only do offline training for this example.)
41
+ # Adjust as you prefer. 5000 steps are needed to get something worth evaluating.
42
+ training_steps = 5000
43
+ log_freq = 1
44
+
45
+ # When starting from scratch (i.e. not from a pretrained policy), we need to specify 2 things before
46
+ # creating the policy:
47
+ # - input/output shapes: to properly size the policy
48
+ # - dataset stats: for normalization and denormalization of input/outputs
49
+ dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht")
50
+ features = dataset_to_policy_features(dataset_metadata.features)
51
+ output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
52
+ input_features = {key: ft for key, ft in features.items() if key not in output_features}
53
+
54
+ # Policies are initialized with a configuration class, in this case `DiffusionConfig`. For this example,
55
+ # we'll just use the defaults and so no arguments other than input/output features need to be passed.
56
+ cfg = DiffusionConfig(input_features=input_features, output_features=output_features)
57
+
58
+ # We can now instantiate our policy with this config and the dataset stats.
59
+ policy = DiffusionPolicy(cfg, dataset_stats=dataset_metadata.stats)
60
+ policy.train()
61
+ policy.to(device)
62
+
63
+ # Another policy-dataset interaction is with the delta_timestamps. Each policy expects a given number frames
64
+ # which can differ for inputs, outputs and rewards (if there are some).
65
+ delta_timestamps = {
66
+ "observation.image": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
67
+ "observation.state": [i / dataset_metadata.fps for i in cfg.observation_delta_indices],
68
+ "action": [i / dataset_metadata.fps for i in cfg.action_delta_indices],
69
+ }
70
+
71
+ # In this case with the standard configuration for Diffusion Policy, it is equivalent to this:
72
+ delta_timestamps = {
73
+ # Load the previous image and state at -0.1 seconds before current frame,
74
+ # then load current image and state corresponding to 0.0 second.
75
+ "observation.image": [-0.1, 0.0],
76
+ "observation.state": [-0.1, 0.0],
77
+ # Load the previous action (-0.1), the next action to be executed (0.0),
78
+ # and 14 future actions with a 0.1 seconds spacing. All these actions will be
79
+ # used to supervise the policy.
80
+ "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
81
+ }
82
+
83
+ # We can then instantiate the dataset with these delta_timestamps configuration.
84
+ dataset = LeRobotDataset("lerobot/pusht", delta_timestamps=delta_timestamps)
85
+
86
+ # Then we create our optimizer and dataloader for offline training.
87
+ optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)
88
+ dataloader = torch.utils.data.DataLoader(
89
+ dataset,
90
+ num_workers=4,
91
+ batch_size=64,
92
+ shuffle=True,
93
+ pin_memory=device.type != "cpu",
94
+ drop_last=True,
95
+ )
96
+
97
+ # Run training loop.
98
+ step = 0
99
+ done = False
100
+ while not done:
101
+ for batch in dataloader:
102
+ batch = {k: (v.to(device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()}
103
+ loss, _ = policy.forward(batch)
104
+ loss.backward()
105
+ optimizer.step()
106
+ optimizer.zero_grad()
107
+
108
+ if step % log_freq == 0:
109
+ print(f"step: {step} loss: {loss.item():.3f}")
110
+ step += 1
111
+ if step >= training_steps:
112
+ done = True
113
+ break
114
+
115
+ # Save a policy checkpoint.
116
+ policy.save_pretrained(output_directory)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ main()
examples/4_train_policy_with_script.md ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This tutorial will explain the training script, how to use it, and particularly how to configure everything needed for the training run.
2
+ > **Note:** The following assume you're running these commands on a machine equipped with a cuda GPU. If you don't have one (or if you're using a Mac), you can add `--policy.device=cpu` (`--policy.device=mps` respectively). However, be advised that the code executes much slower on cpu.
3
+
4
+
5
+ ## The training script
6
+
7
+ LeRobot offers a training script at [`lerobot/scripts/train.py`](../../lerobot/scripts/train.py). At a high level it does the following:
8
+
9
+ - Initialize/load a configuration for the following steps using.
10
+ - Instantiates a dataset.
11
+ - (Optional) Instantiates a simulation environment corresponding to that dataset.
12
+ - Instantiates a policy.
13
+ - Runs a standard training loop with forward pass, backward pass, optimization step, and occasional logging, evaluation (of the policy on the environment), and checkpointing.
14
+
15
+ ## Overview of the configuration system
16
+
17
+ In the training script, the main function `train` expects a `TrainPipelineConfig` object:
18
+ ```python
19
+ # train.py
20
+ @parser.wrap()
21
+ def train(cfg: TrainPipelineConfig):
22
+ ```
23
+
24
+ You can inspect the `TrainPipelineConfig` defined in [`lerobot/configs/train.py`](../../lerobot/configs/train.py) (which is heavily commented and meant to be a reference to understand any option)
25
+
26
+ When running the script, inputs for the command line are parsed thanks to the `@parser.wrap()` decorator and an instance of this class is automatically generated. Under the hood, this is done with [Draccus](https://github.com/dlwh/draccus) which is a tool dedicated for this purpose. If you're familiar with Hydra, Draccus can similarly load configurations from config files (.json, .yaml) and also override their values through command line inputs. Unlike Hydra, these configurations are pre-defined in the code through dataclasses rather than being defined entirely in config files. This allows for more rigorous serialization/deserialization, typing, and to manipulate configuration as objects directly in the code and not as dictionaries or namespaces (which enables nice features in an IDE such as autocomplete, jump-to-def, etc.)
27
+
28
+ Let's have a look at a simplified example. Amongst other attributes, the training config has the following attributes:
29
+ ```python
30
+ @dataclass
31
+ class TrainPipelineConfig:
32
+ dataset: DatasetConfig
33
+ env: envs.EnvConfig | None = None
34
+ policy: PreTrainedConfig | None = None
35
+ ```
36
+ in which `DatasetConfig` for example is defined as such:
37
+ ```python
38
+ @dataclass
39
+ class DatasetConfig:
40
+ repo_id: str
41
+ episodes: list[int] | None = None
42
+ video_backend: str = "pyav"
43
+ ```
44
+
45
+ This creates a hierarchical relationship where, for example assuming we have a `cfg` instance of `TrainPipelineConfig`, we can access the `repo_id` value with `cfg.dataset.repo_id`.
46
+ From the command line, we can specify this value with using a very similar syntax `--dataset.repo_id=repo/id`.
47
+
48
+ By default, every field takes its default value specified in the dataclass. If a field doesn't have a default value, it needs to be specified either from the command line or from a config file – which path is also given in the command line (more in this below). In the example above, the `dataset` field doesn't have a default value which means it must be specified.
49
+
50
+
51
+ ## Specifying values from the CLI
52
+
53
+ Let's say that we want to train [Diffusion Policy](../../lerobot/common/policies/diffusion) on the [pusht](https://huggingface.co/datasets/lerobot/pusht) dataset, using the [gym_pusht](https://github.com/huggingface/gym-pusht) environment for evaluation. The command to do so would look like this:
54
+ ```bash
55
+ python lerobot/scripts/train.py \
56
+ --dataset.repo_id=lerobot/pusht \
57
+ --policy.type=diffusion \
58
+ --env.type=pusht
59
+ ```
60
+
61
+ Let's break this down:
62
+ - To specify the dataset, we just need to specify its `repo_id` on the hub which is the only required argument in the `DatasetConfig`. The rest of the fields have default values and in this case we are fine with those so we can just add the option `--dataset.repo_id=lerobot/pusht`.
63
+ - To specify the policy, we can just select diffusion policy using `--policy` appended with `.type`. Here, `.type` is a special argument which allows us to select config classes inheriting from `draccus.ChoiceRegistry` and that have been decorated with the `register_subclass()` method. To have a better explanation of this feature, have a look at this [Draccus demo](https://github.com/dlwh/draccus?tab=readme-ov-file#more-flexible-configuration-with-choice-types). In our code, we use this mechanism mainly to select policies, environments, robots, and some other components like optimizers. The policies available to select are located in [lerobot/common/policies](../../lerobot/common/policies)
64
+ - Similarly, we select the environment with `--env.type=pusht`. The different environment configs are available in [`lerobot/common/envs/configs.py`](../../lerobot/common/envs/configs.py)
65
+
66
+ Let's see another example. Let's say you've been training [ACT](../../lerobot/common/policies/act) on [lerobot/aloha_sim_insertion_human](https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human) using the [gym-aloha](https://github.com/huggingface/gym-aloha) environment for evaluation with:
67
+ ```bash
68
+ python lerobot/scripts/train.py \
69
+ --policy.type=act \
70
+ --dataset.repo_id=lerobot/aloha_sim_insertion_human \
71
+ --env.type=aloha \
72
+ --output_dir=outputs/train/act_aloha_insertion
73
+ ```
74
+ > Notice we added `--output_dir` to explicitly tell where to write outputs from this run (checkpoints, training state, configs etc.). This is not mandatory and if you don't specify it, a default directory will be created from the current date and time, env.type and policy.type. This will typically look like `outputs/train/2025-01-24/16-10-05_aloha_act`.
75
+
76
+ We now want to train a different policy for aloha on another task. We'll change the dataset and use [lerobot/aloha_sim_transfer_cube_human](https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human) instead. Of course, we also need to change the task of the environment as well to match this other task.
77
+ Looking at the [`AlohaEnv`](../../lerobot/common/envs/configs.py) config, the task is `"AlohaInsertion-v0"` by default, which corresponds to the task we trained on in the command above. The [gym-aloha](https://github.com/huggingface/gym-aloha?tab=readme-ov-file#description) environment also has the `AlohaTransferCube-v0` task which corresponds to this other task we want to train on. Putting this together, we can train this new policy on this different task using:
78
+ ```bash
79
+ python lerobot/scripts/train.py \
80
+ --policy.type=act \
81
+ --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
82
+ --env.type=aloha \
83
+ --env.task=AlohaTransferCube-v0 \
84
+ --output_dir=outputs/train/act_aloha_transfer
85
+ ```
86
+
87
+ ## Loading from a config file
88
+
89
+ Now, let's assume that we want to reproduce the run just above. That run has produced a `train_config.json` file in its checkpoints, which serializes the `TrainPipelineConfig` instance it used:
90
+ ```json
91
+ {
92
+ "dataset": {
93
+ "repo_id": "lerobot/aloha_sim_transfer_cube_human",
94
+ "episodes": null,
95
+ ...
96
+ },
97
+ "env": {
98
+ "type": "aloha",
99
+ "task": "AlohaTransferCube-v0",
100
+ "fps": 50,
101
+ ...
102
+ },
103
+ "policy": {
104
+ "type": "act",
105
+ "n_obs_steps": 1,
106
+ ...
107
+ },
108
+ ...
109
+ }
110
+ ```
111
+
112
+ We can then simply load the config values from this file using:
113
+ ```bash
114
+ python lerobot/scripts/train.py \
115
+ --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
116
+ --output_dir=outputs/train/act_aloha_transfer_2
117
+ ```
118
+ `--config_path` is also a special argument which allows to initialize the config from a local config file. It can point to a directory that contains `train_config.json` or to the config file itself directly.
119
+
120
+ Similarly to Hydra, we can still override some parameters in the CLI if we want to, e.g.:
121
+ ```bash
122
+ python lerobot/scripts/train.py \
123
+ --config_path=outputs/train/act_aloha_transfer/checkpoints/last/pretrained_model/ \
124
+ --output_dir=outputs/train/act_aloha_transfer_2
125
+ --policy.n_action_steps=80
126
+ ```
127
+ > Note: While `--output_dir` is not required in general, in this case we need to specify it since it will otherwise take the value from the `train_config.json` (which is `outputs/train/act_aloha_transfer`). In order to prevent accidental deletion of previous run checkpoints, we raise an error if you're trying to write in an existing directory. This is not the case when resuming a run, which is what you'll learn next.
128
+
129
+ `--config_path` can also accept the repo_id of a repo on the hub that contains a `train_config.json` file, e.g. running:
130
+ ```bash
131
+ python lerobot/scripts/train.py --config_path=lerobot/diffusion_pusht
132
+ ```
133
+ will start a training run with the same configuration used for training [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht)
134
+
135
+
136
+ ## Resume training
137
+
138
+ Being able to resume a training run is important in case it crashed or aborted for any reason. We'll demonstrate how to that here.
139
+
140
+ Let's reuse the command from the previous run and add a few more options:
141
+ ```bash
142
+ python lerobot/scripts/train.py \
143
+ --policy.type=act \
144
+ --dataset.repo_id=lerobot/aloha_sim_transfer_cube_human \
145
+ --env.type=aloha \
146
+ --env.task=AlohaTransferCube-v0 \
147
+ --log_freq=25 \
148
+ --save_freq=100 \
149
+ --output_dir=outputs/train/run_resumption
150
+ ```
151
+
152
+ Here we've taken care to set up the log frequency and checkpointing frequency to low numbers so we can showcase resumption. You should be able to see some logging and have a first checkpoint within 1 minute (depending on hardware). Wait for the first checkpoint to happen, you should see a line that looks like this in your terminal:
153
+ ```
154
+ INFO 2025-01-24 16:10:56 ts/train.py:263 Checkpoint policy after step 100
155
+ ```
156
+ Now let's simulate a crash by killing the process (hit `ctrl`+`c`). We can then simply resume this run from the last checkpoint available with:
157
+ ```bash
158
+ python lerobot/scripts/train.py \
159
+ --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
160
+ --resume=true
161
+ ```
162
+ You should see from the logging that your training picks up from where it left off.
163
+
164
+ Another reason for which you might want to resume a run is simply to extend training and add more training steps. The number of training steps is set by the option `--steps`, which is 100 000 by default.
165
+ You could double the number of steps of the previous run with:
166
+ ```bash
167
+ python lerobot/scripts/train.py \
168
+ --config_path=outputs/train/run_resumption/checkpoints/last/pretrained_model/ \
169
+ --resume=true \
170
+ --steps=200000
171
+ ```
172
+
173
+ ## Outputs of a run
174
+ In the output directory, there will be a folder called `checkpoints` with the following structure:
175
+ ```bash
176
+ outputs/train/run_resumption/checkpoints
177
+ ├── 000100 # checkpoint_dir for training step 100
178
+ │ ├── pretrained_model/
179
+ │ │ ├── config.json # policy config
180
+ │ │ ├── model.safetensors # policy weights
181
+ │ │ └── train_config.json # train config
182
+ │ └── training_state/
183
+ │ ├── optimizer_param_groups.json # optimizer param groups
184
+ │ ├── optimizer_state.safetensors # optimizer state
185
+ │ ├── rng_state.safetensors # rng states
186
+ │ ├── scheduler_state.json # scheduler state
187
+ │ └── training_step.json # training step
188
+ ├── 000200
189
+ └── last -> 000200 # symlink to the last available checkpoint
190
+ ```
191
+
192
+ ## Fine-tuning a pre-trained policy
193
+
194
+ In addition to the features currently in Draccus, we've added a special `.path` argument for the policy, which allows to load a policy as you would with `PreTrainedPolicy.from_pretrained()`. In that case, `path` can be a local directory that contains a checkpoint or a repo_id pointing to a pretrained policy on the hub.
195
+
196
+ For example, we could fine-tune a [policy pre-trained on the aloha transfer task](https://huggingface.co/lerobot/act_aloha_sim_transfer_cube_human) on the aloha insertion task. We can achieve this with:
197
+ ```bash
198
+ python lerobot/scripts/train.py \
199
+ --policy.path=lerobot/act_aloha_sim_transfer_cube_human \
200
+ --dataset.repo_id=lerobot/aloha_sim_insertion_human \
201
+ --env.type=aloha \
202
+ --env.task=AlohaInsertion-v0
203
+ ```
204
+
205
+ When doing so, keep in mind that the features of the fine-tuning dataset would have to match the input/output features of the pretrained policy.
206
+
207
+ ## Typical logs and metrics
208
+
209
+ When you start the training process, you will first see your full configuration being printed in the terminal. You can check it to make sure that you configured your run correctly. The final configuration will also be saved with the checkpoint.
210
+
211
+ After that, you will see training log like this one:
212
+ ```
213
+ INFO 2024-08-14 13:35:12 ts/train.py:192 step:0 smpl:64 ep:1 epch:0.00 loss:1.112 grdn:15.387 lr:2.0e-07 updt_s:1.738 data_s:4.774
214
+ ```
215
+ or evaluation log:
216
+ ```
217
+ INFO 2024-08-14 13:38:45 ts/train.py:226 step:100 smpl:6K ep:52 epch:0.25 ∑rwrd:20.693 success:0.0% eval_s:120.266
218
+ ```
219
+
220
+ These logs will also be saved in wandb if `wandb.enable` is set to `true`. Here are the meaning of some abbreviations:
221
+ - `smpl`: number of samples seen during training.
222
+ - `ep`: number of episodes seen during training. An episode contains multiple samples in a complete manipulation task.
223
+ - `epch`: number of time all unique samples are seen (epoch).
224
+ - `grdn`: gradient norm.
225
+ - `∑rwrd`: compute the sum of rewards in every evaluation episode and then take an average of them.
226
+ - `success`: average success rate of eval episodes. Reward and success are usually different except for the sparsing reward setting, where reward=1 only when the task is completed successfully.
227
+ - `eval_s`: time to evaluate the policy in the environment, in second.
228
+ - `updt_s`: time to update the network parameters, in second.
229
+ - `data_s`: time to load a batch of data, in second.
230
+
231
+ Some metrics are useful for initial performance profiling. For example, if you find the current GPU utilization is low via the `nvidia-smi` command and `data_s` sometimes is too high, you may need to modify batch size or number of dataloading workers to accelerate dataloading. We also recommend [pytorch profiler](https://github.com/huggingface/lerobot?tab=readme-ov-file#improve-your-code-with-profiling) for detailed performance probing.
232
+
233
+ ## In short
234
+
235
+ We'll summarize here the main use cases to remember from this tutorial.
236
+
237
+ #### Train a policy from scratch – CLI
238
+ ```bash
239
+ python lerobot/scripts/train.py \
240
+ --policy.type=act \ # <- select 'act' policy
241
+ --env.type=pusht \ # <- select 'pusht' environment
242
+ --dataset.repo_id=lerobot/pusht # <- train on this dataset
243
+ ```
244
+
245
+ #### Train a policy from scratch - config file + CLI
246
+ ```bash
247
+ python lerobot/scripts/train.py \
248
+ --config_path=path/to/pretrained_model \ # <- can also be a repo_id
249
+ --policy.n_action_steps=80 # <- you may still override values
250
+ ```
251
+
252
+ #### Resume/continue a training run
253
+ ```bash
254
+ python lerobot/scripts/train.py \
255
+ --config_path=checkpoint/pretrained_model/ \
256
+ --resume=true \
257
+ --steps=200000 # <- you can change some training parameters
258
+ ```
259
+
260
+ #### Fine-tuning
261
+ ```bash
262
+ python lerobot/scripts/train.py \
263
+ --policy.path=lerobot/act_aloha_sim_transfer_cube_human \ # <- can also be a local path to a checkpoint
264
+ --dataset.repo_id=lerobot/aloha_sim_insertion_human \
265
+ --env.type=aloha \
266
+ --env.task=AlohaInsertion-v0
267
+ ```
268
+
269
+ ---
270
+
271
+ Now that you know the basics of how to train a policy, you might want to know how to apply this knowledge to actual robots, or how to record your own datasets and train policies on your specific task?
272
+ If that's the case, head over to the next tutorial [`7_get_started_with_real_robot.md`](./7_get_started_with_real_robot.md).
273
+
274
+ Or in the meantime, happy training! 🤗
examples/7_get_started_with_real_robot.md ADDED
@@ -0,0 +1,1012 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Getting Started with Real-World Robots
2
+
3
+ This tutorial will guide you through the process of setting up and training a neural network to autonomously control a real robot.
4
+
5
+ **What You'll Learn:**
6
+ 1. How to order and assemble your robot.
7
+ 2. How to connect, configure, and calibrate your robot.
8
+ 3. How to record and visualize your dataset.
9
+ 4. How to train a policy using your data and prepare it for evaluation.
10
+ 5. How to evaluate your policy and visualize the results.
11
+
12
+ By following these steps, you'll be able to replicate tasks like picking up a Lego block and placing it in a bin with a high success rate, as demonstrated in [this video](https://x.com/RemiCadene/status/1814680760592572934).
13
+
14
+ This tutorial is specifically made for the affordable [Koch v1.1](https://github.com/jess-moss/koch-v1-1) robot, but it contains additional information to be easily adapted to various types of robots like [Aloha bimanual robot](https://aloha-2.github.io) by changing some configurations. The Koch v1.1 consists of a leader arm and a follower arm, each with 6 motors. It can work with one or several cameras to record the scene, which serve as visual sensors for the robot.
15
+
16
+ During the data collection phase, you will control the follower arm by moving the leader arm. This process is known as "teleoperation." This technique is used to collect robot trajectories. Afterward, you'll train a neural network to imitate these trajectories and deploy the network to enable your robot to operate autonomously.
17
+
18
+ If you encounter any issues at any step of the tutorial, feel free to seek help on [Discord](https://discord.com/invite/s3KuuzsPFb) or don't hesitate to iterate with us on the tutorial by creating issues or pull requests. Thanks!
19
+
20
+ ## 1. Order and Assemble your Koch v1.1
21
+
22
+ Follow the sourcing and assembling instructions provided on the [Koch v1.1 Github page](https://github.com/jess-moss/koch-v1-1). This will guide you through setting up both the follower and leader arms, as shown in the image below.
23
+
24
+ <div style="text-align:center;">
25
+ <img src="../media/tutorial/koch_v1_1_leader_follower.webp?raw=true" alt="Koch v1.1 leader and follower arms" title="Koch v1.1 leader and follower arms" width="50%">
26
+ </div>
27
+
28
+ For a visual walkthrough of the assembly process, you can refer to [this video tutorial](https://youtu.be/8nQIg9BwwTk).
29
+
30
+ ## 2. Configure motors, calibrate arms, teleoperate your Koch v1.1
31
+
32
+ First, install the additional dependencies required for robots built with dynamixel motors like Koch v1.1 by running one of the following commands (make sure gcc is installed).
33
+
34
+ Using `pip`:
35
+ ```bash
36
+ pip install -e ".[dynamixel]"
37
+ ```
38
+
39
+ Using `poetry`:
40
+ ```bash
41
+ poetry sync --extras "dynamixel"
42
+ ```
43
+
44
+ Using `uv`:
45
+ ```bash
46
+ uv sync --extra "dynamixel"
47
+ ```
48
+
49
+ /!\ For Linux only, ffmpeg and opencv requires conda install for now. Run this exact sequence of commands:
50
+ ```bash
51
+ conda install -c conda-forge ffmpeg
52
+ pip uninstall opencv-python
53
+ conda install -c conda-forge "opencv>=4.10.0"
54
+ ```
55
+
56
+ You are now ready to plug the 5V power supply to the motor bus of the leader arm (the smaller one) since all its motors only require 5V.
57
+
58
+ Then plug the 12V power supply to the motor bus of the follower arm. It has two motors that need 12V, and the rest will be powered with 5V through the voltage convertor.
59
+
60
+ Finally, connect both arms to your computer via USB. Note that the USB doesn't provide any power, and both arms need to be plugged in with their associated power supply to be detected by your computer.
61
+
62
+ Now you are ready to configure your motors for the first time, as detailed in the sections below. In the upcoming sections, you'll learn about our classes and functions by running some python code in an interactive session, or by copy-pasting it in a python file.
63
+
64
+ If you have already configured your motors the first time, you can streamline the process by directly running the teleoperate script (which is detailed further in the tutorial):
65
+ ```bash
66
+ python lerobot/scripts/control_robot.py \
67
+ --robot.type=koch \
68
+ --control.type=teleoperate
69
+ ```
70
+
71
+ It will automatically:
72
+ 1. Identify any missing calibrations and initiate the calibration procedure.
73
+ 2. Connect the robot and start teleoperation.
74
+
75
+ ### a. Control your motors with DynamixelMotorsBus
76
+
77
+ You can use the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py) to communicate with the motors connected as a chain to the corresponding USB bus. This class leverages the Python [Dynamixel SDK](https://emanual.robotis.com/docs/en/software/dynamixel/dynamixel_sdk/sample_code/python_read_write_protocol_2_0/#python-read-write-protocol-20) to facilitate reading from and writing to the motors.
78
+
79
+ **First Configuration of your motors**
80
+
81
+ You will need to unplug each motor in turn and run a command the identify the motor. The motor will save its own identification, so you only need to do this once. Start by unplugging all of the motors.
82
+
83
+ Do the Leader arm first, as all of its motors are of the same type. Plug in your first motor on your leader arm and run this script to set its ID to 1.
84
+ ```bash
85
+ python lerobot/scripts/configure_motor.py \
86
+ --port /dev/tty.usbmodem58760432961 \
87
+ --brand dynamixel \
88
+ --model xl330-m288 \
89
+ --baudrate 1000000 \
90
+ --ID 1
91
+ ```
92
+
93
+ Then unplug your first motor and plug the second motor and set its ID to 2.
94
+ ```bash
95
+ python lerobot/scripts/configure_motor.py \
96
+ --port /dev/tty.usbmodem58760432961 \
97
+ --brand dynamixel \
98
+ --model xl330-m288 \
99
+ --baudrate 1000000 \
100
+ --ID 2
101
+ ```
102
+
103
+ Redo the process for all your motors until ID 6.
104
+
105
+ The process for the follower arm is almost the same, but the follower arm has two types of motors. For the first two motors, make sure you set the model to `xl430-w250`. _Important: configuring follower motors requires plugging and unplugging power. Make sure you use the 5V power for the XL330s and the 12V power for the XL430s!_
106
+
107
+ After all of your motors are configured properly, you're ready to plug them all together in a daisy-chain as shown in the original video.
108
+
109
+ **Instantiate the DynamixelMotorsBus**
110
+
111
+ To begin, create two instances of the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py), one for each arm, using their corresponding USB ports (e.g. `DynamixelMotorsBus(port="/dev/tty.usbmodem575E0031751"`).
112
+
113
+ To find the correct ports for each arm, run the utility script twice:
114
+ ```bash
115
+ python lerobot/scripts/find_motors_bus_port.py
116
+ ```
117
+
118
+ Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux):
119
+ ```
120
+ Finding all available ports for the MotorBus.
121
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
122
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
123
+
124
+ [...Disconnect leader arm and press Enter...]
125
+
126
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751
127
+ Reconnect the usb cable.
128
+ ```
129
+
130
+ Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux):
131
+ ```
132
+ Finding all available ports for the MotorBus.
133
+ ['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751']
134
+ Remove the usb cable from your DynamixelMotorsBus and press Enter when done.
135
+
136
+ [...Disconnect follower arm and press Enter...]
137
+
138
+ The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081
139
+ Reconnect the usb cable.
140
+ ```
141
+
142
+ Troubleshooting: On Linux, you might need to give access to the USB ports by running this command with your ports:
143
+ ```bash
144
+ sudo chmod 666 /dev/tty.usbmodem575E0032081
145
+ sudo chmod 666 /dev/tty.usbmodem575E0031751
146
+ ```
147
+
148
+ *Listing and Configuring Motors*
149
+
150
+ Next, you'll need to list the motors for each arm, including their name, index, and model. Initially, each motor is assigned the factory default index `1`. Since each motor requires a unique index to function correctly when connected in a chain on a common bus, you'll need to assign different indices. It's recommended to use an ascending index order, starting from `1` (e.g., `1, 2, 3, 4, 5, 6`). These indices will be saved in the persistent memory of each motor during the first connection.
151
+
152
+ To assign indices to the motors, run this code in an interactive Python session. Replace the `port` values with the ones you identified earlier:
153
+ ```python
154
+ from lerobot.common.robot_devices.motors.configs import DynamixelMotorsBusConfig
155
+ from lerobot.common.robot_devices.motors.dynamixel import DynamixelMotorsBus
156
+
157
+ leader_config = DynamixelMotorsBusConfig(
158
+ port="/dev/tty.usbmodem575E0031751",
159
+ motors={
160
+ # name: (index, model)
161
+ "shoulder_pan": (1, "xl330-m077"),
162
+ "shoulder_lift": (2, "xl330-m077"),
163
+ "elbow_flex": (3, "xl330-m077"),
164
+ "wrist_flex": (4, "xl330-m077"),
165
+ "wrist_roll": (5, "xl330-m077"),
166
+ "gripper": (6, "xl330-m077"),
167
+ },
168
+ )
169
+
170
+ follower_config = DynamixelMotorsBusConfig(
171
+ port="/dev/tty.usbmodem575E0032081",
172
+ motors={
173
+ # name: (index, model)
174
+ "shoulder_pan": (1, "xl430-w250"),
175
+ "shoulder_lift": (2, "xl430-w250"),
176
+ "elbow_flex": (3, "xl330-m288"),
177
+ "wrist_flex": (4, "xl330-m288"),
178
+ "wrist_roll": (5, "xl330-m288"),
179
+ "gripper": (6, "xl330-m288"),
180
+ },
181
+ )
182
+
183
+ leader_arm = DynamixelMotorsBus(leader_config)
184
+ follower_arm = DynamixelMotorsBus(follower_config)
185
+ ```
186
+
187
+ IMPORTANTLY: Now that you have your ports, update [`KochRobotConfig`](../lerobot/common/robot_devices/robots/configs.py). You will find something like:
188
+ ```python
189
+ @RobotConfig.register_subclass("koch")
190
+ @dataclass
191
+ class KochRobotConfig(ManipulatorRobotConfig):
192
+ calibration_dir: str = ".cache/calibration/koch"
193
+ # `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes.
194
+ # Set this to a positive scalar to have the same value for all motors, or a list that is the same length as
195
+ # the number of motors in your follower arms.
196
+ max_relative_target: int | None = None
197
+
198
+ leader_arms: dict[str, MotorsBusConfig] = field(
199
+ default_factory=lambda: {
200
+ "main": DynamixelMotorsBusConfig(
201
+ port="/dev/tty.usbmodem585A0085511", <-- UPDATE HERE
202
+ motors={
203
+ # name: (index, model)
204
+ "shoulder_pan": [1, "xl330-m077"],
205
+ "shoulder_lift": [2, "xl330-m077"],
206
+ "elbow_flex": [3, "xl330-m077"],
207
+ "wrist_flex": [4, "xl330-m077"],
208
+ "wrist_roll": [5, "xl330-m077"],
209
+ "gripper": [6, "xl330-m077"],
210
+ },
211
+ ),
212
+ }
213
+ )
214
+
215
+ follower_arms: dict[str, MotorsBusConfig] = field(
216
+ default_factory=lambda: {
217
+ "main": DynamixelMotorsBusConfig(
218
+ port="/dev/tty.usbmodem585A0076891", <-- UPDATE HERE
219
+ motors={
220
+ # name: (index, model)
221
+ "shoulder_pan": [1, "xl430-w250"],
222
+ "shoulder_lift": [2, "xl430-w250"],
223
+ "elbow_flex": [3, "xl330-m288"],
224
+ "wrist_flex": [4, "xl330-m288"],
225
+ "wrist_roll": [5, "xl330-m288"],
226
+ "gripper": [6, "xl330-m288"],
227
+ },
228
+ ),
229
+ }
230
+ )
231
+ ```
232
+
233
+ **Connect and Configure your Motors**
234
+
235
+ Before you can start using your motors, you'll need to configure them to ensure proper communication. When you first connect the motors, the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py) automatically detects any mismatch between the current motor indices (factory set to `1`) and the specified indices (e.g., `1, 2, 3, 4, 5, 6`). This triggers a configuration procedure that requires you to unplug the power cord and motors, then reconnect each motor sequentially, starting from the one closest to the bus.
236
+
237
+ For a visual guide, refer to the [video tutorial of the configuration procedure](https://youtu.be/U78QQ9wCdpY).
238
+
239
+ To connect and configure the leader arm, run the following code in the same Python interactive session as earlier in the tutorial:
240
+ ```python
241
+ leader_arm.connect()
242
+ ```
243
+
244
+ When you connect the leader arm for the first time, you might see an output similar to this:
245
+ ```
246
+ Read failed due to communication error on port /dev/tty.usbmodem575E0032081 for group_key ID_shoulder_pan_shoulder_lift_elbow_flex_wrist_flex_wrist_roll_gripper: [TxRxResult] There is no status packet!
247
+
248
+ /!\ A configuration issue has been detected with your motors:
249
+ If this is the first time you are using these motors, press enter to configure your motors... but before verify that all the cables are connected the proper way. If you find an issue, before making a modification, kill the python process, unplug the power cord to not damage the motors, rewire correctly, then plug the power again and relaunch the script.
250
+
251
+ Motor indices detected: {9600: [1]}
252
+
253
+ 1. Unplug the power cord
254
+ 2. Plug/unplug minimal number of cables to only have the first 1 motor(s) (['shoulder_pan']) connected.
255
+ 3. Re-plug the power cord
256
+ Press Enter to continue...
257
+
258
+ *Follow the procedure*
259
+
260
+ Setting expected motor indices: [1, 2, 3, 4, 5, 6]
261
+ ```
262
+
263
+ Once the leader arm is configured, repeat the process for the follower arm by running:
264
+ ```python
265
+ follower_arm.connect()
266
+ ```
267
+
268
+ Congratulations! Both arms are now properly configured and connected. You won't need to go through the configuration procedure again in the future.
269
+
270
+ **Troubleshooting**:
271
+
272
+ If the configuration process fails, you may need to do the configuration process via the Dynamixel Wizard.
273
+
274
+ Known failure modes:
275
+ - Calling `arm.connect()` raises `OSError: No motor found, but one new motor expected. Verify power cord is plugged in and retry` on Ubuntu 22.
276
+
277
+ Steps:
278
+ 1. Visit https://emanual.robotis.com/docs/en/software/dynamixel/dynamixel_wizard2/#connect-dynamixel.
279
+ 2. Follow the software installation instructions in section 3 of the web page.
280
+ 3. Launch the software.
281
+ 4. Configure the device scanning options in the menu under `Tools` > `Options` > `Scan`. Check only Protocol 2.0, select only the USB port identifier of interest, select all baudrates, set the ID range to `[0, 10]`. _While this step was not strictly necessary, it greatly speeds up scanning_.
282
+ 5. For each motor in turn:
283
+ - Disconnect the power to the driver board.
284
+ - Connect **only** the motor of interest to the driver board, making sure to disconnect it from any other motors.
285
+ - Reconnect the power to the driver board.
286
+ - From the software menu select `Device` > `Scan` and let the scan run. A device should appear.
287
+ - If the device has an asterisk (*) near it, it means the firmware is indeed outdated. From the software menu, select `Tools` > `Firmware Update`. Follow the prompts.
288
+ - The main panel should have table with various parameters of the device (refer to the web page, section 5). Select the row with `ID`, and then set the desired ID on the bottom right panel by selecting and clicking `Save`.
289
+ - Just like you did with the ID, also set the `Baud Rate` to 1 Mbps.
290
+ 6. Check everything has been done right:
291
+ - Rewire the arms in their final configuration and power both of them.
292
+ - Scan for devices. All 12 motors should appear.
293
+ - Select the motors one by one and move the arm. Check that the graphical indicator near the top right shows the movement.
294
+
295
+ ** There is a common issue with the Dynamixel XL430-W250 motors where the motors become undiscoverable after upgrading their firmware from Mac and Windows Dynamixel Wizard2 applications. When this occurs, it is required to do a firmware recovery (Select `DYNAMIXEL Firmware Recovery` and follow the prompts). There are two known workarounds to conduct this firmware reset:
296
+ 1) Install the Dynamixel Wizard on a linux machine and complete the firmware recovery
297
+ 2) Use the Dynamixel U2D2 in order to perform the reset with Windows or Mac. This U2D2 can be purchased [here](https://www.robotis.us/u2d2/).
298
+ For either solution, open DYNAMIXEL Wizard 2.0 and select the appropriate port. You will likely be unable to see the motor in the GUI at this time. Select `Firmware Recovery`, carefully choose the correct model, and wait for the process to complete. Finally, re-scan to confirm the firmware recovery was successful.
299
+
300
+ **Read and Write with DynamixelMotorsBus**
301
+
302
+ To get familiar with how `DynamixelMotorsBus` communicates with the motors, you can start by reading data from them. Copy past this code in the same interactive python session:
303
+ ```python
304
+ leader_pos = leader_arm.read("Present_Position")
305
+ follower_pos = follower_arm.read("Present_Position")
306
+ print(leader_pos)
307
+ print(follower_pos)
308
+ ```
309
+
310
+ Expected output might look like:
311
+ ```
312
+ array([2054, 523, 3071, 1831, 3049, 2441], dtype=int32)
313
+ array([2003, 1601, 56, 2152, 3101, 2283], dtype=int32)
314
+ ```
315
+
316
+ Try moving the arms to various positions and observe how the values change.
317
+
318
+ Now let's try to enable torque in the follower arm by copy pasting this code:
319
+ ```python
320
+ from lerobot.common.robot_devices.motors.dynamixel import TorqueMode
321
+
322
+ follower_arm.write("Torque_Enable", TorqueMode.ENABLED.value)
323
+ ```
324
+
325
+ With torque enabled, the follower arm will be locked in its current position. Do not attempt to manually move the arm while torque is enabled, as this could damage the motors.
326
+
327
+ Now, to get more familiar with reading and writing, let's move the arm programmatically copy pasting the following example code:
328
+ ```python
329
+ # Get the current position
330
+ position = follower_arm.read("Present_Position")
331
+
332
+ # Update first motor (shoulder_pan) position by +10 steps
333
+ position[0] += 10
334
+ follower_arm.write("Goal_Position", position)
335
+
336
+ # Update all motors position by -30 steps
337
+ position -= 30
338
+ follower_arm.write("Goal_Position", position)
339
+
340
+ # Update gripper by +30 steps
341
+ position[-1] += 30
342
+ follower_arm.write("Goal_Position", position[-1], "gripper")
343
+ ```
344
+
345
+ When you're done playing, you can try to disable the torque, but make sure you hold your robot so that it doesn't fall:
346
+ ```python
347
+ follower_arm.write("Torque_Enable", TorqueMode.DISABLED.value)
348
+ ```
349
+
350
+ Finally, disconnect the arms:
351
+ ```python
352
+ leader_arm.disconnect()
353
+ follower_arm.disconnect()
354
+ ```
355
+
356
+ Alternatively, you can unplug the power cord, which will automatically disable torque and disconnect the motors.
357
+
358
+ */!\ Warning*: These motors tend to overheat, especially under torque or if left plugged in for too long. Unplug after use.
359
+
360
+ ### b. Teleoperate your Koch v1.1 with ManipulatorRobot
361
+
362
+ **Instantiate the ManipulatorRobot**
363
+
364
+ Before you can teleoperate your robot, you need to instantiate the [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) using the previously defined `leader_config` and `follower_config`.
365
+
366
+ For the Koch v1.1 robot, we only have one leader, so we refer to it as `"main"` and define it as `leader_arms={"main": leader_config}`. We do the same for the follower arm. For other robots (like the Aloha), which may have two pairs of leader and follower arms, you would define them like this: `leader_arms={"left": left_leader_config, "right": right_leader_config},`. Same thing for the follower arms.
367
+
368
+
369
+ Run the following code to instantiate your manipulator robot:
370
+ ```python
371
+ from lerobot.common.robot_devices.robots.configs import KochRobotConfig
372
+ from lerobot.common.robot_devices.robots.manipulator import ManipulatorRobot
373
+
374
+ robot_config = KochRobotConfig(
375
+ leader_arms={"main": leader_config},
376
+ follower_arms={"main": follower_config},
377
+ cameras={}, # We don't use any camera for now
378
+ )
379
+ robot = ManipulatorRobot(robot_config)
380
+ ```
381
+
382
+ The `KochRobotConfig` is used to set the associated settings and calibration process. For instance, we activate the torque of the gripper of the leader Koch v1.1 arm and position it at a 40 degree angle to use it as a trigger.
383
+
384
+ For the [Aloha bimanual robot](https://aloha-2.github.io), we would use `AlohaRobotConfig` to set different settings such as a secondary ID for shadow joints (shoulder, elbow). Specific to Aloha, LeRobot comes with default calibration files stored in in `.cache/calibration/aloha_default`. Assuming the motors have been properly assembled, no manual calibration step is expected for Aloha.
385
+
386
+ **Calibrate and Connect the ManipulatorRobot**
387
+
388
+ Next, you'll need to calibrate your Koch robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Koch robot to work on another.
389
+
390
+ When you connect your robot for the first time, the [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) will detect if the calibration file is missing and trigger the calibration procedure. During this process, you will be guided to move each arm to three different positions.
391
+
392
+ Here are the positions you'll move the follower arm to:
393
+
394
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
395
+ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
396
+ | <img src="../media/koch/follower_zero.webp?raw=true" alt="Koch v1.1 follower arm zero position" title="Koch v1.1 follower arm zero position" style="width:100%;"> | <img src="../media/koch/follower_rotated.webp?raw=true" alt="Koch v1.1 follower arm rotated position" title="Koch v1.1 follower arm rotated position" style="width:100%;"> | <img src="../media/koch/follower_rest.webp?raw=true" alt="Koch v1.1 follower arm rest position" title="Koch v1.1 follower arm rest position" style="width:100%;"> |
397
+
398
+ And here are the corresponding positions for the leader arm:
399
+
400
+ | 1. Zero position | 2. Rotated position | 3. Rest position |
401
+ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
402
+ | <img src="../media/koch/leader_zero.webp?raw=true" alt="Koch v1.1 leader arm zero position" title="Koch v1.1 leader arm zero position" style="width:100%;"> | <img src="../media/koch/leader_rotated.webp?raw=true" alt="Koch v1.1 leader arm rotated position" title="Koch v1.1 leader arm rotated position" style="width:100%;"> | <img src="../media/koch/leader_rest.webp?raw=true" alt="Koch v1.1 leader arm rest position" title="Koch v1.1 leader arm rest position" style="width:100%;"> |
403
+
404
+ You can watch a [video tutorial of the calibration procedure](https://youtu.be/8drnU9uRY24) for more details.
405
+
406
+ During calibration, we count the number of full 360-degree rotations your motors have made since they were first used. That's why we ask yo to move to this arbitrary "zero" position. We don't actually "set" the zero position, so you don't need to be accurate. After calculating these "offsets" to shift the motor values around 0, we need to assess the rotation direction of each motor, which might differ. That's why we ask you to rotate all motors to roughly 90 degrees, to measure if the values changed negatively or positively.
407
+
408
+ Finally, the rest position ensures that the follower and leader arms are roughly aligned after calibration, preventing sudden movements that could damage the motors when starting teleoperation.
409
+
410
+ Importantly, once calibrated, all Koch robots will move to the same positions (e.g. zero and rotated position) when commanded.
411
+
412
+ Run the following code to calibrate and connect your robot:
413
+ ```python
414
+ robot.connect()
415
+ ```
416
+
417
+ The output will look like this:
418
+ ```
419
+ Connecting main follower arm
420
+ Connecting main leader arm
421
+
422
+ Missing calibration file '.cache/calibration/koch/main_follower.json'
423
+ Running calibration of koch main follower...
424
+ Move arm to zero position
425
+ [...]
426
+ Move arm to rotated position
427
+ [...]
428
+ Move arm to rest position
429
+ [...]
430
+ Calibration is done! Saving calibration file '.cache/calibration/koch/main_follower.json'
431
+
432
+ Missing calibration file '.cache/calibration/koch/main_leader.json'
433
+ Running calibration of koch main leader...
434
+ Move arm to zero position
435
+ [...]
436
+ Move arm to rotated position
437
+ [...]
438
+ Move arm to rest position
439
+ [...]
440
+ Calibration is done! Saving calibration file '.cache/calibration/koch/main_leader.json'
441
+ ```
442
+
443
+ *Verifying Calibration*
444
+
445
+ Once calibration is complete, you can check the positions of the leader and follower arms to ensure they match. If the calibration was successful, the positions should be very similar.
446
+
447
+ Run this code to get the positions in degrees:
448
+ ```python
449
+ leader_pos = robot.leader_arms["main"].read("Present_Position")
450
+ follower_pos = robot.follower_arms["main"].read("Present_Position")
451
+
452
+ print(leader_pos)
453
+ print(follower_pos)
454
+ ```
455
+
456
+ Example output:
457
+ ```
458
+ array([-0.43945312, 133.94531, 179.82422, -18.984375, -1.9335938, 34.541016], dtype=float32)
459
+ array([-0.58723712, 131.72314, 174.98743, -16.872612, 0.786213, 35.271973], dtype=float32)
460
+ ```
461
+
462
+ These values are in degrees, which makes them easier to interpret and debug. The zero position used during calibration should roughly correspond to 0 degrees for each motor, and the rotated position should roughly correspond to 90 degrees for each motor.
463
+
464
+ **Teleoperate your Koch v1.1**
465
+
466
+ You can easily teleoperate your robot by reading the positions from the leader arm and sending them as goal positions to the follower arm.
467
+
468
+ To teleoperate your robot for 30 seconds at a frequency of approximately 200Hz, run the following code:
469
+ ```python
470
+ import tqdm
471
+ seconds = 30
472
+ frequency = 200
473
+ for _ in tqdm.tqdm(range(seconds*frequency)):
474
+ leader_pos = robot.leader_arms["main"].read("Present_Position")
475
+ robot.follower_arms["main"].write("Goal_Position", leader_pos)
476
+ ```
477
+
478
+ *Using `teleop_step` for Teleoperation*
479
+
480
+ Alternatively, you can teleoperate the robot using the `teleop_step` method from [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py).
481
+
482
+ Run this code to teleoperate:
483
+ ```python
484
+ for _ in tqdm.tqdm(range(seconds*frequency)):
485
+ robot.teleop_step()
486
+ ```
487
+
488
+ *Recording data during Teleoperation*
489
+
490
+ Teleoperation is particularly useful for recording data. You can use the `teleop_step(record_data=True)` to returns both the follower arm's position as `"observation.state"` and the leader arm's position as `"action"`. This function also converts the numpy arrays into PyTorch tensors. If you're working with a robot that has two leader and two follower arms (like the Aloha), the positions are concatenated.
491
+
492
+ Run the following code to see how slowly moving the leader arm affects the observation and action:
493
+ ```python
494
+ leader_pos = robot.leader_arms["main"].read("Present_Position")
495
+ follower_pos = robot.follower_arms["main"].read("Present_Position")
496
+ observation, action = robot.teleop_step(record_data=True)
497
+
498
+ print(follower_pos)
499
+ print(observation)
500
+ print(leader_pos)
501
+ print(action)
502
+ ```
503
+
504
+ Expected output:
505
+ ```
506
+ array([7.8223, 131.1328, 165.5859, -23.4668, -0.9668, 32.4316], dtype=float32)
507
+ {'observation.state': tensor([7.8223, 131.1328, 165.5859, -23.4668, -0.9668, 32.4316])}
508
+ array([3.4277, 134.1211, 179.8242, -18.5449, -1.5820, 34.7168], dtype=float32)
509
+ {'action': tensor([3.4277, 134.1211, 179.8242, -18.5449, -1.5820, 34.7168])}
510
+ ```
511
+
512
+ *Asynchronous Frame Recording*
513
+
514
+ Additionally, `teleop_step` can asynchronously record frames from multiple cameras and include them in the observation dictionary as `"observation.images.CAMERA_NAME"`. This feature will be covered in more detail in the next section.
515
+
516
+ *Disconnecting the Robot*
517
+
518
+ When you're finished, make sure to disconnect your robot by running:
519
+ ```python
520
+ robot.disconnect()
521
+ ```
522
+
523
+ Alternatively, you can unplug the power cord, which will also disable torque.
524
+
525
+ */!\ Warning*: These motors tend to overheat, especially under torque or if left plugged in for too long. Unplug after use.
526
+
527
+ ### c. Add your cameras with OpenCVCamera
528
+
529
+ **(Optional) Use your phone as camera on Linux**
530
+
531
+ If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera
532
+
533
+ 1. *Install `v4l2loopback-dkms` and `v4l-utils`*. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using:
534
+ ```python
535
+ sudo apt install v4l2loopback-dkms v4l-utils
536
+ ```
537
+ 2. *Install [DroidCam](https://droidcam.app) on your phone*. This app is available for both iOS and Android.
538
+ 3. *Install [OBS Studio](https://obsproject.com)*. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org):
539
+ ```python
540
+ flatpak install flathub com.obsproject.Studio
541
+ ```
542
+ 4. *Install the DroidCam OBS plugin*. This plugin integrates DroidCam with OBS Studio. Install it with:
543
+ ```python
544
+ flatpak install flathub com.obsproject.Studio.Plugin.DroidCam
545
+ ```
546
+ 5. *Start OBS Studio*. Launch with:
547
+ ```python
548
+ flatpak run com.obsproject.Studio
549
+ ```
550
+ 6. *Add your phone as a source*. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`.
551
+ 7. *Adjust resolution settings*. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in.
552
+ 8. *Start virtual camera*. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide).
553
+ 9. *Verify the virtual camera setup*. Use `v4l2-ctl` to list the devices:
554
+ ```python
555
+ v4l2-ctl --list-devices
556
+ ```
557
+ You should see an entry like:
558
+ ```
559
+ VirtualCam (platform:v4l2loopback-000):
560
+ /dev/video1
561
+ ```
562
+ 10. *Check the camera resolution*. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`.
563
+ ```python
564
+ v4l2-ctl -d /dev/video1 --get-fmt-video
565
+ ```
566
+ You should see an entry like:
567
+ ```
568
+ >>> Format Video Capture:
569
+ >>> Width/Height : 640/480
570
+ >>> Pixel Format : 'YUYV' (YUYV 4:2:2)
571
+ ```
572
+
573
+ Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed.
574
+
575
+ If everything is set up correctly, you can proceed with the rest of the tutorial.
576
+
577
+ **(Optional) Use your iPhone as a camera on MacOS**
578
+
579
+ To use your iPhone as a camera on macOS, enable the Continuity Camera feature:
580
+ - Ensure your Mac is running macOS 13 or later, and your iPhone is on iOS 16 or later.
581
+ - Sign in both devices with the same Apple ID.
582
+ - Connect your devices with a USB cable or turn on Wi-Fi and Bluetooth for a wireless connection.
583
+
584
+ For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac).
585
+
586
+ Your iPhone should be detected automatically when running the camera setup script in the next section.
587
+
588
+ **Instantiate an OpenCVCamera**
589
+
590
+ The [`OpenCVCamera`](../lerobot/common/robot_devices/cameras/opencv.py) class allows you to efficiently record frames from most cameras using the [`opencv2`](https://docs.opencv.org) library. For more details on compatibility, see [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html).
591
+
592
+ To instantiate an [`OpenCVCamera`](../lerobot/common/robot_devices/cameras/opencv.py), you need a camera index (e.g. `OpenCVCamera(camera_index=0)`). When you only have one camera like a webcam of a laptop, the camera index is usually `0` but it might differ, and the camera index might change if you reboot your computer or re-plug your camera. This behavior depends on your operating system.
593
+
594
+ To find the camera indices, run the following utility script, which will save a few frames from each detected camera:
595
+ ```bash
596
+ python lerobot/common/robot_devices/cameras/opencv.py \
597
+ --images-dir outputs/images_from_opencv_cameras
598
+ ```
599
+
600
+ The output will look something like this if you have two cameras connected:
601
+ ```
602
+ Mac or Windows detected. Finding available camera indices through scanning all indices from 0 to 60
603
+ [...]
604
+ Camera found at index 0
605
+ Camera found at index 1
606
+ [...]
607
+ Connecting cameras
608
+ OpenCVCamera(0, fps=30.0, width=1920.0, height=1080.0, color_mode=rgb)
609
+ OpenCVCamera(1, fps=24.0, width=1920.0, height=1080.0, color_mode=rgb)
610
+ Saving images to outputs/images_from_opencv_cameras
611
+ Frame: 0000 Latency (ms): 39.52
612
+ [...]
613
+ Frame: 0046 Latency (ms): 40.07
614
+ Images have been saved to outputs/images_from_opencv_cameras
615
+ ```
616
+
617
+ Check the saved images in `outputs/images_from_opencv_cameras` to identify which camera index corresponds to which physical camera (e.g. `0` for `camera_00` or `1` for `camera_01`):
618
+ ```
619
+ camera_00_frame_000000.png
620
+ [...]
621
+ camera_00_frame_000047.png
622
+ camera_01_frame_000000.png
623
+ [...]
624
+ camera_01_frame_000047.png
625
+ ```
626
+
627
+ Note: Some cameras may take a few seconds to warm up, and the first frame might be black or green.
628
+
629
+ Finally, run this code to instantiate and connectyour camera:
630
+ ```python
631
+ from lerobot.common.robot_devices.cameras.configs import OpenCVCameraConfig
632
+ from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera
633
+
634
+ config = OpenCVCameraConfig(camera_index=0)
635
+ camera = OpenCVCamera(config)
636
+ camera.connect()
637
+ color_image = camera.read()
638
+
639
+ print(color_image.shape)
640
+ print(color_image.dtype)
641
+ ```
642
+
643
+ Expected output for a laptop camera on MacBookPro:
644
+ ```
645
+ (1080, 1920, 3)
646
+ uint8
647
+ ```
648
+
649
+ Or like this if you followed our tutorial to set a virtual camera:
650
+ ```
651
+ (480, 640, 3)
652
+ uint8
653
+ ```
654
+
655
+ With certain camera, you can also specify additional parameters like frame rate, resolution, and color mode during instantiation. For instance:
656
+ ```python
657
+ config = OpenCVCameraConfig(camera_index=0, fps=30, width=640, height=480)
658
+ ```
659
+
660
+ If the provided arguments are not compatible with the camera, an exception will be raised.
661
+
662
+ *Disconnecting the camera*
663
+
664
+ When you're done using the camera, disconnect it by running:
665
+ ```python
666
+ camera.disconnect()
667
+ ```
668
+
669
+ **Instantiate your robot with cameras**
670
+
671
+ Additionally, you can set up your robot to work with your cameras.
672
+
673
+ Modify the following Python code with the appropriate camera names and configurations:
674
+ ```python
675
+ robot = ManipulatorRobot(
676
+ KochRobotConfig(
677
+ leader_arms={"main": leader_arm},
678
+ follower_arms={"main": follower_arm},
679
+ calibration_dir=".cache/calibration/koch",
680
+ cameras={
681
+ "laptop": OpenCVCameraConfig(0, fps=30, width=640, height=480),
682
+ "phone": OpenCVCameraConfig(1, fps=30, width=640, height=480),
683
+ },
684
+ )
685
+ )
686
+ robot.connect()
687
+ ```
688
+
689
+ As a result, `teleop_step(record_data=True` will return a frame for each camera following the pytorch "channel first" convention but we keep images in `uint8` with pixels in range [0,255] to easily save them.
690
+
691
+ Modify this code with the names of your cameras and run it:
692
+ ```python
693
+ observation, action = robot.teleop_step(record_data=True)
694
+ print(observation["observation.images.laptop"].shape)
695
+ print(observation["observation.images.phone"].shape)
696
+ print(observation["observation.images.laptop"].min().item())
697
+ print(observation["observation.images.laptop"].max().item())
698
+ ```
699
+
700
+ The output should look like this:
701
+ ```
702
+ torch.Size([3, 480, 640])
703
+ torch.Size([3, 480, 640])
704
+ 0
705
+ 255
706
+ ```
707
+
708
+ ### d. Use `control_robot.py` and our `teleoperate` function
709
+
710
+ Instead of manually running the python code in a terminal window, you can use [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) to instantiate your robot by providing the robot configurations via command line and control your robot with various modes as explained next.
711
+
712
+ Try running this code to teleoperate your robot (if you dont have a camera, keep reading):
713
+ ```bash
714
+ python lerobot/scripts/control_robot.py \
715
+ --robot.type=koch \
716
+ --control.type=teleoperate
717
+ ```
718
+
719
+ You will see a lot of lines appearing like this one:
720
+ ```
721
+ INFO 2024-08-10 11:15:03 ol_robot.py:209 dt: 5.12 (195.1hz) dtRlead: 4.93 (203.0hz) dtWfoll: 0.19 (5239.0hz)
722
+ ```
723
+
724
+ It contains
725
+ - `2024-08-10 11:15:03` which is the date and time of the call to the print function.
726
+ - `ol_robot.py:209` which is the end of the file name and the line number where the print function is called (`lerobot/scripts/control_robot.py` line `209`).
727
+ - `dt: 5.12 (195.1hz)` which is the "delta time" or the number of milliseconds spent between the previous call to `robot.teleop_step()` and the current one, associated with the frequency (5.12 ms equals 195.1 Hz) ; note that you can control the maximum frequency by adding fps as argument such as `--fps 30`.
728
+ - `dtRlead: 4.93 (203.0hz)` which is the number of milliseconds it took to read the position of the leader arm using `leader_arm.read("Present_Position")`.
729
+ - `dtWfoll: 0.22 (4446.9hz)` which is the number of milliseconds it took to set a new goal position for the follower arm using `follower_arm.write("Goal_position", leader_pos)` ; note that writing is done asynchronously so it takes less time than reading.
730
+
731
+ Importantly: If you don't have any camera, you can remove them dynamically with this [draccus](https://github.com/dlwh/draccus) syntax `--robot.cameras='{}'`:
732
+ ```bash
733
+ python lerobot/scripts/control_robot.py \
734
+ --robot.type=koch \
735
+ --robot.cameras='{}' \
736
+ --control.type=teleoperate
737
+ ```
738
+
739
+ We advise to create a new yaml file when the command becomes too long.
740
+
741
+ ## 3. Record your Dataset and Visualize it
742
+
743
+ Using what you've learned previously, you can now easily record a dataset of states and actions for one episode. You can use `busy_wait` to control the speed of teleoperation and record at a fixed `fps` (frame per seconds).
744
+
745
+ Try this code to record 30 seconds at 60 fps:
746
+ ```python
747
+ import time
748
+ from lerobot.scripts.control_robot import busy_wait
749
+
750
+ record_time_s = 30
751
+ fps = 60
752
+
753
+ states = []
754
+ actions = []
755
+ for _ in range(record_time_s * fps):
756
+ start_time = time.perf_counter()
757
+ observation, action = robot.teleop_step(record_data=True)
758
+
759
+ states.append(observation["observation.state"])
760
+ actions.append(action["action"])
761
+
762
+ dt_s = time.perf_counter() - start_time
763
+ busy_wait(1 / fps - dt_s)
764
+
765
+ # Note that observation and action are available in RAM, but
766
+ # you could potentially store them on disk with pickle/hdf5 or
767
+ # our optimized format `LeRobotDataset`. More on this next.
768
+ ```
769
+
770
+ Importantly, many utilities are still missing. For instance, if you have cameras, you will need to save the images on disk to not go out of RAM, and to do so in threads to not slow down communication with your robot. Also, you will need to store your data in a format optimized for training and web sharing like [`LeRobotDataset`](../lerobot/common/datasets/lerobot_dataset.py). More on this in the next section.
771
+
772
+ ### a. Use the `record` function
773
+
774
+ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) to achieve efficient data recording. It encompasses many recording utilities:
775
+ 1. Frames from cameras are saved on disk in threads, and encoded into videos at the end of each episode recording.
776
+ 2. Video streams from cameras are displayed in window so that you can verify them.
777
+ 3. Data is stored with [`LeRobotDataset`](../lerobot/common/datasets/lerobot_dataset.py) format which is pushed to your Hugging Face page (unless `--control.push_to_hub=false` is provided).
778
+ 4. Checkpoints are done during recording, so if any issue occurs, you can resume recording by re-running the same command again with `--control.resume=true`. You will need to manually delete the dataset directory if you want to start recording from scratch.
779
+ 5. Set the flow of data recording using command line arguments:
780
+ - `--control.warmup_time_s=10` defines the number of seconds before starting data collection. It allows the robot devices to warmup and synchronize (10 seconds by default).
781
+ - `--control.episode_time_s=60` defines the number of seconds for data recording for each episode (60 seconds by default).
782
+ - `--control.reset_time_s=60` defines the number of seconds for resetting the environment after each episode (60 seconds by default).
783
+ - `--control.num_episodes=50` defines the number of episodes to record (50 by default).
784
+ 6. Control the flow during data recording using keyboard keys:
785
+ - Press right arrow `->` at any time during episode recording to early stop and go to resetting. Same during resetting, to early stop and to go to the next episode recording.
786
+ - Press left arrow `<-` at any time during episode recording or resetting to early stop, cancel the current episode, and re-record it.
787
+ - Press escape `ESC` at any time during episode recording to end the session early and go straight to video encoding and dataset uploading.
788
+ 7. Similarly to `teleoperate`, you can also use the command line to override anything.
789
+
790
+ Before trying `record`, if you want to push your dataset to the hub, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
791
+ ```bash
792
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
793
+ ```
794
+ Also, store your Hugging Face repository name in a variable (e.g. `cadene` or `lerobot`). For instance, run this to use your Hugging Face user name as repository:
795
+ ```bash
796
+ HF_USER=$(huggingface-cli whoami | head -n 1)
797
+ echo $HF_USER
798
+ ```
799
+ If you don't want to push to hub, use `--control.push_to_hub=false`.
800
+
801
+ Now run this to record 2 episodes:
802
+ ```bash
803
+ python lerobot/scripts/control_robot.py \
804
+ --robot.type=koch \
805
+ --control.type=record \
806
+ --control.single_task="Grasp a lego block and put it in the bin." \
807
+ --control.fps=30 \
808
+ --control.repo_id=${HF_USER}/koch_test \
809
+ --control.tags='["tutorial"]' \
810
+ --control.warmup_time_s=5 \
811
+ --control.episode_time_s=30 \
812
+ --control.reset_time_s=30 \
813
+ --control.num_episodes=2 \
814
+ --control.push_to_hub=true
815
+ ```
816
+
817
+
818
+ This will write your dataset locally to `~/.cache/huggingface/lerobot/{repo-id}` (e.g. `data/cadene/koch_test`) and push it on the hub at `https://huggingface.co/datasets/{HF_USER}/{repo-id}`. Your dataset will be automatically tagged with `LeRobot` for the community to find it easily, and you can also add custom tags (in this case `tutorial` for example).
819
+
820
+ You can look for other LeRobot datasets on the hub by searching for `LeRobot` tags: https://huggingface.co/datasets?other=LeRobot
821
+
822
+ You will see a lot of lines appearing like this one:
823
+ ```
824
+ INFO 2024-08-10 15:02:58 ol_robot.py:219 dt:33.34 (30.0hz) dtRlead: 5.06 (197.5hz) dtWfoll: 0.25 (3963.7hz) dtRfoll: 6.22 (160.7hz) dtRlaptop: 32.57 (30.7hz) dtRphone: 33.84 (29.5hz)
825
+ ```
826
+ It contains:
827
+ - `2024-08-10 15:02:58` which is the date and time of the call to the print function,
828
+ - `ol_robot.py:219` which is the end of the file name and the line number where the print function is called (`lerobot/scripts/control_robot.py` line `219`).
829
+ - `dt:33.34 (30.0hz)` which is the "delta time" or the number of milliseconds spent between the previous call to `robot.teleop_step(record_data=True)` and the current one, associated with the frequency (33.34 ms equals 30.0 Hz) ; note that we use `--fps 30` so we expect 30.0 Hz ; when a step takes more time, the line appears in yellow.
830
+ - `dtRlead: 5.06 (197.5hz)` which is the delta time of reading the present position of the leader arm.
831
+ - `dtWfoll: 0.25 (3963.7hz)` which is the delta time of writing the goal position on the follower arm ; writing is asynchronous so it takes less time than reading.
832
+ - `dtRfoll: 6.22 (160.7hz)` which is the delta time of reading the present position on the follower arm.
833
+ - `dtRlaptop:32.57 (30.7hz) ` which is the delta time of capturing an image from the laptop camera in the thread running asynchronously.
834
+ - `dtRphone:33.84 (29.5hz)` which is the delta time of capturing an image from the phone camera in the thread running asynchronously.
835
+
836
+ Troubleshooting:
837
+ - On Linux, if you encounter a hanging issue when using cameras, uninstall opencv and re-install it with conda:
838
+ ```bash
839
+ pip uninstall opencv-python
840
+ conda install -c conda-forge opencv=4.10.0
841
+ ```
842
+ - On Linux, if you encounter any issue during video encoding with `ffmpeg: unknown encoder libsvtav1`, you can:
843
+ - install with conda-forge by running `conda install -c conda-forge ffmpeg` (it should be compiled with `libsvtav1`),
844
+ - or, install [Homebrew](https://brew.sh) and run `brew install ffmpeg` (it should be compiled with `libsvtav1`),
845
+ - or, install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1),
846
+ - and, make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`.
847
+ - On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux).
848
+
849
+ At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/koch_test) that you can obtain by running:
850
+ ```bash
851
+ echo https://huggingface.co/datasets/${HF_USER}/koch_test
852
+ ```
853
+
854
+ ### b. Advice for recording dataset
855
+
856
+ Once you're comfortable with data recording, it's time to create a larger dataset for training. A good starting task is grasping an object at different locations and placing it in a bin. We suggest recording at least 50 episodes, with 10 episodes per location. Keep the cameras fixed and maintain consistent grasping behavior throughout the recordings.
857
+
858
+ In the following sections, you’ll train your neural network. After achieving reliable grasping performance, you can start introducing more variations during data collection, such as additional grasp locations, different grasping techniques, and altering camera positions.
859
+
860
+ Avoid adding too much variation too quickly, as it may hinder your results.
861
+
862
+ In the coming months, we plan to release a foundational model for robotics. We anticipate that fine-tuning this model will enhance generalization, reducing the need for strict consistency during data collection.
863
+
864
+ ### c. Visualize all episodes
865
+
866
+ You can visualize your dataset by running:
867
+ ```bash
868
+ python lerobot/scripts/visualize_dataset_html.py \
869
+ --repo-id ${HF_USER}/koch_test
870
+ ```
871
+
872
+ Note: You might need to add `--local-files-only 1` if your dataset was not uploaded to hugging face hub.
873
+
874
+ This will launch a local web server that looks like this:
875
+ <div style="text-align:center;">
876
+ <img src="../media/tutorial/visualize_dataset_html.webp?raw=true" alt="Koch v1.1 leader and follower arms" title="Koch v1.1 leader and follower arms" width="100%">
877
+ </div>
878
+
879
+ ### d. Replay episode on your robot with the `replay` function
880
+
881
+ A useful feature of [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) is the `replay` function, which allows to replay on your robot any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model.
882
+
883
+ To replay the first episode of the dataset you just recorded, run the following command:
884
+ ```bash
885
+ python lerobot/scripts/control_robot.py \
886
+ --robot.type=koch \
887
+ --control.type=replay \
888
+ --control.fps=30 \
889
+ --control.repo_id=${HF_USER}/koch_test \
890
+ --control.episode=0
891
+ ```
892
+
893
+ Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com).
894
+
895
+ ## 4. Train a policy on your data
896
+
897
+ ### a. Use the `train` script
898
+
899
+ To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
900
+ ```bash
901
+ python lerobot/scripts/train.py \
902
+ --dataset.repo_id=${HF_USER}/koch_test \
903
+ --policy.type=act \
904
+ --output_dir=outputs/train/act_koch_test \
905
+ --job_name=act_koch_test \
906
+ --policy.device=cuda \
907
+ --wandb.enable=true
908
+ ```
909
+
910
+ Let's explain it:
911
+ 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/koch_test`.
912
+ 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
913
+ 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
914
+ 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
915
+
916
+ For more information on the `train` script see the previous tutorial: [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md)
917
+
918
+ ### b. (Optional) Upload policy checkpoints to the hub
919
+
920
+ Once training is done, upload the latest checkpoint with:
921
+ ```bash
922
+ huggingface-cli upload ${HF_USER}/act_koch_test \
923
+ outputs/train/act_koch_test/checkpoints/last/pretrained_model
924
+ ```
925
+
926
+ You can also upload intermediate checkpoints with:
927
+ ```bash
928
+ CKPT=010000
929
+ huggingface-cli upload ${HF_USER}/act_koch_test_${CKPT} \
930
+ outputs/train/act_koch_test/checkpoints/${CKPT}/pretrained_model
931
+ ```
932
+
933
+ ## 5. Evaluate your policy
934
+
935
+ Now that you have a policy checkpoint, you can easily control your robot with it using methods from [`ManipulatorRobot`](../lerobot/common/robot_devices/robots/manipulator.py) and the policy.
936
+
937
+ Try this code for running inference for 60 seconds at 30 fps:
938
+ ```python
939
+ from lerobot.common.policies.act.modeling_act import ACTPolicy
940
+
941
+ inference_time_s = 60
942
+ fps = 30
943
+ device = "cuda" # TODO: On Mac, use "mps" or "cpu"
944
+
945
+ ckpt_path = "outputs/train/act_koch_test/checkpoints/last/pretrained_model"
946
+ policy = ACTPolicy.from_pretrained(ckpt_path)
947
+ policy.to(device)
948
+
949
+ for _ in range(inference_time_s * fps):
950
+ start_time = time.perf_counter()
951
+
952
+ # Read the follower state and access the frames from the cameras
953
+ observation = robot.capture_observation()
954
+
955
+ # Convert to pytorch format: channel first and float32 in [0,1]
956
+ # with batch dimension
957
+ for name in observation:
958
+ if "image" in name:
959
+ observation[name] = observation[name].type(torch.float32) / 255
960
+ observation[name] = observation[name].permute(2, 0, 1).contiguous()
961
+ observation[name] = observation[name].unsqueeze(0)
962
+ observation[name] = observation[name].to(device)
963
+
964
+ # Compute the next action with the policy
965
+ # based on the current observation
966
+ action = policy.select_action(observation)
967
+ # Remove batch dimension
968
+ action = action.squeeze(0)
969
+ # Move to cpu, if not already the case
970
+ action = action.to("cpu")
971
+ # Order the robot to move
972
+ robot.send_action(action)
973
+
974
+ dt_s = time.perf_counter() - start_time
975
+ busy_wait(1 / fps - dt_s)
976
+ ```
977
+
978
+ ### a. Use our `record` function
979
+
980
+ Ideally, when controlling your robot with your neural network, you would want to record evaluation episodes and to be able to visualize them later on, or even train on them like in Reinforcement Learning. This pretty much corresponds to recording a new dataset but with a neural network providing the actions instead of teleoperation.
981
+
982
+ To this end, you can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
983
+ ```bash
984
+ python lerobot/scripts/control_robot.py \
985
+ --robot.type=koch \
986
+ --control.type=record \
987
+ --control.fps=30 \
988
+ --control.repo_id=${HF_USER}/eval_act_koch_test \
989
+ --control.tags='["tutorial"]' \
990
+ --control.warmup_time_s=5 \
991
+ --control.episode_time_s=30 \
992
+ --control.reset_time_s=30 \
993
+ --control.num_episodes=10 \
994
+ --control.push_to_hub=true \
995
+ --control.policy.path=outputs/train/act_koch_test/checkpoints/last/pretrained_model
996
+ ```
997
+
998
+ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
999
+ 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_koch_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_koch_test`).
1000
+ 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_koch_test`).
1001
+
1002
+ ### b. Visualize evaluation afterwards
1003
+
1004
+ You can then visualize your evaluation dataset by running the same command as before but with the new inference dataset as argument:
1005
+ ```bash
1006
+ python lerobot/scripts/visualize_dataset.py \
1007
+ --repo-id ${HF_USER}/eval_act_koch_test
1008
+ ```
1009
+
1010
+ ## 6. Next step
1011
+
1012
+ Join our [Discord](https://discord.com/invite/s3KuuzsPFb) to collaborate on data collection and help us train a fully open-source foundational models for robotics!
examples/8_use_stretch.md ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This tutorial explains how to use [Stretch 3](https://hello-robot.com/stretch-3-product) with LeRobot.
2
+
3
+ ## Setup
4
+
5
+ Familiarize yourself with Stretch by following its [tutorials](https://docs.hello-robot.com/0.3/getting_started/hello_robot/) (recommended).
6
+
7
+ To use LeRobot on Stretch, 3 options are available:
8
+ - [tethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup)
9
+ - [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup)
10
+ - ssh directly into Stretch (you will first need to install and configure openssh-server on stretch using one of the two above setups)
11
+
12
+
13
+ ## Install LeRobot
14
+
15
+ On Stretch's CLI, follow these steps:
16
+
17
+ 1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
18
+ ```bash
19
+ mkdir -p ~/miniconda3
20
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
21
+ bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
22
+ rm ~/miniconda3/miniconda.sh
23
+ ~/miniconda3/bin/conda init bash
24
+ ```
25
+
26
+ 2. Comment out these lines in `~/.profile` (this can mess up paths used by conda and ~/.local/bin should already be in your PATH)
27
+ ```
28
+ # set PATH so it includes user's private bin if it exists
29
+ if [ -d "$HOME/.local/bin" ] ; then
30
+ PATH="$HOME/.local/bin:$PATH"
31
+ fi
32
+ ```
33
+
34
+ 3. Restart shell or `source ~/.bashrc`
35
+
36
+ 4. Create and activate a fresh conda environment for lerobot
37
+ ```bash
38
+ conda create -y -n lerobot python=3.10 && conda activate lerobot
39
+ ```
40
+
41
+ 5. Clone LeRobot:
42
+ ```bash
43
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
44
+ ```
45
+
46
+ 6. Install LeRobot with stretch dependencies:
47
+ ```bash
48
+ cd ~/lerobot && pip install -e ".[stretch]"
49
+ ```
50
+
51
+ > **Note:** If you get this message, you can ignore it: `ERROR: pip's dependency resolver does not currently take into account all the packages that are installed.`
52
+
53
+ For Linux only (not Mac), install extra dependencies for recording datasets:
54
+ ```bash
55
+ conda install -y -c conda-forge ffmpeg
56
+ pip uninstall -y opencv-python
57
+ conda install -y -c conda-forge "opencv>=4.10.0"
58
+ ```
59
+
60
+ 7. Run a [system check](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#system-check) to make sure your robot is ready:
61
+ ```bash
62
+ stretch_system_check.py
63
+ ```
64
+
65
+ > **Note:** You may need to free the "robot process" after booting Stretch by running `stretch_free_robot_process.py`. For more info this Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#turning-off-gamepad-teleoperation).
66
+
67
+ You should get something like this:
68
+ ```bash
69
+ For use with S T R E T C H (R) from Hello Robot Inc.
70
+ ---------------------------------------------------------------------
71
+
72
+ Model = Stretch 3
73
+ Tool = DexWrist 3 w/ Gripper
74
+ Serial Number = stretch-se3-3054
75
+
76
+ ---- Checking Hardware ----
77
+ [Pass] Comms are ready
78
+ [Pass] Actuators are ready
79
+ [Warn] Sensors not ready (IMU AZ = -10.19 out of range -10.1 to -9.5)
80
+ [Pass] Battery voltage is 13.6 V
81
+
82
+ ---- Checking Software ----
83
+ [Pass] Ubuntu 22.04 is ready
84
+ [Pass] All APT pkgs are setup correctly
85
+ [Pass] Firmware is up-to-date
86
+ [Pass] Python pkgs are up-to-date
87
+ [Pass] ROS2 Humble is ready
88
+ ```
89
+
90
+ ## Teleoperate, record a dataset and run a policy
91
+
92
+ **Calibrate (Optional)**
93
+ Before operating Stretch, you need to [home](https://docs.hello-robot.com/0.3/getting_started/stretch_hardware_overview/#homing) it first. Be mindful about giving Stretch some space as this procedure will move the robot's arm and gripper. Now run this command:
94
+ ```bash
95
+ python lerobot/scripts/control_robot.py \
96
+ --robot.type=stretch \
97
+ --control.type=calibrate
98
+ ```
99
+ This is equivalent to running `stretch_robot_home.py`
100
+
101
+ > **Note:** If you run any of the LeRobot scripts below and Stretch is not properly homed, it will automatically home/calibrate first.
102
+
103
+ **Teleoperate**
104
+ Before trying teleoperation, you need activate the gamepad controller by pressing the middle button. For more info, see Stretch's [doc](https://docs.hello-robot.com/0.3/getting_started/hello_robot/#gamepad-teleoperation).
105
+
106
+ Now try out teleoperation (see above documentation to learn about the gamepad controls):
107
+ ```bash
108
+ python lerobot/scripts/control_robot.py \
109
+ --robot.type=stretch \
110
+ --control.type=teleoperate
111
+ ```
112
+ This is essentially the same as running `stretch_gamepad_teleop.py`
113
+
114
+ **Record a dataset**
115
+ Once you're familiar with the gamepad controls and after a bit of practice, you can try to record your first dataset with Stretch.
116
+
117
+ If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
118
+ ```bash
119
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
120
+ ```
121
+
122
+ Store your Hugging Face repository name in a variable to run these commands:
123
+ ```bash
124
+ HF_USER=$(huggingface-cli whoami | head -n 1)
125
+ echo $HF_USER
126
+ ```
127
+
128
+ Record one episode:
129
+ ```bash
130
+ python lerobot/scripts/control_robot.py \
131
+ --robot.type=stretch \
132
+ --control.type=record \
133
+ --control.fps=30 \
134
+ --control.single_task="Grasp a lego block and put it in the bin." \
135
+ --control.repo_id=${HF_USER}/stretch_test \
136
+ --control.tags='["tutorial"]' \
137
+ --control.warmup_time_s=5 \
138
+ --control.episode_time_s=30 \
139
+ --control.reset_time_s=30 \
140
+ --control.num_episodes=2 \
141
+ --control.push_to_hub=true
142
+ ```
143
+
144
+ > **Note:** If you're using ssh to connect to Stretch and run this script, you won't be able to visualize its cameras feed (though they will still be recording). To see the cameras stream, use [tethered](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#tethered-setup) or [untethered setup](https://docs.hello-robot.com/0.3/getting_started/connecting_to_stretch/#untethered-setup).
145
+
146
+ **Replay an episode**
147
+ Now try to replay this episode (make sure the robot's initial position is the same):
148
+ ```bash
149
+ python lerobot/scripts/control_robot.py \
150
+ --robot.type=stretch \
151
+ --control.type=replay \
152
+ --control.fps=30 \
153
+ --control.repo_id=${HF_USER}/stretch_test \
154
+ --control.episode=0
155
+ ```
156
+
157
+ Follow [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) to train a policy on your data and run inference on your robot. You will need to adapt the code for Stretch.
158
+
159
+ > TODO(rcadene, aliberts): Add already setup environment and policy yaml configuration files
160
+
161
+ If you need help, please reach out on Discord in the channel `#stretch3-mobile-arm`.
examples/9_use_aloha.md ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This tutorial explains how to use [Aloha and Aloha 2 stationary](https://www.trossenrobotics.com/aloha-stationary) with LeRobot.
2
+
3
+ ## Setup
4
+
5
+ Follow the [documentation from Trossen Robotics](https://docs.trossenrobotics.com/aloha_docs/2.0/getting_started/stationary/hardware_setup.html) for setting up the hardware and plugging the 4 arms and 4 cameras to your computer.
6
+
7
+
8
+ ## Install LeRobot
9
+
10
+ On your computer:
11
+
12
+ 1. [Install Miniconda](https://docs.anaconda.com/miniconda/#quick-command-line-install):
13
+ ```bash
14
+ mkdir -p ~/miniconda3
15
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
16
+ bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
17
+ rm ~/miniconda3/miniconda.sh
18
+ ~/miniconda3/bin/conda init bash
19
+ ```
20
+
21
+ 2. Restart shell or `source ~/.bashrc`
22
+
23
+ 3. Create and activate a fresh conda environment for lerobot
24
+ ```bash
25
+ conda create -y -n lerobot python=3.10 && conda activate lerobot
26
+ ```
27
+
28
+ 4. Clone LeRobot:
29
+ ```bash
30
+ git clone https://github.com/huggingface/lerobot.git ~/lerobot
31
+ ```
32
+
33
+ 5. Install LeRobot with dependencies for the Aloha motors (dynamixel) and cameras (intelrealsense):
34
+ ```bash
35
+ cd ~/lerobot && pip install -e ".[dynamixel, intelrealsense]"
36
+ ```
37
+
38
+ For Linux only (not Mac), install extra dependencies for recording datasets:
39
+ ```bash
40
+ conda install -y -c conda-forge ffmpeg
41
+ pip uninstall -y opencv-python
42
+ conda install -y -c conda-forge "opencv>=4.10.0"
43
+ ```
44
+
45
+ ## Teleoperate
46
+
47
+ **/!\ FOR SAFETY, READ THIS /!\**
48
+ Teleoperation consists in manually operating the leader arms to move the follower arms. Importantly:
49
+ 1. Make sure your leader arms are in the same position as the follower arms, so that the follower arms don't move too fast to match the leader arms,
50
+ 2. Our code assumes that your robot has been assembled following Trossen Robotics instructions. This allows us to skip calibration, as we use the pre-defined calibration files in `.cache/calibration/aloha_default`. If you replace a motor, make sure you follow the exact instructions from Trossen Robotics.
51
+
52
+ By running the following code, you can start your first **SAFE** teleoperation:
53
+ ```bash
54
+ python lerobot/scripts/control_robot.py \
55
+ --robot.type=aloha \
56
+ --robot.max_relative_target=5 \
57
+ --control.type=teleoperate
58
+ ```
59
+
60
+ By adding `--robot.max_relative_target=5`, we override the default value for `max_relative_target` defined in [`AlohaRobotConfig`](lerobot/common/robot_devices/robots/configs.py). It is expected to be `5` to limit the magnitude of the movement for more safety, but the teleoperation won't be smooth. When you feel confident, you can disable this limit by adding `--robot.max_relative_target=null` to the command line:
61
+ ```bash
62
+ python lerobot/scripts/control_robot.py \
63
+ --robot.type=aloha \
64
+ --robot.max_relative_target=null \
65
+ --control.type=teleoperate
66
+ ```
67
+
68
+ ## Record a dataset
69
+
70
+ Once you're familiar with teleoperation, you can record your first dataset with Aloha.
71
+
72
+ If you want to use the Hugging Face hub features for uploading your dataset and you haven't previously done it, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens):
73
+ ```bash
74
+ huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
75
+ ```
76
+
77
+ Store your Hugging Face repository name in a variable to run these commands:
78
+ ```bash
79
+ HF_USER=$(huggingface-cli whoami | head -n 1)
80
+ echo $HF_USER
81
+ ```
82
+
83
+ Record 2 episodes and upload your dataset to the hub:
84
+ ```bash
85
+ python lerobot/scripts/control_robot.py \
86
+ --robot.type=aloha \
87
+ --robot.max_relative_target=null \
88
+ --control.type=record \
89
+ --control.fps=30 \
90
+ --control.single_task="Grasp a lego block and put it in the bin." \
91
+ --control.repo_id=${HF_USER}/aloha_test \
92
+ --control.tags='["tutorial"]' \
93
+ --control.warmup_time_s=5 \
94
+ --control.episode_time_s=30 \
95
+ --control.reset_time_s=30 \
96
+ --control.num_episodes=2 \
97
+ --control.push_to_hub=true
98
+ ```
99
+
100
+ ## Visualize a dataset
101
+
102
+ If you uploaded your dataset to the hub with `--control.push_to_hub=true`, you can [visualize your dataset online](https://huggingface.co/spaces/lerobot/visualize_dataset) by copy pasting your repo id given by:
103
+ ```bash
104
+ echo ${HF_USER}/aloha_test
105
+ ```
106
+
107
+ If you didn't upload with `--control.push_to_hub=false`, you can also visualize it locally with:
108
+ ```bash
109
+ python lerobot/scripts/visualize_dataset_html.py \
110
+ --repo-id ${HF_USER}/aloha_test
111
+ ```
112
+
113
+ ## Replay an episode
114
+
115
+ **/!\ FOR SAFETY, READ THIS /!\**
116
+ Replay consists in automatically replaying the sequence of actions (i.e. goal positions for your motors) recorded in a given dataset episode. Make sure the current initial position of your robot is similar to the one in your episode, so that your follower arms don't move too fast to go to the first goal positions. For safety, you might want to add `--robot.max_relative_target=5` to your command line as explained above.
117
+
118
+ Now try to replay the first episode on your robot:
119
+ ```bash
120
+ python lerobot/scripts/control_robot.py \
121
+ --robot.type=aloha \
122
+ --robot.max_relative_target=null \
123
+ --control.type=replay \
124
+ --control.fps=30 \
125
+ --control.repo_id=${HF_USER}/aloha_test \
126
+ --control.episode=0
127
+ ```
128
+
129
+ ## Train a policy
130
+
131
+ To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command:
132
+ ```bash
133
+ python lerobot/scripts/train.py \
134
+ --dataset.repo_id=${HF_USER}/aloha_test \
135
+ --policy.type=act \
136
+ --output_dir=outputs/train/act_aloha_test \
137
+ --job_name=act_aloha_test \
138
+ --policy.device=cuda \
139
+ --wandb.enable=true
140
+ ```
141
+
142
+ Let's explain it:
143
+ 1. We provided the dataset as argument with `--dataset.repo_id=${HF_USER}/aloha_test`.
144
+ 2. We provided the policy with `policy.type=act`. This loads configurations from [`configuration_act.py`](../lerobot/common/policies/act/configuration_act.py). Importantly, this policy will automatically adapt to the number of motor sates, motor actions and cameras of your robot (e.g. `laptop` and `phone`) which have been saved in your dataset.
145
+ 4. We provided `policy.device=cuda` since we are training on a Nvidia GPU, but you could use `policy.device=mps` to train on Apple silicon.
146
+ 5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`.
147
+
148
+ For more information on the `train` script see the previous tutorial: [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md)
149
+
150
+ Training should take several hours. You will find checkpoints in `outputs/train/act_aloha_test/checkpoints`.
151
+
152
+ ## Evaluate your policy
153
+
154
+ You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes:
155
+ ```bash
156
+ python lerobot/scripts/control_robot.py \
157
+ --robot.type=aloha \
158
+ --control.type=record \
159
+ --control.fps=30 \
160
+ --control.single_task="Grasp a lego block and put it in the bin." \
161
+ --control.repo_id=${HF_USER}/eval_act_aloha_test \
162
+ --control.tags='["tutorial"]' \
163
+ --control.warmup_time_s=5 \
164
+ --control.episode_time_s=30 \
165
+ --control.reset_time_s=30 \
166
+ --control.num_episodes=10 \
167
+ --control.push_to_hub=true \
168
+ --control.policy.path=outputs/train/act_aloha_test/checkpoints/last/pretrained_model \
169
+ --control.num_image_writer_processes=1
170
+ ```
171
+
172
+ As you can see, it's almost the same command as previously used to record your training dataset. Two things changed:
173
+ 1. There is an additional `--control.policy.path` argument which indicates the path to your policy checkpoint with (e.g. `outputs/train/eval_act_aloha_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `${HF_USER}/act_aloha_test`).
174
+ 2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `${HF_USER}/eval_act_aloha_test`).
175
+ 3. We use `--control.num_image_writer_processes=1` instead of the default value (`0`). On our computer, using a dedicated process to write images from the 4 cameras on disk allows to reach constant 30 fps during inference. Feel free to explore different values for `--control.num_image_writer_processes`.
176
+
177
+ ## More
178
+
179
+ Follow this [previous tutorial](https://github.com/huggingface/lerobot/blob/main/examples/7_get_started_with_real_robot.md#4-train-a-policy-on-your-data) for a more in-depth explanation.
180
+
181
+ If you have any question or need help, please reach out on Discord in the channel `#aloha-arm`.
examples/advanced/1_add_image_transforms.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ This script demonstrates how to use torchvision's image transformation with LeRobotDataset for data
17
+ augmentation purposes. The transformations are passed to the dataset as an argument upon creation, and
18
+ transforms are applied to the observation images before they are returned in the dataset's __getitem__.
19
+ """
20
+
21
+ from pathlib import Path
22
+
23
+ from torchvision.transforms import ToPILImage, v2
24
+
25
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
26
+
27
+ dataset_repo_id = "lerobot/aloha_static_screw_driver"
28
+
29
+ # Create a LeRobotDataset with no transformations
30
+ dataset = LeRobotDataset(dataset_repo_id, episodes=[0])
31
+ # This is equivalent to `dataset = LeRobotDataset(dataset_repo_id, image_transforms=None)`
32
+
33
+ # Get the index of the first observation in the first episode
34
+ first_idx = dataset.episode_data_index["from"][0].item()
35
+
36
+ # Get the frame corresponding to the first camera
37
+ frame = dataset[first_idx][dataset.meta.camera_keys[0]]
38
+
39
+
40
+ # Define the transformations
41
+ transforms = v2.Compose(
42
+ [
43
+ v2.ColorJitter(brightness=(0.5, 1.5)),
44
+ v2.ColorJitter(contrast=(0.5, 1.5)),
45
+ v2.ColorJitter(hue=(-0.1, 0.1)),
46
+ v2.RandomAdjustSharpness(sharpness_factor=2, p=1),
47
+ ]
48
+ )
49
+
50
+ # Create another LeRobotDataset with the defined transformations
51
+ transformed_dataset = LeRobotDataset(dataset_repo_id, episodes=[0], image_transforms=transforms)
52
+
53
+ # Get a frame from the transformed dataset
54
+ transformed_frame = transformed_dataset[first_idx][transformed_dataset.meta.camera_keys[0]]
55
+
56
+ # Create a directory to store output images
57
+ output_dir = Path("outputs/image_transforms")
58
+ output_dir.mkdir(parents=True, exist_ok=True)
59
+
60
+ # Save the original frame
61
+ to_pil = ToPILImage()
62
+ to_pil(frame).save(output_dir / "original_frame.png", quality=100)
63
+ print(f"Original frame saved to {output_dir / 'original_frame.png'}.")
64
+
65
+ # Save the transformed frame
66
+ to_pil(transformed_frame).save(output_dir / "transformed_frame.png", quality=100)
67
+ print(f"Transformed frame saved to {output_dir / 'transformed_frame.png'}.")
examples/advanced/2_calculate_validation_loss.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """This script demonstrates how to slice a dataset and calculate the loss on a subset of the data.
16
+
17
+ This technique can be useful for debugging and testing purposes, as well as identifying whether a policy
18
+ is learning effectively.
19
+
20
+ Furthermore, relying on validation loss to evaluate performance is generally not considered a good practice,
21
+ especially in the context of imitation learning. The most reliable approach is to evaluate the policy directly
22
+ on the target environment, whether that be in simulation or the real world.
23
+ """
24
+
25
+ import math
26
+
27
+ import torch
28
+
29
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata
30
+ from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
31
+
32
+
33
+ def main():
34
+ device = torch.device("cuda")
35
+
36
+ # Download the diffusion policy for pusht environment
37
+ pretrained_policy_path = "lerobot/diffusion_pusht"
38
+ # OR uncomment the following to evaluate a policy from the local outputs/train folder.
39
+ # pretrained_policy_path = Path("outputs/train/example_pusht_diffusion")
40
+
41
+ policy = DiffusionPolicy.from_pretrained(pretrained_policy_path)
42
+ policy.eval()
43
+ policy.to(device)
44
+
45
+ # Set up the dataset.
46
+ delta_timestamps = {
47
+ # Load the previous image and state at -0.1 seconds before current frame,
48
+ # then load current image and state corresponding to 0.0 second.
49
+ "observation.image": [-0.1, 0.0],
50
+ "observation.state": [-0.1, 0.0],
51
+ # Load the previous action (-0.1), the next action to be executed (0.0),
52
+ # and 14 future actions with a 0.1 seconds spacing. All these actions will be
53
+ # used to calculate the loss.
54
+ "action": [-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4],
55
+ }
56
+
57
+ # Load the last 10% of episodes of the dataset as a validation set.
58
+ # - Load dataset metadata
59
+ dataset_metadata = LeRobotDatasetMetadata("lerobot/pusht")
60
+ # - Calculate train and val episodes
61
+ total_episodes = dataset_metadata.total_episodes
62
+ episodes = list(range(dataset_metadata.total_episodes))
63
+ num_train_episodes = math.floor(total_episodes * 90 / 100)
64
+ train_episodes = episodes[:num_train_episodes]
65
+ val_episodes = episodes[num_train_episodes:]
66
+ print(f"Number of episodes in full dataset: {total_episodes}")
67
+ print(f"Number of episodes in training dataset (90% subset): {len(train_episodes)}")
68
+ print(f"Number of episodes in validation dataset (10% subset): {len(val_episodes)}")
69
+ # - Load train an val datasets
70
+ train_dataset = LeRobotDataset(
71
+ "lerobot/pusht", episodes=train_episodes, delta_timestamps=delta_timestamps
72
+ )
73
+ val_dataset = LeRobotDataset("lerobot/pusht", episodes=val_episodes, delta_timestamps=delta_timestamps)
74
+ print(f"Number of frames in training dataset (90% subset): {len(train_dataset)}")
75
+ print(f"Number of frames in validation dataset (10% subset): {len(val_dataset)}")
76
+
77
+ # Create dataloader for evaluation.
78
+ val_dataloader = torch.utils.data.DataLoader(
79
+ val_dataset,
80
+ num_workers=4,
81
+ batch_size=64,
82
+ shuffle=False,
83
+ pin_memory=device != torch.device("cpu"),
84
+ drop_last=False,
85
+ )
86
+
87
+ # Run validation loop.
88
+ loss_cumsum = 0
89
+ n_examples_evaluated = 0
90
+ for batch in val_dataloader:
91
+ batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()}
92
+ loss, _ = policy.forward(batch)
93
+
94
+ loss_cumsum += loss.item()
95
+ n_examples_evaluated += batch["index"].shape[0]
96
+
97
+ # Calculate the average loss over the validation set.
98
+ average_loss = loss_cumsum / n_examples_evaluated
99
+
100
+ print(f"Average loss on validation set: {average_loss:.4f}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
examples/port_datasets/pusht_zarr.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import shutil
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+ from huggingface_hub import HfApi
20
+
21
+ from lerobot.common.constants import HF_LEROBOT_HOME
22
+ from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
23
+ from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw
24
+
25
+ PUSHT_TASK = "Push the T-shaped blue block onto the T-shaped green target surface."
26
+ PUSHT_FEATURES = {
27
+ "observation.state": {
28
+ "dtype": "float32",
29
+ "shape": (2,),
30
+ "names": {
31
+ "axes": ["x", "y"],
32
+ },
33
+ },
34
+ "action": {
35
+ "dtype": "float32",
36
+ "shape": (2,),
37
+ "names": {
38
+ "axes": ["x", "y"],
39
+ },
40
+ },
41
+ "next.reward": {
42
+ "dtype": "float32",
43
+ "shape": (1,),
44
+ "names": None,
45
+ },
46
+ "next.success": {
47
+ "dtype": "bool",
48
+ "shape": (1,),
49
+ "names": None,
50
+ },
51
+ "observation.environment_state": {
52
+ "dtype": "float32",
53
+ "shape": (16,),
54
+ "names": [
55
+ "keypoints",
56
+ ],
57
+ },
58
+ "observation.image": {
59
+ "dtype": None,
60
+ "shape": (3, 96, 96),
61
+ "names": [
62
+ "channels",
63
+ "height",
64
+ "width",
65
+ ],
66
+ },
67
+ }
68
+
69
+
70
+ def build_features(mode: str) -> dict:
71
+ features = PUSHT_FEATURES
72
+ if mode == "keypoints":
73
+ features.pop("observation.image")
74
+ else:
75
+ features.pop("observation.environment_state")
76
+ features["observation.image"]["dtype"] = mode
77
+
78
+ return features
79
+
80
+
81
+ def load_raw_dataset(zarr_path: Path):
82
+ try:
83
+ from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import (
84
+ ReplayBuffer as DiffusionPolicyReplayBuffer,
85
+ )
86
+ except ModuleNotFoundError as e:
87
+ print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
88
+ raise e
89
+
90
+ zarr_data = DiffusionPolicyReplayBuffer.copy_from_path(zarr_path)
91
+ return zarr_data
92
+
93
+
94
+ def calculate_coverage(zarr_data):
95
+ try:
96
+ import pymunk
97
+ from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely
98
+ except ModuleNotFoundError as e:
99
+ print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
100
+ raise e
101
+
102
+ block_pos = zarr_data["state"][:, 2:4]
103
+ block_angle = zarr_data["state"][:, 4]
104
+
105
+ num_frames = len(block_pos)
106
+
107
+ coverage = np.zeros((num_frames,), dtype=np.float32)
108
+ # 8 keypoints with 2 coords each
109
+ keypoints = np.zeros((num_frames, 16), dtype=np.float32)
110
+
111
+ # Set x, y, theta (in radians)
112
+ goal_pos_angle = np.array([256, 256, np.pi / 4])
113
+ goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle)
114
+
115
+ for i in range(num_frames):
116
+ space = pymunk.Space()
117
+ space.gravity = 0, 0
118
+ space.damping = 0
119
+
120
+ # Add walls.
121
+ walls = [
122
+ PushTEnv.add_segment(space, (5, 506), (5, 5), 2),
123
+ PushTEnv.add_segment(space, (5, 5), (506, 5), 2),
124
+ PushTEnv.add_segment(space, (506, 5), (506, 506), 2),
125
+ PushTEnv.add_segment(space, (5, 506), (506, 506), 2),
126
+ ]
127
+ space.add(*walls)
128
+
129
+ block_body, block_shapes = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item())
130
+ goal_geom = pymunk_to_shapely(goal_body, block_body.shapes)
131
+ block_geom = pymunk_to_shapely(block_body, block_body.shapes)
132
+ intersection_area = goal_geom.intersection(block_geom).area
133
+ goal_area = goal_geom.area
134
+ coverage[i] = intersection_area / goal_area
135
+ keypoints[i] = PushTEnv.get_keypoints(block_shapes).flatten()
136
+
137
+ return coverage, keypoints
138
+
139
+
140
+ def calculate_success(coverage: float, success_threshold: float):
141
+ return coverage > success_threshold
142
+
143
+
144
+ def calculate_reward(coverage: float, success_threshold: float):
145
+ return np.clip(coverage / success_threshold, 0, 1)
146
+
147
+
148
+ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = True):
149
+ if mode not in ["video", "image", "keypoints"]:
150
+ raise ValueError(mode)
151
+
152
+ if (HF_LEROBOT_HOME / repo_id).exists():
153
+ shutil.rmtree(HF_LEROBOT_HOME / repo_id)
154
+
155
+ if not raw_dir.exists():
156
+ download_raw(raw_dir, repo_id="lerobot-raw/pusht_raw")
157
+
158
+ zarr_data = load_raw_dataset(zarr_path=raw_dir / "pusht_cchi_v7_replay.zarr")
159
+
160
+ env_state = zarr_data["state"][:]
161
+ agent_pos = env_state[:, :2]
162
+
163
+ action = zarr_data["action"][:]
164
+ image = zarr_data["img"] # (b, h, w, c)
165
+
166
+ if image.dtype == np.float32 and image.max() == np.float32(255):
167
+ # HACK: images are loaded as float32 but they actually encode uint8 data
168
+ image = image.astype(np.uint8)
169
+
170
+ episode_data_index = {
171
+ "from": np.concatenate(([0], zarr_data.meta["episode_ends"][:-1])),
172
+ "to": zarr_data.meta["episode_ends"],
173
+ }
174
+
175
+ # Calculate success and reward based on the overlapping area
176
+ # of the T-object and the T-area.
177
+ coverage, keypoints = calculate_coverage(zarr_data)
178
+ success = calculate_success(coverage, success_threshold=0.95)
179
+ reward = calculate_reward(coverage, success_threshold=0.95)
180
+
181
+ features = build_features(mode)
182
+ dataset = LeRobotDataset.create(
183
+ repo_id=repo_id,
184
+ fps=10,
185
+ robot_type="2d pointer",
186
+ features=features,
187
+ image_writer_threads=4,
188
+ )
189
+ episodes = range(len(episode_data_index["from"]))
190
+ for ep_idx in episodes:
191
+ from_idx = episode_data_index["from"][ep_idx]
192
+ to_idx = episode_data_index["to"][ep_idx]
193
+ num_frames = to_idx - from_idx
194
+
195
+ for frame_idx in range(num_frames):
196
+ i = from_idx + frame_idx
197
+ idx = i + (frame_idx < num_frames - 1)
198
+ frame = {
199
+ "action": action[i],
200
+ # Shift reward and success by +1 until the last item of the episode
201
+ "next.reward": reward[idx : idx + 1],
202
+ "next.success": success[idx : idx + 1],
203
+ "task": PUSHT_TASK,
204
+ }
205
+
206
+ frame["observation.state"] = agent_pos[i]
207
+
208
+ if mode == "keypoints":
209
+ frame["observation.environment_state"] = keypoints[i]
210
+ else:
211
+ frame["observation.image"] = image[i]
212
+
213
+ dataset.add_frame(frame)
214
+
215
+ dataset.save_episode()
216
+
217
+ if push_to_hub:
218
+ dataset.push_to_hub()
219
+ hub_api = HfApi()
220
+ hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, repo_type="dataset")
221
+
222
+
223
+ if __name__ == "__main__":
224
+ # To try this script, modify the repo id with your own HuggingFace user (e.g cadene/pusht)
225
+ repo_id = "lerobot/pusht"
226
+
227
+ modes = ["video", "image", "keypoints"]
228
+ # Uncomment if you want to try with a specific mode
229
+ # modes = ["video"]
230
+ # modes = ["image"]
231
+ # modes = ["keypoints"]
232
+
233
+ raw_dir = Path("data/lerobot-raw/pusht_raw")
234
+ for mode in modes:
235
+ if mode in ["image", "keypoints"]:
236
+ repo_id += f"_{mode}"
237
+
238
+ # download and load raw dataset, create LeRobotDataset, populate it, push to hub
239
+ main(raw_dir, repo_id=repo_id, mode=mode)
240
+
241
+ # Uncomment if you want to load the local dataset and explore it
242
+ # dataset = LeRobotDataset(repo_id=repo_id)
243
+ # breakpoint()
lerobot/__init__.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """
17
+ This file contains lists of available environments, dataset and policies to reflect the current state of LeRobot library.
18
+ We do not want to import all the dependencies, but instead we keep it lightweight to ensure fast access to these variables.
19
+
20
+ Example:
21
+ ```python
22
+ import lerobot
23
+ print(lerobot.available_envs)
24
+ print(lerobot.available_tasks_per_env)
25
+ print(lerobot.available_datasets)
26
+ print(lerobot.available_datasets_per_env)
27
+ print(lerobot.available_real_world_datasets)
28
+ print(lerobot.available_policies)
29
+ print(lerobot.available_policies_per_env)
30
+ print(lerobot.available_robots)
31
+ print(lerobot.available_cameras)
32
+ print(lerobot.available_motors)
33
+ ```
34
+
35
+ When implementing a new dataset loadable with LeRobotDataset follow these steps:
36
+ - Update `available_datasets_per_env` in `lerobot/__init__.py`
37
+
38
+ When implementing a new environment (e.g. `gym_aloha`), follow these steps:
39
+ - Update `available_tasks_per_env` and `available_datasets_per_env` in `lerobot/__init__.py`
40
+
41
+ When implementing a new policy class (e.g. `DiffusionPolicy`) follow these steps:
42
+ - Update `available_policies` and `available_policies_per_env`, in `lerobot/__init__.py`
43
+ - Set the required `name` class attribute.
44
+ - Update variables in `tests/test_available.py` by importing your new Policy class
45
+ """
46
+
47
+ import itertools
48
+
49
+ from lerobot.__version__ import __version__ # noqa: F401
50
+
51
+ # TODO(rcadene): Improve policies and envs. As of now, an item in `available_policies`
52
+ # refers to a yaml file AND a modeling name. Same for `available_envs` which refers to
53
+ # a yaml file AND a environment name. The difference should be more obvious.
54
+ available_tasks_per_env = {
55
+ "aloha": [
56
+ "AlohaInsertion-v0",
57
+ "AlohaTransferCube-v0",
58
+ ],
59
+ "pusht": ["PushT-v0"],
60
+ "xarm": ["XarmLift-v0"],
61
+ }
62
+ available_envs = list(available_tasks_per_env.keys())
63
+
64
+ available_datasets_per_env = {
65
+ "aloha": [
66
+ "lerobot/aloha_sim_insertion_human",
67
+ "lerobot/aloha_sim_insertion_scripted",
68
+ "lerobot/aloha_sim_transfer_cube_human",
69
+ "lerobot/aloha_sim_transfer_cube_scripted",
70
+ "lerobot/aloha_sim_insertion_human_image",
71
+ "lerobot/aloha_sim_insertion_scripted_image",
72
+ "lerobot/aloha_sim_transfer_cube_human_image",
73
+ "lerobot/aloha_sim_transfer_cube_scripted_image",
74
+ ],
75
+ # TODO(alexander-soare): Add "lerobot/pusht_keypoints". Right now we can't because this is too tightly
76
+ # coupled with tests.
77
+ "pusht": ["lerobot/pusht", "lerobot/pusht_image"],
78
+ "xarm": [
79
+ "lerobot/xarm_lift_medium",
80
+ "lerobot/xarm_lift_medium_replay",
81
+ "lerobot/xarm_push_medium",
82
+ "lerobot/xarm_push_medium_replay",
83
+ "lerobot/xarm_lift_medium_image",
84
+ "lerobot/xarm_lift_medium_replay_image",
85
+ "lerobot/xarm_push_medium_image",
86
+ "lerobot/xarm_push_medium_replay_image",
87
+ ],
88
+ }
89
+
90
+ available_real_world_datasets = [
91
+ "lerobot/aloha_mobile_cabinet",
92
+ "lerobot/aloha_mobile_chair",
93
+ "lerobot/aloha_mobile_elevator",
94
+ "lerobot/aloha_mobile_shrimp",
95
+ "lerobot/aloha_mobile_wash_pan",
96
+ "lerobot/aloha_mobile_wipe_wine",
97
+ "lerobot/aloha_static_battery",
98
+ "lerobot/aloha_static_candy",
99
+ "lerobot/aloha_static_coffee",
100
+ "lerobot/aloha_static_coffee_new",
101
+ "lerobot/aloha_static_cups_open",
102
+ "lerobot/aloha_static_fork_pick_up",
103
+ "lerobot/aloha_static_pingpong_test",
104
+ "lerobot/aloha_static_pro_pencil",
105
+ "lerobot/aloha_static_screw_driver",
106
+ "lerobot/aloha_static_tape",
107
+ "lerobot/aloha_static_thread_velcro",
108
+ "lerobot/aloha_static_towel",
109
+ "lerobot/aloha_static_vinh_cup",
110
+ "lerobot/aloha_static_vinh_cup_left",
111
+ "lerobot/aloha_static_ziploc_slide",
112
+ "lerobot/umi_cup_in_the_wild",
113
+ "lerobot/unitreeh1_fold_clothes",
114
+ "lerobot/unitreeh1_rearrange_objects",
115
+ "lerobot/unitreeh1_two_robot_greeting",
116
+ "lerobot/unitreeh1_warehouse",
117
+ "lerobot/nyu_rot_dataset",
118
+ "lerobot/utokyo_saytap",
119
+ "lerobot/imperialcollege_sawyer_wrist_cam",
120
+ "lerobot/utokyo_xarm_bimanual",
121
+ "lerobot/tokyo_u_lsmo",
122
+ "lerobot/utokyo_pr2_opening_fridge",
123
+ "lerobot/cmu_franka_exploration_dataset",
124
+ "lerobot/cmu_stretch",
125
+ "lerobot/asu_table_top",
126
+ "lerobot/utokyo_pr2_tabletop_manipulation",
127
+ "lerobot/utokyo_xarm_pick_and_place",
128
+ "lerobot/ucsd_kitchen_dataset",
129
+ "lerobot/austin_buds_dataset",
130
+ "lerobot/dlr_sara_grid_clamp",
131
+ "lerobot/conq_hose_manipulation",
132
+ "lerobot/columbia_cairlab_pusht_real",
133
+ "lerobot/dlr_sara_pour",
134
+ "lerobot/dlr_edan_shared_control",
135
+ "lerobot/ucsd_pick_and_place_dataset",
136
+ "lerobot/berkeley_cable_routing",
137
+ "lerobot/nyu_franka_play_dataset",
138
+ "lerobot/austin_sirius_dataset",
139
+ "lerobot/cmu_play_fusion",
140
+ "lerobot/berkeley_gnm_sac_son",
141
+ "lerobot/nyu_door_opening_surprising_effectiveness",
142
+ "lerobot/berkeley_fanuc_manipulation",
143
+ "lerobot/jaco_play",
144
+ "lerobot/viola",
145
+ "lerobot/kaist_nonprehensile",
146
+ "lerobot/berkeley_mvp",
147
+ "lerobot/uiuc_d3field",
148
+ "lerobot/berkeley_gnm_recon",
149
+ "lerobot/austin_sailor_dataset",
150
+ "lerobot/utaustin_mutex",
151
+ "lerobot/roboturk",
152
+ "lerobot/stanford_hydra_dataset",
153
+ "lerobot/berkeley_autolab_ur5",
154
+ "lerobot/stanford_robocook",
155
+ "lerobot/toto",
156
+ "lerobot/fmb",
157
+ "lerobot/droid_100",
158
+ "lerobot/berkeley_rpt",
159
+ "lerobot/stanford_kuka_multimodal_dataset",
160
+ "lerobot/iamlab_cmu_pickup_insert",
161
+ "lerobot/taco_play",
162
+ "lerobot/berkeley_gnm_cory_hall",
163
+ "lerobot/usc_cloth_sim",
164
+ ]
165
+
166
+ available_datasets = sorted(
167
+ set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
168
+ )
169
+
170
+ # lists all available policies from `lerobot/common/policies`
171
+ available_policies = [
172
+ "act",
173
+ "diffusion",
174
+ "tdmpc",
175
+ "vqbet",
176
+ ]
177
+
178
+ # lists all available robots from `lerobot/common/robot_devices/robots`
179
+ available_robots = [
180
+ "koch",
181
+ "koch_bimanual",
182
+ "aloha",
183
+ "so100",
184
+ "moss",
185
+ ]
186
+
187
+ # lists all available cameras from `lerobot/common/robot_devices/cameras`
188
+ available_cameras = [
189
+ "opencv",
190
+ "intelrealsense",
191
+ ]
192
+
193
+ # lists all available motors from `lerobot/common/robot_devices/motors`
194
+ available_motors = [
195
+ "dynamixel",
196
+ "feetech",
197
+ ]
198
+
199
+ # keys and values refer to yaml files
200
+ available_policies_per_env = {
201
+ "aloha": ["act"],
202
+ "pusht": ["diffusion", "vqbet"],
203
+ "xarm": ["tdmpc"],
204
+ "koch_real": ["act_koch_real"],
205
+ "aloha_real": ["act_aloha_real"],
206
+ }
207
+
208
+ env_task_pairs = [(env, task) for env, tasks in available_tasks_per_env.items() for task in tasks]
209
+ env_dataset_pairs = [
210
+ (env, dataset) for env, datasets in available_datasets_per_env.items() for dataset in datasets
211
+ ]
212
+ env_dataset_policy_triplets = [
213
+ (env, dataset, policy)
214
+ for env, datasets in available_datasets_per_env.items()
215
+ for dataset in datasets
216
+ for policy in available_policies_per_env[env]
217
+ ]
lerobot/__version__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """To enable `lerobot.__version__`"""
17
+
18
+ from importlib.metadata import PackageNotFoundError, version
19
+
20
+ try:
21
+ __version__ = version("lerobot")
22
+ except PackageNotFoundError:
23
+ __version__ = "unknown"
lerobot/common/constants.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # keys
15
+ import os
16
+ from pathlib import Path
17
+
18
+ from huggingface_hub.constants import HF_HOME
19
+
20
+ OBS_ENV = "observation.environment_state"
21
+ OBS_ROBOT = "observation.state"
22
+ OBS_IMAGE = "observation.image"
23
+ OBS_IMAGES = "observation.images"
24
+ ACTION = "action"
25
+
26
+ # files & directories
27
+ CHECKPOINTS_DIR = "checkpoints"
28
+ LAST_CHECKPOINT_LINK = "last"
29
+ PRETRAINED_MODEL_DIR = "pretrained_model"
30
+ TRAINING_STATE_DIR = "training_state"
31
+ RNG_STATE = "rng_state.safetensors"
32
+ TRAINING_STEP = "training_step.json"
33
+ OPTIMIZER_STATE = "optimizer_state.safetensors"
34
+ OPTIMIZER_PARAM_GROUPS = "optimizer_param_groups.json"
35
+ SCHEDULER_STATE = "scheduler_state.json"
36
+
37
+ # cache dir
38
+ default_cache_path = Path(HF_HOME) / "lerobot"
39
+ HF_LEROBOT_HOME = Path(os.getenv("HF_LEROBOT_HOME", default_cache_path)).expanduser()
40
+
41
+ if "LEROBOT_HOME" in os.environ:
42
+ raise ValueError(
43
+ f"You have a 'LEROBOT_HOME' environment variable set to '{os.getenv('LEROBOT_HOME')}'.\n"
44
+ "'LEROBOT_HOME' is deprecated, please use 'HF_LEROBOT_HOME' instead."
45
+ )
lerobot/common/datasets/backward_compatibility.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import packaging.version
16
+
17
+ V2_MESSAGE = """
18
+ The dataset you requested ({repo_id}) is in {version} format.
19
+
20
+ We introduced a new format since v2.0 which is not backward compatible with v1.x.
21
+ Please, use our conversion script. Modify the following command with your own task description:
22
+ ```
23
+ python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\
24
+ --repo-id {repo_id} \\
25
+ --single-task "TASK DESCRIPTION." # <---- /!\\ Replace TASK DESCRIPTION /!\\
26
+ ```
27
+
28
+ A few examples to replace TASK DESCRIPTION: "Pick up the blue cube and place it into the bin.", "Insert the
29
+ peg into the socket.", "Slide open the ziploc bag.", "Take the elevator to the 1st floor.", "Open the top
30
+ cabinet, store the pot inside it then close the cabinet.", "Push the T-shaped block onto the T-shaped
31
+ target.", "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.", "Fold the
32
+ sweatshirt.", ...
33
+
34
+ If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
35
+ or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).
36
+ """
37
+
38
+ V21_MESSAGE = """
39
+ The dataset you requested ({repo_id}) is in {version} format.
40
+ While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global
41
+ stats instead of per-episode stats. Update your dataset stats to the new format using this command:
42
+ ```
43
+ python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={repo_id}
44
+ ```
45
+
46
+ If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb)
47
+ or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose).
48
+ """
49
+
50
+ FUTURE_MESSAGE = """
51
+ The dataset you requested ({repo_id}) is only available in {version} format.
52
+ As we cannot ensure forward compatibility with it, please update your current version of lerobot.
53
+ """
54
+
55
+
56
+ class CompatibilityError(Exception): ...
57
+
58
+
59
+ class BackwardCompatibilityError(CompatibilityError):
60
+ def __init__(self, repo_id: str, version: packaging.version.Version):
61
+ message = V2_MESSAGE.format(repo_id=repo_id, version=version)
62
+ super().__init__(message)
63
+
64
+
65
+ class ForwardCompatibilityError(CompatibilityError):
66
+ def __init__(self, repo_id: str, version: packaging.version.Version):
67
+ message = FUTURE_MESSAGE.format(repo_id=repo_id, version=version)
68
+ super().__init__(message)
lerobot/common/datasets/card_template.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ # For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
3
+ # Doc / guide: https://huggingface.co/docs/hub/datasets-cards
4
+ {{ card_data }}
5
+ ---
6
+
7
+ This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
8
+
9
+ ## Dataset Description
10
+
11
+ {{ dataset_description | default("", true) }}
12
+
13
+ - **Homepage:** {{ url | default("[More Information Needed]", true)}}
14
+ - **Paper:** {{ paper | default("[More Information Needed]", true)}}
15
+ - **License:** {{ license | default("[More Information Needed]", true)}}
16
+
17
+ ## Dataset Structure
18
+
19
+ {{ dataset_structure | default("[More Information Needed]", true)}}
20
+
21
+ ## Citation
22
+
23
+ **BibTeX:**
24
+
25
+ ```bibtex
26
+ {{ citation_bibtex | default("[More Information Needed]", true)}}
27
+ ```
lerobot/common/datasets/compute_stats.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import numpy as np
17
+
18
+ from lerobot.common.datasets.utils import load_image_as_numpy
19
+
20
+
21
+ def estimate_num_samples(
22
+ dataset_len: int, min_num_samples: int = 100, max_num_samples: int = 10_000, power: float = 0.75
23
+ ) -> int:
24
+ """Heuristic to estimate the number of samples based on dataset size.
25
+ The power controls the sample growth relative to dataset size.
26
+ Lower the power for less number of samples.
27
+
28
+ For default arguments, we have:
29
+ - from 1 to ~500, num_samples=100
30
+ - at 1000, num_samples=177
31
+ - at 2000, num_samples=299
32
+ - at 5000, num_samples=594
33
+ - at 10000, num_samples=1000
34
+ - at 20000, num_samples=1681
35
+ """
36
+ if dataset_len < min_num_samples:
37
+ min_num_samples = dataset_len
38
+ return max(min_num_samples, min(int(dataset_len**power), max_num_samples))
39
+
40
+
41
+ def sample_indices(data_len: int) -> list[int]:
42
+ num_samples = estimate_num_samples(data_len)
43
+ return np.round(np.linspace(0, data_len - 1, num_samples)).astype(int).tolist()
44
+
45
+
46
+ def auto_downsample_height_width(img: np.ndarray, target_size: int = 150, max_size_threshold: int = 300):
47
+ _, height, width = img.shape
48
+
49
+ if max(width, height) < max_size_threshold:
50
+ # no downsampling needed
51
+ return img
52
+
53
+ downsample_factor = int(width / target_size) if width > height else int(height / target_size)
54
+ return img[:, ::downsample_factor, ::downsample_factor]
55
+
56
+
57
+ def sample_images(image_paths: list[str]) -> np.ndarray:
58
+ sampled_indices = sample_indices(len(image_paths))
59
+
60
+ images = None
61
+ for i, idx in enumerate(sampled_indices):
62
+ path = image_paths[idx]
63
+ # we load as uint8 to reduce memory usage
64
+ img = load_image_as_numpy(path, dtype=np.uint8, channel_first=True)
65
+ img = auto_downsample_height_width(img)
66
+
67
+ if images is None:
68
+ images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8)
69
+
70
+ images[i] = img
71
+
72
+ return images
73
+
74
+
75
+ def get_feature_stats(array: np.ndarray, axis: tuple, keepdims: bool) -> dict[str, np.ndarray]:
76
+ return {
77
+ "min": np.min(array, axis=axis, keepdims=keepdims),
78
+ "max": np.max(array, axis=axis, keepdims=keepdims),
79
+ "mean": np.mean(array, axis=axis, keepdims=keepdims),
80
+ "std": np.std(array, axis=axis, keepdims=keepdims),
81
+ "count": np.array([len(array)]),
82
+ }
83
+
84
+
85
+ def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict:
86
+ ep_stats = {}
87
+ for key, data in episode_data.items():
88
+ if features[key]["dtype"] == "string":
89
+ continue # HACK: we should receive np.arrays of strings
90
+ elif features[key]["dtype"] in ["image", "video"]:
91
+ ep_ft_array = sample_images(data) # data is a list of image paths
92
+ axes_to_reduce = (0, 2, 3) # keep channel dim
93
+ keepdims = True
94
+ else:
95
+ ep_ft_array = data # data is already a np.ndarray
96
+ axes_to_reduce = 0 # compute stats over the first axis
97
+ keepdims = data.ndim == 1 # keep as np.array
98
+
99
+ ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims)
100
+
101
+ # finally, we normalize and remove batch dim for images
102
+ if features[key]["dtype"] in ["image", "video"]:
103
+ ep_stats[key] = {
104
+ k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items()
105
+ }
106
+
107
+ return ep_stats
108
+
109
+
110
+ def _assert_type_and_shape(stats_list: list[dict[str, dict]]):
111
+ for i in range(len(stats_list)):
112
+ for fkey in stats_list[i]:
113
+ for k, v in stats_list[i][fkey].items():
114
+ if not isinstance(v, np.ndarray):
115
+ raise ValueError(
116
+ f"Stats must be composed of numpy array, but key '{k}' of feature '{fkey}' is of type '{type(v)}' instead."
117
+ )
118
+ if v.ndim == 0:
119
+ raise ValueError("Number of dimensions must be at least 1, and is 0 instead.")
120
+ if k == "count" and v.shape != (1,):
121
+ raise ValueError(f"Shape of 'count' must be (1), but is {v.shape} instead.")
122
+ if "image" in fkey and k != "count" and v.shape != (3, 1, 1):
123
+ raise ValueError(f"Shape of '{k}' must be (3,1,1), but is {v.shape} instead.")
124
+
125
+
126
+ def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
127
+ """Aggregates stats for a single feature."""
128
+ means = np.stack([s["mean"] for s in stats_ft_list])
129
+ variances = np.stack([s["std"] ** 2 for s in stats_ft_list])
130
+ counts = np.stack([s["count"] for s in stats_ft_list])
131
+ total_count = counts.sum(axis=0)
132
+
133
+ # Prepare weighted mean by matching number of dimensions
134
+ while counts.ndim < means.ndim:
135
+ counts = np.expand_dims(counts, axis=-1)
136
+
137
+ # Compute the weighted mean
138
+ weighted_means = means * counts
139
+ total_mean = weighted_means.sum(axis=0) / total_count
140
+
141
+ # Compute the variance using the parallel algorithm
142
+ delta_means = means - total_mean
143
+ weighted_variances = (variances + delta_means**2) * counts
144
+ total_variance = weighted_variances.sum(axis=0) / total_count
145
+
146
+ return {
147
+ "min": np.min(np.stack([s["min"] for s in stats_ft_list]), axis=0),
148
+ "max": np.max(np.stack([s["max"] for s in stats_ft_list]), axis=0),
149
+ "mean": total_mean,
150
+ "std": np.sqrt(total_variance),
151
+ "count": total_count,
152
+ }
153
+
154
+
155
+ def aggregate_stats(stats_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]:
156
+ """Aggregate stats from multiple compute_stats outputs into a single set of stats.
157
+
158
+ The final stats will have the union of all data keys from each of the stats dicts.
159
+
160
+ For instance:
161
+ - new_min = min(min_dataset_0, min_dataset_1, ...)
162
+ - new_max = max(max_dataset_0, max_dataset_1, ...)
163
+ - new_mean = (mean of all data, weighted by counts)
164
+ - new_std = (std of all data)
165
+ """
166
+
167
+ _assert_type_and_shape(stats_list)
168
+
169
+ data_keys = {key for stats in stats_list for key in stats}
170
+ aggregated_stats = {key: {} for key in data_keys}
171
+
172
+ for key in data_keys:
173
+ stats_with_key = [stats[key] for stats in stats_list if key in stats]
174
+ aggregated_stats[key] = aggregate_feature_stats(stats_with_key)
175
+
176
+ return aggregated_stats
lerobot/common/datasets/factory.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import logging
17
+ from pprint import pformat
18
+
19
+ import torch
20
+
21
+ from lerobot.common.datasets.lerobot_dataset import (
22
+ LeRobotDataset,
23
+ LeRobotDatasetMetadata,
24
+ MultiLeRobotDataset,
25
+ )
26
+ from lerobot.common.datasets.transforms import ImageTransforms
27
+ from lerobot.configs.policies import PreTrainedConfig
28
+ from lerobot.configs.train import TrainPipelineConfig
29
+
30
+ IMAGENET_STATS = {
31
+ "mean": [[[0.485]], [[0.456]], [[0.406]]], # (c,1,1)
32
+ "std": [[[0.229]], [[0.224]], [[0.225]]], # (c,1,1)
33
+ }
34
+
35
+
36
+ def resolve_delta_timestamps(
37
+ cfg: PreTrainedConfig, ds_meta: LeRobotDatasetMetadata
38
+ ) -> dict[str, list] | None:
39
+ """Resolves delta_timestamps by reading from the 'delta_indices' properties of the PreTrainedConfig.
40
+
41
+ Args:
42
+ cfg (PreTrainedConfig): The PreTrainedConfig to read delta_indices from.
43
+ ds_meta (LeRobotDatasetMetadata): The dataset from which features and fps are used to build
44
+ delta_timestamps against.
45
+
46
+ Returns:
47
+ dict[str, list] | None: A dictionary of delta_timestamps, e.g.:
48
+ {
49
+ "observation.state": [-0.04, -0.02, 0]
50
+ "observation.action": [-0.02, 0, 0.02]
51
+ }
52
+ returns `None` if the the resulting dict is empty.
53
+ """
54
+ delta_timestamps = {}
55
+ for key in ds_meta.features:
56
+ if key == "next.reward" and cfg.reward_delta_indices is not None:
57
+ delta_timestamps[key] = [i / ds_meta.fps for i in cfg.reward_delta_indices]
58
+ if key == "action" and cfg.action_delta_indices is not None:
59
+ delta_timestamps[key] = [i / ds_meta.fps for i in cfg.action_delta_indices]
60
+ if key.startswith("observation.") and cfg.observation_delta_indices is not None:
61
+ delta_timestamps[key] = [i / ds_meta.fps for i in cfg.observation_delta_indices]
62
+
63
+ if len(delta_timestamps) == 0:
64
+ delta_timestamps = None
65
+
66
+ return delta_timestamps
67
+
68
+
69
+ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDataset:
70
+ """Handles the logic of setting up delta timestamps and image transforms before creating a dataset.
71
+
72
+ Args:
73
+ cfg (TrainPipelineConfig): A TrainPipelineConfig config which contains a DatasetConfig and a PreTrainedConfig.
74
+
75
+ Raises:
76
+ NotImplementedError: The MultiLeRobotDataset is currently deactivated.
77
+
78
+ Returns:
79
+ LeRobotDataset | MultiLeRobotDataset
80
+ """
81
+ image_transforms = (
82
+ ImageTransforms(cfg.dataset.image_transforms) if cfg.dataset.image_transforms.enable else None
83
+ )
84
+
85
+ if isinstance(cfg.dataset.repo_id, str):
86
+ ds_meta = LeRobotDatasetMetadata(
87
+ cfg.dataset.repo_id, root=cfg.dataset.root, revision=cfg.dataset.revision
88
+ )
89
+ delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta)
90
+ dataset = LeRobotDataset(
91
+ cfg.dataset.repo_id,
92
+ root=cfg.dataset.root,
93
+ episodes=cfg.dataset.episodes,
94
+ delta_timestamps=delta_timestamps,
95
+ image_transforms=image_transforms,
96
+ revision=cfg.dataset.revision,
97
+ video_backend=cfg.dataset.video_backend,
98
+ )
99
+ else:
100
+ raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.")
101
+ dataset = MultiLeRobotDataset(
102
+ cfg.dataset.repo_id,
103
+ # TODO(aliberts): add proper support for multi dataset
104
+ # delta_timestamps=delta_timestamps,
105
+ image_transforms=image_transforms,
106
+ video_backend=cfg.dataset.video_backend,
107
+ )
108
+ logging.info(
109
+ "Multiple datasets were provided. Applied the following index mapping to the provided datasets: "
110
+ f"{pformat(dataset.repo_id_to_index, indent=2)}"
111
+ )
112
+
113
+ if cfg.dataset.use_imagenet_stats:
114
+ for key in dataset.meta.camera_keys:
115
+ for stats_type, stats in IMAGENET_STATS.items():
116
+ dataset.meta.stats[key][stats_type] = torch.tensor(stats, dtype=torch.float32)
117
+
118
+ return dataset
lerobot/common/datasets/image_writer.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import multiprocessing
17
+ import queue
18
+ import threading
19
+ from pathlib import Path
20
+
21
+ import numpy as np
22
+ import PIL.Image
23
+ import torch
24
+
25
+
26
+ def safe_stop_image_writer(func):
27
+ def wrapper(*args, **kwargs):
28
+ try:
29
+ return func(*args, **kwargs)
30
+ except Exception as e:
31
+ dataset = kwargs.get("dataset")
32
+ image_writer = getattr(dataset, "image_writer", None) if dataset else None
33
+ if image_writer is not None:
34
+ print("Waiting for image writer to terminate...")
35
+ image_writer.stop()
36
+ raise e
37
+
38
+ return wrapper
39
+
40
+
41
+ def image_array_to_pil_image(image_array: np.ndarray, range_check: bool = True) -> PIL.Image.Image:
42
+ # TODO(aliberts): handle 1 channel and 4 for depth images
43
+ if image_array.ndim != 3:
44
+ raise ValueError(f"The array has {image_array.ndim} dimensions, but 3 is expected for an image.")
45
+
46
+ if image_array.shape[0] == 3:
47
+ # Transpose from pytorch convention (C, H, W) to (H, W, C)
48
+ image_array = image_array.transpose(1, 2, 0)
49
+
50
+ elif image_array.shape[-1] != 3:
51
+ raise NotImplementedError(
52
+ f"The image has {image_array.shape[-1]} channels, but 3 is required for now."
53
+ )
54
+
55
+ if image_array.dtype != np.uint8:
56
+ if range_check:
57
+ max_ = image_array.max().item()
58
+ min_ = image_array.min().item()
59
+ if max_ > 1.0 or min_ < 0.0:
60
+ raise ValueError(
61
+ "The image data type is float, which requires values in the range [0.0, 1.0]. "
62
+ f"However, the provided range is [{min_}, {max_}]. Please adjust the range or "
63
+ "provide a uint8 image with values in the range [0, 255]."
64
+ )
65
+
66
+ image_array = (image_array * 255).astype(np.uint8)
67
+
68
+ return PIL.Image.fromarray(image_array)
69
+
70
+
71
+ def write_image(image: np.ndarray | PIL.Image.Image, fpath: Path):
72
+ try:
73
+ if isinstance(image, np.ndarray):
74
+ img = image_array_to_pil_image(image)
75
+ elif isinstance(image, PIL.Image.Image):
76
+ img = image
77
+ else:
78
+ raise TypeError(f"Unsupported image type: {type(image)}")
79
+ img.save(fpath)
80
+ except Exception as e:
81
+ print(f"Error writing image {fpath}: {e}")
82
+
83
+
84
+ def worker_thread_loop(queue: queue.Queue):
85
+ while True:
86
+ item = queue.get()
87
+ if item is None:
88
+ queue.task_done()
89
+ break
90
+ image_array, fpath = item
91
+ write_image(image_array, fpath)
92
+ queue.task_done()
93
+
94
+
95
+ def worker_process(queue: queue.Queue, num_threads: int):
96
+ threads = []
97
+ for _ in range(num_threads):
98
+ t = threading.Thread(target=worker_thread_loop, args=(queue,))
99
+ t.daemon = True
100
+ t.start()
101
+ threads.append(t)
102
+ for t in threads:
103
+ t.join()
104
+
105
+
106
+ class AsyncImageWriter:
107
+ """
108
+ This class abstract away the initialisation of processes or/and threads to
109
+ save images on disk asynchrounously, which is critical to control a robot and record data
110
+ at a high frame rate.
111
+
112
+ When `num_processes=0`, it creates a threads pool of size `num_threads`.
113
+ When `num_processes>0`, it creates processes pool of size `num_processes`, where each subprocess starts
114
+ their own threads pool of size `num_threads`.
115
+
116
+ The optimal number of processes and threads depends on your computer capabilities.
117
+ We advise to use 4 threads per camera with 0 processes. If the fps is not stable, try to increase or lower
118
+ the number of threads. If it is still not stable, try to use 1 subprocess, or more.
119
+ """
120
+
121
+ def __init__(self, num_processes: int = 0, num_threads: int = 1):
122
+ self.num_processes = num_processes
123
+ self.num_threads = num_threads
124
+ self.queue = None
125
+ self.threads = []
126
+ self.processes = []
127
+ self._stopped = False
128
+
129
+ if num_threads <= 0 and num_processes <= 0:
130
+ raise ValueError("Number of threads and processes must be greater than zero.")
131
+
132
+ if self.num_processes == 0:
133
+ # Use threading
134
+ self.queue = queue.Queue()
135
+ for _ in range(self.num_threads):
136
+ t = threading.Thread(target=worker_thread_loop, args=(self.queue,))
137
+ t.daemon = True
138
+ t.start()
139
+ self.threads.append(t)
140
+ else:
141
+ # Use multiprocessing
142
+ self.queue = multiprocessing.JoinableQueue()
143
+ for _ in range(self.num_processes):
144
+ p = multiprocessing.Process(target=worker_process, args=(self.queue, self.num_threads))
145
+ p.daemon = True
146
+ p.start()
147
+ self.processes.append(p)
148
+
149
+ def save_image(self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path):
150
+ if isinstance(image, torch.Tensor):
151
+ # Convert tensor to numpy array to minimize main process time
152
+ image = image.cpu().numpy()
153
+ self.queue.put((image, fpath))
154
+
155
+ def wait_until_done(self):
156
+ self.queue.join()
157
+
158
+ def stop(self):
159
+ if self._stopped:
160
+ return
161
+
162
+ if self.num_processes == 0:
163
+ for _ in self.threads:
164
+ self.queue.put(None)
165
+ for t in self.threads:
166
+ t.join()
167
+ else:
168
+ num_nones = self.num_processes * self.num_threads
169
+ for _ in range(num_nones):
170
+ self.queue.put(None)
171
+ for p in self.processes:
172
+ p.join()
173
+ if p.is_alive():
174
+ p.terminate()
175
+ self.queue.close()
176
+ self.queue.join_thread()
177
+
178
+ self._stopped = True
lerobot/common/datasets/lerobot_dataset.py ADDED
@@ -0,0 +1,1217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ import contextlib
17
+ import logging
18
+ import shutil
19
+ from pathlib import Path
20
+ from typing import Callable
21
+
22
+ import datasets
23
+ import numpy as np
24
+ import packaging.version
25
+ import PIL.Image
26
+ import torch
27
+ import torch.utils
28
+ from datasets import concatenate_datasets, load_dataset
29
+ from huggingface_hub import HfApi, snapshot_download
30
+ from huggingface_hub.constants import REPOCARD_NAME
31
+ from huggingface_hub.errors import RevisionNotFoundError
32
+
33
+ from lerobot.common.constants import HF_LEROBOT_HOME
34
+ from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats
35
+ from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image
36
+ from lerobot.common.datasets.utils import (
37
+ DEFAULT_FEATURES,
38
+ DEFAULT_IMAGE_PATH,
39
+ INFO_PATH,
40
+ TASKS_PATH,
41
+ append_jsonlines,
42
+ backward_compatible_episodes_stats,
43
+ check_delta_timestamps,
44
+ check_timestamps_sync,
45
+ check_version_compatibility,
46
+ create_empty_dataset_info,
47
+ create_lerobot_dataset_card,
48
+ embed_images,
49
+ get_delta_indices,
50
+ get_episode_data_index,
51
+ get_features_from_robot,
52
+ get_hf_features_from_features,
53
+ get_safe_version,
54
+ hf_transform_to_torch,
55
+ is_valid_version,
56
+ load_episodes,
57
+ load_episodes_stats,
58
+ load_info,
59
+ load_stats,
60
+ load_tasks,
61
+ validate_episode_buffer,
62
+ validate_frame,
63
+ write_episode,
64
+ write_episode_stats,
65
+ write_info,
66
+ write_json,
67
+ )
68
+ from lerobot.common.datasets.video_utils import (
69
+ VideoFrame,
70
+ decode_video_frames,
71
+ encode_video_frames,
72
+ get_safe_default_codec,
73
+ get_video_info,
74
+ )
75
+ from lerobot.common.robot_devices.robots.utils import Robot
76
+
77
+ CODEBASE_VERSION = "v2.1"
78
+
79
+
80
+ class LeRobotDatasetMetadata:
81
+ def __init__(
82
+ self,
83
+ repo_id: str,
84
+ root: str | Path | None = None,
85
+ revision: str | None = None,
86
+ force_cache_sync: bool = False,
87
+ ):
88
+ self.repo_id = repo_id
89
+ self.revision = revision if revision else CODEBASE_VERSION
90
+ self.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id
91
+
92
+ try:
93
+ if force_cache_sync:
94
+ raise FileNotFoundError
95
+ self.load_metadata()
96
+ except (FileNotFoundError, NotADirectoryError):
97
+ if is_valid_version(self.revision):
98
+ self.revision = get_safe_version(self.repo_id, self.revision)
99
+
100
+ (self.root / "meta").mkdir(exist_ok=True, parents=True)
101
+ self.pull_from_repo(allow_patterns="meta/")
102
+ self.load_metadata()
103
+
104
+ def load_metadata(self):
105
+ self.info = load_info(self.root)
106
+ check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION)
107
+ self.tasks, self.task_to_task_index = load_tasks(self.root)
108
+ self.episodes = load_episodes(self.root)
109
+ if self._version < packaging.version.parse("v2.1"):
110
+ self.stats = load_stats(self.root)
111
+ self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes)
112
+ else:
113
+ self.episodes_stats = load_episodes_stats(self.root)
114
+ self.stats = aggregate_stats(list(self.episodes_stats.values()))
115
+
116
+ def pull_from_repo(
117
+ self,
118
+ allow_patterns: list[str] | str | None = None,
119
+ ignore_patterns: list[str] | str | None = None,
120
+ ) -> None:
121
+ snapshot_download(
122
+ self.repo_id,
123
+ repo_type="dataset",
124
+ revision=self.revision,
125
+ local_dir=self.root,
126
+ allow_patterns=allow_patterns,
127
+ ignore_patterns=ignore_patterns,
128
+ )
129
+
130
+ @property
131
+ def _version(self) -> packaging.version.Version:
132
+ """Codebase version used to create this dataset."""
133
+ return packaging.version.parse(self.info["codebase_version"])
134
+
135
+ def get_data_file_path(self, ep_index: int) -> Path:
136
+ ep_chunk = self.get_episode_chunk(ep_index)
137
+ fpath = self.data_path.format(episode_chunk=ep_chunk, episode_index=ep_index)
138
+ return Path(fpath)
139
+
140
+ def get_video_file_path(self, ep_index: int, vid_key: str) -> Path:
141
+ ep_chunk = self.get_episode_chunk(ep_index)
142
+ fpath = self.video_path.format(episode_chunk=ep_chunk, video_key=vid_key, episode_index=ep_index)
143
+ return Path(fpath)
144
+
145
+ def get_episode_chunk(self, ep_index: int) -> int:
146
+ return ep_index // self.chunks_size
147
+
148
+ @property
149
+ def data_path(self) -> str:
150
+ """Formattable string for the parquet files."""
151
+ return self.info["data_path"]
152
+
153
+ @property
154
+ def video_path(self) -> str | None:
155
+ """Formattable string for the video files."""
156
+ return self.info["video_path"]
157
+
158
+ @property
159
+ def robot_type(self) -> str | None:
160
+ """Robot type used in recording this dataset."""
161
+ return self.info["robot_type"]
162
+
163
+ @property
164
+ def fps(self) -> int:
165
+ """Frames per second used during data collection."""
166
+ return self.info["fps"]
167
+
168
+ @property
169
+ def features(self) -> dict[str, dict]:
170
+ """All features contained in the dataset."""
171
+ return self.info["features"]
172
+
173
+ @property
174
+ def image_keys(self) -> list[str]:
175
+ """Keys to access visual modalities stored as images."""
176
+ return [key for key, ft in self.features.items() if ft["dtype"] == "image"]
177
+
178
+ @property
179
+ def video_keys(self) -> list[str]:
180
+ """Keys to access visual modalities stored as videos."""
181
+ return [key for key, ft in self.features.items() if ft["dtype"] == "video"]
182
+
183
+ @property
184
+ def camera_keys(self) -> list[str]:
185
+ """Keys to access visual modalities (regardless of their storage method)."""
186
+ return [key for key, ft in self.features.items() if ft["dtype"] in ["video", "image"]]
187
+
188
+ @property
189
+ def names(self) -> dict[str, list | dict]:
190
+ """Names of the various dimensions of vector modalities."""
191
+ return {key: ft["names"] for key, ft in self.features.items()}
192
+
193
+ @property
194
+ def shapes(self) -> dict:
195
+ """Shapes for the different features."""
196
+ return {key: tuple(ft["shape"]) for key, ft in self.features.items()}
197
+
198
+ @property
199
+ def total_episodes(self) -> int:
200
+ """Total number of episodes available."""
201
+ return self.info["total_episodes"]
202
+
203
+ @property
204
+ def total_frames(self) -> int:
205
+ """Total number of frames saved in this dataset."""
206
+ return self.info["total_frames"]
207
+
208
+ @property
209
+ def total_tasks(self) -> int:
210
+ """Total number of different tasks performed in this dataset."""
211
+ return self.info["total_tasks"]
212
+
213
+ @property
214
+ def total_chunks(self) -> int:
215
+ """Total number of chunks (groups of episodes)."""
216
+ return self.info["total_chunks"]
217
+
218
+ @property
219
+ def chunks_size(self) -> int:
220
+ """Max number of episodes per chunk."""
221
+ return self.info["chunks_size"]
222
+
223
+ def get_task_index(self, task: str) -> int | None:
224
+ """
225
+ Given a task in natural language, returns its task_index if the task already exists in the dataset,
226
+ otherwise return None.
227
+ """
228
+ return self.task_to_task_index.get(task, None)
229
+
230
+ def add_task(self, task: str):
231
+ """
232
+ Given a task in natural language, add it to the dictionary of tasks.
233
+ """
234
+ if task in self.task_to_task_index:
235
+ raise ValueError(f"The task '{task}' already exists and can't be added twice.")
236
+
237
+ task_index = self.info["total_tasks"]
238
+ self.task_to_task_index[task] = task_index
239
+ self.tasks[task_index] = task
240
+ self.info["total_tasks"] += 1
241
+
242
+ task_dict = {
243
+ "task_index": task_index,
244
+ "task": task,
245
+ }
246
+ append_jsonlines(task_dict, self.root / TASKS_PATH)
247
+
248
+ def save_episode(
249
+ self,
250
+ episode_index: int,
251
+ episode_length: int,
252
+ episode_tasks: list[str],
253
+ episode_stats: dict[str, dict],
254
+ ) -> None:
255
+ self.info["total_episodes"] += 1
256
+ self.info["total_frames"] += episode_length
257
+
258
+ chunk = self.get_episode_chunk(episode_index)
259
+ if chunk >= self.total_chunks:
260
+ self.info["total_chunks"] += 1
261
+
262
+ self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"}
263
+ self.info["total_videos"] += len(self.video_keys)
264
+ if len(self.video_keys) > 0:
265
+ self.update_video_info()
266
+
267
+ write_info(self.info, self.root)
268
+
269
+ episode_dict = {
270
+ "episode_index": episode_index,
271
+ "tasks": episode_tasks,
272
+ "length": episode_length,
273
+ }
274
+ self.episodes[episode_index] = episode_dict
275
+ write_episode(episode_dict, self.root)
276
+
277
+ self.episodes_stats[episode_index] = episode_stats
278
+ self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats else episode_stats
279
+ write_episode_stats(episode_index, episode_stats, self.root)
280
+
281
+ def update_video_info(self) -> None:
282
+ """
283
+ Warning: this function writes info from first episode videos, implicitly assuming that all videos have
284
+ been encoded the same way. Also, this means it assumes the first episode exists.
285
+ """
286
+ for key in self.video_keys:
287
+ if not self.features[key].get("info", None):
288
+ video_path = self.root / self.get_video_file_path(ep_index=0, vid_key=key)
289
+ self.info["features"][key]["info"] = get_video_info(video_path)
290
+
291
+ def __repr__(self):
292
+ feature_keys = list(self.features)
293
+ return (
294
+ f"{self.__class__.__name__}({{\n"
295
+ f" Repository ID: '{self.repo_id}',\n"
296
+ f" Total episodes: '{self.total_episodes}',\n"
297
+ f" Total frames: '{self.total_frames}',\n"
298
+ f" Features: '{feature_keys}',\n"
299
+ "})',\n"
300
+ )
301
+
302
+ @classmethod
303
+ def create(
304
+ cls,
305
+ repo_id: str,
306
+ fps: int,
307
+ root: str | Path | None = None,
308
+ robot: Robot | None = None,
309
+ robot_type: str | None = None,
310
+ features: dict | None = None,
311
+ use_videos: bool = True,
312
+ ) -> "LeRobotDatasetMetadata":
313
+ """Creates metadata for a LeRobotDataset."""
314
+ obj = cls.__new__(cls)
315
+ obj.repo_id = repo_id
316
+ obj.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id
317
+
318
+ obj.root.mkdir(parents=True, exist_ok=False)
319
+
320
+ if robot is not None:
321
+ features = get_features_from_robot(robot, use_videos)
322
+ robot_type = robot.robot_type
323
+ if not all(cam.fps == fps for cam in robot.cameras.values()):
324
+ logging.warning(
325
+ f"Some cameras in your {robot.robot_type} robot don't have an fps matching the fps of your dataset."
326
+ "In this case, frames from lower fps cameras will be repeated to fill in the blanks."
327
+ )
328
+ elif features is None:
329
+ raise ValueError(
330
+ "Dataset features must either come from a Robot or explicitly passed upon creation."
331
+ )
332
+ else:
333
+ # TODO(aliberts, rcadene): implement sanity check for features
334
+ features = {**features, **DEFAULT_FEATURES}
335
+
336
+ # check if none of the features contains a "/" in their names,
337
+ # as this would break the dict flattening in the stats computation, which uses '/' as separator
338
+ for key in features:
339
+ if "/" in key:
340
+ raise ValueError(f"Feature names should not contain '/'. Found '/' in feature '{key}'.")
341
+
342
+ features = {**features, **DEFAULT_FEATURES}
343
+
344
+ obj.tasks, obj.task_to_task_index = {}, {}
345
+ obj.episodes_stats, obj.stats, obj.episodes = {}, {}, {}
346
+ obj.info = create_empty_dataset_info(CODEBASE_VERSION, fps, robot_type, features, use_videos)
347
+ if len(obj.video_keys) > 0 and not use_videos:
348
+ raise ValueError()
349
+ write_json(obj.info, obj.root / INFO_PATH)
350
+ obj.revision = None
351
+ return obj
352
+
353
+
354
+ class LeRobotDataset(torch.utils.data.Dataset):
355
+ def __init__(
356
+ self,
357
+ repo_id: str,
358
+ root: str | Path | None = None,
359
+ episodes: list[int] | None = None,
360
+ image_transforms: Callable | None = None,
361
+ delta_timestamps: dict[list[float]] | None = None,
362
+ tolerance_s: float = 1e-4,
363
+ revision: str | None = None,
364
+ force_cache_sync: bool = False,
365
+ download_videos: bool = True,
366
+ video_backend: str | None = None,
367
+ ):
368
+ """
369
+ 2 modes are available for instantiating this class, depending on 2 different use cases:
370
+
371
+ 1. Your dataset already exists:
372
+ - On your local disk in the 'root' folder. This is typically the case when you recorded your
373
+ dataset locally and you may or may not have pushed it to the hub yet. Instantiating this class
374
+ with 'root' will load your dataset directly from disk. This can happen while you're offline (no
375
+ internet connection).
376
+
377
+ - On the Hugging Face Hub at the address https://huggingface.co/datasets/{repo_id} and not on
378
+ your local disk in the 'root' folder. Instantiating this class with this 'repo_id' will download
379
+ the dataset from that address and load it, pending your dataset is compliant with
380
+ codebase_version v2.0. If your dataset has been created before this new format, you will be
381
+ prompted to convert it using our conversion script from v1.6 to v2.0, which you can find at
382
+ lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py.
383
+
384
+
385
+ 2. Your dataset doesn't already exists (either on local disk or on the Hub): you can create an empty
386
+ LeRobotDataset with the 'create' classmethod. This can be used for recording a dataset or port an
387
+ existing dataset to the LeRobotDataset format.
388
+
389
+
390
+ In terms of files, LeRobotDataset encapsulates 3 main things:
391
+ - metadata:
392
+ - info contains various information about the dataset like shapes, keys, fps etc.
393
+ - stats stores the dataset statistics of the different modalities for normalization
394
+ - tasks contains the prompts for each task of the dataset, which can be used for
395
+ task-conditioned training.
396
+ - hf_dataset (from datasets.Dataset), which will read any values from parquet files.
397
+ - videos (optional) from which frames are loaded to be synchronous with data from parquet files.
398
+
399
+ A typical LeRobotDataset looks like this from its root path:
400
+ .
401
+ ├── data
402
+ │ ├── chunk-000
403
+ │ │ ├── episode_000000.parquet
404
+ │ │ ├── episode_000001.parquet
405
+ │ │ ├── episode_000002.parquet
406
+ │ │ └── ...
407
+ │ ├── chunk-001
408
+ │ │ ├── episode_001000.parquet
409
+ │ │ ├── episode_001001.parquet
410
+ │ │ ├── episode_001002.parquet
411
+ │ │ └── ...
412
+ │ └── ...
413
+ ├── meta
414
+ │ ├── episodes.jsonl
415
+ │ ├── info.json
416
+ │ ├── stats.json
417
+ │ └── tasks.jsonl
418
+ └── videos
419
+ ├── chunk-000
420
+ │ ├── observation.images.laptop
421
+ │ │ ├── episode_000000.mp4
422
+ │ │ ├── episode_000001.mp4
423
+ │ │ ├── episode_000002.mp4
424
+ │ │ └── ...
425
+ │ ├── observation.images.phone
426
+ │ │ ├── episode_000000.mp4
427
+ │ │ ├── episode_000001.mp4
428
+ │ │ ├── episode_000002.mp4
429
+ │ │ └── ...
430
+ ├── chunk-001
431
+ └── ...
432
+
433
+ Note that this file-based structure is designed to be as versatile as possible. The files are split by
434
+ episodes which allows a more granular control over which episodes one wants to use and download. The
435
+ structure of the dataset is entirely described in the info.json file, which can be easily downloaded
436
+ or viewed directly on the hub before downloading any actual data. The type of files used are very
437
+ simple and do not need complex tools to be read, it only uses .parquet, .json and .mp4 files (and .md
438
+ for the README).
439
+
440
+ Args:
441
+ repo_id (str): This is the repo id that will be used to fetch the dataset. Locally, the dataset
442
+ will be stored under root/repo_id.
443
+ root (Path | None, optional): Local directory to use for downloading/writing files. You can also
444
+ set the LEROBOT_HOME environment variable to point to a different location. Defaults to
445
+ '~/.cache/huggingface/lerobot'.
446
+ episodes (list[int] | None, optional): If specified, this will only load episodes specified by
447
+ their episode_index in this list. Defaults to None.
448
+ image_transforms (Callable | None, optional): You can pass standard v2 image transforms from
449
+ torchvision.transforms.v2 here which will be applied to visual modalities (whether they come
450
+ from videos or images). Defaults to None.
451
+ delta_timestamps (dict[list[float]] | None, optional): _description_. Defaults to None.
452
+ tolerance_s (float, optional): Tolerance in seconds used to ensure data timestamps are actually in
453
+ sync with the fps value. It is used at the init of the dataset to make sure that each
454
+ timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames
455
+ decoded from video files. It is also used to check that `delta_timestamps` (when provided) are
456
+ multiples of 1/fps. Defaults to 1e-4.
457
+ revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
458
+ commit hash. Defaults to current codebase version tag.
459
+ sync_cache_first (bool, optional): Flag to sync and refresh local files first. If True and files
460
+ are already present in the local cache, this will be faster. However, files loaded might not
461
+ be in sync with the version on the hub, especially if you specified 'revision'. Defaults to
462
+ False.
463
+ download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
464
+ video files are already present on local disk, they won't be downloaded again. Defaults to
465
+ True.
466
+ video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'.
467
+ You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
468
+ """
469
+ super().__init__()
470
+ self.repo_id = repo_id
471
+ self.root = Path(root) if root else HF_LEROBOT_HOME / repo_id
472
+ self.image_transforms = image_transforms
473
+ self.delta_timestamps = delta_timestamps
474
+ self.episodes = episodes
475
+ self.tolerance_s = tolerance_s
476
+ self.revision = revision if revision else CODEBASE_VERSION
477
+ self.video_backend = video_backend if video_backend else get_safe_default_codec()
478
+ self.delta_indices = None
479
+
480
+ # Unused attributes
481
+ self.image_writer = None
482
+ self.episode_buffer = None
483
+
484
+ self.root.mkdir(exist_ok=True, parents=True)
485
+
486
+ # Load metadata
487
+ self.meta = LeRobotDatasetMetadata(
488
+ self.repo_id, self.root, self.revision, force_cache_sync=force_cache_sync
489
+ )
490
+ if self.episodes is not None and self.meta._version >= packaging.version.parse("v2.1"):
491
+ episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes]
492
+ self.stats = aggregate_stats(episodes_stats)
493
+
494
+ # Load actual data
495
+ try:
496
+ if force_cache_sync:
497
+ raise FileNotFoundError
498
+ assert all((self.root / fpath).is_file() for fpath in self.get_episodes_file_paths())
499
+ self.hf_dataset = self.load_hf_dataset()
500
+ except (AssertionError, FileNotFoundError, NotADirectoryError):
501
+ self.revision = get_safe_version(self.repo_id, self.revision)
502
+ self.download_episodes(download_videos)
503
+ self.hf_dataset = self.load_hf_dataset()
504
+
505
+ self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes)
506
+
507
+ # Check timestamps
508
+ timestamps = torch.stack(self.hf_dataset["timestamp"]).numpy()
509
+ episode_indices = torch.stack(self.hf_dataset["episode_index"]).numpy()
510
+ ep_data_index_np = {k: t.numpy() for k, t in self.episode_data_index.items()}
511
+ check_timestamps_sync(timestamps, episode_indices, ep_data_index_np, self.fps, self.tolerance_s)
512
+
513
+ # Setup delta_indices
514
+ if self.delta_timestamps is not None:
515
+ check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s)
516
+ self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps)
517
+
518
+ def push_to_hub(
519
+ self,
520
+ branch: str | None = None,
521
+ tags: list | None = None,
522
+ license: str | None = "apache-2.0",
523
+ tag_version: bool = True,
524
+ push_videos: bool = True,
525
+ private: bool = False,
526
+ allow_patterns: list[str] | str | None = None,
527
+ upload_large_folder: bool = False,
528
+ **card_kwargs,
529
+ ) -> None:
530
+ ignore_patterns = ["images/"]
531
+ if not push_videos:
532
+ ignore_patterns.append("videos/")
533
+
534
+ hub_api = HfApi()
535
+ hub_api.create_repo(
536
+ repo_id=self.repo_id,
537
+ private=private,
538
+ repo_type="dataset",
539
+ exist_ok=True,
540
+ )
541
+ if branch:
542
+ hub_api.create_branch(
543
+ repo_id=self.repo_id,
544
+ branch=branch,
545
+ revision=self.revision,
546
+ repo_type="dataset",
547
+ exist_ok=True,
548
+ )
549
+
550
+ upload_kwargs = {
551
+ "repo_id": self.repo_id,
552
+ "folder_path": self.root,
553
+ "repo_type": "dataset",
554
+ "revision": branch,
555
+ "allow_patterns": allow_patterns,
556
+ "ignore_patterns": ignore_patterns,
557
+ }
558
+ if upload_large_folder:
559
+ hub_api.upload_large_folder(**upload_kwargs)
560
+ else:
561
+ hub_api.upload_folder(**upload_kwargs)
562
+
563
+ if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=branch):
564
+ card = create_lerobot_dataset_card(
565
+ tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs
566
+ )
567
+ card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=branch)
568
+
569
+ if tag_version:
570
+ with contextlib.suppress(RevisionNotFoundError):
571
+ hub_api.delete_tag(self.repo_id, tag=CODEBASE_VERSION, repo_type="dataset")
572
+ hub_api.create_tag(self.repo_id, tag=CODEBASE_VERSION, revision=branch, repo_type="dataset")
573
+
574
+ def pull_from_repo(
575
+ self,
576
+ allow_patterns: list[str] | str | None = None,
577
+ ignore_patterns: list[str] | str | None = None,
578
+ ) -> None:
579
+ snapshot_download(
580
+ self.repo_id,
581
+ repo_type="dataset",
582
+ revision=self.revision,
583
+ local_dir=self.root,
584
+ allow_patterns=allow_patterns,
585
+ ignore_patterns=ignore_patterns,
586
+ )
587
+
588
+ def download_episodes(self, download_videos: bool = True) -> None:
589
+ """Downloads the dataset from the given 'repo_id' at the provided version. If 'episodes' is given, this
590
+ will only download those episodes (selected by their episode_index). If 'episodes' is None, the whole
591
+ dataset will be downloaded. Thanks to the behavior of snapshot_download, if the files are already present
592
+ in 'local_dir', they won't be downloaded again.
593
+ """
594
+ # TODO(rcadene, aliberts): implement faster transfer
595
+ # https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads
596
+ files = None
597
+ ignore_patterns = None if download_videos else "videos/"
598
+ if self.episodes is not None:
599
+ files = self.get_episodes_file_paths()
600
+
601
+ self.pull_from_repo(allow_patterns=files, ignore_patterns=ignore_patterns)
602
+
603
+ def get_episodes_file_paths(self) -> list[Path]:
604
+ episodes = self.episodes if self.episodes is not None else list(range(self.meta.total_episodes))
605
+ fpaths = [str(self.meta.get_data_file_path(ep_idx)) for ep_idx in episodes]
606
+ if len(self.meta.video_keys) > 0:
607
+ video_files = [
608
+ str(self.meta.get_video_file_path(ep_idx, vid_key))
609
+ for vid_key in self.meta.video_keys
610
+ for ep_idx in episodes
611
+ ]
612
+ fpaths += video_files
613
+
614
+ return fpaths
615
+
616
+ def load_hf_dataset(self) -> datasets.Dataset:
617
+ """hf_dataset contains all the observations, states, actions, rewards, etc."""
618
+ if self.episodes is None:
619
+ path = str(self.root / "data")
620
+ hf_dataset = load_dataset("parquet", data_dir=path, split="train")
621
+ else:
622
+ files = [str(self.root / self.meta.get_data_file_path(ep_idx)) for ep_idx in self.episodes]
623
+ hf_dataset = load_dataset("parquet", data_files=files, split="train")
624
+
625
+ # TODO(aliberts): hf_dataset.set_format("torch")
626
+ hf_dataset.set_transform(hf_transform_to_torch)
627
+ return hf_dataset
628
+
629
+ def create_hf_dataset(self) -> datasets.Dataset:
630
+ features = get_hf_features_from_features(self.features)
631
+ ft_dict = {col: [] for col in features}
632
+ hf_dataset = datasets.Dataset.from_dict(ft_dict, features=features, split="train")
633
+
634
+ # TODO(aliberts): hf_dataset.set_format("torch")
635
+ hf_dataset.set_transform(hf_transform_to_torch)
636
+ return hf_dataset
637
+
638
+ @property
639
+ def fps(self) -> int:
640
+ """Frames per second used during data collection."""
641
+ return self.meta.fps
642
+
643
+ @property
644
+ def num_frames(self) -> int:
645
+ """Number of frames in selected episodes."""
646
+ return len(self.hf_dataset) if self.hf_dataset is not None else self.meta.total_frames
647
+
648
+ @property
649
+ def num_episodes(self) -> int:
650
+ """Number of episodes selected."""
651
+ return len(self.episodes) if self.episodes is not None else self.meta.total_episodes
652
+
653
+ @property
654
+ def features(self) -> dict[str, dict]:
655
+ return self.meta.features
656
+
657
+ @property
658
+ def hf_features(self) -> datasets.Features:
659
+ """Features of the hf_dataset."""
660
+ if self.hf_dataset is not None:
661
+ return self.hf_dataset.features
662
+ else:
663
+ return get_hf_features_from_features(self.features)
664
+
665
+ def _get_query_indices(self, idx: int, ep_idx: int) -> tuple[dict[str, list[int | bool]]]:
666
+ ep_start = self.episode_data_index["from"][ep_idx]
667
+ ep_end = self.episode_data_index["to"][ep_idx]
668
+ query_indices = {
669
+ key: [max(ep_start.item(), min(ep_end.item() - 1, idx + delta)) for delta in delta_idx]
670
+ for key, delta_idx in self.delta_indices.items()
671
+ }
672
+ padding = { # Pad values outside of current episode range
673
+ f"{key}_is_pad": torch.BoolTensor(
674
+ [(idx + delta < ep_start.item()) | (idx + delta >= ep_end.item()) for delta in delta_idx]
675
+ )
676
+ for key, delta_idx in self.delta_indices.items()
677
+ }
678
+ return query_indices, padding
679
+
680
+ def _get_query_timestamps(
681
+ self,
682
+ current_ts: float,
683
+ query_indices: dict[str, list[int]] | None = None,
684
+ ) -> dict[str, list[float]]:
685
+ query_timestamps = {}
686
+ for key in self.meta.video_keys:
687
+ if query_indices is not None and key in query_indices:
688
+ timestamps = self.hf_dataset.select(query_indices[key])["timestamp"]
689
+ query_timestamps[key] = torch.stack(timestamps).tolist()
690
+ else:
691
+ query_timestamps[key] = [current_ts]
692
+
693
+ return query_timestamps
694
+
695
+ def _query_hf_dataset(self, query_indices: dict[str, list[int]]) -> dict:
696
+ return {
697
+ key: torch.stack(self.hf_dataset.select(q_idx)[key])
698
+ for key, q_idx in query_indices.items()
699
+ if key not in self.meta.video_keys
700
+ }
701
+
702
+ def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]:
703
+ """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function
704
+ in the main process (e.g. by using a second Dataloader with num_workers=0). It will result in a
705
+ Segmentation Fault. This probably happens because a memory reference to the video loader is created in
706
+ the main process and a subprocess fails to access it.
707
+ """
708
+ item = {}
709
+ for vid_key, query_ts in query_timestamps.items():
710
+ video_path = self.root / self.meta.get_video_file_path(ep_idx, vid_key)
711
+ frames = decode_video_frames(video_path, query_ts, self.tolerance_s, self.video_backend)
712
+ item[vid_key] = frames.squeeze(0)
713
+
714
+ return item
715
+
716
+ def _add_padding_keys(self, item: dict, padding: dict[str, list[bool]]) -> dict:
717
+ for key, val in padding.items():
718
+ item[key] = torch.BoolTensor(val)
719
+ return item
720
+
721
+ def __len__(self):
722
+ return self.num_frames
723
+
724
+ def __getitem__(self, idx) -> dict:
725
+ item = self.hf_dataset[idx]
726
+ ep_idx = item["episode_index"].item()
727
+
728
+ query_indices = None
729
+ if self.delta_indices is not None:
730
+ query_indices, padding = self._get_query_indices(idx, ep_idx)
731
+ query_result = self._query_hf_dataset(query_indices)
732
+ item = {**item, **padding}
733
+ for key, val in query_result.items():
734
+ item[key] = val
735
+
736
+ if len(self.meta.video_keys) > 0:
737
+ current_ts = item["timestamp"].item()
738
+ query_timestamps = self._get_query_timestamps(current_ts, query_indices)
739
+ video_frames = self._query_videos(query_timestamps, ep_idx)
740
+ item = {**video_frames, **item}
741
+
742
+ if self.image_transforms is not None:
743
+ image_keys = self.meta.camera_keys
744
+ for cam in image_keys:
745
+ item[cam] = self.image_transforms(item[cam])
746
+
747
+ # Add task as a string
748
+ task_idx = item["task_index"].item()
749
+ item["task"] = self.meta.tasks[task_idx]
750
+
751
+ return item
752
+
753
+ def __repr__(self):
754
+ feature_keys = list(self.features)
755
+ return (
756
+ f"{self.__class__.__name__}({{\n"
757
+ f" Repository ID: '{self.repo_id}',\n"
758
+ f" Number of selected episodes: '{self.num_episodes}',\n"
759
+ f" Number of selected samples: '{self.num_frames}',\n"
760
+ f" Features: '{feature_keys}',\n"
761
+ "})',\n"
762
+ )
763
+
764
+ def create_episode_buffer(self, episode_index: int | None = None) -> dict:
765
+ current_ep_idx = self.meta.total_episodes if episode_index is None else episode_index
766
+ ep_buffer = {}
767
+ # size and task are special cases that are not in self.features
768
+ ep_buffer["size"] = 0
769
+ ep_buffer["task"] = []
770
+ for key in self.features:
771
+ ep_buffer[key] = current_ep_idx if key == "episode_index" else []
772
+ return ep_buffer
773
+
774
+ def _get_image_file_path(self, episode_index: int, image_key: str, frame_index: int) -> Path:
775
+ fpath = DEFAULT_IMAGE_PATH.format(
776
+ image_key=image_key, episode_index=episode_index, frame_index=frame_index
777
+ )
778
+ return self.root / fpath
779
+
780
+ def _save_image(self, image: torch.Tensor | np.ndarray | PIL.Image.Image, fpath: Path) -> None:
781
+ if self.image_writer is None:
782
+ if isinstance(image, torch.Tensor):
783
+ image = image.cpu().numpy()
784
+ write_image(image, fpath)
785
+ else:
786
+ self.image_writer.save_image(image=image, fpath=fpath)
787
+
788
+ def add_frame(self, frame: dict) -> None:
789
+ """
790
+ This function only adds the frame to the episode_buffer. Apart from images — which are written in a
791
+ temporary directory — nothing is written to disk. To save those frames, the 'save_episode()' method
792
+ then needs to be called.
793
+ """
794
+ # Convert torch to numpy if needed
795
+ for name in frame:
796
+ if isinstance(frame[name], torch.Tensor):
797
+ frame[name] = frame[name].numpy()
798
+
799
+ validate_frame(frame, self.features)
800
+
801
+ if self.episode_buffer is None:
802
+ self.episode_buffer = self.create_episode_buffer()
803
+
804
+ # Automatically add frame_index and timestamp to episode buffer
805
+ frame_index = self.episode_buffer["size"]
806
+ timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps
807
+ self.episode_buffer["frame_index"].append(frame_index)
808
+ self.episode_buffer["timestamp"].append(timestamp)
809
+
810
+ # Add frame features to episode_buffer
811
+ for key in frame:
812
+ if key == "task":
813
+ # Note: we associate the task in natural language to its task index during `save_episode`
814
+ self.episode_buffer["task"].append(frame["task"])
815
+ continue
816
+
817
+ if key not in self.features:
818
+ raise ValueError(
819
+ f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'."
820
+ )
821
+
822
+ if self.features[key]["dtype"] in ["image", "video"]:
823
+ img_path = self._get_image_file_path(
824
+ episode_index=self.episode_buffer["episode_index"], image_key=key, frame_index=frame_index
825
+ )
826
+ if frame_index == 0:
827
+ img_path.parent.mkdir(parents=True, exist_ok=True)
828
+ self._save_image(frame[key], img_path)
829
+ self.episode_buffer[key].append(str(img_path))
830
+ else:
831
+ self.episode_buffer[key].append(frame[key])
832
+
833
+ self.episode_buffer["size"] += 1
834
+
835
+ def save_episode(self, episode_data: dict | None = None) -> None:
836
+ """
837
+ This will save to disk the current episode in self.episode_buffer.
838
+
839
+ Args:
840
+ episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will
841
+ save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to
842
+ None.
843
+ """
844
+ if not episode_data:
845
+ episode_buffer = self.episode_buffer
846
+
847
+ validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features)
848
+
849
+ # size and task are special cases that won't be added to hf_dataset
850
+ episode_length = episode_buffer.pop("size")
851
+ tasks = episode_buffer.pop("task")
852
+ episode_tasks = list(set(tasks))
853
+ episode_index = episode_buffer["episode_index"]
854
+
855
+ episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length)
856
+ episode_buffer["episode_index"] = np.full((episode_length,), episode_index)
857
+
858
+ # Add new tasks to the tasks dictionary
859
+ for task in episode_tasks:
860
+ task_index = self.meta.get_task_index(task)
861
+ if task_index is None:
862
+ self.meta.add_task(task)
863
+
864
+ # Given tasks in natural language, find their corresponding task indices
865
+ episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks])
866
+
867
+ for key, ft in self.features.items():
868
+ # index, episode_index, task_index are already processed above, and image and video
869
+ # are processed separately by storing image path and frame info as meta data
870
+ if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]:
871
+ continue
872
+ episode_buffer[key] = np.stack(episode_buffer[key])
873
+
874
+ self._wait_image_writer()
875
+ self._save_episode_table(episode_buffer, episode_index)
876
+ ep_stats = compute_episode_stats(episode_buffer, self.features)
877
+
878
+ if len(self.meta.video_keys) > 0:
879
+ video_paths = self.encode_episode_videos(episode_index)
880
+ for key in self.meta.video_keys:
881
+ episode_buffer[key] = video_paths[key]
882
+
883
+ # `meta.save_episode` be executed after encoding the videos
884
+ self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats)
885
+
886
+ ep_data_index = get_episode_data_index(self.meta.episodes, [episode_index])
887
+ ep_data_index_np = {k: t.numpy() for k, t in ep_data_index.items()}
888
+ check_timestamps_sync(
889
+ episode_buffer["timestamp"],
890
+ episode_buffer["episode_index"],
891
+ ep_data_index_np,
892
+ self.fps,
893
+ self.tolerance_s,
894
+ )
895
+
896
+ video_files = list(self.root.rglob("*.mp4"))
897
+ assert len(video_files) == self.num_episodes * len(self.meta.video_keys)
898
+
899
+ parquet_files = list(self.root.rglob("*.parquet"))
900
+ assert len(parquet_files) == self.num_episodes
901
+
902
+ # delete images
903
+ img_dir = self.root / "images"
904
+ if img_dir.is_dir():
905
+ shutil.rmtree(self.root / "images")
906
+
907
+ if not episode_data: # Reset the buffer
908
+ self.episode_buffer = self.create_episode_buffer()
909
+
910
+ def _save_episode_table(self, episode_buffer: dict, episode_index: int) -> None:
911
+ episode_dict = {key: episode_buffer[key] for key in self.hf_features}
912
+ ep_dataset = datasets.Dataset.from_dict(episode_dict, features=self.hf_features, split="train")
913
+ ep_dataset = embed_images(ep_dataset)
914
+ self.hf_dataset = concatenate_datasets([self.hf_dataset, ep_dataset])
915
+ self.hf_dataset.set_transform(hf_transform_to_torch)
916
+ ep_data_path = self.root / self.meta.get_data_file_path(ep_index=episode_index)
917
+ ep_data_path.parent.mkdir(parents=True, exist_ok=True)
918
+ ep_dataset.to_parquet(ep_data_path)
919
+
920
+ def clear_episode_buffer(self) -> None:
921
+ episode_index = self.episode_buffer["episode_index"]
922
+ if self.image_writer is not None:
923
+ for cam_key in self.meta.camera_keys:
924
+ img_dir = self._get_image_file_path(
925
+ episode_index=episode_index, image_key=cam_key, frame_index=0
926
+ ).parent
927
+ if img_dir.is_dir():
928
+ shutil.rmtree(img_dir)
929
+
930
+ # Reset the buffer
931
+ self.episode_buffer = self.create_episode_buffer()
932
+
933
+ def start_image_writer(self, num_processes: int = 0, num_threads: int = 4) -> None:
934
+ if isinstance(self.image_writer, AsyncImageWriter):
935
+ logging.warning(
936
+ "You are starting a new AsyncImageWriter that is replacing an already existing one in the dataset."
937
+ )
938
+
939
+ self.image_writer = AsyncImageWriter(
940
+ num_processes=num_processes,
941
+ num_threads=num_threads,
942
+ )
943
+
944
+ def stop_image_writer(self) -> None:
945
+ """
946
+ Whenever wrapping this dataset inside a parallelized DataLoader, this needs to be called first to
947
+ remove the image_writer in order for the LeRobotDataset object to be pickleable and parallelized.
948
+ """
949
+ if self.image_writer is not None:
950
+ self.image_writer.stop()
951
+ self.image_writer = None
952
+
953
+ def _wait_image_writer(self) -> None:
954
+ """Wait for asynchronous image writer to finish."""
955
+ if self.image_writer is not None:
956
+ self.image_writer.wait_until_done()
957
+
958
+ def encode_videos(self) -> None:
959
+ """
960
+ Use ffmpeg to convert frames stored as png into mp4 videos.
961
+ Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
962
+ since video encoding with ffmpeg is already using multithreading.
963
+ """
964
+ for ep_idx in range(self.meta.total_episodes):
965
+ self.encode_episode_videos(ep_idx)
966
+
967
+ def encode_episode_videos(self, episode_index: int) -> dict:
968
+ """
969
+ Use ffmpeg to convert frames stored as png into mp4 videos.
970
+ Note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding,
971
+ since video encoding with ffmpeg is already using multithreading.
972
+ """
973
+ video_paths = {}
974
+ for key in self.meta.video_keys:
975
+ video_path = self.root / self.meta.get_video_file_path(episode_index, key)
976
+ video_paths[key] = str(video_path)
977
+ if video_path.is_file():
978
+ # Skip if video is already encoded. Could be the case when resuming data recording.
979
+ continue
980
+ img_dir = self._get_image_file_path(
981
+ episode_index=episode_index, image_key=key, frame_index=0
982
+ ).parent
983
+ encode_video_frames(img_dir, video_path, self.fps, overwrite=True)
984
+
985
+ return video_paths
986
+
987
+ @classmethod
988
+ def create(
989
+ cls,
990
+ repo_id: str,
991
+ fps: int,
992
+ root: str | Path | None = None,
993
+ robot: Robot | None = None,
994
+ robot_type: str | None = None,
995
+ features: dict | None = None,
996
+ use_videos: bool = True,
997
+ tolerance_s: float = 1e-4,
998
+ image_writer_processes: int = 0,
999
+ image_writer_threads: int = 0,
1000
+ video_backend: str | None = None,
1001
+ ) -> "LeRobotDataset":
1002
+ """Create a LeRobot Dataset from scratch in order to record data."""
1003
+ obj = cls.__new__(cls)
1004
+ obj.meta = LeRobotDatasetMetadata.create(
1005
+ repo_id=repo_id,
1006
+ fps=fps,
1007
+ root=root,
1008
+ robot=robot,
1009
+ robot_type=robot_type,
1010
+ features=features,
1011
+ use_videos=use_videos,
1012
+ )
1013
+ obj.repo_id = obj.meta.repo_id
1014
+ obj.root = obj.meta.root
1015
+ obj.revision = None
1016
+ obj.tolerance_s = tolerance_s
1017
+ obj.image_writer = None
1018
+
1019
+ if image_writer_processes or image_writer_threads:
1020
+ obj.start_image_writer(image_writer_processes, image_writer_threads)
1021
+
1022
+ # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer
1023
+ obj.episode_buffer = obj.create_episode_buffer()
1024
+
1025
+ obj.episodes = None
1026
+ obj.hf_dataset = obj.create_hf_dataset()
1027
+ obj.image_transforms = None
1028
+ obj.delta_timestamps = None
1029
+ obj.delta_indices = None
1030
+ obj.episode_data_index = None
1031
+ obj.video_backend = video_backend if video_backend is not None else get_safe_default_codec()
1032
+ return obj
1033
+
1034
+
1035
+ class MultiLeRobotDataset(torch.utils.data.Dataset):
1036
+ """A dataset consisting of multiple underlying `LeRobotDataset`s.
1037
+
1038
+ The underlying `LeRobotDataset`s are effectively concatenated, and this class adopts much of the API
1039
+ structure of `LeRobotDataset`.
1040
+ """
1041
+
1042
+ def __init__(
1043
+ self,
1044
+ repo_ids: list[str],
1045
+ root: str | Path | None = None,
1046
+ episodes: dict | None = None,
1047
+ image_transforms: Callable | None = None,
1048
+ delta_timestamps: dict[list[float]] | None = None,
1049
+ tolerances_s: dict | None = None,
1050
+ download_videos: bool = True,
1051
+ video_backend: str | None = None,
1052
+ ):
1053
+ super().__init__()
1054
+ self.repo_ids = repo_ids
1055
+ self.root = Path(root) if root else HF_LEROBOT_HOME
1056
+ self.tolerances_s = tolerances_s if tolerances_s else {repo_id: 1e-4 for repo_id in repo_ids}
1057
+ # Construct the underlying datasets passing everything but `transform` and `delta_timestamps` which
1058
+ # are handled by this class.
1059
+ self._datasets = [
1060
+ LeRobotDataset(
1061
+ repo_id,
1062
+ root=self.root / repo_id,
1063
+ episodes=episodes[repo_id] if episodes else None,
1064
+ image_transforms=image_transforms,
1065
+ delta_timestamps=delta_timestamps,
1066
+ tolerance_s=self.tolerances_s[repo_id],
1067
+ download_videos=download_videos,
1068
+ video_backend=video_backend,
1069
+ )
1070
+ for repo_id in repo_ids
1071
+ ]
1072
+
1073
+ # Disable any data keys that are not common across all of the datasets. Note: we may relax this
1074
+ # restriction in future iterations of this class. For now, this is necessary at least for being able
1075
+ # to use PyTorch's default DataLoader collate function.
1076
+ self.disabled_features = set()
1077
+ intersection_features = set(self._datasets[0].features)
1078
+ for ds in self._datasets:
1079
+ intersection_features.intersection_update(ds.features)
1080
+ if len(intersection_features) == 0:
1081
+ raise RuntimeError(
1082
+ "Multiple datasets were provided but they had no keys common to all of them. "
1083
+ "The multi-dataset functionality currently only keeps common keys."
1084
+ )
1085
+ for repo_id, ds in zip(self.repo_ids, self._datasets, strict=True):
1086
+ extra_keys = set(ds.features).difference(intersection_features)
1087
+ logging.warning(
1088
+ f"keys {extra_keys} of {repo_id} were disabled as they are not contained in all the "
1089
+ "other datasets."
1090
+ )
1091
+ self.disabled_features.update(extra_keys)
1092
+
1093
+ self.image_transforms = image_transforms
1094
+ self.delta_timestamps = delta_timestamps
1095
+ # TODO(rcadene, aliberts): We should not perform this aggregation for datasets
1096
+ # with multiple robots of different ranges. Instead we should have one normalization
1097
+ # per robot.
1098
+ self.stats = aggregate_stats([dataset.meta.stats for dataset in self._datasets])
1099
+
1100
+ @property
1101
+ def repo_id_to_index(self):
1102
+ """Return a mapping from dataset repo_id to a dataset index automatically created by this class.
1103
+
1104
+ This index is incorporated as a data key in the dictionary returned by `__getitem__`.
1105
+ """
1106
+ return {repo_id: i for i, repo_id in enumerate(self.repo_ids)}
1107
+
1108
+ @property
1109
+ def repo_index_to_id(self):
1110
+ """Return the inverse mapping if repo_id_to_index."""
1111
+ return {v: k for k, v in self.repo_id_to_index}
1112
+
1113
+ @property
1114
+ def fps(self) -> int:
1115
+ """Frames per second used during data collection.
1116
+
1117
+ NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
1118
+ """
1119
+ return self._datasets[0].meta.info["fps"]
1120
+
1121
+ @property
1122
+ def video(self) -> bool:
1123
+ """Returns True if this dataset loads video frames from mp4 files.
1124
+
1125
+ Returns False if it only loads images from png files.
1126
+
1127
+ NOTE: Fow now, this relies on a check in __init__ to make sure all sub-datasets have the same info.
1128
+ """
1129
+ return self._datasets[0].meta.info.get("video", False)
1130
+
1131
+ @property
1132
+ def features(self) -> datasets.Features:
1133
+ features = {}
1134
+ for dataset in self._datasets:
1135
+ features.update({k: v for k, v in dataset.hf_features.items() if k not in self.disabled_features})
1136
+ return features
1137
+
1138
+ @property
1139
+ def camera_keys(self) -> list[str]:
1140
+ """Keys to access image and video stream from cameras."""
1141
+ keys = []
1142
+ for key, feats in self.features.items():
1143
+ if isinstance(feats, (datasets.Image, VideoFrame)):
1144
+ keys.append(key)
1145
+ return keys
1146
+
1147
+ @property
1148
+ def video_frame_keys(self) -> list[str]:
1149
+ """Keys to access video frames that requires to be decoded into images.
1150
+
1151
+ Note: It is empty if the dataset contains images only,
1152
+ or equal to `self.cameras` if the dataset contains videos only,
1153
+ or can even be a subset of `self.cameras` in a case of a mixed image/video dataset.
1154
+ """
1155
+ video_frame_keys = []
1156
+ for key, feats in self.features.items():
1157
+ if isinstance(feats, VideoFrame):
1158
+ video_frame_keys.append(key)
1159
+ return video_frame_keys
1160
+
1161
+ @property
1162
+ def num_frames(self) -> int:
1163
+ """Number of samples/frames."""
1164
+ return sum(d.num_frames for d in self._datasets)
1165
+
1166
+ @property
1167
+ def num_episodes(self) -> int:
1168
+ """Number of episodes."""
1169
+ return sum(d.num_episodes for d in self._datasets)
1170
+
1171
+ @property
1172
+ def tolerance_s(self) -> float:
1173
+ """Tolerance in seconds used to discard loaded frames when their timestamps
1174
+ are not close enough from the requested frames. It is only used when `delta_timestamps`
1175
+ is provided or when loading video frames from mp4 files.
1176
+ """
1177
+ # 1e-4 to account for possible numerical error
1178
+ return 1 / self.fps - 1e-4
1179
+
1180
+ def __len__(self):
1181
+ return self.num_frames
1182
+
1183
+ def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
1184
+ if idx >= len(self):
1185
+ raise IndexError(f"Index {idx} out of bounds.")
1186
+ # Determine which dataset to get an item from based on the index.
1187
+ start_idx = 0
1188
+ dataset_idx = 0
1189
+ for dataset in self._datasets:
1190
+ if idx >= start_idx + dataset.num_frames:
1191
+ start_idx += dataset.num_frames
1192
+ dataset_idx += 1
1193
+ continue
1194
+ break
1195
+ else:
1196
+ raise AssertionError("We expect the loop to break out as long as the index is within bounds.")
1197
+ item = self._datasets[dataset_idx][idx - start_idx]
1198
+ item["dataset_index"] = torch.tensor(dataset_idx)
1199
+ for data_key in self.disabled_features:
1200
+ if data_key in item:
1201
+ del item[data_key]
1202
+
1203
+ return item
1204
+
1205
+ def __repr__(self):
1206
+ return (
1207
+ f"{self.__class__.__name__}(\n"
1208
+ f" Repository IDs: '{self.repo_ids}',\n"
1209
+ f" Number of Samples: {self.num_frames},\n"
1210
+ f" Number of Episodes: {self.num_episodes},\n"
1211
+ f" Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
1212
+ f" Recorded Frames per Second: {self.fps},\n"
1213
+ f" Camera Keys: {self.camera_keys},\n"
1214
+ f" Video Frame Keys: {self.video_frame_keys if self.video else 'N/A'},\n"
1215
+ f" Transformations: {self.image_transforms},\n"
1216
+ f")"
1217
+ )
lerobot/common/datasets/online_buffer.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """An online buffer for the online training loop in train.py
17
+
18
+ Note to maintainers: This duplicates some logic from LeRobotDataset and EpisodeAwareSampler. We should
19
+ consider converging to one approach. Here we have opted to use numpy.memmap to back the data buffer. It's much
20
+ faster than using HuggingFace Datasets as there's no conversion to an intermediate non-python object. Also it
21
+ supports in-place slicing and mutation which is very handy for a dynamic buffer.
22
+ """
23
+
24
+ import os
25
+ from pathlib import Path
26
+ from typing import Any
27
+
28
+ import numpy as np
29
+ import torch
30
+
31
+ from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
32
+
33
+
34
+ def _make_memmap_safe(**kwargs) -> np.memmap:
35
+ """Make a numpy memmap with checks on available disk space first.
36
+
37
+ Expected kwargs are: "filename", "dtype" (must by np.dtype), "mode" and "shape"
38
+
39
+ For information on dtypes:
40
+ https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes-constructing
41
+ """
42
+ if kwargs["mode"].startswith("w"):
43
+ required_space = kwargs["dtype"].itemsize * np.prod(kwargs["shape"]) # bytes
44
+ stats = os.statvfs(Path(kwargs["filename"]).parent)
45
+ available_space = stats.f_bavail * stats.f_frsize # bytes
46
+ if required_space >= available_space * 0.8:
47
+ raise RuntimeError(
48
+ f"You're about to take up {required_space} of {available_space} bytes available."
49
+ )
50
+ return np.memmap(**kwargs)
51
+
52
+
53
+ class OnlineBuffer(torch.utils.data.Dataset):
54
+ """FIFO data buffer for the online training loop in train.py.
55
+
56
+ Follows the protocol of LeRobotDataset as much as is required to have it be used by the online training
57
+ loop in the same way that a LeRobotDataset would be used.
58
+
59
+ The underlying data structure will have data inserted in a circular fashion. Always insert after the
60
+ last index, and when you reach the end, wrap around to the start.
61
+
62
+ The data is stored in a numpy memmap.
63
+ """
64
+
65
+ NEXT_INDEX_KEY = "_next_index"
66
+ OCCUPANCY_MASK_KEY = "_occupancy_mask"
67
+ INDEX_KEY = "index"
68
+ FRAME_INDEX_KEY = "frame_index"
69
+ EPISODE_INDEX_KEY = "episode_index"
70
+ TIMESTAMP_KEY = "timestamp"
71
+ IS_PAD_POSTFIX = "_is_pad"
72
+
73
+ def __init__(
74
+ self,
75
+ write_dir: str | Path,
76
+ data_spec: dict[str, Any] | None,
77
+ buffer_capacity: int | None,
78
+ fps: float | None = None,
79
+ delta_timestamps: dict[str, list[float]] | dict[str, np.ndarray] | None = None,
80
+ ):
81
+ """
82
+ The online buffer can be provided from scratch or you can load an existing online buffer by passing
83
+ a `write_dir` associated with an existing buffer.
84
+
85
+ Args:
86
+ write_dir: Where to keep the numpy memmap files. One memmap file will be stored for each data key.
87
+ Note that if the files already exist, they are opened in read-write mode (used for training
88
+ resumption.)
89
+ data_spec: A mapping from data key to data specification, like {data_key: {"shape": tuple[int],
90
+ "dtype": np.dtype}}. This should include all the data that you wish to record into the buffer,
91
+ but note that "index", "frame_index" and "episode_index" are already accounted for by this
92
+ class, so you don't need to include them.
93
+ buffer_capacity: How many frames should be stored in the buffer as a maximum. Be aware of your
94
+ system's available disk space when choosing this.
95
+ fps: Same as the fps concept in LeRobot dataset. Here it needs to be provided for the
96
+ delta_timestamps logic. You can pass None if you are not using delta_timestamps.
97
+ delta_timestamps: Same as the delta_timestamps concept in LeRobotDataset. This is internally
98
+ converted to dict[str, np.ndarray] for optimization purposes.
99
+
100
+ """
101
+ self.set_delta_timestamps(delta_timestamps)
102
+ self._fps = fps
103
+ # Tolerance in seconds used to discard loaded frames when their timestamps are not close enough from
104
+ # the requested frames. It is only used when `delta_timestamps` is provided.
105
+ # minus 1e-4 to account for possible numerical error
106
+ self.tolerance_s = 1 / self.fps - 1e-4 if fps is not None else None
107
+ self._buffer_capacity = buffer_capacity
108
+ data_spec = self._make_data_spec(data_spec, buffer_capacity)
109
+ Path(write_dir).mkdir(parents=True, exist_ok=True)
110
+ self._data = {}
111
+ for k, v in data_spec.items():
112
+ self._data[k] = _make_memmap_safe(
113
+ filename=Path(write_dir) / k,
114
+ dtype=v["dtype"] if v is not None else None,
115
+ mode="r+" if (Path(write_dir) / k).exists() else "w+",
116
+ shape=tuple(v["shape"]) if v is not None else None,
117
+ )
118
+
119
+ @property
120
+ def delta_timestamps(self) -> dict[str, np.ndarray] | None:
121
+ return self._delta_timestamps
122
+
123
+ def set_delta_timestamps(self, value: dict[str, list[float]] | None):
124
+ """Set delta_timestamps converting the values to numpy arrays.
125
+
126
+ The conversion is for an optimization in the __getitem__. The loop is much slower if the arrays
127
+ need to be converted into numpy arrays.
128
+ """
129
+ if value is not None:
130
+ self._delta_timestamps = {k: np.array(v) for k, v in value.items()}
131
+ else:
132
+ self._delta_timestamps = None
133
+
134
+ def _make_data_spec(self, data_spec: dict[str, Any], buffer_capacity: int) -> dict[str, dict[str, Any]]:
135
+ """Makes the data spec for np.memmap."""
136
+ if any(k.startswith("_") for k in data_spec):
137
+ raise ValueError(
138
+ "data_spec keys should not start with '_'. This prefix is reserved for internal logic."
139
+ )
140
+ preset_keys = {
141
+ OnlineBuffer.INDEX_KEY,
142
+ OnlineBuffer.FRAME_INDEX_KEY,
143
+ OnlineBuffer.EPISODE_INDEX_KEY,
144
+ OnlineBuffer.TIMESTAMP_KEY,
145
+ }
146
+ if len(intersection := set(data_spec).intersection(preset_keys)) > 0:
147
+ raise ValueError(
148
+ f"data_spec should not contain any of {preset_keys} as these are handled internally. "
149
+ f"The provided data_spec has {intersection}."
150
+ )
151
+ complete_data_spec = {
152
+ # _next_index will be a pointer to the next index that we should start filling from when we add
153
+ # more data.
154
+ OnlineBuffer.NEXT_INDEX_KEY: {"dtype": np.dtype("int64"), "shape": ()},
155
+ # Since the memmap is initialized with all-zeros, this keeps track of which indices are occupied
156
+ # with real data rather than the dummy initialization.
157
+ OnlineBuffer.OCCUPANCY_MASK_KEY: {"dtype": np.dtype("?"), "shape": (buffer_capacity,)},
158
+ OnlineBuffer.INDEX_KEY: {"dtype": np.dtype("int64"), "shape": (buffer_capacity,)},
159
+ OnlineBuffer.FRAME_INDEX_KEY: {"dtype": np.dtype("int64"), "shape": (buffer_capacity,)},
160
+ OnlineBuffer.EPISODE_INDEX_KEY: {"dtype": np.dtype("int64"), "shape": (buffer_capacity,)},
161
+ OnlineBuffer.TIMESTAMP_KEY: {"dtype": np.dtype("float64"), "shape": (buffer_capacity,)},
162
+ }
163
+ for k, v in data_spec.items():
164
+ complete_data_spec[k] = {"dtype": v["dtype"], "shape": (buffer_capacity, *v["shape"])}
165
+ return complete_data_spec
166
+
167
+ def add_data(self, data: dict[str, np.ndarray]):
168
+ """Add new data to the buffer, which could potentially mean shifting old data out.
169
+
170
+ The new data should contain all the frames (in order) of any number of episodes. The indices should
171
+ start from 0 (note to the developer: this can easily be generalized). See the `rollout` and
172
+ `eval_policy` functions in `eval.py` for more information on how the data is constructed.
173
+
174
+ Shift the incoming data index and episode_index to continue on from the last frame. Note that this
175
+ will be done in place!
176
+ """
177
+ if len(missing_keys := (set(self.data_keys).difference(set(data)))) > 0:
178
+ raise ValueError(f"Missing data keys: {missing_keys}")
179
+ new_data_length = len(data[self.data_keys[0]])
180
+ if not all(len(data[k]) == new_data_length for k in self.data_keys):
181
+ raise ValueError("All data items should have the same length")
182
+
183
+ next_index = self._data[OnlineBuffer.NEXT_INDEX_KEY]
184
+
185
+ # Sanity check to make sure that the new data indices start from 0.
186
+ assert data[OnlineBuffer.EPISODE_INDEX_KEY][0].item() == 0
187
+ assert data[OnlineBuffer.INDEX_KEY][0].item() == 0
188
+
189
+ # Shift the incoming indices if necessary.
190
+ if self.num_frames > 0:
191
+ last_episode_index = self._data[OnlineBuffer.EPISODE_INDEX_KEY][next_index - 1]
192
+ last_data_index = self._data[OnlineBuffer.INDEX_KEY][next_index - 1]
193
+ data[OnlineBuffer.EPISODE_INDEX_KEY] += last_episode_index + 1
194
+ data[OnlineBuffer.INDEX_KEY] += last_data_index + 1
195
+
196
+ # Insert the new data starting from next_index. It may be necessary to wrap around to the start.
197
+ n_surplus = max(0, new_data_length - (self._buffer_capacity - next_index))
198
+ for k in self.data_keys:
199
+ if n_surplus == 0:
200
+ slc = slice(next_index, next_index + new_data_length)
201
+ self._data[k][slc] = data[k]
202
+ self._data[OnlineBuffer.OCCUPANCY_MASK_KEY][slc] = True
203
+ else:
204
+ self._data[k][next_index:] = data[k][:-n_surplus]
205
+ self._data[OnlineBuffer.OCCUPANCY_MASK_KEY][next_index:] = True
206
+ self._data[k][:n_surplus] = data[k][-n_surplus:]
207
+ if n_surplus == 0:
208
+ self._data[OnlineBuffer.NEXT_INDEX_KEY] = next_index + new_data_length
209
+ else:
210
+ self._data[OnlineBuffer.NEXT_INDEX_KEY] = n_surplus
211
+
212
+ @property
213
+ def data_keys(self) -> list[str]:
214
+ keys = set(self._data)
215
+ keys.remove(OnlineBuffer.OCCUPANCY_MASK_KEY)
216
+ keys.remove(OnlineBuffer.NEXT_INDEX_KEY)
217
+ return sorted(keys)
218
+
219
+ @property
220
+ def fps(self) -> float | None:
221
+ return self._fps
222
+
223
+ @property
224
+ def num_episodes(self) -> int:
225
+ return len(
226
+ np.unique(self._data[OnlineBuffer.EPISODE_INDEX_KEY][self._data[OnlineBuffer.OCCUPANCY_MASK_KEY]])
227
+ )
228
+
229
+ @property
230
+ def num_frames(self) -> int:
231
+ return np.count_nonzero(self._data[OnlineBuffer.OCCUPANCY_MASK_KEY])
232
+
233
+ def __len__(self):
234
+ return self.num_frames
235
+
236
+ def _item_to_tensors(self, item: dict) -> dict:
237
+ item_ = {}
238
+ for k, v in item.items():
239
+ if isinstance(v, torch.Tensor):
240
+ item_[k] = v
241
+ elif isinstance(v, np.ndarray):
242
+ item_[k] = torch.from_numpy(v)
243
+ else:
244
+ item_[k] = torch.tensor(v)
245
+ return item_
246
+
247
+ def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
248
+ if idx >= len(self) or idx < -len(self):
249
+ raise IndexError
250
+
251
+ item = {k: v[idx] for k, v in self._data.items() if not k.startswith("_")}
252
+
253
+ if self.delta_timestamps is None:
254
+ return self._item_to_tensors(item)
255
+
256
+ episode_index = item[OnlineBuffer.EPISODE_INDEX_KEY]
257
+ current_ts = item[OnlineBuffer.TIMESTAMP_KEY]
258
+ episode_data_indices = np.where(
259
+ np.bitwise_and(
260
+ self._data[OnlineBuffer.EPISODE_INDEX_KEY] == episode_index,
261
+ self._data[OnlineBuffer.OCCUPANCY_MASK_KEY],
262
+ )
263
+ )[0]
264
+ episode_timestamps = self._data[OnlineBuffer.TIMESTAMP_KEY][episode_data_indices]
265
+
266
+ for data_key in self.delta_timestamps:
267
+ # Note: The logic in this loop is copied from `load_previous_and_future_frames`.
268
+ # Get timestamps used as query to retrieve data of previous/future frames.
269
+ query_ts = current_ts + self.delta_timestamps[data_key]
270
+
271
+ # Compute distances between each query timestamp and all timestamps of all the frames belonging to
272
+ # the episode.
273
+ dist = np.abs(query_ts[:, None] - episode_timestamps[None, :])
274
+ argmin_ = np.argmin(dist, axis=1)
275
+ min_ = dist[np.arange(dist.shape[0]), argmin_]
276
+
277
+ is_pad = min_ > self.tolerance_s
278
+
279
+ # Check violated query timestamps are all outside the episode range.
280
+ assert (
281
+ (query_ts[is_pad] < episode_timestamps[0]) | (episode_timestamps[-1] < query_ts[is_pad])
282
+ ).all(), (
283
+ f"One or several timestamps unexpectedly violate the tolerance ({min_} > {self.tolerance_s=}"
284
+ ") inside the episode range."
285
+ )
286
+
287
+ # Load frames for this data key.
288
+ item[data_key] = self._data[data_key][episode_data_indices[argmin_]]
289
+
290
+ item[f"{data_key}{OnlineBuffer.IS_PAD_POSTFIX}"] = is_pad
291
+
292
+ return self._item_to_tensors(item)
293
+
294
+ def get_data_by_key(self, key: str) -> torch.Tensor:
295
+ """Returns all data for a given data key as a Tensor."""
296
+ return torch.from_numpy(self._data[key][self._data[OnlineBuffer.OCCUPANCY_MASK_KEY]])
297
+
298
+
299
+ def compute_sampler_weights(
300
+ offline_dataset: LeRobotDataset,
301
+ offline_drop_n_last_frames: int = 0,
302
+ online_dataset: OnlineBuffer | None = None,
303
+ online_sampling_ratio: float | None = None,
304
+ online_drop_n_last_frames: int = 0,
305
+ ) -> torch.Tensor:
306
+ """Compute the sampling weights for the online training dataloader in train.py.
307
+
308
+ Args:
309
+ offline_dataset: The LeRobotDataset used for offline pre-training.
310
+ online_drop_n_last_frames: Number of frames to drop from the end of each offline dataset episode.
311
+ online_dataset: The OnlineBuffer used in online training.
312
+ online_sampling_ratio: The proportion of data that should be sampled from the online dataset. If an
313
+ online dataset is provided, this value must also be provided.
314
+ online_drop_n_first_frames: See `offline_drop_n_last_frames`. This is the same, but for the online
315
+ dataset.
316
+ Returns:
317
+ Tensor of weights for [offline_dataset; online_dataset], normalized to 1.
318
+
319
+ Notes to maintainers:
320
+ - This duplicates some logic from EpisodeAwareSampler. We should consider converging to one approach.
321
+ - When used with `torch.utils.data.WeightedRandomSampler`, it could completely replace
322
+ `EpisodeAwareSampler` as the online dataset related arguments are optional. The only missing feature
323
+ is the ability to turn shuffling off.
324
+ - Options `drop_first_n_frames` and `episode_indices_to_use` can be added easily. They were not
325
+ included here to avoid adding complexity.
326
+ """
327
+ if len(offline_dataset) == 0 and (online_dataset is None or len(online_dataset) == 0):
328
+ raise ValueError("At least one of `offline_dataset` or `online_dataset` should be contain data.")
329
+ if (online_dataset is None) ^ (online_sampling_ratio is None):
330
+ raise ValueError(
331
+ "`online_dataset` and `online_sampling_ratio` must be provided together or not at all."
332
+ )
333
+ offline_sampling_ratio = 0 if online_sampling_ratio is None else 1 - online_sampling_ratio
334
+
335
+ weights = []
336
+
337
+ if len(offline_dataset) > 0:
338
+ offline_data_mask_indices = []
339
+ for start_index, end_index in zip(
340
+ offline_dataset.episode_data_index["from"],
341
+ offline_dataset.episode_data_index["to"],
342
+ strict=True,
343
+ ):
344
+ offline_data_mask_indices.extend(
345
+ range(start_index.item(), end_index.item() - offline_drop_n_last_frames)
346
+ )
347
+ offline_data_mask = torch.zeros(len(offline_dataset), dtype=torch.bool)
348
+ offline_data_mask[torch.tensor(offline_data_mask_indices)] = True
349
+ weights.append(
350
+ torch.full(
351
+ size=(len(offline_dataset),),
352
+ fill_value=offline_sampling_ratio / offline_data_mask.sum(),
353
+ )
354
+ * offline_data_mask
355
+ )
356
+
357
+ if online_dataset is not None and len(online_dataset) > 0:
358
+ online_data_mask_indices = []
359
+ episode_indices = online_dataset.get_data_by_key("episode_index")
360
+ for episode_idx in torch.unique(episode_indices):
361
+ where_episode = torch.where(episode_indices == episode_idx)
362
+ start_index = where_episode[0][0]
363
+ end_index = where_episode[0][-1] + 1
364
+ online_data_mask_indices.extend(
365
+ range(start_index.item(), end_index.item() - online_drop_n_last_frames)
366
+ )
367
+ online_data_mask = torch.zeros(len(online_dataset), dtype=torch.bool)
368
+ online_data_mask[torch.tensor(online_data_mask_indices)] = True
369
+ weights.append(
370
+ torch.full(
371
+ size=(len(online_dataset),),
372
+ fill_value=online_sampling_ratio / online_data_mask.sum(),
373
+ )
374
+ * online_data_mask
375
+ )
376
+
377
+ weights = torch.cat(weights)
378
+
379
+ if weights.sum() == 0:
380
+ weights += 1 / len(weights)
381
+ else:
382
+ weights /= weights.sum()
383
+
384
+ return weights