Buckets:

rtrm's picture
download
raw
32.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Imitation Learning in Sim&quot;,&quot;local&quot;:&quot;imitation-learning-in-sim&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installation&quot;,&quot;local&quot;:&quot;installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Teleoperate and Record a Dataset&quot;,&quot;local&quot;:&quot;teleoperate-and-record-a-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Visualize a dataset&quot;,&quot;local&quot;:&quot;visualize-a-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Train a policy&quot;,&quot;local&quot;:&quot;train-a-policy&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Train using Collab&quot;,&quot;local&quot;:&quot;train-using-collab&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Upload policy checkpoints&quot;,&quot;local&quot;:&quot;upload-policy-checkpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate your policy in Sim&quot;,&quot;local&quot;:&quot;evaluate-your-policy-in-sim&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/lerobot/pr_1713/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/entry/start.ff6c7d92.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/scheduler.f6b352c8.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/singletons.8fa76063.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/index.26cf6c5a.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/paths.e7fcefe1.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/entry/app.492c5e10.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/index.b90df637.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/nodes/0.5be955ad.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/nodes/11.25291f4d.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/Tip.366d2e6e.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/CodeBlock.e5718f9d.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/getInferenceSnippets.00196ff1.js">
<link rel="modulepreload" href="/docs/lerobot/pr_1713/en/_app/immutable/chunks/HfOption.6047630d.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Imitation Learning in Sim&quot;,&quot;local&quot;:&quot;imitation-learning-in-sim&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Installation&quot;,&quot;local&quot;:&quot;installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Teleoperate and Record a Dataset&quot;,&quot;local&quot;:&quot;teleoperate-and-record-a-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Visualize a dataset&quot;,&quot;local&quot;:&quot;visualize-a-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Train a policy&quot;,&quot;local&quot;:&quot;train-a-policy&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Train using Collab&quot;,&quot;local&quot;:&quot;train-using-collab&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;Upload policy checkpoints&quot;,&quot;local&quot;:&quot;upload-policy-checkpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate your policy in Sim&quot;,&quot;local&quot;:&quot;evaluate-your-policy-in-sim&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="imitation-learning-in-sim" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#imitation-learning-in-sim"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Imitation Learning in Sim</span></h1> <p data-svelte-h="svelte-1d6gfr3">This tutorial will explain how to train a neural network to control a robot in simulation with imitation learning.</p> <p data-svelte-h="svelte-xo8ua7"><strong>You’ll learn:</strong></p> <ol data-svelte-h="svelte-gdnn74"><li>How to record a dataset in simulation with <a href="https://github.com/huggingface/gym-hil" rel="nofollow">gym-hil</a> and visualize the dataset.</li> <li>How to train a policy using your data.</li> <li>How to evaluate your policy in simulation and visualize the results.</li></ol> <p data-svelte-h="svelte-pnxe2d">For the simulation environment we use the same <a href="https://github.com/huggingface/gym-hil" rel="nofollow">repo</a> that is also being used by the Human-In-the-Loop (HIL) reinforcement learning algorithm.
This environment is based on <a href="https://mujoco.org" rel="nofollow">MuJoCo</a> and allows you to record datasets in LeRobotDataset format.
Teleoperation is easiest with a controller like the Logitech F710, but you can also use your keyboard if you are up for the challenge.</p> <h2 class="relative group"><a id="installation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installation</span></h2> <p data-svelte-h="svelte-vhckiq">First, install the <code>gym_hil</code> package within the LeRobot environment, go to your LeRobot folder and run this command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -e <span class="hljs-string">&quot;.[hilserl]&quot;</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="teleoperate-and-record-a-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#teleoperate-and-record-a-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Teleoperate and Record a Dataset</span></h2> <p data-svelte-h="svelte-1xnuqd9">To use <code>gym_hil</code> with LeRobot, you need to use a configuration file. An example config file can be found <a href="https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/env_config_gym_hil_il.json" rel="nofollow">here</a>.</p> <p data-svelte-h="svelte-1yqaaj0">To teleoperate and collect a dataset, we need to modify this config file and you should add your <code>repo_id</code> here: <code>&quot;repo_id&quot;: &quot;il_gym&quot;,</code> and <code>&quot;num_episodes&quot;: 30,</code> and make sure you set <code>mode</code> to <code>record</code>, “mode”: “record”.</p> <p data-svelte-h="svelte-6a79y4">If you do not have a Nvidia GPU also change <code>&quot;device&quot;: &quot;cuda&quot;</code> parameter in the config file (for example to <code>mps</code> for MacOS).</p> <p data-svelte-h="svelte-1jc2n8g">By default the config file assumes you use a controller. To use your keyboard please change the envoirment specified at <code>&quot;task&quot;</code> in the config file and set it to <code>&quot;PandaPickCubeKeyboard-v0&quot;</code>.</p> <p data-svelte-h="svelte-1wx09im">Then we can run this command to start:</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Linux </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">MacOS </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.scripts.rl.gym_manipulator --config_path path/to/env_config_gym_hil_il.json<!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-1fcnh3f">Once rendered you can teleoperate the robot with the gamepad or keyboard, below you can find the gamepad/keyboard controls.</p> <p data-svelte-h="svelte-gyrmwb">Note that to teleoperate the robot you have to hold the “Human Take Over Pause Policy” Button <code>RB</code> to enable control!</p> <p data-svelte-h="svelte-1sznzvk"><strong>Gamepad Controls</strong></p> <p align="center" data-svelte-h="svelte-l7xw97"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/gamepad_guide.jpg?raw=true" alt="Figure shows the control mappings on a Logitech gamepad." title="Gamepad Control Mapping" width="100%"></p> <p align="center" data-svelte-h="svelte-i8xgvo"><i>Gamepad button mapping for robot control and episode management</i></p> <p data-svelte-h="svelte-l6ly5c"><strong>Keyboard controls</strong></p> <p data-svelte-h="svelte-u52c17">For keyboard controls use the <code>spacebar</code> to enable control and the following keys to move the robot:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> Arrow keys: Move <span class="hljs-keyword">in</span> X-Y plane
Shift and Shift_R: Move <span class="hljs-keyword">in</span> Z axis
Right Ctrl and Left Ctrl: Open and close gripper
ESC: Exit<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="visualize-a-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#visualize-a-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Visualize a dataset</span></h2> <p data-svelte-h="svelte-vdprhj">If you uploaded your dataset to the hub you can <a href="https://huggingface.co/spaces/lerobot/visualize_dataset" rel="nofollow">visualize your dataset online</a> by copy pasting your repo id.</p> <p align="center" data-svelte-h="svelte-s1gjtk"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/dataset_visualizer_sim.png" alt="Figure shows the dataset visualizer" title="Dataset visualization" width="100%"></p> <p align="center" data-svelte-h="svelte-dnkx88"><i>Dataset visualizer</i></p> <h2 class="relative group"><a id="train-a-policy" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#train-a-policy"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Train a policy</span></h2> <p data-svelte-h="svelte-1ce314c">To train a policy to control your robot, use the <a href="https://github.com/huggingface/lerobot/blob/main/src/lerobot/scripts/train.py" rel="nofollow"><code>lerobot-train</code></a> script. A few arguments are required. Here is an example command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lerobot-train \
--dataset.repo_id=<span class="hljs-variable">${HF_USER}</span>/il_gym \
--policy.type=act \
--output_dir=outputs/train/il_sim_test \
--job_name=il_sim_test \
--policy.device=cuda \
--wandb.enable=<span class="hljs-literal">true</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1aio4w2">Let’s explain the command:</p> <ol data-svelte-h="svelte-v0pokl"><li>We provided the dataset as argument with <code>--dataset.repo_id=${HF_USER}/il_gym</code>.</li> <li>We provided the policy with <code>policy.type=act</code>. This loads configurations from <a href="https://github.com/huggingface/lerobot/blob/main/src/lerobot/policies/act/configuration_act.py" rel="nofollow"><code>configuration_act.py</code></a>. Importantly, this policy will automatically adapt to the number of motor states, motor actions and cameras of your robot (e.g. <code>laptop</code> and <code>phone</code>) which have been saved in your dataset.</li> <li>We provided <code>policy.device=cuda</code> since we are training on a Nvidia GPU, but you could use <code>policy.device=mps</code> to train on Apple silicon.</li> <li>We provided <code>wandb.enable=true</code> to use <a href="https://docs.wandb.ai/quickstart" rel="nofollow">Weights and Biases</a> for visualizing training plots. This is optional but if you use it, make sure you are logged in by running <code>wandb login</code>.</li></ol> <p data-svelte-h="svelte-1xiuroi">Training should take several hours, 100k steps (which is the default) will take about 1h on Nvidia A100. You will find checkpoints in <code>outputs/train/il_sim_test/checkpoints</code>.</p> <h4 class="relative group"><a id="train-using-collab" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#train-using-collab"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Train using Collab</span></h4> <p data-svelte-h="svelte-i80u5e">If your local computer doesn’t have a powerful GPU you could utilize Google Collab to train your model by following the <a href="./notebooks#training-act">ACT training notebook</a>.</p> <h4 class="relative group"><a id="upload-policy-checkpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#upload-policy-checkpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Upload policy checkpoints</span></h4> <p data-svelte-h="svelte-15j4w0e">Once training is done, upload the latest checkpoint with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->huggingface-cli upload <span class="hljs-variable">${HF_USER}</span>/il_sim_test \
outputs/train/il_sim_test/checkpoints/last/pretrained_model<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ex9l9h">You can also upload intermediate checkpoints with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->CKPT=010000
huggingface-cli upload <span class="hljs-variable">${HF_USER}</span>/il_sim_test<span class="hljs-variable">${CKPT}</span> \
outputs/train/il_sim_test/checkpoints/<span class="hljs-variable">${CKPT}</span>/pretrained_model<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="evaluate-your-policy-in-sim" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluate-your-policy-in-sim"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluate your policy in Sim</span></h2> <p data-svelte-h="svelte-1ixoihb">To evaluate your policy we have to use the config file that can be found <a href="https://huggingface.co/datasets/aractingi/lerobot-example-config-files/blob/main/eval_config_gym_hil.json" rel="nofollow">here</a>.</p> <p data-svelte-h="svelte-4z1icg">Make sure to replace the <code>repo_id</code> with the dataset you trained on, for example <code>pepijn223/il_sim_dataset</code> and replace the <code>pretrained_policy_name_or_path</code> with your model id, for example <code>pepijn223/il_sim_model</code></p> <p data-svelte-h="svelte-k5n1oj">Then you can run this command to visualize your trained policy</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Linux </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">MacOS </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.scripts.rl.eval_policy --config_path=path/to/eval_config_gym_hil.json<!-- HTML_TAG_END --></pre></div> </div> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-ujkl83">While the main workflow of training ACT in simulation is straightforward, there is significant room for exploring how to set up the task, define the initial state of the environment, and determine the type of data required during collection to learn the most effective policy. If your trained policy doesn’t perform well, investigate the quality of the dataset it was trained on using our visualizers, as well as the action values and various hyperparameters related to ACT and the simulation.</p></div> <p data-svelte-h="svelte-mwj09h">Congrats 🎉, you have finished this tutorial. If you want to continue with using LeRobot in simulation follow this <a href="https://huggingface.co/docs/lerobot/hilserl_sim" rel="nofollow">Tutorial on reinforcement learning in sim with HIL-SERL</a></p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-aht5b3">If you have any questions or need help, please reach out on <a href="https://discord.com/invite/s3KuuzsPFb" rel="nofollow">Discord</a>.</p></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lerobot/blob/main/docs/source/il_sim.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1l4d05w = {
assets: "/docs/lerobot/pr_1713/en",
base: "/docs/lerobot/pr_1713/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/lerobot/pr_1713/en/_app/immutable/entry/start.ff6c7d92.js"),
import("/docs/lerobot/pr_1713/en/_app/immutable/entry/app.492c5e10.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 11],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
32.6 kB
·
Xet hash:
1cea643cfbd7d65b31e13b15ea883efd141a5e5b356a94669411b4f94adb3871

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.