Buckets:

HuggingFaceDocBuilder's picture
download
raw
32.2 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;GR00T N1.5 Policy&quot;,&quot;local&quot;:&quot;gr00t-n15-policy&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Model Overview&quot;,&quot;local&quot;:&quot;model-overview&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Installation Requirements&quot;,&quot;local&quot;:&quot;installation-requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usage&quot;,&quot;local&quot;:&quot;usage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Training&quot;,&quot;local&quot;:&quot;training&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Training Command Example&quot;,&quot;local&quot;:&quot;training-command-example&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Performance Results&quot;,&quot;local&quot;:&quot;performance-results&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Libero Benchmark Results&quot;,&quot;local&quot;:&quot;libero-benchmark-results&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Evaluate in your hardware setup&quot;,&quot;local&quot;:&quot;evaluate-in-your-hardware-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;License&quot;,&quot;local&quot;:&quot;license&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/lerobot/pr_3313/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/entry/start.d3f1c0f3.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/scheduler.eb244325.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/singletons.1f33814c.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/index.3c23fb4b.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/paths.17f05d75.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/entry/app.04bb7687.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/preload-helper.b00aacbc.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/index.3fe63ad3.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/nodes/0.07fbe93e.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/nodes/19.88dbe673.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/CopyLLMTxtMenu.d0c64540.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.6453902c.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/CodeBlock.48dd2cc2.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;GR00T N1.5 Policy&quot;,&quot;local&quot;:&quot;gr00t-n15-policy&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Model Overview&quot;,&quot;local&quot;:&quot;model-overview&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Installation Requirements&quot;,&quot;local&quot;:&quot;installation-requirements&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usage&quot;,&quot;local&quot;:&quot;usage&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Training&quot;,&quot;local&quot;:&quot;training&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Training Command Example&quot;,&quot;local&quot;:&quot;training-command-example&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Performance Results&quot;,&quot;local&quot;:&quot;performance-results&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Libero Benchmark Results&quot;,&quot;local&quot;:&quot;libero-benchmark-results&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Evaluate in your hardware setup&quot;,&quot;local&quot;:&quot;evaluate-in-your-hardware-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;License&quot;,&quot;local&quot;:&quot;license&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="gr00t-n15-policy" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#gr00t-n15-policy"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>GR00T N1.5 Policy</span></h1> <p data-svelte-h="svelte-u3b5xr">GR00T N1.5 is an open foundation model from NVIDIA designed for generalized humanoid robot reasoning and skills. It is a cross-embodiment model that accepts multimodal input, including language and images, to perform manipulation tasks in diverse environments.</p> <p data-svelte-h="svelte-1h50ax8">This document outlines the specifics of its integration and usage within the LeRobot framework.</p> <h2 class="relative group"><a id="model-overview" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-overview"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model Overview</span></h2> <p data-svelte-h="svelte-z34azi">NVIDIA Isaac GR00T N1.5 is an upgraded version of the GR00T N1 foundation model. It is built to improve generalization and language-following abilities for humanoid robots.</p> <p data-svelte-h="svelte-jic8xd">Developers and researchers can post-train GR00T N1.5 with their own real or synthetic data to adapt it for specific humanoid robots or tasks.</p> <p data-svelte-h="svelte-dv0kor">GR00T N1.5 (specifically the GR00T-N1.5-3B model) is built using pre-trained vision and language encoders. It utilizes a flow matching action transformer to model a chunk of actions, conditioned on vision, language, and proprioception.</p> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/lerobot/lerobot-groot-paper1%20(1).png" alt="An overview of GR00T" width="80%"> <p data-svelte-h="svelte-1c3javj">Its strong performance comes from being trained on an expansive and diverse humanoid dataset, which includes:</p> <ul data-svelte-h="svelte-chcqqn"><li>Real captured data from robots.</li> <li>Synthetic data generated using NVIDIA Isaac GR00T Blueprint.</li> <li>Internet-scale video data.</li></ul> <p data-svelte-h="svelte-i0d9s4">This approach allows the model to be highly adaptable through post-training for specific embodiments, tasks, and environments.</p> <h2 class="relative group"><a id="installation-requirements" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installation-requirements"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installation Requirements</span></h2> <p data-svelte-h="svelte-1wphqd1">As of today, GR00T N1.5 requires flash attention for it’s internal working.</p> <p data-svelte-h="svelte-1t68h2m">We are working on making this optional, but in the meantime that means that we require an extra installation step and it can only be used in CUDA enabled devices.</p> <ol data-svelte-h="svelte-fcwuiu"><li>Following the Environment Setup of our <a href="./installation">Installation Guide</a>. <strong>Attention</strong> don’t install <code>lerobot</code> in this step.</li> <li>Install <a href="https://github.com/Dao-AILab/flash-attention" rel="nofollow">Flash Attention</a> by running:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Check https://pytorch.org/get-started/locally/ for your system</span>
pip install <span class="hljs-string">&quot;torch&gt;=2.2.1,&lt;2.8.0&quot;</span> <span class="hljs-string">&quot;torchvision&gt;=0.21.0,&lt;0.23.0&quot;</span> <span class="hljs-comment"># --index-url https://download.pytorch.org/whl/cu1XX</span>
pip install ninja <span class="hljs-string">&quot;packaging&gt;=24.2,&lt;26.0&quot;</span> <span class="hljs-comment"># flash attention dependencies</span>
pip install <span class="hljs-string">&quot;flash-attn&gt;=2.5.9,&lt;3.0.0&quot;</span> --no-build-isolation
python -c <span class="hljs-string">&quot;import flash_attn; print(f&#x27;Flash Attention {flash_attn.__version__} imported successfully&#x27;)&quot;</span><!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-1yb31p"><li>Install LeRobot by running:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install lerobot[groot]<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage</span></h2> <p data-svelte-h="svelte-bxh6zc">To use GR00T in your LeRobot configuration, specify the policy type as:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->policy.<span class="hljs-built_in">type</span>=groot<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training</span></h2> <h3 class="relative group"><a id="training-command-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-command-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training Command Example</span></h3> <p data-svelte-h="svelte-l8fsf1">Here’s a complete training command for finetuning the base GR00T model on your own dataset:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Using a multi-GPU setup</span>
accelerate launch \
--multi_gpu \
--num_processes=<span class="hljs-variable">$NUM_GPUS</span> \
$(<span class="hljs-built_in">which</span> lerobot-train) \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--save_checkpoint=<span class="hljs-literal">true</span> \
--batch_size=<span class="hljs-variable">$BATCH_SIZE</span> \
--steps=<span class="hljs-variable">$NUM_STEPS</span> \
--save_freq=<span class="hljs-variable">$SAVE_FREQ</span> \
--log_freq=<span class="hljs-variable">$LOG_FREQ</span> \
--policy.push_to_hub=<span class="hljs-literal">true</span> \
--policy.type=groot \
--policy.repo_id=<span class="hljs-variable">$REPO_ID</span> \
--policy.tune_diffusion_model=<span class="hljs-literal">false</span> \
--dataset.repo_id=<span class="hljs-variable">$DATASET_ID</span> \
--wandb.enable=<span class="hljs-literal">true</span> \
--wandb.disable_artifact=<span class="hljs-literal">true</span> \
--job_name=<span class="hljs-variable">$JOB_NAME</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="performance-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#performance-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Performance Results</span></h2> <h3 class="relative group"><a id="libero-benchmark-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#libero-benchmark-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Libero Benchmark Results</span></h3> <blockquote class="note" data-svelte-h="svelte-cevxlx"><p>Follow our instructions for Libero usage: <a href="./libero">Libero</a></p></blockquote> <p data-svelte-h="svelte-1a7d6z9">GR00T has demonstrated strong performance on the Libero benchmark suite. To compare and test its LeRobot implementation, we finetuned the GR00T N1.5 model for 30k steps on the Libero dataset and compared the results to the GR00T reference results.</p> <table data-svelte-h="svelte-1xohst4"><thead><tr><th>Benchmark</th> <th>LeRobot Implementation</th> <th>GR00T Reference</th></tr></thead> <tbody><tr><td><strong>Libero Spatial</strong></td> <td>82.0%</td> <td>92.0%</td></tr> <tr><td><strong>Libero Object</strong></td> <td>99.0%</td> <td>92.0%</td></tr> <tr><td><strong>Libero Long</strong></td> <td>82.0%</td> <td>76.0%</td></tr> <tr><td><strong>Average</strong></td> <td>87.0%</td> <td>87.0%</td></tr></tbody></table> <p data-svelte-h="svelte-1s3prlo">These results demonstrate GR00T’s strong generalization capabilities across diverse robotic manipulation tasks. To reproduce these results, you can follow the instructions in the <a href="https://huggingface.co/docs/lerobot/libero" rel="nofollow">Libero</a> section.</p> <h3 class="relative group"><a id="evaluate-in-your-hardware-setup" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluate-in-your-hardware-setup"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluate in your hardware setup</span></h3> <p data-svelte-h="svelte-punrny">Once you have trained your model using your parameters you can run inference in your downstream task. Follow the instructions in <a href="./il_robots">Imitation Learning for Robots</a>. For example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lerobot-record \
--robot.type=bi_so_follower \
--robot.left_arm_port=/dev/ttyACM1 \
--robot.right_arm_port=/dev/ttyACM0 \
--robot.id=bimanual_follower \
--robot.cameras=<span class="hljs-string">&#x27;{ right: {&quot;type&quot;: &quot;opencv&quot;, &quot;index_or_path&quot;: 0, &quot;width&quot;: 640, &quot;height&quot;: 480, &quot;fps&quot;: 30},
left: {&quot;type&quot;: &quot;opencv&quot;, &quot;index_or_path&quot;: 2, &quot;width&quot;: 640, &quot;height&quot;: 480, &quot;fps&quot;: 30},
top: {&quot;type&quot;: &quot;opencv&quot;, &quot;index_or_path&quot;: 4, &quot;width&quot;: 640, &quot;height&quot;: 480, &quot;fps&quot;: 30},
}&#x27;</span> \
--display_data=<span class="hljs-literal">true</span> \
--dataset.repo_id=&lt;user&gt;/eval_groot-bimanual \
--dataset.num_episodes=10 \
--dataset.single_task=<span class="hljs-string">&quot;Grab and handover the red cube to the other arm&quot;</span> \
--dataset.streaming_encoding=<span class="hljs-literal">true</span> \
--dataset.encoder_threads=2 \
<span class="hljs-comment"># --dataset.vcodec=auto \</span>
--policy.path=&lt;user&gt;/groot-bimanual \ <span class="hljs-comment"># your trained model</span>
--dataset.episode_time_s=30 \
--dataset.reset_time_s=10<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="license" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#license"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>License</span></h2> <p data-svelte-h="svelte-111dz0v">This model follows NVIDIA’s proprietary license, consistent with the original <a href="https://github.com/NVIDIA/Isaac-GR00T" rel="nofollow">GR00T repository</a>. Future versions (starting from N1.7) will follow <strong>Apache 2.0 License</strong>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lerobot/blob/main/docs/source/groot.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_9kza6s = {
assets: "/docs/lerobot/pr_3313/en",
base: "/docs/lerobot/pr_3313/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/lerobot/pr_3313/en/_app/immutable/entry/start.d3f1c0f3.js"),
import("/docs/lerobot/pr_3313/en/_app/immutable/entry/app.04bb7687.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 19],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
32.2 kB
·
Xet hash:
fd1a1b9504214136d83c8188564d017ed02037fa165f62de134b941adc095b83

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.