Buckets:

HuggingFaceDocBuilder's picture
download
raw
40.9 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Asynchronous Inference&quot;,&quot;local&quot;:&quot;asynchronous-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Getting started with async inference&quot;,&quot;local&quot;:&quot;getting-started-with-async-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Done! You should see your robot moving around by now 😉&quot;,&quot;local&quot;:&quot;done-you-should-see-your-robot-moving-around-by-now-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Async vs. synchronous inference&quot;,&quot;local&quot;:&quot;async-vs-synchronous-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Start the Policy Server&quot;,&quot;local&quot;:&quot;start-the-policy-server&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Launch the Robot Client&quot;,&quot;local&quot;:&quot;launch-the-robot-client&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Tuning async inference for your setup&quot;,&quot;local&quot;:&quot;tuning-async-inference-for-your-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conclusion&quot;,&quot;local&quot;:&quot;conclusion&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/lerobot/pr_3313/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/entry/start.d3f1c0f3.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/scheduler.eb244325.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/singletons.1f33814c.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/index.3c23fb4b.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/paths.17f05d75.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/entry/app.04bb7687.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/preload-helper.b00aacbc.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/index.3fe63ad3.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/nodes/0.07fbe93e.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/nodes/5.e7164e54.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/Tip.8a9a4ce7.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/CopyLLMTxtMenu.d0c64540.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.6453902c.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/CodeBlock.48dd2cc2.js">
<link rel="modulepreload" href="/docs/lerobot/pr_3313/en/_app/immutable/chunks/HfOption.0914e9f3.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Asynchronous Inference&quot;,&quot;local&quot;:&quot;asynchronous-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Getting started with async inference&quot;,&quot;local&quot;:&quot;getting-started-with-async-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Done! You should see your robot moving around by now 😉&quot;,&quot;local&quot;:&quot;done-you-should-see-your-robot-moving-around-by-now-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Async vs. synchronous inference&quot;,&quot;local&quot;:&quot;async-vs-synchronous-inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Start the Policy Server&quot;,&quot;local&quot;:&quot;start-the-policy-server&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Launch the Robot Client&quot;,&quot;local&quot;:&quot;launch-the-robot-client&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Tuning async inference for your setup&quot;,&quot;local&quot;:&quot;tuning-async-inference-for-your-setup&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Conclusion&quot;,&quot;local&quot;:&quot;conclusion&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="asynchronous-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#asynchronous-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Asynchronous Inference</span></h1> <p data-svelte-h="svelte-1grquz4">With our <a href="https://huggingface.co/papers/2506.01844" rel="nofollow">SmolVLA</a> we introduced a new way to run inference on real-world robots, <strong>decoupling action prediction from action execution</strong>.
In this tutorial, we’ll show how to use asynchronous inference (<em>async inference</em>) using a finetuned version of SmolVLA, and all the policies supported by LeRobot.
<strong>Try async inference with all the policies</strong> supported by LeRobot!</p> <p data-svelte-h="svelte-qtpxdt"><strong>What you’ll learn:</strong></p> <ol data-svelte-h="svelte-1rnlps9"><li>Why asynchronous inference matters and how it compares to, more traditional, sequential inference.</li> <li>How to spin-up a <code>PolicyServer</code> and connect a <code>RobotClient</code> from the same machine, and even over the network.</li> <li>How to tune key parameters (<code>actions_per_chunk</code>, <code>chunk_size_threshold</code>) for your robot and policy.</li></ol> <p data-svelte-h="svelte-1gwyflc">If you get stuck, hop into our <a href="https://discord.gg/s3KuuzsPFb" rel="nofollow">Discord community</a>!</p> <p data-svelte-h="svelte-z33upu">In a nutshell: with <em>async inference</em>, your robot keeps acting while the policy server is already busy computing the next chunk of actions---eliminating “wait-for-inference” lags and unlocking smoother, more reactive behaviours.
This is fundamentally different from synchronous inference (sync), where the robot stays idle while the policy computes the next chunk of actions.</p> <hr> <h2 class="relative group"><a id="getting-started-with-async-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#getting-started-with-async-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Getting started with async inference</span></h2> <p data-svelte-h="svelte-dc7wnn">You can read more information on asynchronous inference in our <a href="https://huggingface.co/blog/async-robot-inference" rel="nofollow">blogpost</a>. This guide is designed to help you quickly set up and run asynchronous inference in your environment.</p> <p data-svelte-h="svelte-lllvpj">First, install <code>lerobot</code> with the <code>async</code> tag, to install the extra dependencies required to run async inference.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -e &quot;.[async]&quot;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-o9nz2v">Then, spin up a policy server (in one terminal, or in a separate machine) specifying the host address and port for the client to connect to.
You can spin up a policy server running:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.async_inference.policy_server \
--host=127.0.0.1 \
--port=8080<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1s4pcoy">This will start a policy server listening on <code>127.0.0.1:8080</code> (<code>localhost</code>, port 8080). At this stage, the policy server is empty, as all information related to which policy to run and with which parameters are specified during the first handshake with the client. Spin up a client with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.async_inference.robot_client \
--server_address=127.0.0.1:8080 \ # SERVER: the host address and port of the policy server
--robot.type=so100_follower \ # ROBOT: your robot type
--robot.port=/dev/tty.usbmodem585A0076841 \ # ROBOT: your robot port
--robot.id=follower_so100 \ # ROBOT: your robot id, to load calibration file
--robot.cameras=&quot;{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}&quot; \ # POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy
--task=&quot;dummy&quot; \ # POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`
--policy_type=your_policy_type \ # POLICY: the type of policy to run (smolvla, act, etc)
--pretrained_name_or_path=user/model \ # POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)
--policy_device=mps \ # POLICY: the device to run the policy on, on the server (cuda, mps, xpu, cpu)
--actions_per_chunk=50 \ # POLICY: the number of actions to output at once
--chunk_size_threshold=0.5 \ # CLIENT: the threshold for the chunk size before sending a new observation to the server
--aggregate_fn_name=weighted_average \ # CLIENT: the function to aggregate actions on overlapping portions
--debug_visualize_queue_size=True # CLIENT: whether to visualize the queue size at runtime<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1h4pflk">In summary, you need to specify instructions for:</p> <ul data-svelte-h="svelte-glkyog"><li><code>SERVER</code>: the address and port of the policy server</li> <li><code>ROBOT</code>: the type of robot to connect to, the port to connect to, and the local <code>id</code> of the robot</li> <li><code>POLICY</code>: the type of policy to run, and the model name/path on server to the checkpoint to run. You also need to specify which device should the sever be using, and how many actions to output at once (capped at the policy max actions value).</li> <li><code>CLIENT</code>: the threshold for the chunk size before sending a new observation to the server, and the function to aggregate actions on overlapping portions. Optionally, you can also visualize the queue size at runtime, to help you tune the <code>CLIENT</code> parameters.</li></ul> <p data-svelte-h="svelte-mjg4q5">Importantly,</p> <ul data-svelte-h="svelte-1notb6i"><li><code>actions_per_chunk</code> and <code>chunk_size_threshold</code> are key parameters to tune for your setup.</li> <li><code>aggregate_fn_name</code> is the function to aggregate actions on overlapping portions. You can either add a new one to a registry of functions, or add your own in <code>robot_client.py</code> (see <a href="NOTE:addlinktoLOC">here</a>)</li> <li><code>debug_visualize_queue_size</code> is a useful tool to tune the <code>CLIENT</code> parameters.</li></ul> <h2 class="relative group"><a id="done-you-should-see-your-robot-moving-around-by-now-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#done-you-should-see-your-robot-moving-around-by-now-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Done! You should see your robot moving around by now 😉</span></h2> <h2 class="relative group"><a id="async-vs-synchronous-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#async-vs-synchronous-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Async vs. synchronous inference</span></h2> <p data-svelte-h="svelte-q4wltw">Synchronous inference relies on interleaving action chunk prediction and action execution. This inherently results in <em>idle frames</em>, frames where the robot awaits idle the policy’s output: a new action chunk.
In turn, inference is plagued by evident real-time lags, where the robot simply stops acting due to the lack of available actions.
With robotics models increasing in size, this problem risks becoming only more severe.</p> <p align="center" data-svelte-h="svelte-10cfi8e"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/async-inference/sync.png" width="80%"></p> <p align="center" data-svelte-h="svelte-nqo5ib"><i>Synchronous inference</i> makes the robot idle while the policy is
computing the next chunk of actions.</p> <p data-svelte-h="svelte-11p5v55">To overcome this, we design async inference, a paradigm where action planning and execution are decoupled, resulting in (1) higher adaptability and, most importantly, (2) no idle frames.
Crucially, with async inference, the next action chunk is computed <em>before</em> the current one is exhausted, resulting in no idleness.
Higher adaptability is ensured by aggregating the different action chunks on overlapping portions, obtaining an up-to-date plan and a tighter control loop.</p> <p align="center" data-svelte-h="svelte-a8x2pl"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/async-inference/async.png" width="80%"></p> <p align="center" data-svelte-h="svelte-ev3be1"><i>Asynchronous inference</i> results in no idleness because the next chunk is
computed before the current chunk is exhausted.</p> <hr> <h2 class="relative group"><a id="start-the-policy-server" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#start-the-policy-server"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Start the Policy Server</span></h2> <p data-svelte-h="svelte-xnp609">Policy servers are wrappers around a <code>PreTrainedPolicy</code> interfacing them with observations coming from a robot client.
Policy servers are initialized as empty containers which are populated with the requested policy specified in the initial handshake between the robot client and the policy server.
As such, spinning up a policy server is as easy as specifying the host address and port. If you’re running the policy server on the same machine as the robot client, you can use <code>localhost</code> as the host address.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Command </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">API example </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.async_inference.policy_server \
--host=127.0.0.1 \
--port=8080<!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-1bimxlr">This listens on <code>localhost:8080</code> for an incoming connection from the associated<code>RobotClient</code>, which will communicate which policy to run during the first client-server handshake.</p> <hr> <h2 class="relative group"><a id="launch-the-robot-client" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#launch-the-robot-client"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Launch the Robot Client</span></h2> <p data-svelte-h="svelte-ov7xtn"><code>RobotClient</code> is a wrapper around a <code>Robot</code> instance, which <code>RobotClient</code> connects to the (possibly remote) <code>PolicyServer</code>.
The <code>RobotClient</code> streams observations to the <code>PolicyServer</code>, and receives action chunks obtained running inference on the server (which we assume to have better computational resources than the robot controller).</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Command </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">API example </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python -m lerobot.async_inference.robot_client \
--server_address=127.0.0.1:8080 \ <span class="hljs-comment"># SERVER: the host address and port of the policy server</span>
--robot.type=so100_follower \ <span class="hljs-comment"># ROBOT: your robot type</span>
--robot.port=/dev/tty.usbmodem585A0076841 \ <span class="hljs-comment"># ROBOT: your robot port</span>
--robot.id=follower_so100 \ <span class="hljs-comment"># ROBOT: your robot id, to load calibration file</span>
--robot.cameras=<span class="hljs-string">&quot;{ laptop: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}, phone: {type: opencv, index_or_path: 0, width: 1920, height: 1080, fps: 30}}&quot;</span> \ <span class="hljs-comment"># POLICY: the cameras used to acquire frames, with keys matching the keys expected by the policy</span>
--task=<span class="hljs-string">&quot;dummy&quot;</span> \ <span class="hljs-comment"># POLICY: The task to run the policy on (`Fold my t-shirt`). Not necessarily defined for all policies, such as `act`</span>
--policy_type=your_policy_type \ <span class="hljs-comment"># POLICY: the type of policy to run (smolvla, act, etc)</span>
--pretrained_name_or_path=user/model \ <span class="hljs-comment"># POLICY: the model name/path on server to the checkpoint to run (e.g., lerobot/smolvla_base)</span>
--policy_device=mps \ <span class="hljs-comment"># POLICY: the device to run the policy on, on the server</span>
--actions_per_chunk=50 \ <span class="hljs-comment"># POLICY: the number of actions to output at once</span>
--chunk_size_threshold=0.5 \ <span class="hljs-comment"># CLIENT: the threshold for the chunk size before sending a new observation to the server</span>
--aggregate_fn_name=weighted_average \ <span class="hljs-comment"># CLIENT: the function to aggregate actions on overlapping portions</span>
--debug_visualize_queue_size=True <span class="hljs-comment"># CLIENT: whether to visualize the queue size at runtime</span><!-- HTML_TAG_END --></pre></div> </div> <p data-svelte-h="svelte-1txlh3i">The following two parameters are key in every setup:</p> <table data-svelte-h="svelte-zlc5ky"><thead><tr><th>Hyperparameter</th> <th>Default</th> <th>What it does</th></tr></thead> <tbody><tr><td><code>actions_per_chunk</code></td> <td>50</td> <td>How many actions the policy outputs at once. Typical values: 10-50.</td></tr> <tr><td><code>chunk_size_threshold</code></td> <td>0.7</td> <td>When the queue is ≤ 50% full, the client sends a fresh observation.
Value in [0, 1].</td></tr></tbody></table> <blockquote class="tip">Different values of `actions_per_chunk` and `chunk_size_threshold` do result
in different behaviours.</blockquote> <p data-svelte-h="svelte-1wcr4ad">On the one hand, increasing the value of <code>actions_per_chunk</code> will result in reducing the likelihood of ending up with no actions to execute, as more actions will be available when the new chunk is computed.
However, larger values of <code>actions_per_chunk</code> might also result in less precise actions, due to the compounding errors consequent to predicting actions over longer timespans.</p> <p data-svelte-h="svelte-1e8rfth">On the other hand, increasing the value of <code>chunk_size_threshold</code> will result in sending out to the <code>PolicyServer</code> observations for inference more often, resulting in a larger number of updates action chunks, overlapping on significant portions. This results in high adaptability, in the limit predicting one action chunk for each observation, which is in turn only marginally consumed while a new one is produced.
This option does also put more pressure on the inference pipeline, as a consequence of the many requests. Conversely, values of <code>chunk_size_threshold</code> close to 0.0 collapse to the synchronous edge case, whereby new observations are only sent out whenever the current chunk is exhausted.</p> <p data-svelte-h="svelte-7n3gup">We found the default values of <code>actions_per_chunk</code> and <code>chunk_size_threshold</code> to work well in the experiments we developed for the <a href="https://huggingface.co/papers/2506.01844" rel="nofollow">SmolVLA paper</a>, but recommend experimenting with different values to find the best fit for your setup.</p> <h3 class="relative group"><a id="tuning-async-inference-for-your-setup" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tuning-async-inference-for-your-setup"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tuning async inference for your setup</span></h3> <ol data-svelte-h="svelte-1fwwdbp"><li><strong>Choose your computational resources carefully.</strong> <a href="https://huggingface.co/lerobot/pi0" rel="nofollow">PI0</a> occupies 14GB of memory at inference time, while <a href="https://huggingface.co/lerobot/smolvla_base" rel="nofollow">SmolVLA</a> requires only ~2GB. You should identify the best computational resource for your use case keeping in mind smaller policies require less computational resources. The combination of policy and device used (CPU-intensive, using MPS, or the number of CUDA cores on a given NVIDIA GPU) directly impacts the average inference latency you should expect.</li> <li><strong>Adjust your <code>fps</code> based on inference latency.</strong> While the server generates a new action chunk, the client is not idle and is stepping through its current action queue. If the two processes happen at fundamentally different speeds, the client might end up with an empty queue. As such, you should reduce your fps if you consistently run out of actions in queue.</li> <li><strong>Adjust <code>chunk_size_threshold</code></strong>.
<ul><li>Values closer to <code>0.0</code> result in almost sequential behavior. Values closer to <code>1.0</code> → send observation every step (more bandwidth, relies on good world-model).</li> <li>We found values around 0.5-0.6 to work well. If you want to tweak this, spin up a <code>RobotClient</code> setting the <code>--debug_visualize_queue_size</code> to <code>True</code>. This will plot the action queue size evolution at runtime, and you can use it to find the value of <code>chunk_size_threshold</code> that works best for your setup.</li></ul></li></ol> <p align="center" data-svelte-h="svelte-1rfo7vb"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/async-inference/queues.png" width="80%"></p> <p align="center" data-svelte-h="svelte-9t9eym"><i>The action queue size is plotted at runtime when the
`--debug_visualize_queue_size` flag is passed, for various levels of
`chunk_size_threshold` (`g` in the SmolVLA paper).</i></p> <hr> <h2 class="relative group"><a id="conclusion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conclusion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conclusion</span></h2> <p data-svelte-h="svelte-2fpn4u">Asynchronous inference represents a significant advancement in real-time robotics control, addressing the fundamental challenge of inference latency that has long plagued robotics applications. Through this tutorial, you’ve learned how to implement a complete async inference pipeline that eliminates idle frames and enables smoother, more reactive robot behaviors.</p> <p data-svelte-h="svelte-10mg9xa"><strong>Key Takeaways:</strong></p> <ul data-svelte-h="svelte-loclkx"><li><strong>Paradigm Shift</strong>: Async inference decouples action prediction from execution, allowing robots to continue acting while new action chunks are computed in parallel</li> <li><strong>Performance Benefits</strong>: Eliminates “wait-for-inference” lags that are inherent in synchronous approaches, becoming increasingly important as policy models grow larger</li> <li><strong>Flexible Architecture</strong>: The server-client design enables distributed computing, where inference can run on powerful remote hardware while maintaining real-time robot control</li> <li><strong>Tunable Parameters</strong>: Success depends on properly configuring <code>actions_per_chunk</code> and <code>chunk_size_threshold</code> for your specific hardware, policy, and task requirements</li> <li><strong>Universal Compatibility</strong>: Works with all LeRobot-supported policies, from lightweight ACT models to vision-language models like SmolVLA</li></ul> <p data-svelte-h="svelte-kqkix9">Start experimenting with the default parameters, monitor your action queue sizes, and iteratively refine your setup to achieve optimal performance for your specific use case.
If you want to discuss this further, hop into our <a href="https://discord.gg/s3KuuzsPFb" rel="nofollow">Discord community</a>, or open an issue on our <a href="https://github.com/huggingface/lerobot/issues" rel="nofollow">GitHub repository</a>.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/lerobot/blob/main/docs/source/async.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_9kza6s = {
assets: "/docs/lerobot/pr_3313/en",
base: "/docs/lerobot/pr_3313/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/lerobot/pr_3313/en/_app/immutable/entry/start.d3f1c0f3.js"),
import("/docs/lerobot/pr_3313/en/_app/immutable/entry/app.04bb7687.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 5],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
40.9 kB
·
Xet hash:
d90c9d181be09ef01009e8a6055011b52321b13bfb04236aea0f14b5a4a9471f

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.