update
Browse files- app/scripts/latex-importer/mdx-converter.mjs +130 -0
- app/scripts/latex-importer/output/main.md +0 -0
- app/scripts/latex-importer/output/main.mdx +27 -42
- app/scripts/latex-importer/reference-preprocessor.mjs +2 -2
- app/src/components/Hero.astro +18 -3
- app/src/content/article.mdx +27 -42
- app/src/content/assets/lerobot-logo-thumbnail.png +3 -0
- app/src/content/embeds/{banner2.html → banner.html} +0 -0
- app/src/styles/_base.css +2 -2
app/scripts/latex-importer/mdx-converter.mjs
CHANGED
|
@@ -416,6 +416,133 @@ function transformStyledSpans(content) {
|
|
| 416 |
return content;
|
| 417 |
}
|
| 418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
/**
|
| 420 |
* Transform reference links to proper Astro internal links
|
| 421 |
* @param {string} content - MDX content
|
|
@@ -825,6 +952,9 @@ function processMdxContent(content, latexContent = '') {
|
|
| 825 |
processedContent = convertSubfiguresToMultiImage(processedContent);
|
| 826 |
processedContent = transformImages(processedContent);
|
| 827 |
processedContent = transformStyledSpans(processedContent);
|
|
|
|
|
|
|
|
|
|
| 828 |
processedContent = transformReferenceLinks(processedContent);
|
| 829 |
processedContent = fixHtmlEscaping(processedContent);
|
| 830 |
processedContent = cleanHighlightNumbering(processedContent);
|
|
|
|
| 416 |
return content;
|
| 417 |
}
|
| 418 |
|
| 419 |
+
/**
|
| 420 |
+
* Transform epigraph divs to Quote components
|
| 421 |
+
* @param {string} content - MDX content
|
| 422 |
+
* @returns {string} - Content with Quote components
|
| 423 |
+
*/
|
| 424 |
+
function transformEpigraphs(content) {
|
| 425 |
+
console.log(' 💬 Transforming epigraphs to Quote components...');
|
| 426 |
+
|
| 427 |
+
let epigraphsConverted = 0;
|
| 428 |
+
|
| 429 |
+
// Pattern to match epigraph divs: <div class="epigraph">...</div>
|
| 430 |
+
// More flexible pattern that handles various formats
|
| 431 |
+
content = content.replace(
|
| 432 |
+
/<div class="epigraph">([\s\S]*?)<\/div>/g,
|
| 433 |
+
(match, content) => {
|
| 434 |
+
// Extract quote text (between asterisks) and author (last non-empty line)
|
| 435 |
+
const lines = content.trim().split('\n').map(line => line.trim()).filter(line => line);
|
| 436 |
+
|
| 437 |
+
let quoteText = '';
|
| 438 |
+
let author = '';
|
| 439 |
+
|
| 440 |
+
// Find quote text (between asterisks)
|
| 441 |
+
const quoteMatch = content.match(/\*([^*]+)\*/);
|
| 442 |
+
if (quoteMatch) {
|
| 443 |
+
quoteText = quoteMatch[1].trim();
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
// Find author (usually the last non-empty line that's not the quote)
|
| 447 |
+
const lastLine = lines[lines.length - 1];
|
| 448 |
+
if (lastLine && !lastLine.includes('*') && !lastLine.includes('[')) {
|
| 449 |
+
author = lastLine;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
if (quoteText && author) {
|
| 453 |
+
epigraphsConverted++;
|
| 454 |
+
|
| 455 |
+
// Clean the quote text
|
| 456 |
+
const cleanQuoteText = quoteText
|
| 457 |
+
.replace(/<[^>]*>/g, '')
|
| 458 |
+
.replace(/\s+/g, ' ')
|
| 459 |
+
.replace(/"/g, '\\"')
|
| 460 |
+
.trim();
|
| 461 |
+
|
| 462 |
+
// Clean the author text
|
| 463 |
+
const cleanAuthor = author
|
| 464 |
+
.replace(/<[^>]*>/g, '')
|
| 465 |
+
.replace(/\s+/g, ' ')
|
| 466 |
+
.replace(/"/g, '\\"')
|
| 467 |
+
.trim();
|
| 468 |
+
|
| 469 |
+
// Mark Quote component as used
|
| 470 |
+
usedComponents.add('Quote');
|
| 471 |
+
|
| 472 |
+
return `<Quote source="${cleanAuthor}">
|
| 473 |
+
${cleanQuoteText}
|
| 474 |
+
</Quote>`;
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
return match; // Return original if we can't parse it
|
| 478 |
+
}
|
| 479 |
+
);
|
| 480 |
+
|
| 481 |
+
if (epigraphsConverted > 0) {
|
| 482 |
+
console.log(` ✅ Converted ${epigraphsConverted} epigraph(s) to Quote component(s)`);
|
| 483 |
+
} else {
|
| 484 |
+
console.log(' ℹ️ No epigraphs found');
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
return content;
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
/**
|
| 491 |
+
* Transform highlight spans to mark tags
|
| 492 |
+
* @param {string} content - MDX content
|
| 493 |
+
* @returns {string} - Content with mark tags instead of highlight spans
|
| 494 |
+
*/
|
| 495 |
+
function transformHighlightSpans(content) {
|
| 496 |
+
console.log(' 🎯 Transforming highlight spans to mark tags...');
|
| 497 |
+
|
| 498 |
+
let highlightsConverted = 0;
|
| 499 |
+
|
| 500 |
+
// Transform <span class="highlight">...</span> to <mark>...</mark>
|
| 501 |
+
content = content.replace(
|
| 502 |
+
/<span class="highlight">(.*?)<\/span>/g,
|
| 503 |
+
(match, text) => {
|
| 504 |
+
highlightsConverted++;
|
| 505 |
+
return `<mark>${text}</mark>`;
|
| 506 |
+
}
|
| 507 |
+
);
|
| 508 |
+
|
| 509 |
+
if (highlightsConverted > 0) {
|
| 510 |
+
console.log(` ✅ Converted ${highlightsConverted} highlight span(s) to mark tag(s)`);
|
| 511 |
+
} else {
|
| 512 |
+
console.log(' ℹ️ No highlight spans found');
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
return content;
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
/**
|
| 519 |
+
* Fix escaped mark tags
|
| 520 |
+
* @param {string} content - MDX content
|
| 521 |
+
* @returns {string} - Content with unescaped mark tags
|
| 522 |
+
*/
|
| 523 |
+
function fixEscapedMarkTags(content) {
|
| 524 |
+
console.log(' 🎯 Fixing escaped mark tags...');
|
| 525 |
+
|
| 526 |
+
let fixedCount = 0;
|
| 527 |
+
|
| 528 |
+
// Fix escaped mark tags: \<mark\>...\</mark\> -> <mark>...</mark>
|
| 529 |
+
content = content.replace(
|
| 530 |
+
/\\<mark\\>(.*?)\\<\/mark\\>/g,
|
| 531 |
+
(match, text) => {
|
| 532 |
+
fixedCount++;
|
| 533 |
+
return `<mark>${text}</mark>`;
|
| 534 |
+
}
|
| 535 |
+
);
|
| 536 |
+
|
| 537 |
+
if (fixedCount > 0) {
|
| 538 |
+
console.log(` ✅ Fixed ${fixedCount} escaped mark tag(s)`);
|
| 539 |
+
} else {
|
| 540 |
+
console.log(' ℹ️ No escaped mark tags found');
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
return content;
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
/**
|
| 547 |
* Transform reference links to proper Astro internal links
|
| 548 |
* @param {string} content - MDX content
|
|
|
|
| 952 |
processedContent = convertSubfiguresToMultiImage(processedContent);
|
| 953 |
processedContent = transformImages(processedContent);
|
| 954 |
processedContent = transformStyledSpans(processedContent);
|
| 955 |
+
processedContent = transformHighlightSpans(processedContent);
|
| 956 |
+
processedContent = fixEscapedMarkTags(processedContent);
|
| 957 |
+
processedContent = transformEpigraphs(processedContent);
|
| 958 |
processedContent = transformReferenceLinks(processedContent);
|
| 959 |
processedContent = fixHtmlEscaping(processedContent);
|
| 960 |
processedContent = cleanHighlightNumbering(processedContent);
|
app/scripts/latex-importer/output/main.md
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app/scripts/latex-importer/output/main.mdx
CHANGED
|
@@ -20,6 +20,7 @@ tableOfContentsAutoCollapse: true
|
|
| 20 |
|
| 21 |
import MultiImage from '../components/MultiImage.astro';
|
| 22 |
import ResponsiveImage from '../components/ResponsiveImage.astro';
|
|
|
|
| 23 |
import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
|
| 24 |
import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
|
| 25 |
import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
|
|
@@ -101,7 +102,7 @@ The frontier of robotics research is indeed increasingly moving away from classi
|
|
| 101 |
|
| 102 |
Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
|
| 103 |
|
| 104 |
-
Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <
|
| 105 |
|
| 106 |
This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
|
| 107 |
|
|
@@ -281,13 +282,9 @@ for epoch in range(num_epochs):
|
|
| 281 |
## Classical Robotics
|
| 282 |
|
| 283 |
<span id="classical" style="position: absolute;"></span>
|
| 284 |
-
<
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
Sun Tzu
|
| 289 |
-
|
| 290 |
-
</div>
|
| 291 |
<div class="callout">
|
| 292 |
|
| 293 |
TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
|
|
@@ -308,9 +305,9 @@ TL;DR Learning-based approaches to robotics are motivated by the need to (1) gen
|
|
| 308 |
<figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
|
| 309 |
</figure>
|
| 310 |
|
| 311 |
-
Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <
|
| 312 |
|
| 313 |
-
Methods to produce robotics motion range from traditional *explicit* models--<
|
| 314 |
|
| 315 |
### Different Types of Motion
|
| 316 |
|
|
@@ -477,26 +474,22 @@ Despite the last 60+ years of robotics research, autonomous robots are still lar
|
|
| 477 |
<figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
|
| 478 |
</figure>
|
| 479 |
|
| 480 |
-
Dynamics-based robotics pipelines have historically been <
|
| 481 |
|
| 482 |
-
Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <
|
| 483 |
|
| 484 |
-
Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <
|
| 485 |
|
| 486 |
-
Lastly, dynamics-based methods (naturally) overlook the rather recent <
|
| 487 |
|
| 488 |
Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
|
| 489 |
|
| 490 |
## Robot (Reinforcement) Learning
|
| 491 |
|
| 492 |
<span id="learning-rl" style="position: absolute;"></span>
|
| 493 |
-
<
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
Richard Sutton
|
| 498 |
-
|
| 499 |
-
</div>
|
| 500 |
<div class="callout">
|
| 501 |
|
| 502 |
TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
|
|
@@ -516,7 +509,7 @@ TL;DR The need for expensive, high-fidelity simulators can be obviated learning
|
|
| 516 |
|
| 517 |
Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
|
| 518 |
|
| 519 |
-
Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <
|
| 520 |
|
| 521 |
<div class="wrapfigure">
|
| 522 |
|
|
@@ -653,7 +646,7 @@ Popular approaches to continuous state and action space--such as those studied w
|
|
| 653 |
|
| 654 |
Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
|
| 655 |
|
| 656 |
-
First, especially early in training, <
|
| 657 |
|
| 658 |
<figure>
|
| 659 |
<ResponsiveImage
|
|
@@ -685,7 +678,7 @@ While effective in transfering policies across the reality gap in real-world rob
|
|
| 685 |
|
| 686 |
Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
|
| 687 |
|
| 688 |
-
While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <
|
| 689 |
|
| 690 |
A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
|
| 691 |
|
|
@@ -739,7 +732,7 @@ Provably, eq. <a href="#deterministic-pg" data-reference-type="ref" data-refere
|
|
| 739 |
```
|
| 740 |
Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
|
| 741 |
|
| 742 |
-
Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <
|
| 743 |
<span id="J-soft" style="position: absolute;">
|
| 744 |
</span>
|
| 745 |
|
|
@@ -1065,13 +1058,9 @@ Advances in learning to act from potentially large corpora of human demonstratio
|
|
| 1065 |
## Robot (Imitation) Learning
|
| 1066 |
|
| 1067 |
<span id="learning-imitation" style="position: absolute;"></span>
|
| 1068 |
-
<
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
Norbert Wiener
|
| 1073 |
-
|
| 1074 |
-
</div>
|
| 1075 |
<div class="callout">
|
| 1076 |
|
| 1077 |
TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
|
|
@@ -1089,7 +1078,7 @@ TL;DR Behavioral Cloning provides a natural platform to learn from real-world in
|
|
| 1089 |
<figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
|
| 1090 |
</figure>
|
| 1091 |
|
| 1092 |
-
Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <
|
| 1093 |
|
| 1094 |
Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
|
| 1095 |
|
|
@@ -1947,13 +1936,9 @@ for epoch in range(num_epochs):
|
|
| 1947 |
## Generalist Robot Policies
|
| 1948 |
|
| 1949 |
<span id="learning-foundation" style="position: absolute;"></span>
|
| 1950 |
-
<
|
| 1951 |
-
|
| 1952 |
-
|
| 1953 |
-
|
| 1954 |
-
Robert A. Heinlein
|
| 1955 |
-
|
| 1956 |
-
</div>
|
| 1957 |
<div class="callout">
|
| 1958 |
|
| 1959 |
TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
|
|
@@ -1994,7 +1979,7 @@ Driven by the goal of developing generalist robot policies, the research communi
|
|
| 1994 |
|
| 1995 |
In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
|
| 1996 |
|
| 1997 |
-
Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <
|
| 1998 |
|
| 1999 |
Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
|
| 2000 |
|
|
@@ -2244,7 +2229,7 @@ for epoch in range(num_epochs):
|
|
| 2244 |
|
| 2245 |
<span id="conclusions" style="position: absolute;"></span>
|
| 2246 |
|
| 2247 |
-
This tutorial has charted the paradigmatic shift transforming robotics, tracing the <
|
| 2248 |
|
| 2249 |
Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
|
| 2250 |
|
|
|
|
| 20 |
|
| 21 |
import MultiImage from '../components/MultiImage.astro';
|
| 22 |
import ResponsiveImage from '../components/ResponsiveImage.astro';
|
| 23 |
+
import Quote from '../components/Quote.astro';
|
| 24 |
import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
|
| 25 |
import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
|
| 26 |
import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
|
|
|
|
| 102 |
|
| 103 |
Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
|
| 104 |
|
| 105 |
+
Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <mark>(1) `lerobot` supports many, openly available, robotic platforms</mark> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <mark>(2) unified, low-level approach to reading/writing robot configurations</mark> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <mark>(3) a native robotics dataset’s format</mark> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
|
| 106 |
|
| 107 |
This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
|
| 108 |
|
|
|
|
| 282 |
## Classical Robotics
|
| 283 |
|
| 284 |
<span id="classical" style="position: absolute;"></span>
|
| 285 |
+
<Quote source="Sun Tzu">
|
| 286 |
+
Know your enemy
|
| 287 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
<div class="callout">
|
| 289 |
|
| 290 |
TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
|
|
|
|
| 305 |
<figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
|
| 306 |
</figure>
|
| 307 |
|
| 308 |
+
Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <mark>producing artificial motion in the physical world</mark>.
|
| 309 |
|
| 310 |
+
Methods to produce robotics motion range from traditional *explicit* models--<mark>dynamics-based</mark>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<mark>learning-based</mark> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance, @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <mark>learning-based approaches to robotics</mark>--the core focus on this tutorial.
|
| 311 |
|
| 312 |
### Different Types of Motion
|
| 313 |
|
|
|
|
| 474 |
<figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
|
| 475 |
</figure>
|
| 476 |
|
| 477 |
+
Dynamics-based robotics pipelines have historically been <mark>developed sequentially, engineering the different blocks</mark> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
|
| 478 |
|
| 479 |
+
Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <mark>limited scalability to multimodal data and multitask settings</mark>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
|
| 480 |
|
| 481 |
+
Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <mark>relying on approximated models hinders real-world applicability</mark> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
|
| 482 |
|
| 483 |
+
Lastly, dynamics-based methods (naturally) overlook the rather recent <mark>increase in availability of openly-available robotics datasets</mark>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <mark>growing number of robotics datasets contributed in a decentralized fashion</mark> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
|
| 484 |
|
| 485 |
Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
|
| 486 |
|
| 487 |
## Robot (Reinforcement) Learning
|
| 488 |
|
| 489 |
<span id="learning-rl" style="position: absolute;"></span>
|
| 490 |
+
<Quote source="Richard Sutton">
|
| 491 |
+
Approximate the solution, not the problem
|
| 492 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
<div class="callout">
|
| 494 |
|
| 495 |
TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
|
|
|
|
| 509 |
|
| 510 |
Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
|
| 511 |
|
| 512 |
+
Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <mark>Reinforcement Learning (RL)</mark> and <mark>Behavioral Cloning (BC)</mark> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
|
| 513 |
|
| 514 |
<div class="wrapfigure">
|
| 515 |
|
|
|
|
| 646 |
|
| 647 |
Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
|
| 648 |
|
| 649 |
+
First, especially early in training, <mark>actions are typically explorative, and thus may be erractic</mark>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <mark>limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</mark>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
|
| 650 |
|
| 651 |
<figure>
|
| 652 |
<ResponsiveImage
|
|
|
|
| 678 |
|
| 679 |
Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
|
| 680 |
|
| 681 |
+
While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <mark>cannot be simulated with high-enough fidelity under practical computational constraints</mark>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
|
| 682 |
|
| 683 |
A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
|
| 684 |
|
|
|
|
| 732 |
```
|
| 733 |
Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
|
| 734 |
|
| 735 |
+
Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <mark>maximizing the discounted cumulative reward, while acting as randomly as possible</mark>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
|
| 736 |
<span id="J-soft" style="position: absolute;">
|
| 737 |
</span>
|
| 738 |
|
|
|
|
| 1058 |
## Robot (Imitation) Learning
|
| 1059 |
|
| 1060 |
<span id="learning-imitation" style="position: absolute;"></span>
|
| 1061 |
+
<Quote source="Norbert Wiener">
|
| 1062 |
+
The best material model for a cat is another, or preferably the same cat
|
| 1063 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
<div class="callout">
|
| 1065 |
|
| 1066 |
TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
|
|
|
|
| 1078 |
<figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
|
| 1079 |
</figure>
|
| 1080 |
|
| 1081 |
+
Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <mark>costly and potentially unsafe</mark>, and designing (dense) reward signals is a <mark>brittle and task-specific</mark> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <mark>casting control an imitation learning problem</mark>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
|
| 1082 |
|
| 1083 |
Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
|
| 1084 |
|
|
|
|
| 1936 |
## Generalist Robot Policies
|
| 1937 |
|
| 1938 |
<span id="learning-foundation" style="position: absolute;"></span>
|
| 1939 |
+
<Quote source="Robert A. Heinlein">
|
| 1940 |
+
Specialization is for insects
|
| 1941 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1942 |
<div class="callout">
|
| 1943 |
|
| 1944 |
TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
|
|
|
|
| 1979 |
|
| 1980 |
In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
|
| 1981 |
|
| 1982 |
+
Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <mark>a single model trained on multi-embodiment data can outperform specialist models</mark> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
|
| 1983 |
|
| 1984 |
Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
|
| 1985 |
|
|
|
|
| 2229 |
|
| 2230 |
<span id="conclusions" style="position: absolute;"></span>
|
| 2231 |
|
| 2232 |
+
This tutorial has charted the paradigmatic shift transforming robotics, tracing the <mark>evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</mark>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
|
| 2233 |
|
| 2234 |
Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
|
| 2235 |
|
app/scripts/latex-importer/reference-preprocessor.mjs
CHANGED
|
@@ -121,10 +121,10 @@ function convertHighlightCommands(content) {
|
|
| 121 |
let processedContent = content;
|
| 122 |
let highlightsConverted = 0;
|
| 123 |
|
| 124 |
-
// Replace \highlight{...} with <
|
| 125 |
processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
|
| 126 |
highlightsConverted++;
|
| 127 |
-
return `<
|
| 128 |
});
|
| 129 |
|
| 130 |
return { content: processedContent, highlightsConverted };
|
|
|
|
| 121 |
let processedContent = content;
|
| 122 |
let highlightsConverted = 0;
|
| 123 |
|
| 124 |
+
// Replace \highlight{...} with <mark>...</mark>
|
| 125 |
processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
|
| 126 |
highlightsConverted++;
|
| 127 |
+
return `<mark>${text}</mark>`;
|
| 128 |
});
|
| 129 |
|
| 130 |
return { content: processedContent, highlightsConverted };
|
app/src/components/Hero.astro
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
---
|
| 2 |
import HtmlEmbed from "./HtmlEmbed.astro";
|
|
|
|
| 3 |
|
| 4 |
interface Props {
|
| 5 |
title: string; // may contain HTML (e.g., <br/>)
|
|
@@ -37,6 +38,7 @@ function normalizeAuthors(
|
|
| 37 |
url?: string;
|
| 38 |
link?: string;
|
| 39 |
affiliationIndices?: number[];
|
|
|
|
| 40 |
}
|
| 41 |
>,
|
| 42 |
): Author[] {
|
|
@@ -47,8 +49,11 @@ function normalizeAuthors(
|
|
| 47 |
}
|
| 48 |
const name = (a?.name ?? "").toString();
|
| 49 |
const url = (a?.url ?? a?.link) as string | undefined;
|
|
|
|
| 50 |
const affiliationIndices = Array.isArray((a as any)?.affiliationIndices)
|
| 51 |
? (a as any).affiliationIndices
|
|
|
|
|
|
|
| 52 |
: undefined;
|
| 53 |
return { name, url, affiliationIndices } as Author;
|
| 54 |
})
|
|
@@ -69,9 +74,9 @@ for (const author of normalizedAuthors) {
|
|
| 69 |
}
|
| 70 |
}
|
| 71 |
}
|
| 72 |
-
const shouldShowAffiliationSupers = authorAffiliationIndexSet.size > 1;
|
| 73 |
const hasMultipleAffiliations =
|
| 74 |
Array.isArray(affiliations) && affiliations.length > 1;
|
|
|
|
| 75 |
|
| 76 |
function stripHtml(text: string): string {
|
| 77 |
return String(text || "").replace(/<[^>]*>/g, "");
|
|
@@ -96,7 +101,12 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
|
|
| 96 |
<section class="hero">
|
| 97 |
<h1 class="hero-title" set:html={title} />
|
| 98 |
<div class="hero-banner">
|
| 99 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
{description && <p class="hero-desc">{description}</p>}
|
| 101 |
</div>
|
| 102 |
</section>
|
|
@@ -362,6 +372,10 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
|
|
| 362 |
max-width: 980px;
|
| 363 |
margin: 0 auto;
|
| 364 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
.hero-desc {
|
| 366 |
color: var(--muted-color);
|
| 367 |
font-style: italic;
|
|
@@ -404,7 +418,8 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
|
|
| 404 |
display: flex;
|
| 405 |
flex-direction: column;
|
| 406 |
gap: 8px;
|
| 407 |
-
|
|
|
|
| 408 |
}
|
| 409 |
.meta-container-cell h3 {
|
| 410 |
margin: 0;
|
|
|
|
| 1 |
---
|
| 2 |
import HtmlEmbed from "./HtmlEmbed.astro";
|
| 3 |
+
import Image from "./Image.astro";
|
| 4 |
|
| 5 |
interface Props {
|
| 6 |
title: string; // may contain HTML (e.g., <br/>)
|
|
|
|
| 38 |
url?: string;
|
| 39 |
link?: string;
|
| 40 |
affiliationIndices?: number[];
|
| 41 |
+
affiliations?: number[];
|
| 42 |
}
|
| 43 |
>,
|
| 44 |
): Author[] {
|
|
|
|
| 49 |
}
|
| 50 |
const name = (a?.name ?? "").toString();
|
| 51 |
const url = (a?.url ?? a?.link) as string | undefined;
|
| 52 |
+
// Support both 'affiliationIndices' and 'affiliations' as property names
|
| 53 |
const affiliationIndices = Array.isArray((a as any)?.affiliationIndices)
|
| 54 |
? (a as any).affiliationIndices
|
| 55 |
+
: Array.isArray((a as any)?.affiliations)
|
| 56 |
+
? (a as any).affiliations
|
| 57 |
: undefined;
|
| 58 |
return { name, url, affiliationIndices } as Author;
|
| 59 |
})
|
|
|
|
| 74 |
}
|
| 75 |
}
|
| 76 |
}
|
|
|
|
| 77 |
const hasMultipleAffiliations =
|
| 78 |
Array.isArray(affiliations) && affiliations.length > 1;
|
| 79 |
+
const shouldShowAffiliationSupers = hasMultipleAffiliations && authorAffiliationIndexSet.size > 0;
|
| 80 |
|
| 81 |
function stripHtml(text: string): string {
|
| 82 |
return String(text || "").replace(/<[^>]*>/g, "");
|
|
|
|
| 101 |
<section class="hero">
|
| 102 |
<h1 class="hero-title" set:html={title} />
|
| 103 |
<div class="hero-banner">
|
| 104 |
+
<Image
|
| 105 |
+
src="/src/content/assets/lerobot-logo-thumbnail.png"
|
| 106 |
+
alt="LeRobot Logo"
|
| 107 |
+
width={400}
|
| 108 |
+
height={200}
|
| 109 |
+
/>
|
| 110 |
{description && <p class="hero-desc">{description}</p>}
|
| 111 |
</div>
|
| 112 |
</section>
|
|
|
|
| 372 |
max-width: 980px;
|
| 373 |
margin: 0 auto;
|
| 374 |
}
|
| 375 |
+
.hero-banner img {
|
| 376 |
+
width: 100%;
|
| 377 |
+
height: auto;
|
| 378 |
+
}
|
| 379 |
.hero-desc {
|
| 380 |
color: var(--muted-color);
|
| 381 |
font-style: italic;
|
|
|
|
| 418 |
display: flex;
|
| 419 |
flex-direction: column;
|
| 420 |
gap: 8px;
|
| 421 |
+
flex: 1;
|
| 422 |
+
min-width: 0;
|
| 423 |
}
|
| 424 |
.meta-container-cell h3 {
|
| 425 |
margin: 0;
|
app/src/content/article.mdx
CHANGED
|
@@ -20,6 +20,7 @@ tableOfContentsAutoCollapse: true
|
|
| 20 |
|
| 21 |
import MultiImage from '../components/MultiImage.astro';
|
| 22 |
import ResponsiveImage from '../components/ResponsiveImage.astro';
|
|
|
|
| 23 |
import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
|
| 24 |
import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
|
| 25 |
import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
|
|
@@ -101,7 +102,7 @@ The frontier of robotics research is indeed increasingly moving away from classi
|
|
| 101 |
|
| 102 |
Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
|
| 103 |
|
| 104 |
-
Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <
|
| 105 |
|
| 106 |
This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
|
| 107 |
|
|
@@ -281,13 +282,9 @@ for epoch in range(num_epochs):
|
|
| 281 |
## Classical Robotics
|
| 282 |
|
| 283 |
<span id="classical" style="position: absolute;"></span>
|
| 284 |
-
<
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
Sun Tzu
|
| 289 |
-
|
| 290 |
-
</div>
|
| 291 |
<div class="callout">
|
| 292 |
|
| 293 |
TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
|
|
@@ -308,9 +305,9 @@ TL;DR Learning-based approaches to robotics are motivated by the need to (1) gen
|
|
| 308 |
<figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
|
| 309 |
</figure>
|
| 310 |
|
| 311 |
-
Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <
|
| 312 |
|
| 313 |
-
Methods to produce robotics motion range from traditional *explicit* models--<
|
| 314 |
|
| 315 |
### Different Types of Motion
|
| 316 |
|
|
@@ -477,26 +474,22 @@ Despite the last 60+ years of robotics research, autonomous robots are still lar
|
|
| 477 |
<figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
|
| 478 |
</figure>
|
| 479 |
|
| 480 |
-
Dynamics-based robotics pipelines have historically been <
|
| 481 |
|
| 482 |
-
Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <
|
| 483 |
|
| 484 |
-
Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <
|
| 485 |
|
| 486 |
-
Lastly, dynamics-based methods (naturally) overlook the rather recent <
|
| 487 |
|
| 488 |
Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
|
| 489 |
|
| 490 |
## Robot (Reinforcement) Learning
|
| 491 |
|
| 492 |
<span id="learning-rl" style="position: absolute;"></span>
|
| 493 |
-
<
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
Richard Sutton
|
| 498 |
-
|
| 499 |
-
</div>
|
| 500 |
<div class="callout">
|
| 501 |
|
| 502 |
TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
|
|
@@ -516,7 +509,7 @@ TL;DR The need for expensive, high-fidelity simulators can be obviated learning
|
|
| 516 |
|
| 517 |
Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
|
| 518 |
|
| 519 |
-
Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <
|
| 520 |
|
| 521 |
<div class="wrapfigure">
|
| 522 |
|
|
@@ -653,7 +646,7 @@ Popular approaches to continuous state and action space--such as those studied w
|
|
| 653 |
|
| 654 |
Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
|
| 655 |
|
| 656 |
-
First, especially early in training, <
|
| 657 |
|
| 658 |
<figure>
|
| 659 |
<ResponsiveImage
|
|
@@ -685,7 +678,7 @@ While effective in transfering policies across the reality gap in real-world rob
|
|
| 685 |
|
| 686 |
Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
|
| 687 |
|
| 688 |
-
While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <
|
| 689 |
|
| 690 |
A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
|
| 691 |
|
|
@@ -739,7 +732,7 @@ Provably, eq. <a href="#deterministic-pg" data-reference-type="ref" data-refere
|
|
| 739 |
```
|
| 740 |
Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
|
| 741 |
|
| 742 |
-
Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <
|
| 743 |
<span id="J-soft" style="position: absolute;">
|
| 744 |
</span>
|
| 745 |
|
|
@@ -1065,13 +1058,9 @@ Advances in learning to act from potentially large corpora of human demonstratio
|
|
| 1065 |
## Robot (Imitation) Learning
|
| 1066 |
|
| 1067 |
<span id="learning-imitation" style="position: absolute;"></span>
|
| 1068 |
-
<
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
Norbert Wiener
|
| 1073 |
-
|
| 1074 |
-
</div>
|
| 1075 |
<div class="callout">
|
| 1076 |
|
| 1077 |
TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
|
|
@@ -1089,7 +1078,7 @@ TL;DR Behavioral Cloning provides a natural platform to learn from real-world in
|
|
| 1089 |
<figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
|
| 1090 |
</figure>
|
| 1091 |
|
| 1092 |
-
Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <
|
| 1093 |
|
| 1094 |
Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
|
| 1095 |
|
|
@@ -1947,13 +1936,9 @@ for epoch in range(num_epochs):
|
|
| 1947 |
## Generalist Robot Policies
|
| 1948 |
|
| 1949 |
<span id="learning-foundation" style="position: absolute;"></span>
|
| 1950 |
-
<
|
| 1951 |
-
|
| 1952 |
-
|
| 1953 |
-
|
| 1954 |
-
Robert A. Heinlein
|
| 1955 |
-
|
| 1956 |
-
</div>
|
| 1957 |
<div class="callout">
|
| 1958 |
|
| 1959 |
TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
|
|
@@ -1994,7 +1979,7 @@ Driven by the goal of developing generalist robot policies, the research communi
|
|
| 1994 |
|
| 1995 |
In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
|
| 1996 |
|
| 1997 |
-
Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <
|
| 1998 |
|
| 1999 |
Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
|
| 2000 |
|
|
@@ -2244,7 +2229,7 @@ for epoch in range(num_epochs):
|
|
| 2244 |
|
| 2245 |
<span id="conclusions" style="position: absolute;"></span>
|
| 2246 |
|
| 2247 |
-
This tutorial has charted the paradigmatic shift transforming robotics, tracing the <
|
| 2248 |
|
| 2249 |
Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
|
| 2250 |
|
|
|
|
| 20 |
|
| 21 |
import MultiImage from '../components/MultiImage.astro';
|
| 22 |
import ResponsiveImage from '../components/ResponsiveImage.astro';
|
| 23 |
+
import Quote from '../components/Quote.astro';
|
| 24 |
import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
|
| 25 |
import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
|
| 26 |
import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
|
|
|
|
| 102 |
|
| 103 |
Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
|
| 104 |
|
| 105 |
+
Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <mark>(1) `lerobot` supports many, openly available, robotic platforms</mark> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <mark>(2) unified, low-level approach to reading/writing robot configurations</mark> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <mark>(3) a native robotics dataset’s format</mark> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
|
| 106 |
|
| 107 |
This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
|
| 108 |
|
|
|
|
| 282 |
## Classical Robotics
|
| 283 |
|
| 284 |
<span id="classical" style="position: absolute;"></span>
|
| 285 |
+
<Quote source="Sun Tzu">
|
| 286 |
+
Know your enemy
|
| 287 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
<div class="callout">
|
| 289 |
|
| 290 |
TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
|
|
|
|
| 305 |
<figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
|
| 306 |
</figure>
|
| 307 |
|
| 308 |
+
Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <mark>producing artificial motion in the physical world</mark>.
|
| 309 |
|
| 310 |
+
Methods to produce robotics motion range from traditional *explicit* models--<mark>dynamics-based</mark>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<mark>learning-based</mark> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance, @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <mark>learning-based approaches to robotics</mark>--the core focus on this tutorial.
|
| 311 |
|
| 312 |
### Different Types of Motion
|
| 313 |
|
|
|
|
| 474 |
<figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
|
| 475 |
</figure>
|
| 476 |
|
| 477 |
+
Dynamics-based robotics pipelines have historically been <mark>developed sequentially, engineering the different blocks</mark> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
|
| 478 |
|
| 479 |
+
Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <mark>limited scalability to multimodal data and multitask settings</mark>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
|
| 480 |
|
| 481 |
+
Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <mark>relying on approximated models hinders real-world applicability</mark> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
|
| 482 |
|
| 483 |
+
Lastly, dynamics-based methods (naturally) overlook the rather recent <mark>increase in availability of openly-available robotics datasets</mark>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <mark>growing number of robotics datasets contributed in a decentralized fashion</mark> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
|
| 484 |
|
| 485 |
Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
|
| 486 |
|
| 487 |
## Robot (Reinforcement) Learning
|
| 488 |
|
| 489 |
<span id="learning-rl" style="position: absolute;"></span>
|
| 490 |
+
<Quote source="Richard Sutton">
|
| 491 |
+
Approximate the solution, not the problem
|
| 492 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
<div class="callout">
|
| 494 |
|
| 495 |
TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
|
|
|
|
| 509 |
|
| 510 |
Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
|
| 511 |
|
| 512 |
+
Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <mark>Reinforcement Learning (RL)</mark> and <mark>Behavioral Cloning (BC)</mark> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
|
| 513 |
|
| 514 |
<div class="wrapfigure">
|
| 515 |
|
|
|
|
| 646 |
|
| 647 |
Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
|
| 648 |
|
| 649 |
+
First, especially early in training, <mark>actions are typically explorative, and thus may be erractic</mark>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <mark>limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</mark>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
|
| 650 |
|
| 651 |
<figure>
|
| 652 |
<ResponsiveImage
|
|
|
|
| 678 |
|
| 679 |
Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
|
| 680 |
|
| 681 |
+
While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <mark>cannot be simulated with high-enough fidelity under practical computational constraints</mark>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
|
| 682 |
|
| 683 |
A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
|
| 684 |
|
|
|
|
| 732 |
```
|
| 733 |
Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
|
| 734 |
|
| 735 |
+
Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <mark>maximizing the discounted cumulative reward, while acting as randomly as possible</mark>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
|
| 736 |
<span id="J-soft" style="position: absolute;">
|
| 737 |
</span>
|
| 738 |
|
|
|
|
| 1058 |
## Robot (Imitation) Learning
|
| 1059 |
|
| 1060 |
<span id="learning-imitation" style="position: absolute;"></span>
|
| 1061 |
+
<Quote source="Norbert Wiener">
|
| 1062 |
+
The best material model for a cat is another, or preferably the same cat
|
| 1063 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1064 |
<div class="callout">
|
| 1065 |
|
| 1066 |
TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
|
|
|
|
| 1078 |
<figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
|
| 1079 |
</figure>
|
| 1080 |
|
| 1081 |
+
Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <mark>costly and potentially unsafe</mark>, and designing (dense) reward signals is a <mark>brittle and task-specific</mark> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <mark>casting control an imitation learning problem</mark>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
|
| 1082 |
|
| 1083 |
Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
|
| 1084 |
|
|
|
|
| 1936 |
## Generalist Robot Policies
|
| 1937 |
|
| 1938 |
<span id="learning-foundation" style="position: absolute;"></span>
|
| 1939 |
+
<Quote source="Robert A. Heinlein">
|
| 1940 |
+
Specialization is for insects
|
| 1941 |
+
</Quote>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1942 |
<div class="callout">
|
| 1943 |
|
| 1944 |
TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
|
|
|
|
| 1979 |
|
| 1980 |
In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
|
| 1981 |
|
| 1982 |
+
Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <mark>a single model trained on multi-embodiment data can outperform specialist models</mark> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
|
| 1983 |
|
| 1984 |
Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
|
| 1985 |
|
|
|
|
| 2229 |
|
| 2230 |
<span id="conclusions" style="position: absolute;"></span>
|
| 2231 |
|
| 2232 |
+
This tutorial has charted the paradigmatic shift transforming robotics, tracing the <mark>evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</mark>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
|
| 2233 |
|
| 2234 |
Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
|
| 2235 |
|
app/src/content/assets/lerobot-logo-thumbnail.png
ADDED
|
|
Git LFS Details
|
app/src/content/embeds/{banner2.html → banner.html}
RENAMED
|
File without changes
|
app/src/styles/_base.css
CHANGED
|
@@ -126,8 +126,8 @@ html {
|
|
| 126 |
}
|
| 127 |
|
| 128 |
.content-grid main mark {
|
| 129 |
-
background-color: color-mix(in srgb, var(--primary-color, #007AFF)
|
| 130 |
-
border: 1px solid color-mix(in srgb, var(--primary-color)
|
| 131 |
color: inherit;
|
| 132 |
padding: 4px 6px;
|
| 133 |
border-radius: 4px;
|
|
|
|
| 126 |
}
|
| 127 |
|
| 128 |
.content-grid main mark {
|
| 129 |
+
background-color: color-mix(in srgb, var(--primary-color, #007AFF) 30%, transparent);
|
| 130 |
+
border: 1px solid color-mix(in srgb, var(--primary-color) 38%, transparent);
|
| 131 |
color: inherit;
|
| 132 |
padding: 4px 6px;
|
| 133 |
border-radius: 4px;
|