tfrere HF Staff commited on
Commit
2b50d2d
·
1 Parent(s): 20810cd
app/scripts/latex-importer/mdx-converter.mjs CHANGED
@@ -416,6 +416,133 @@ function transformStyledSpans(content) {
416
  return content;
417
  }
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  /**
420
  * Transform reference links to proper Astro internal links
421
  * @param {string} content - MDX content
@@ -825,6 +952,9 @@ function processMdxContent(content, latexContent = '') {
825
  processedContent = convertSubfiguresToMultiImage(processedContent);
826
  processedContent = transformImages(processedContent);
827
  processedContent = transformStyledSpans(processedContent);
 
 
 
828
  processedContent = transformReferenceLinks(processedContent);
829
  processedContent = fixHtmlEscaping(processedContent);
830
  processedContent = cleanHighlightNumbering(processedContent);
 
416
  return content;
417
  }
418
 
419
+ /**
420
+ * Transform epigraph divs to Quote components
421
+ * @param {string} content - MDX content
422
+ * @returns {string} - Content with Quote components
423
+ */
424
+ function transformEpigraphs(content) {
425
+ console.log(' 💬 Transforming epigraphs to Quote components...');
426
+
427
+ let epigraphsConverted = 0;
428
+
429
+ // Pattern to match epigraph divs: <div class="epigraph">...</div>
430
+ // More flexible pattern that handles various formats
431
+ content = content.replace(
432
+ /<div class="epigraph">([\s\S]*?)<\/div>/g,
433
+ (match, content) => {
434
+ // Extract quote text (between asterisks) and author (last non-empty line)
435
+ const lines = content.trim().split('\n').map(line => line.trim()).filter(line => line);
436
+
437
+ let quoteText = '';
438
+ let author = '';
439
+
440
+ // Find quote text (between asterisks)
441
+ const quoteMatch = content.match(/\*([^*]+)\*/);
442
+ if (quoteMatch) {
443
+ quoteText = quoteMatch[1].trim();
444
+ }
445
+
446
+ // Find author (usually the last non-empty line that's not the quote)
447
+ const lastLine = lines[lines.length - 1];
448
+ if (lastLine && !lastLine.includes('*') && !lastLine.includes('[')) {
449
+ author = lastLine;
450
+ }
451
+
452
+ if (quoteText && author) {
453
+ epigraphsConverted++;
454
+
455
+ // Clean the quote text
456
+ const cleanQuoteText = quoteText
457
+ .replace(/<[^>]*>/g, '')
458
+ .replace(/\s+/g, ' ')
459
+ .replace(/"/g, '\\"')
460
+ .trim();
461
+
462
+ // Clean the author text
463
+ const cleanAuthor = author
464
+ .replace(/<[^>]*>/g, '')
465
+ .replace(/\s+/g, ' ')
466
+ .replace(/"/g, '\\"')
467
+ .trim();
468
+
469
+ // Mark Quote component as used
470
+ usedComponents.add('Quote');
471
+
472
+ return `<Quote source="${cleanAuthor}">
473
+ ${cleanQuoteText}
474
+ </Quote>`;
475
+ }
476
+
477
+ return match; // Return original if we can't parse it
478
+ }
479
+ );
480
+
481
+ if (epigraphsConverted > 0) {
482
+ console.log(` ✅ Converted ${epigraphsConverted} epigraph(s) to Quote component(s)`);
483
+ } else {
484
+ console.log(' ℹ️ No epigraphs found');
485
+ }
486
+
487
+ return content;
488
+ }
489
+
490
+ /**
491
+ * Transform highlight spans to mark tags
492
+ * @param {string} content - MDX content
493
+ * @returns {string} - Content with mark tags instead of highlight spans
494
+ */
495
+ function transformHighlightSpans(content) {
496
+ console.log(' 🎯 Transforming highlight spans to mark tags...');
497
+
498
+ let highlightsConverted = 0;
499
+
500
+ // Transform <span class="highlight">...</span> to <mark>...</mark>
501
+ content = content.replace(
502
+ /<span class="highlight">(.*?)<\/span>/g,
503
+ (match, text) => {
504
+ highlightsConverted++;
505
+ return `<mark>${text}</mark>`;
506
+ }
507
+ );
508
+
509
+ if (highlightsConverted > 0) {
510
+ console.log(` ✅ Converted ${highlightsConverted} highlight span(s) to mark tag(s)`);
511
+ } else {
512
+ console.log(' ℹ️ No highlight spans found');
513
+ }
514
+
515
+ return content;
516
+ }
517
+
518
+ /**
519
+ * Fix escaped mark tags
520
+ * @param {string} content - MDX content
521
+ * @returns {string} - Content with unescaped mark tags
522
+ */
523
+ function fixEscapedMarkTags(content) {
524
+ console.log(' 🎯 Fixing escaped mark tags...');
525
+
526
+ let fixedCount = 0;
527
+
528
+ // Fix escaped mark tags: \<mark\>...\</mark\> -> <mark>...</mark>
529
+ content = content.replace(
530
+ /\\<mark\\>(.*?)\\<\/mark\\>/g,
531
+ (match, text) => {
532
+ fixedCount++;
533
+ return `<mark>${text}</mark>`;
534
+ }
535
+ );
536
+
537
+ if (fixedCount > 0) {
538
+ console.log(` ✅ Fixed ${fixedCount} escaped mark tag(s)`);
539
+ } else {
540
+ console.log(' ℹ️ No escaped mark tags found');
541
+ }
542
+
543
+ return content;
544
+ }
545
+
546
  /**
547
  * Transform reference links to proper Astro internal links
548
  * @param {string} content - MDX content
 
952
  processedContent = convertSubfiguresToMultiImage(processedContent);
953
  processedContent = transformImages(processedContent);
954
  processedContent = transformStyledSpans(processedContent);
955
+ processedContent = transformHighlightSpans(processedContent);
956
+ processedContent = fixEscapedMarkTags(processedContent);
957
+ processedContent = transformEpigraphs(processedContent);
958
  processedContent = transformReferenceLinks(processedContent);
959
  processedContent = fixHtmlEscaping(processedContent);
960
  processedContent = cleanHighlightNumbering(processedContent);
app/scripts/latex-importer/output/main.md CHANGED
The diff for this file is too large to render. See raw diff
 
app/scripts/latex-importer/output/main.mdx CHANGED
@@ -20,6 +20,7 @@ tableOfContentsAutoCollapse: true
20
 
21
  import MultiImage from '../components/MultiImage.astro';
22
  import ResponsiveImage from '../components/ResponsiveImage.astro';
 
23
  import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
24
  import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
25
  import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
@@ -101,7 +102,7 @@ The frontier of robotics research is indeed increasingly moving away from classi
101
 
102
  Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
103
 
104
- Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <span class="highlight">`lerobot` supports many, openly available, robotic platforms</span> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <span class="highlight">unified, low-level approach to reading/writing robot configurations</span> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <span class="highlight">a native robotics dataset’s format</span> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
105
 
106
  This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
107
 
@@ -281,13 +282,9 @@ for epoch in range(num_epochs):
281
  ## Classical Robotics
282
 
283
  <span id="classical" style="position: absolute;"></span>
284
- <div class="epigraph">
285
-
286
- *Know your enemy* \[...\]
287
-
288
- Sun Tzu
289
-
290
- </div>
291
  <div class="callout">
292
 
293
  TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
@@ -308,9 +305,9 @@ TL;DR Learning-based approaches to robotics are motivated by the need to (1) gen
308
  <figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
309
  </figure>
310
 
311
- Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <span class="highlight">producing artificial motion in the physical world</span>.
312
 
313
- Methods to produce robotics motion range from traditional *explicit* models--<span class="highlight">dynamics-based</span>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<span class="highlight">learning-based</span> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance,  @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <span class="highlight">learning-based approaches to robotics</span>--the core focus on this tutorial.
314
 
315
  ### Different Types of Motion
316
 
@@ -477,26 +474,22 @@ Despite the last 60+ years of robotics research, autonomous robots are still lar
477
  <figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
478
  </figure>
479
 
480
- Dynamics-based robotics pipelines have historically been <span class="highlight">developed sequentially, engineering the different blocks</span> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
481
 
482
- Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <span class="highlight">limited scalability to multimodal data and multitask settings</span>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
483
 
484
- Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <span class="highlight">relying on approximated models hinders real-world applicability</span> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
485
 
486
- Lastly, dynamics-based methods (naturally) overlook the rather recent <span class="highlight">increase in availability of openly-available robotics datasets</span>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <span class="highlight">growing number of robotics datasets contributed in a decentralized fashion</span> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
487
 
488
  Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
489
 
490
  ## Robot (Reinforcement) Learning
491
 
492
  <span id="learning-rl" style="position: absolute;"></span>
493
- <div class="epigraph">
494
-
495
- *Approximate the solution, not the problem* \[...\]
496
-
497
- Richard Sutton
498
-
499
- </div>
500
  <div class="callout">
501
 
502
  TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
@@ -516,7 +509,7 @@ TL;DR The need for expensive, high-fidelity simulators can be obviated learning
516
 
517
  Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
518
 
519
- Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <span class="highlight">Reinforcement Learning (RL)</span> and <span class="highlight">Behavioral Cloning (BC)</span> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
520
 
521
  <div class="wrapfigure">
522
 
@@ -653,7 +646,7 @@ Popular approaches to continuous state and action space--such as those studied w
653
 
654
  Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
655
 
656
- First, especially early in training, <span class="highlight">actions are typically explorative, and thus may be erractic</span>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <span class="highlight">limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</span>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
657
 
658
  <figure>
659
  <ResponsiveImage
@@ -685,7 +678,7 @@ While effective in transfering policies across the reality gap in real-world rob
685
 
686
  Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
687
 
688
- While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <span class="highlight">cannot be simulated with high-enough fidelity under practical computational constraints</span>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
689
 
690
  A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
691
 
@@ -739,7 +732,7 @@ Provably, eq. <a href="#deterministic-pg" data-reference-type="ref" data-refere
739
  ```
740
  Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
741
 
742
- Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <span class="highlight">maximizing the discounted cumulative reward, while acting as randomly as possible</span>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
743
  <span id="J-soft" style="position: absolute;">
744
  </span>
745
 
@@ -1065,13 +1058,9 @@ Advances in learning to act from potentially large corpora of human demonstratio
1065
  ## Robot (Imitation) Learning
1066
 
1067
  <span id="learning-imitation" style="position: absolute;"></span>
1068
- <div class="epigraph">
1069
-
1070
- *The best material model for a cat is another, or preferably the same cat*
1071
-
1072
- Norbert Wiener
1073
-
1074
- </div>
1075
  <div class="callout">
1076
 
1077
  TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
@@ -1089,7 +1078,7 @@ TL;DR Behavioral Cloning provides a natural platform to learn from real-world in
1089
  <figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
1090
  </figure>
1091
 
1092
- Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <span class="highlight">costly and potentially unsafe</span>, and designing (dense) reward signals is a <span class="highlight">brittle and task-specific</span> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <span class="highlight">casting control an imitation learning problem</span>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
1093
 
1094
  Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
1095
 
@@ -1947,13 +1936,9 @@ for epoch in range(num_epochs):
1947
  ## Generalist Robot Policies
1948
 
1949
  <span id="learning-foundation" style="position: absolute;"></span>
1950
- <div class="epigraph">
1951
-
1952
- *Specialization is for insects*
1953
-
1954
- Robert A. Heinlein
1955
-
1956
- </div>
1957
  <div class="callout">
1958
 
1959
  TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
@@ -1994,7 +1979,7 @@ Driven by the goal of developing generalist robot policies, the research communi
1994
 
1995
  In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
1996
 
1997
- Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <span class="highlight">a single model trained on multi-embodiment data can outperform specialist models</span> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
1998
 
1999
  Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
2000
 
@@ -2244,7 +2229,7 @@ for epoch in range(num_epochs):
2244
 
2245
  <span id="conclusions" style="position: absolute;"></span>
2246
 
2247
- This tutorial has charted the paradigmatic shift transforming robotics, tracing the <span class="highlight">evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</span>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
2248
 
2249
  Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
2250
 
 
20
 
21
  import MultiImage from '../components/MultiImage.astro';
22
  import ResponsiveImage from '../components/ResponsiveImage.astro';
23
+ import Quote from '../components/Quote.astro';
24
  import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
25
  import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
26
  import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
 
102
 
103
  Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
104
 
105
+ Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <mark>(1) `lerobot` supports many, openly available, robotic platforms</mark> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <mark>(2) unified, low-level approach to reading/writing robot configurations</mark> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <mark>(3) a native robotics dataset’s format</mark> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
106
 
107
  This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
108
 
 
282
  ## Classical Robotics
283
 
284
  <span id="classical" style="position: absolute;"></span>
285
+ <Quote source="Sun Tzu">
286
+ Know your enemy
287
+ </Quote>
 
 
 
 
288
  <div class="callout">
289
 
290
  TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
 
305
  <figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
306
  </figure>
307
 
308
+ Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <mark>producing artificial motion in the physical world</mark>.
309
 
310
+ Methods to produce robotics motion range from traditional *explicit* models--<mark>dynamics-based</mark>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<mark>learning-based</mark> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance,  @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <mark>learning-based approaches to robotics</mark>--the core focus on this tutorial.
311
 
312
  ### Different Types of Motion
313
 
 
474
  <figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
475
  </figure>
476
 
477
+ Dynamics-based robotics pipelines have historically been <mark>developed sequentially, engineering the different blocks</mark> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
478
 
479
+ Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <mark>limited scalability to multimodal data and multitask settings</mark>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
480
 
481
+ Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <mark>relying on approximated models hinders real-world applicability</mark> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
482
 
483
+ Lastly, dynamics-based methods (naturally) overlook the rather recent <mark>increase in availability of openly-available robotics datasets</mark>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <mark>growing number of robotics datasets contributed in a decentralized fashion</mark> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
484
 
485
  Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
486
 
487
  ## Robot (Reinforcement) Learning
488
 
489
  <span id="learning-rl" style="position: absolute;"></span>
490
+ <Quote source="Richard Sutton">
491
+ Approximate the solution, not the problem
492
+ </Quote>
 
 
 
 
493
  <div class="callout">
494
 
495
  TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
 
509
 
510
  Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
511
 
512
+ Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <mark>Reinforcement Learning (RL)</mark> and <mark>Behavioral Cloning (BC)</mark> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
513
 
514
  <div class="wrapfigure">
515
 
 
646
 
647
  Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
648
 
649
+ First, especially early in training, <mark>actions are typically explorative, and thus may be erractic</mark>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <mark>limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</mark>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
650
 
651
  <figure>
652
  <ResponsiveImage
 
678
 
679
  Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
680
 
681
+ While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <mark>cannot be simulated with high-enough fidelity under practical computational constraints</mark>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
682
 
683
  A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
684
 
 
732
  ```
733
  Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
734
 
735
+ Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <mark>maximizing the discounted cumulative reward, while acting as randomly as possible</mark>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
736
  <span id="J-soft" style="position: absolute;">
737
  </span>
738
 
 
1058
  ## Robot (Imitation) Learning
1059
 
1060
  <span id="learning-imitation" style="position: absolute;"></span>
1061
+ <Quote source="Norbert Wiener">
1062
+ The best material model for a cat is another, or preferably the same cat
1063
+ </Quote>
 
 
 
 
1064
  <div class="callout">
1065
 
1066
  TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
 
1078
  <figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
1079
  </figure>
1080
 
1081
+ Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <mark>costly and potentially unsafe</mark>, and designing (dense) reward signals is a <mark>brittle and task-specific</mark> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <mark>casting control an imitation learning problem</mark>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
1082
 
1083
  Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
1084
 
 
1936
  ## Generalist Robot Policies
1937
 
1938
  <span id="learning-foundation" style="position: absolute;"></span>
1939
+ <Quote source="Robert A. Heinlein">
1940
+ Specialization is for insects
1941
+ </Quote>
 
 
 
 
1942
  <div class="callout">
1943
 
1944
  TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
 
1979
 
1980
  In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
1981
 
1982
+ Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <mark>a single model trained on multi-embodiment data can outperform specialist models</mark> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
1983
 
1984
  Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
1985
 
 
2229
 
2230
  <span id="conclusions" style="position: absolute;"></span>
2231
 
2232
+ This tutorial has charted the paradigmatic shift transforming robotics, tracing the <mark>evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</mark>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
2233
 
2234
  Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
2235
 
app/scripts/latex-importer/reference-preprocessor.mjs CHANGED
@@ -121,10 +121,10 @@ function convertHighlightCommands(content) {
121
  let processedContent = content;
122
  let highlightsConverted = 0;
123
 
124
- // Replace \highlight{...} with <span class="highlight">...</span>
125
  processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
126
  highlightsConverted++;
127
- return `<span class="highlight">${text}</span>`;
128
  });
129
 
130
  return { content: processedContent, highlightsConverted };
 
121
  let processedContent = content;
122
  let highlightsConverted = 0;
123
 
124
+ // Replace \highlight{...} with <mark>...</mark>
125
  processedContent = processedContent.replace(/\\highlight\{([^}]+)\}/g, (match, text) => {
126
  highlightsConverted++;
127
+ return `<mark>${text}</mark>`;
128
  });
129
 
130
  return { content: processedContent, highlightsConverted };
app/src/components/Hero.astro CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  import HtmlEmbed from "./HtmlEmbed.astro";
 
3
 
4
  interface Props {
5
  title: string; // may contain HTML (e.g., <br/>)
@@ -37,6 +38,7 @@ function normalizeAuthors(
37
  url?: string;
38
  link?: string;
39
  affiliationIndices?: number[];
 
40
  }
41
  >,
42
  ): Author[] {
@@ -47,8 +49,11 @@ function normalizeAuthors(
47
  }
48
  const name = (a?.name ?? "").toString();
49
  const url = (a?.url ?? a?.link) as string | undefined;
 
50
  const affiliationIndices = Array.isArray((a as any)?.affiliationIndices)
51
  ? (a as any).affiliationIndices
 
 
52
  : undefined;
53
  return { name, url, affiliationIndices } as Author;
54
  })
@@ -69,9 +74,9 @@ for (const author of normalizedAuthors) {
69
  }
70
  }
71
  }
72
- const shouldShowAffiliationSupers = authorAffiliationIndexSet.size > 1;
73
  const hasMultipleAffiliations =
74
  Array.isArray(affiliations) && affiliations.length > 1;
 
75
 
76
  function stripHtml(text: string): string {
77
  return String(text || "").replace(/<[^>]*>/g, "");
@@ -96,7 +101,12 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
96
  <section class="hero">
97
  <h1 class="hero-title" set:html={title} />
98
  <div class="hero-banner">
99
- <HtmlEmbed src="banner.html" frameless />
 
 
 
 
 
100
  {description && <p class="hero-desc">{description}</p>}
101
  </div>
102
  </section>
@@ -362,6 +372,10 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
362
  max-width: 980px;
363
  margin: 0 auto;
364
  }
 
 
 
 
365
  .hero-desc {
366
  color: var(--muted-color);
367
  font-style: italic;
@@ -404,7 +418,8 @@ const pdfFilename = `${slugify(pdfBase)}.pdf`;
404
  display: flex;
405
  flex-direction: column;
406
  gap: 8px;
407
- max-width: 250px;
 
408
  }
409
  .meta-container-cell h3 {
410
  margin: 0;
 
1
  ---
2
  import HtmlEmbed from "./HtmlEmbed.astro";
3
+ import Image from "./Image.astro";
4
 
5
  interface Props {
6
  title: string; // may contain HTML (e.g., <br/>)
 
38
  url?: string;
39
  link?: string;
40
  affiliationIndices?: number[];
41
+ affiliations?: number[];
42
  }
43
  >,
44
  ): Author[] {
 
49
  }
50
  const name = (a?.name ?? "").toString();
51
  const url = (a?.url ?? a?.link) as string | undefined;
52
+ // Support both 'affiliationIndices' and 'affiliations' as property names
53
  const affiliationIndices = Array.isArray((a as any)?.affiliationIndices)
54
  ? (a as any).affiliationIndices
55
+ : Array.isArray((a as any)?.affiliations)
56
+ ? (a as any).affiliations
57
  : undefined;
58
  return { name, url, affiliationIndices } as Author;
59
  })
 
74
  }
75
  }
76
  }
 
77
  const hasMultipleAffiliations =
78
  Array.isArray(affiliations) && affiliations.length > 1;
79
+ const shouldShowAffiliationSupers = hasMultipleAffiliations && authorAffiliationIndexSet.size > 0;
80
 
81
  function stripHtml(text: string): string {
82
  return String(text || "").replace(/<[^>]*>/g, "");
 
101
  <section class="hero">
102
  <h1 class="hero-title" set:html={title} />
103
  <div class="hero-banner">
104
+ <Image
105
+ src="/src/content/assets/lerobot-logo-thumbnail.png"
106
+ alt="LeRobot Logo"
107
+ width={400}
108
+ height={200}
109
+ />
110
  {description && <p class="hero-desc">{description}</p>}
111
  </div>
112
  </section>
 
372
  max-width: 980px;
373
  margin: 0 auto;
374
  }
375
+ .hero-banner img {
376
+ width: 100%;
377
+ height: auto;
378
+ }
379
  .hero-desc {
380
  color: var(--muted-color);
381
  font-style: italic;
 
418
  display: flex;
419
  flex-direction: column;
420
  gap: 8px;
421
+ flex: 1;
422
+ min-width: 0;
423
  }
424
  .meta-container-cell h3 {
425
  margin: 0;
app/src/content/article.mdx CHANGED
@@ -20,6 +20,7 @@ tableOfContentsAutoCollapse: true
20
 
21
  import MultiImage from '../components/MultiImage.astro';
22
  import ResponsiveImage from '../components/ResponsiveImage.astro';
 
23
  import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
24
  import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
25
  import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
@@ -101,7 +102,7 @@ The frontier of robotics research is indeed increasingly moving away from classi
101
 
102
  Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
103
 
104
- Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <span class="highlight">`lerobot` supports many, openly available, robotic platforms</span> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <span class="highlight">unified, low-level approach to reading/writing robot configurations</span> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <span class="highlight">a native robotics dataset’s format</span> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
105
 
106
  This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
107
 
@@ -281,13 +282,9 @@ for epoch in range(num_epochs):
281
  ## Classical Robotics
282
 
283
  <span id="classical" style="position: absolute;"></span>
284
- <div class="epigraph">
285
-
286
- *Know your enemy* \[...\]
287
-
288
- Sun Tzu
289
-
290
- </div>
291
  <div class="callout">
292
 
293
  TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
@@ -308,9 +305,9 @@ TL;DR Learning-based approaches to robotics are motivated by the need to (1) gen
308
  <figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
309
  </figure>
310
 
311
- Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <span class="highlight">producing artificial motion in the physical world</span>.
312
 
313
- Methods to produce robotics motion range from traditional *explicit* models--<span class="highlight">dynamics-based</span>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<span class="highlight">learning-based</span> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance,  @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <span class="highlight">learning-based approaches to robotics</span>--the core focus on this tutorial.
314
 
315
  ### Different Types of Motion
316
 
@@ -477,26 +474,22 @@ Despite the last 60+ years of robotics research, autonomous robots are still lar
477
  <figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
478
  </figure>
479
 
480
- Dynamics-based robotics pipelines have historically been <span class="highlight">developed sequentially, engineering the different blocks</span> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
481
 
482
- Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <span class="highlight">limited scalability to multimodal data and multitask settings</span>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
483
 
484
- Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <span class="highlight">relying on approximated models hinders real-world applicability</span> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
485
 
486
- Lastly, dynamics-based methods (naturally) overlook the rather recent <span class="highlight">increase in availability of openly-available robotics datasets</span>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <span class="highlight">growing number of robotics datasets contributed in a decentralized fashion</span> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
487
 
488
  Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
489
 
490
  ## Robot (Reinforcement) Learning
491
 
492
  <span id="learning-rl" style="position: absolute;"></span>
493
- <div class="epigraph">
494
-
495
- *Approximate the solution, not the problem* \[...\]
496
-
497
- Richard Sutton
498
-
499
- </div>
500
  <div class="callout">
501
 
502
  TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
@@ -516,7 +509,7 @@ TL;DR The need for expensive, high-fidelity simulators can be obviated learning
516
 
517
  Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
518
 
519
- Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <span class="highlight">Reinforcement Learning (RL)</span> and <span class="highlight">Behavioral Cloning (BC)</span> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
520
 
521
  <div class="wrapfigure">
522
 
@@ -653,7 +646,7 @@ Popular approaches to continuous state and action space--such as those studied w
653
 
654
  Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
655
 
656
- First, especially early in training, <span class="highlight">actions are typically explorative, and thus may be erractic</span>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <span class="highlight">limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</span>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
657
 
658
  <figure>
659
  <ResponsiveImage
@@ -685,7 +678,7 @@ While effective in transfering policies across the reality gap in real-world rob
685
 
686
  Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
687
 
688
- While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <span class="highlight">cannot be simulated with high-enough fidelity under practical computational constraints</span>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
689
 
690
  A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
691
 
@@ -739,7 +732,7 @@ Provably, eq. <a href="#deterministic-pg" data-reference-type="ref" data-refere
739
  ```
740
  Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
741
 
742
- Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <span class="highlight">maximizing the discounted cumulative reward, while acting as randomly as possible</span>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
743
  <span id="J-soft" style="position: absolute;">
744
  </span>
745
 
@@ -1065,13 +1058,9 @@ Advances in learning to act from potentially large corpora of human demonstratio
1065
  ## Robot (Imitation) Learning
1066
 
1067
  <span id="learning-imitation" style="position: absolute;"></span>
1068
- <div class="epigraph">
1069
-
1070
- *The best material model for a cat is another, or preferably the same cat*
1071
-
1072
- Norbert Wiener
1073
-
1074
- </div>
1075
  <div class="callout">
1076
 
1077
  TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
@@ -1089,7 +1078,7 @@ TL;DR Behavioral Cloning provides a natural platform to learn from real-world in
1089
  <figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
1090
  </figure>
1091
 
1092
- Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <span class="highlight">costly and potentially unsafe</span>, and designing (dense) reward signals is a <span class="highlight">brittle and task-specific</span> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <span class="highlight">casting control an imitation learning problem</span>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
1093
 
1094
  Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
1095
 
@@ -1947,13 +1936,9 @@ for epoch in range(num_epochs):
1947
  ## Generalist Robot Policies
1948
 
1949
  <span id="learning-foundation" style="position: absolute;"></span>
1950
- <div class="epigraph">
1951
-
1952
- *Specialization is for insects*
1953
-
1954
- Robert A. Heinlein
1955
-
1956
- </div>
1957
  <div class="callout">
1958
 
1959
  TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
@@ -1994,7 +1979,7 @@ Driven by the goal of developing generalist robot policies, the research communi
1994
 
1995
  In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
1996
 
1997
- Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <span class="highlight">a single model trained on multi-embodiment data can outperform specialist models</span> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
1998
 
1999
  Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
2000
 
@@ -2244,7 +2229,7 @@ for epoch in range(num_epochs):
2244
 
2245
  <span id="conclusions" style="position: absolute;"></span>
2246
 
2247
- This tutorial has charted the paradigmatic shift transforming robotics, tracing the <span class="highlight">evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</span>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
2248
 
2249
  Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
2250
 
 
20
 
21
  import MultiImage from '../components/MultiImage.astro';
22
  import ResponsiveImage from '../components/ResponsiveImage.astro';
23
+ import Quote from '../components/Quote.astro';
24
  import ch2_planar_manipulator_free from './assets/image/figures/ch2/ch2-planar-manipulator-free.png';
25
  import ch2_planar_manipulator_floor from './assets/image/figures/ch2/ch2-planar-manipulator-floor.png';
26
  import ch2_planar_manipulator_floor_shelf from './assets/image/figures/ch2/ch2-planar-manipulator-floor-shelf.png';
 
102
 
103
  Moreover, since end-to-end learning on ever-growing collections of text and image data has historically been at the core of the development of *foundation models* capable of semantic reasoning across multiple modalities (images, text, audio, etc.), deriving robotics methods grounded in learning appears particularly consequential, especially as the number of openly available datasets continues to grow.
104
 
105
+ Robotics is, at its core, an inherently multidisciplinary field, requiring a wide range of expertise in both *software* and *hardware*. The integration of learning-based techniques further broadens this spectrum of skills, raising the bar for both research and practical applications. `lerobot` is an open-source library designed to integrate end-to-end with the entire robotics stack. With a strong focus on accessible, real-world robots <mark>(1) `lerobot` supports many, openly available, robotic platforms</mark> for manipulation, locomotion and even whole-body control. `lerobot`also implements a <mark>(2) unified, low-level approach to reading/writing robot configurations</mark> to extend support for other robot platforms with relatively low effort. The library introduces `LeRobotDataset`, <mark>(3) a native robotics dataset’s format</mark> currently being used by the community to efficiently record and share datasets. `lerobot` also supports many state-of-the-art (SOTA) algorithms in robot learning--mainly based on Reinforcement Learning (RL) and Behavioral Cloning (BC) techniques--with efficient implementations in Pytorch, and extended support to experimentation and experiments tracking. Lastly, `lerobot` defines a custom, optimized inference stack for robotic policies decoupling action planning from action execution, proving effective in guaranteeing more adaptability at runtime.
106
 
107
  This tutorial serves the double purpose of providing useful references for the Science behind--and practical use of--common robot learning techniques. To this aim, we strike to provide a rigorous yet concise overview of the core concepts behind the techniques presented, paired with practical examples of how to use such techniques concretely, with code examples in `lerobot`, for researchers and practitioners interested in the field of robot learning. This tutorial is structured as follows:
108
 
 
282
  ## Classical Robotics
283
 
284
  <span id="classical" style="position: absolute;"></span>
285
+ <Quote source="Sun Tzu">
286
+ Know your enemy
287
+ </Quote>
 
 
 
 
288
  <div class="callout">
289
 
290
  TL;DR Learning-based approaches to robotics are motivated by the need to (1) generalize across tasks and embodiments (2) reduce dependency on human expertise (3) leverage historical trends on the production of data--all traditionally overlooked by dynamics-based techniques.
 
305
  <figcaption>Overview of methods to generate motion (clearly non-exhausitve, see @bekrisStateRobotMotion2024). The different methods can be grouped based on whether they explicitly (<em>dynamics-based</em>) or implicitly (<em>learning-based</em>) model robot-environment interactions.</figcaption>
306
  </figure>
307
 
308
+ Robotics is concerned with producing artificial motion in the physical world in useful, reliable and safe fashion. Thus, robotics is an inherently multi-disciplinar domain: producing autonomous motion in the physical world requires, to the very least, interfacing different software (motion planners) and hardware (motion executioners) components. Further, knowledge of mechanical, electrical, and software engineering, as well as rigid-body mechanics and control theory have therefore proven quintessential in robotics since the field first developed in the 1950s. More recently, Machine Learning (ML) has also proved effective in robotics, complementing these more traditional disciplines @connellRobotLearning1993. As a direct consequence of its multi-disciplinar nature, robotics has developed as a rather wide array of methods, all concerned with the main purpose of <mark>producing artificial motion in the physical world</mark>.
309
 
310
+ Methods to produce robotics motion range from traditional *explicit* models--<mark>dynamics-based</mark>[^1] methods, leveraging precise descriptions of the mechanics of robots’ rigid bodies and their interactions with eventual obstacles in the environment--to *implicit* models--<mark>learning-based</mark> methods, treating artificial motion as a statistical pattern to learn given multiple sensorimotor readings @agrawalComputationalSensorimotorLearning, @bekrisStateRobotMotion2024. A variety of methods have been developed between these two extrema. For instance,  @hansenTemporalDifferenceLearning2022 show how learning-based systems can benefit from information on the physics of problems, complementing a traditional learning method such as Temporal Difference (TD)-learning @suttonReinforcementLearningIntroduction2018 with Model-Predictive Control (MPC). Conversely, as explicit models may be relying on assumptions proving overly simplistic--or even unrealistic--in practice, learning can prove effective to improve modeling of complex phenomena or complement perception @mccormacSemanticFusionDense3D2016. Such examples aim at demonstrating the richness of approaches to robotics, and Figure <a href="#generating-motion-atlas" data-reference-type="ref" data-reference="generating-motion-atlas">[generating-motion-atlas]</a> graphically illustrates some of the most relevant techniques. Such a list is clearly far from being exhaustive, and we refer to @bekrisStateRobotMotion2024 for a more comprehensive overview of both general and application-specific methods for motion generation. In this section, we wish to introduce the inherent benefits of <mark>learning-based approaches to robotics</mark>--the core focus on this tutorial.
311
 
312
  ### Different Types of Motion
313
 
 
474
  <figcaption>Dynamics-based approaches to robotics suffer from several limitations: (1) orchestrating multiple components poses integration challenges; (2) the need to develop custom processing pipelines for the sensing modalities and tasks considered hinders scalability; (3) simplified analytical models of physical phenomena (here friction at the gripper; credits to @antonovaReinforcementLearningPivoting2017) limit real-world performance. Lastly, (4) dynamics-based methods overlook trends in the availability and growth of robotics data.</figcaption>
475
  </figure>
476
 
477
+ Dynamics-based robotics pipelines have historically been <mark>developed sequentially, engineering the different blocks</mark> now within most architectures for specific purposes. That is, sensing, state estimation, mapping, planning, (diff-)IK, and low-level control have been traditionally developed as distinct modules with fixed interfaces. Pipelining these specific modules proved error-prone, and brittleness emerges--alongside compounding errors--whenever changes incur (e.g., changes in lighting for sensing, occlusion/failure of sensors, control failures). Adapting such a stack to new tasks or robotic platforms often entails re-specifying objectives, constraints, and heuristics at multiple stages, incurring significant engineering overhead.
478
 
479
+ Moreover, classical planners operate on compact, assumed-sufficient state representations; extending them to reason directly over raw, heterogeneous and noisy data streams is non-trivial. This results in a <mark>limited scalability to multimodal data and multitask settings</mark>, as incorporating high-dimensional perceptual inputs (RGB, depth, tactile, audio) traditionally required extensive engineering efforts to extract meaningful features for control. Also, the large number of tasks, coupled with the adoption of *per-task* planners, goal parameterizations, and safety constraints, results in an explosion in design and validation options, with little opportunity to reuse solutions across tasks.
480
 
481
+ Setting aside integration and scalability challenges: developing accurate modeling of contact, friction, and compliance for complicated systems remains difficult. Rigid-body approximations are often insufficient in the presence of deformable objects, and <mark>relying on approximated models hinders real-world applicability</mark> of the methods developed. In the case of complex, time-dependent and/or non-linear dynamics, even moderate mismatches in parameters, unmodeled evolutions, or grasp-induced couplings can qualitatively affect the observed dynamics.
482
 
483
+ Lastly, dynamics-based methods (naturally) overlook the rather recent <mark>increase in availability of openly-available robotics datasets</mark>. The curation of academic datasets by large centralized groups of human experts in robotics @oneillOpenXEmbodimentRobotic2025, @khazatskyDROIDLargeScaleInTheWild2025 is now increasingly complemented by a <mark>growing number of robotics datasets contributed in a decentralized fashion</mark> by individuals with varied expertise. If not tangentially, dynamics-based approaches are not posed to maximally benefit from this trend, which holds the premise of allowing generalization in the space of tasks and embodiments, like data was the cornerstone for advancements in vision @alayracFlamingoVisualLanguage2022 and natural-language understanding @brownLanguageModelsAre2020.
484
 
485
  Taken together, these limitations (Figure <a href="#classical-limitations" data-reference-type="ref" data-reference="classical-limitations">[classical-limitations]</a>) motivate the exploration of learning-based approaches that can (1) integrate perception and control more tightly, (2) adapt across tasks and embodiments with reduced expert modeling interventions and (3) scale gracefully in performance as more robotics data becomes available.
486
 
487
  ## Robot (Reinforcement) Learning
488
 
489
  <span id="learning-rl" style="position: absolute;"></span>
490
+ <Quote source="Richard Sutton">
491
+ Approximate the solution, not the problem
492
+ </Quote>
 
 
 
 
493
  <div class="callout">
494
 
495
  TL;DR The need for expensive, high-fidelity simulators can be obviated learning from real-world data, using sample-efficient algorithms that can safely train directly on hardware.
 
509
 
510
  Learning-based techniques for robotics naturally address the limitations presented in Section <a href="#classical" data-reference-type="ref" data-reference="classical">[classical]</a> (Figure <a href="#robot-learning-upsides" data-reference-type="ref" data-reference="robot-learning-upsides">[robot-learning-upsides]</a>). In particular, learning-based techniques typically rely on monolithich prediction-to-action pipelines (*visuomotor policies*) which do directly map sensorimotor inputs to predicted actions, streamlining control policies by removing the need to interface multiple components. Mapping sensory inputs to actions also makes it possible to incorporate diverse input modalities, leveraging the automatic feature extraction capabilities of modern learning systems. Moreover, learning-based approaches can, in principle, bypass explicit modeling altogether and instead rely solely on interaction data--an advantage that proves transformative when dynamics are difficult to model or entirely unknown. Lastly, learning for robotics (*robot learning*) is naturally well posed to leverage the growing amount of robotics data openly available, just as computer vision and natural language processing did historically benefit from large-scale corpora of data, in great part overlooked by dynamics-based approaches.
511
 
512
+ Being a field at its relative nascent stages, no prevalent technique(s) proves distinctly better than any other in the domain of robot learning. Still, two major classes of methods gained prominence- <mark>Reinforcement Learning (RL)</mark> and <mark>Behavioral Cloning (BC)</mark> (Figure <a href="#robot-learning-atlas" data-reference-type="ref" data-reference="robot-learning-atlas">[robot-learning-atlas]</a>). In this section, we provide a conceptual overview of applications of RL to robotics, as well as introduce practical examples of how to use RL within `lerobot`. We then introduce the major limitations RL suffers from, to introduce BC techniques in Section <a href="#learning-imitation" data-reference-type="ref" data-reference="learning-imitation">[learning-imitation]</a> and Section sec-learning-foundation.
513
 
514
  <div class="wrapfigure">
515
 
 
646
 
647
  Streamlined end-to-end control pipelines, data-driven feature extraction and a disregard for explicit modeling in favor of interaction data are all features of RL for robotics. However, RL still suffers from limitations concerning safety and learning efficiency, particularly pressing for real-world robotics applications.
648
 
649
+ First, especially early in training, <mark>actions are typically explorative, and thus may be erractic</mark>. On physical systems, untrained policies may command high velocities, self-collisiding configurations, or torques exceeding joint limits, leading to wear and potential hardware damage. Mitigating these risks requires external safeguards (e.g., watchdogs, safety monitors, emergency stops), often incuring in a high degree of human supervision. Further, in the typical episodic setting considered in most robotics problems, experimentation is substantially slowed down by the need to manually reset the environment over the course of training, a time-consuming and error-prone process. Second, learning efficiently remains problematic in RL, <mark>limiting the applicability of RL in real-world robotics due to consequently prohibitive timescales of training</mark>. Even strong algorithms such as SAC @haarnojaSoftActorCriticOffPolicy2018 typically require a large numbers of transitions $\{ (s_t, a_t, r_t, s_{t+1})\}_{t=1}^N$. On real-world hardware, generating this data is time-consuming.
650
 
651
  <figure>
652
  <ResponsiveImage
 
678
 
679
  Selecting the dynamics distribution $\Xi$ is also non-trivial. On the one hand, distributions with low entropy might risk to cause failure at transfer time, due to the limited robustness induced over the course of training. On the other hand, excessive randomization may cause over-regularization and hinder performance @margolisRapidLocomotionReinforcement2022. Consequently, the research community investigated approaches to automatically select the randomization distribution $\Xi$, using signals from the training process or tuning it to reproduce observed real-world trajectories. @akkayaSolvingRubiksCube2019 use a parametric uniform distribution $\mathcal U(a, b)$ as $\Xi$, widening the bounds $a, b$ as training progresses and the agent’s performance improves (AutoDR). While effective, AutoDR requires significant tuning--the bounds are widened by a fixed, pre-specified amount $\Delta$ along--and may disregard data when performance *does not* improve after a distribution update @tiboniDomainRandomizationEntropy2024. @tiboniDomainRandomizationEntropy2024 propose a similar method to AutoDR (DORAEMON) to evolve $\Xi$ based on the training signal, but with the key difference of explicitly maximizing the entropy of a parametric Beta distribution--inherently more flexible than uniform distributions--with learned updates instead of fixed $\Delta$. In this, DORAEMON proves particularly effective at dynamically increasing the entropy levels of the training distribution by employing an outer-loop max-entropy objective, tackled under performance constraints in the inner-loop RL problem. Other approaches to automatically perform DR consist in specifically tuning $\Xi$ to align as much as possible the simulation and real-world domains. For instance, @chebotarClosingSimtorealLoop2019 interleave in-simulation policy training with repeated real-world policy rollouts used to adjust $\Xi$ based on real-world data, while @tiboniDROPOSimtoRealTransfer2023 leverage a single, pre-collected set of real-world trajectories and tune $\Xi$ under a simple likelihood objective.
680
 
681
+ While DR has shown promise, it does not address the main limitation that, even under the assumption that an ideal distribution $\Xi$ was available, many robotics problems <mark>cannot be simulated with high-enough fidelity under practical computational constraints</mark>. Simulating contact-rich manipulation of possibly deformable or soft materials--i.e., *folding a piece of clothing*--can prove time-intensive, limiting the benefits of in-simulation training.
682
 
683
  A perhaps more foundamental limitation of RL for robotics is the general unavailability of complicated tasks’ *dense* reward function, the design of which is essentially based on human expertise, ingenuity and trial-and-error. In practice, *sparse* reward functions can be used to conclude whether one specific goal has been attained--*has this t-shirt been correctly folded?*--but unfortunately incur in more challenging learning. As a result, despite notable successes, deploying RL directly on real-world robots at scale remains challenging.
684
 
 
732
  ```
733
  Similarily to DQN, DDPG also employs the same replay buffer mechanism, reusing past transitions over training for increased sample efficiency and estimate the loss function via MC-estimates.
734
 
735
+ Soft Actor-Critic (SAC) @haarnojaSoftActorCriticOffPolicy2018 is a derivation of DDPG in the max-entropy (MaxEnt) RL framework, in which RL agents are tasked with <mark>maximizing the discounted cumulative reward, while acting as randomly as possible</mark>. MaxEnt RL @haarnojaReinforcementLearningDeep2017b has proven particularly robust thanks to the development of diverse behaviors, incentivized by its entropy-regularization formulation. In that, MaxEnt revisits the RL objective $J (\pi)$ to specifically account for the policy entropy $\mathcal H(\pi (\bullet \vert s_t))$,
736
  <span id="J-soft" style="position: absolute;">
737
  </span>
738
 
 
1058
  ## Robot (Imitation) Learning
1059
 
1060
  <span id="learning-imitation" style="position: absolute;"></span>
1061
+ <Quote source="Norbert Wiener">
1062
+ The best material model for a cat is another, or preferably the same cat
1063
+ </Quote>
 
 
 
 
1064
  <div class="callout">
1065
 
1066
  TL;DR Behavioral Cloning provides a natural platform to learn from real-world interactions without the need to design any reward function, and generative models prove more effective than point-wise policies at dealing with multimodal demonstration datasets.
 
1078
  <figcaption>(A) Average (with standard deviation) evolution of the actuation levels over the first 5 recorded episodes in <a href="lerobot/svla_so101_pickplace" class="uri">lerobot/svla_so101_pickplace</a>. Proprioperceptive states provide invaluable to determine the robot’s state during an episode. (B) Camera frames are also recorded alongside measurements on the robot’s state, capturing information about the robot’s interaction with its environment.</figcaption>
1079
  </figure>
1080
 
1081
+ Learning from human demonstrations provides a pragmatic alternative to the RL pipeline discussed in Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>. Indeed, especially in real-world robotics, online exploration is typically <mark>costly and potentially unsafe</mark>, and designing (dense) reward signals is a <mark>brittle and task-specific</mark> process. Further, even success detection itself often requires bespoke instrumentation, while episodic training demands reliable resets--all factors complicating training RL algorithms on hardware at scale. Behavioral Cloning (BC) sidesteps these constraints by <mark>casting control an imitation learning problem</mark>, leveraging previously collected expert demonstrations to anchor the learned autonomous behavior. Most notably, by *learning-to-imitate*, autonomous systems naturally adhere to the objectives, preferences, and success criteria implicitly encoded in the data, which reduces early-stage exploratory failures and obviates hand-crafted reward shaping altogether.
1082
 
1083
  Formally, let $\mathcal D = \{ \tau^{(i)} \}_{i=1}^N$ be a set of expert trajectories, with $\tau^{(i)} = \{(o_t^{(i)}, a_t^{(i)})\}_{t=0}^{T_i}$ representing the $i$-th length-$T_i$ trajectory in $\mathcal D$, $o_t \in \mathcal O$ denoting observations (e.g., images and proprioception altogether), and $a_t \in \mathcal A$ the expert actions. Typically, observations $o \in \mathcal O$ consist of both image and proprioperceptive information, while actions $a \in \mathcal A$ represent control specifications for the robot to execute, e.g. a joint configuration. Note that differently from Section <a href="#learning-rl" data-reference-type="ref" data-reference="learning-rl">[learning-rl]</a>, in the imitation learning context $\mathcal D$ denotes an offline dataset collecting $N$ length-$T_i$ reward-free (expert) human trajectories $\tau^{(i)}$, and *not* the environment dynamics. Similarily, in this section $\tau^{(i)}$ represent a length-$T_i$ trajectory of observation-action pairs, which crucially *omits entirely any reward* information. Figure <a href="#ch4-bc-trajectories" data-reference-type="ref" data-reference="ch4-bc-trajectories">[ch4-bc-trajectories]</a> graphically shows trajectories in terms of the average evolution of the actuation on the 6 joints of a teleoperated SO-100 manipulator. Notice how proprioperceptive states are captured jointly with camera frames over the course of the recorded episodes, providing a unified high-frame rate collection of both image and joint teleoperation data. Figure <a href="#ch4-observation-action-mapping" data-reference-type="ref" data-reference="ch4-observation-action-mapping">[ch4-observation-action-mapping]</a> shows $(o_t, a_t)$-pairs for the same dataset, with the actions performed by the human expert illustrated alongside the corresponding observation. In principle, (expert) trajectories $\tau^{(i)}$ can have different lengths since demonstrations might exhibit multi-modal strategies to attain the same goal, resulting in multiple, different behaviors.
1084
 
 
1936
  ## Generalist Robot Policies
1937
 
1938
  <span id="learning-foundation" style="position: absolute;"></span>
1939
+ <Quote source="Robert A. Heinlein">
1940
+ Specialization is for insects
1941
+ </Quote>
 
 
 
 
1942
  <div class="callout">
1943
 
1944
  TL;DR Openly available, large-scale datasets and the development of stable-to-train, expressive and efficient architectures fostered research on the development of generalist robot policies that can operate across embodiment and tasks.
 
1979
 
1980
  In a follow-up work, the same group of authors propose a modified method to learn generalist models, leveraging (1) a more powerful architecture and (2) scaling up the dataset used . In RT-2, @brohanRT2VisionLanguageActionModels2023 propose inheriting internet-scale semantic knowledge from large-scale multi-modal datasets to learn a single, *unified model* for robotics control. Such a model, termed *Vision-Language-Action* (VLA) in the original RT-2 paper, effectively casts robot control as a language-modeling problem, and in particular as a Visual Question-Answering (VQ) task, in which the output token space used to represent *textual tokens* is shared with the *8-bits tokens* used to represent the 256 ($2^8$) actuation levels of a 6-dof robot. In their work, @brohanRT2VisionLanguageActionModels2023 propose co-fine-tuning large-scale VLMs such as PaLIX @chenPaLIXScalingMultilingual2023 or PaLM-E @driessPaLMEEmbodiedMultimodal2023 on a mix of (1) web and (2) robotics data, complementing VQtraining with robotics-specific signal, and learning to directly output robot actions in a shared token space for visual and language inputs. In their work, the authors claim using large models trained on internet-scale data as backbones for VLAs allows models to tap into the rich semantic knowledge embedded in the VLM’s parameters, interpreting instructions and unseen objects by connecting them to concepts acquired while pre-training. For instance, @brohanRT2VisionLanguageActionModels2023 show that while RT-2 has never been explicitly trained to repurpose tools for a *hammering* task, it can still combine its semantic understanding of images, so that when asked which object between (1) a piece of paper, (2) a pair of headphones or (3) a rock may be used instead of a hammer, it correctly answers (3).
1981
 
1982
+ Traditionally, research efforts revolved around not only training models, but also proposing datasets for the community, a costly and time-consuming process. Due to the aforementioned embodiment gap, the data used in research efforts in robot learning have traditionally proved rather fragmented, tailored to the specific task considered by the specific group of researchers who collected it, which ultimately hindered integration. The Open X-Embodiment project @oneillOpenXEmbodimentRobotic2025 was a landmark collaboration effort to address data fragmentation, by curating the aggregation of 60 *existing* robotics datasets from 22 different robot embodiments and 21 institutions across the world, and resulted in a total 1.4M of cross-embodiments, cross-tasks, openly-available trajectories. Besides the contribution of an aggregate, large scale dataset, @oneillOpenXEmbodimentRobotic2025 also demonstrated significant positive transfer *across tasks and embodiments*, showing that <mark>a single model trained on multi-embodiment data can outperform specialist models</mark> trained on their respective single-embodiment datasets. The Distributed Robot Interaction Dataset (DROID) @khazatskyDROIDLargeScaleInTheWild2025 represents another significant step towards addressing the problem of scarse and disaggregated data in robot learning, providing a unique dataset consisting of 75k+ human demonstrations collected in realistic (*in-the-wild*) manipulation settings, providing another cornerstone for building general-purpose robot policies. Recently, foundational datasets curated through large, centralized efforts, are increasingly complemented by decentralized, community-driven contributions of robotics data. Software libraries like `lerobot` have been instrumental in enabling decentralized collection of large amounts of data, providing the infrastructure for researchers and practitioners to easily contribute trajectories from a wide range of embodiments, democratizing data access via distributed collection.
1983
 
1984
  Despite these advancements, the success of large, proprietary models like RT-1 and RT-2, highlighted a growing accessibility gap in robotics research, as training and deploying large-scale robotics foundation models requires computational resources simply unattainable for most research institutions. The OpenVLA project @kimOpenVLAOpenSourceVisionLanguageAction2024 emerged in direct contrast to traditionally closed-source efforts to develop VLAs. In particular, @kimOpenVLAOpenSourceVisionLanguageAction2024 trained OpenVLA by exclusively leveraging openly available data (970k+ trajectories from the Open-X dataset), and openly shared their training recipes alongside the model weights. Architecturally, OpenVLA integrates a pre-trained vision encoder to project visual tokens into the embedding space of the Llama2-7B @touvronLlama2Open2023 language-model backbone. The language model backbone is then used to predict *discrete action tokens* over 256 activation levels.
1985
 
 
2229
 
2230
  <span id="conclusions" style="position: absolute;"></span>
2231
 
2232
+ This tutorial has charted the paradigmatic shift transforming robotics, tracing the <mark>evolution of robotics from structured, model-based methods to the dynamic, data-driven approaches that define modern robot learning</mark>. We began by examining the limitations of traditional dynamics-based control, namely its brittleness and significant engineering overhead, which motivate the adoption of more flexible, learning-based alternatives. Unlike scalable, data-driven techniques, conventional explicit models demand extensive human expertise, hindering wider accessibility and scalability of robotics.
2233
 
2234
  Our exploration traced a clear trajectory of progress, beginning with Reinforcement Learning (RL). While RL offers a powerful paradigm for learning through interaction, its application in robotics is complicated by challenges such as sample inefficiency, safety concerns in real-world training, and the complexities of reward design. We saw how modern approaches like HIL-SERL make real-world RL more feasible by incorporating training-time human guidance, datasets of previously collected data as well as learned reward classifiers.
2235
 
app/src/content/assets/lerobot-logo-thumbnail.png ADDED

Git LFS Details

  • SHA256: 72ee48061c2528eb9f6a1f163622d4805476a52c813bd03f2f32e32d89afd63e
  • Pointer size: 131 Bytes
  • Size of remote file: 164 kB
app/src/content/embeds/{banner2.html → banner.html} RENAMED
File without changes
app/src/styles/_base.css CHANGED
@@ -126,8 +126,8 @@ html {
126
  }
127
 
128
  .content-grid main mark {
129
- background-color: color-mix(in srgb, var(--primary-color, #007AFF) 10%, transparent);
130
- border: 1px solid color-mix(in srgb, var(--primary-color) 18%, transparent);
131
  color: inherit;
132
  padding: 4px 6px;
133
  border-radius: 4px;
 
126
  }
127
 
128
  .content-grid main mark {
129
+ background-color: color-mix(in srgb, var(--primary-color, #007AFF) 30%, transparent);
130
+ border: 1px solid color-mix(in srgb, var(--primary-color) 38%, transparent);
131
  color: inherit;
132
  padding: 4px 6px;
133
  border-radius: 4px;