Spaces:
Running
Running
Add action representation diagram explaining relative vs delta vs absolute
Browse filesAdapted from UMI paper (Chi et al., 2024). Shows why relative trajectory
avoids error accumulation and calibration requirements.
Made-with: Cursor
app/src/content/chapters/folding/08-ablations.mdx
CHANGED
|
@@ -139,6 +139,15 @@ We hypothesise that the root cause is the difference in **multi-modality** betwe
|
|
| 139 |
|
| 140 |
#### 2. Relative actions improve performance consistently
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
Comparing π0.5 without relative actions (1.2: 20% total SR, 40% L1) to π0.5 with relative actions and quantile normalization (1.3: 35% total SR, 70% L1), and then to the full combination in 1.7 (40% total SR, 80% L1), shows that training with relative actions consistently improves performance. The trend is clear and shows up in every comparison we made.
|
| 143 |
|
| 144 |
The effect size doesn't separate cleanly at 20 rollouts, but the direction is consistent. **Caveat:** π0.5 is likely pretrained with relative actions, so 1.3 and 1.7 fine-tune in a regime consistent with pretraining, while 1.2 fine-tunes against it.
|
|
|
|
| 139 |
|
| 140 |
#### 2. Relative actions improve performance consistently
|
| 141 |
|
| 142 |
+
We use **relative trajectory** actions as defined by [UMI](https://arxiv.org/abs/2402.10329): each action in the chunk is an offset from the robot's current state at prediction time, not from the previous action. This avoids error accumulation (unlike true delta) and doesn't require a global coordinate frame (unlike absolute). LeRobot uses absolute actions by default — switching to relative trajectory was one of our key improvements.
|
| 143 |
+
|
| 144 |
+
<HtmlEmbed
|
| 145 |
+
id="action-representations"
|
| 146 |
+
src="folding/action-representations.html"
|
| 147 |
+
title="Action Representations"
|
| 148 |
+
desc="Relative trajectory (blue) references all actions to the current state. Delta (yellow) chains each action to the previous one, accumulating error. Absolute (red) requires a global coordinate frame. Diagram adapted from UMI (Chi et al., 2024)."
|
| 149 |
+
/>
|
| 150 |
+
|
| 151 |
Comparing π0.5 without relative actions (1.2: 20% total SR, 40% L1) to π0.5 with relative actions and quantile normalization (1.3: 35% total SR, 70% L1), and then to the full combination in 1.7 (40% total SR, 80% L1), shows that training with relative actions consistently improves performance. The trend is clear and shows up in every comparison we made.
|
| 152 |
|
| 153 |
The effect size doesn't separate cleanly at 20 rollouts, but the direction is consistent. **Caveat:** π0.5 is likely pretrained with relative actions, so 1.3 and 1.7 fine-tune in a regime consistent with pretraining, while 1.2 fine-tunes against it.
|
app/src/content/embeds/folding/action-representations.html
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8"/>
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
|
| 6 |
+
<style>
|
| 7 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 8 |
+
body { background: transparent; font-family: system-ui, sans-serif; color: #e8eaf0; }
|
| 9 |
+
svg text { font-family: system-ui, sans-serif; }
|
| 10 |
+
</style>
|
| 11 |
+
</head>
|
| 12 |
+
<body>
|
| 13 |
+
<svg id="action-rep" style="overflow:visible"></svg>
|
| 14 |
+
<script>
|
| 15 |
+
function _initActionRep() {
|
| 16 |
+
const svgEl = document.getElementById('action-rep');
|
| 17 |
+
const W = Math.min(svgEl.parentElement.clientWidth || 700, 760);
|
| 18 |
+
const H = 320;
|
| 19 |
+
svgEl.setAttribute('width', W);
|
| 20 |
+
svgEl.setAttribute('height', H);
|
| 21 |
+
|
| 22 |
+
const svg = d3.select('#action-rep').attr('width', W).attr('height', H);
|
| 23 |
+
svg.selectAll('*').remove();
|
| 24 |
+
|
| 25 |
+
const m = { top: 36, right: 30, bottom: 44, left: 50 };
|
| 26 |
+
const w = W - m.left - m.right;
|
| 27 |
+
const h = H - m.top - m.bottom;
|
| 28 |
+
const g = svg.append('g').attr('transform', `translate(${m.left},${m.top})`);
|
| 29 |
+
|
| 30 |
+
const GRID = '#2a2d3a';
|
| 31 |
+
const SUB = '#8b8fa8';
|
| 32 |
+
|
| 33 |
+
// Pose trajectory (ground truth)
|
| 34 |
+
const poses = [
|
| 35 |
+
{t:0, p:0.5}, {t:1, p:1.2}, {t:2, p:1.8}, {t:3, p:2.6},
|
| 36 |
+
{t:4, p:3.0}, {t:5, p:3.9}, {t:6, p:4.3}, {t:7, p:5.2}, {t:8, p:5.8}
|
| 37 |
+
];
|
| 38 |
+
|
| 39 |
+
const x = d3.scaleLinear().domain([0, 8]).range([0, w]);
|
| 40 |
+
const y = d3.scaleLinear().domain([0, 6.5]).range([h, 0]);
|
| 41 |
+
|
| 42 |
+
// Grid
|
| 43 |
+
g.append('g').selectAll('line').data(y.ticks(5)).join('line')
|
| 44 |
+
.attr('x1', 0).attr('x2', w).attr('y1', d => y(d)).attr('y2', d => y(d))
|
| 45 |
+
.attr('stroke', GRID).attr('stroke-dasharray', '3,3');
|
| 46 |
+
|
| 47 |
+
// Inference boundaries
|
| 48 |
+
[0, 4].forEach(t => {
|
| 49 |
+
g.append('line').attr('x1', x(t)).attr('x2', x(t)).attr('y1', -20).attr('y2', h + 8)
|
| 50 |
+
.attr('stroke', '#555').attr('stroke-dasharray', '6,3').attr('stroke-width', 1);
|
| 51 |
+
g.append('text').attr('x', x(t) + 4).attr('y', -8)
|
| 52 |
+
.attr('fill', SUB).attr('font-size', 9)
|
| 53 |
+
.text(t === 0 ? 'Inference at t=0' : 'Inference at t=4');
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
+
// Axes
|
| 57 |
+
g.append('g').attr('transform', `translate(0,${h})`).call(d3.axisBottom(x).ticks(8).tickSize(0))
|
| 58 |
+
.call(gg => { gg.select('.domain').attr('stroke', GRID); gg.selectAll('text').attr('fill', SUB).attr('font-size', 10); });
|
| 59 |
+
g.append('g').call(d3.axisLeft(y).ticks(5).tickSize(0))
|
| 60 |
+
.call(gg => { gg.select('.domain').attr('stroke', GRID); gg.selectAll('text').attr('fill', SUB).attr('font-size', 10); });
|
| 61 |
+
|
| 62 |
+
g.append('text').attr('x', w / 2).attr('y', h + 36).attr('text-anchor', 'middle')
|
| 63 |
+
.attr('fill', SUB).attr('font-size', 11).text('Time');
|
| 64 |
+
g.append('text').attr('x', -h / 2).attr('y', -36).attr('text-anchor', 'middle')
|
| 65 |
+
.attr('transform', 'rotate(-90)').attr('fill', SUB).attr('font-size', 11).text('Pose');
|
| 66 |
+
|
| 67 |
+
// Colors
|
| 68 |
+
const COL_REL = '#3b82f6';
|
| 69 |
+
const COL_DELTA = '#f59e0b';
|
| 70 |
+
const COL_ABS = '#ef4444';
|
| 71 |
+
|
| 72 |
+
// --- ABSOLUTE: red dots at each pose ---
|
| 73 |
+
poses.forEach(d => {
|
| 74 |
+
g.append('circle').attr('cx', x(d.t)).attr('cy', y(d.p)).attr('r', 5)
|
| 75 |
+
.attr('fill', COL_ABS).attr('stroke', '#1a1d27').attr('stroke-width', 1.5);
|
| 76 |
+
});
|
| 77 |
+
|
| 78 |
+
// --- RELATIVE: arrows from current state to each future action in chunk ---
|
| 79 |
+
// Chunk 1: from state at t=0 to actions 0..3
|
| 80 |
+
const chunk1Origin = poses[0];
|
| 81 |
+
for (let i = 1; i <= 3; i++) {
|
| 82 |
+
drawArrow(g, x(chunk1Origin.t), y(chunk1Origin.p), x(poses[i].t), y(poses[i].p), COL_REL, 1.5, 0.7);
|
| 83 |
+
}
|
| 84 |
+
// Chunk 2: from state at t=4 to actions 5..7
|
| 85 |
+
const chunk2Origin = poses[4];
|
| 86 |
+
for (let i = 5; i <= 7; i++) {
|
| 87 |
+
drawArrow(g, x(chunk2Origin.t), y(chunk2Origin.p), x(poses[i].t), y(poses[i].p), COL_REL, 1.5, 0.7);
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// --- DELTA: arrows from each action to the next ---
|
| 91 |
+
for (let i = 0; i < poses.length - 1; i++) {
|
| 92 |
+
drawArrow(g, x(poses[i].t), y(poses[i].p), x(poses[i + 1].t), y(poses[i + 1].p), COL_DELTA, 1.2, 0.45);
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// Legend
|
| 96 |
+
const legY = -24;
|
| 97 |
+
const items = [
|
| 98 |
+
{ color: COL_REL, label: 'Relative trajectory (used here)', style: 'arrow' },
|
| 99 |
+
{ color: COL_DELTA, label: 'Delta (accumulates error)', style: 'arrow' },
|
| 100 |
+
{ color: COL_ABS, label: 'Absolute (needs calibration)', style: 'dot' },
|
| 101 |
+
];
|
| 102 |
+
|
| 103 |
+
let lx = w - 10;
|
| 104 |
+
items.slice().reverse().forEach(item => {
|
| 105 |
+
const textEl = g.append('text').attr('y', legY + 4).attr('fill', item.color).attr('font-size', 10).attr('font-weight', 500).text(item.label);
|
| 106 |
+
const tw = textEl.node().getComputedTextLength();
|
| 107 |
+
textEl.attr('x', lx - tw);
|
| 108 |
+
if (item.style === 'dot') {
|
| 109 |
+
g.append('circle').attr('cx', lx - tw - 10).attr('cy', legY + 1).attr('r', 4)
|
| 110 |
+
.attr('fill', item.color).attr('stroke', '#1a1d27').attr('stroke-width', 1);
|
| 111 |
+
} else {
|
| 112 |
+
g.append('line').attr('x1', lx - tw - 20).attr('x2', lx - tw - 6)
|
| 113 |
+
.attr('y1', legY + 1).attr('y2', legY + 1)
|
| 114 |
+
.attr('stroke', item.color).attr('stroke-width', 2);
|
| 115 |
+
g.append('polygon')
|
| 116 |
+
.attr('points', `${lx - tw - 6},${legY - 2} ${lx - tw - 6},${legY + 4} ${lx - tw - 2},${legY + 1}`)
|
| 117 |
+
.attr('fill', item.color);
|
| 118 |
+
}
|
| 119 |
+
lx = lx - tw - 28;
|
| 120 |
+
});
|
| 121 |
+
|
| 122 |
+
function drawArrow(parent, x1, y1, x2, y2, color, width, opacity) {
|
| 123 |
+
const dx = x2 - x1, dy = y2 - y1;
|
| 124 |
+
const len = Math.sqrt(dx * dx + dy * dy);
|
| 125 |
+
const ux = dx / len, uy = dy / len;
|
| 126 |
+
const headLen = 6;
|
| 127 |
+
const tipX = x2 - ux * 2, tipY = y2 - uy * 2;
|
| 128 |
+
const baseX = tipX - ux * headLen, baseY = tipY - uy * headLen;
|
| 129 |
+
const perpX = -uy * 3, perpY = ux * 3;
|
| 130 |
+
|
| 131 |
+
parent.append('line')
|
| 132 |
+
.attr('x1', x1).attr('y1', y1).attr('x2', baseX).attr('y2', baseY)
|
| 133 |
+
.attr('stroke', color).attr('stroke-width', width).attr('opacity', opacity);
|
| 134 |
+
parent.append('polygon')
|
| 135 |
+
.attr('points', `${tipX},${tipY} ${baseX + perpX},${baseY + perpY} ${baseX - perpX},${baseY - perpY}`)
|
| 136 |
+
.attr('fill', color).attr('opacity', opacity);
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
function render() {
|
| 141 |
+
_initActionRep();
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
if (typeof d3 !== "undefined") {
|
| 145 |
+
render();
|
| 146 |
+
} else {
|
| 147 |
+
var s = document.createElement("script");
|
| 148 |
+
s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js";
|
| 149 |
+
s.onload = render;
|
| 150 |
+
document.head.appendChild(s);
|
| 151 |
+
}
|
| 152 |
+
window.addEventListener('resize', render);
|
| 153 |
+
</script>
|
| 154 |
+
</body>
|
| 155 |
+
</html>
|