Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> | |
| <style> | |
| :root { | |
| --bg: transparent; | |
| --text: #e8eaf0; | |
| --subtext: #8b8fa8; | |
| --border: #2a2d3a; | |
| --chart-bg: #1a1d27; | |
| --btn-active-bg: #252835; | |
| --btn-active-text: #e8eaf0; | |
| --btn-active-border: #4a4d5a; | |
| } | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { | |
| background: var(--bg); | |
| font-family: system-ui, sans-serif; | |
| color: var(--text); | |
| padding: 0; | |
| min-height: 100vh; | |
| } | |
| .container { | |
| max-width: 960px; | |
| margin: 0 auto; | |
| padding: 28px 24px 36px; | |
| } | |
| .tab-row { | |
| display: flex; | |
| gap: 4px; | |
| margin-bottom: 20px; | |
| border-bottom: 1px solid var(--border); | |
| padding-bottom: 0; | |
| } | |
| .tab { | |
| padding: 7px 16px; | |
| border: 1px solid transparent; | |
| border-bottom: none; | |
| background: none; | |
| color: var(--subtext); | |
| cursor: pointer; | |
| font-family: inherit; | |
| font-size: 13px; | |
| letter-spacing: 0.05em; | |
| text-transform: uppercase; | |
| border-radius: 4px 4px 0 0; | |
| transition: all 0.15s; | |
| position: relative; | |
| bottom: -1px; | |
| } | |
| .tab:hover { color: var(--text); background: var(--chart-bg); } | |
| .tab.active { | |
| color: var(--text); | |
| background: var(--chart-bg); | |
| border-color: var(--border); | |
| border-bottom-color: var(--chart-bg); | |
| } | |
| .panel { display: none; } | |
| .panel.active { display: block; } | |
| .chart-wrap { | |
| background: var(--chart-bg); | |
| border: 1px solid var(--border); | |
| border-radius: 6px; | |
| padding: 20px 20px 12px; | |
| } | |
| .chart-header { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-bottom: 16px; | |
| } | |
| .chart-title { | |
| font-size: 13px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.08em; | |
| color: var(--subtext); | |
| } | |
| .mode-toggle { | |
| display: flex; | |
| gap: 0; | |
| } | |
| .mode-btn { | |
| padding: 5px 14px; | |
| font-size: 12px; | |
| font-family: inherit; | |
| cursor: pointer; | |
| border: 1px solid var(--border); | |
| background: none; | |
| color: var(--subtext); | |
| transition: all 0.15s; | |
| letter-spacing: 0.04em; | |
| } | |
| .mode-btn:first-child { border-radius: 4px 0 0 4px; } | |
| .mode-btn:last-child { border-radius: 0 4px 4px 0; border-left: none; } | |
| .mode-btn.active { background: var(--btn-active-bg); color: var(--btn-active-text); border-color: var(--btn-active-border); } | |
| .mode-btn:hover:not(.active) { color: var(--text); } | |
| .legend { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 10px 18px; | |
| margin-top: 14px; | |
| padding-top: 12px; | |
| border-top: 1px solid var(--border); | |
| } | |
| .legend-item { | |
| display: flex; | |
| align-items: center; | |
| gap: 6px; | |
| font-size: 12px; | |
| color: var(--subtext); | |
| letter-spacing: 0.03em; | |
| } | |
| .legend-swatch { | |
| width: 10px; | |
| height: 10px; | |
| border-radius: 2px; | |
| flex-shrink: 0; | |
| } | |
| .series-labels { | |
| display: flex; | |
| gap: 16px; | |
| margin-bottom: 14px; | |
| } | |
| .series-badge { | |
| display: flex; | |
| align-items: center; | |
| gap: 6px; | |
| font-size: 12px; | |
| letter-spacing: 0.05em; | |
| text-transform: uppercase; | |
| color: var(--subtext); | |
| } | |
| .series-badge span { | |
| width: 24px; | |
| height: 3px; | |
| border-radius: 2px; | |
| display: inline-block; | |
| } | |
| svg text { font-family: system-ui, sans-serif; } | |
| .note { | |
| font-size: 12px; | |
| color: var(--subtext); | |
| margin-top: 10px; | |
| line-height: 1.6; | |
| letter-spacing: 0.02em; | |
| } | |
| .insight-box { | |
| background: #12151f; | |
| border: 1px solid #2a2d3a; | |
| border-left: 3px solid #f7934f; | |
| border-radius: 4px; | |
| padding: 10px 14px; | |
| margin-bottom: 16px; | |
| font-size: 12px; | |
| color: #8b8fa8; | |
| line-height: 1.6; | |
| } | |
| .insight-box strong { color: #e8eaf0; } | |
| .filter-toggle { | |
| font-size: 12px; color: var(--subtext); cursor: pointer; border: 1px solid var(--border); | |
| background: none; padding: 4px 12px; border-radius: 14px; transition: all .15s; | |
| user-select: none; display: inline-flex; align-items: center; gap: 4px; font-family: inherit; | |
| } | |
| .filter-toggle:hover { color: var(--text); } | |
| .filter-toggle.showing-all { background: rgba(248,147,79,0.12); border-color: #f7934f; color: #f7934f; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <div class="tab-row" style="justify-content:space-between;align-items:center"> | |
| <div style="display:flex;gap:4px"> | |
| <button class="tab active" onclick="showTab('l1')">Level 1: Failure point</button> | |
| <button class="tab" onclick="showTab('l2')">Level 2: Failure point</button> | |
| </div> | |
| <button class="filter-toggle" id="fa-filter-btn" onclick="window._faToggleFilter()">Show all 11</button> | |
| </div> | |
| <!-- LEVEL 2 PANEL --> | |
| <div class="panel" id="panel-l2"> | |
| <div class="insight-box"> | |
| <strong>Initial training (1.x):</strong> nearly all level 2 failures occur at Unfold (the robot never gets past step 1). | |
| <strong>Fine-tuned (2.x):</strong> Unfold failures collapse (2.5: 0%), but late-stage failures (Fold 3, Rotation) emerge: the model now reliably unfolds but precision degrades at the end. | |
| </div> | |
| <div class="chart-wrap"> | |
| <div class="chart-header"> | |
| <div class="chart-title">Where does the robot fail? Level 2 failed rollouts by subtask</div> | |
| <div class="mode-toggle"> | |
| <button class="mode-btn active" id="mode-l2-abs" onclick="setMode('l2','abs')">Counts</button> | |
| <button class="mode-btn" id="mode-l2-pct" onclick="setMode('l2','pct')">Percentage</button> | |
| </div> | |
| </div> | |
| <svg id="chart-l2" width="100%" height="320" style="overflow:visible"></svg> | |
| <div class="legend" id="legend-l2"></div> | |
| </div> | |
| <p class="note">Each bar = one experiment, showing how its failed Level 2 rollouts distribute across subtasks. Only failed rollouts shown; successful rollouts are excluded. Toggle "Percentage" to compare failure distributions regardless of total failure count.</p> | |
| </div> | |
| <!-- LEVEL 1 PANEL --> | |
| <div class="panel active" id="panel-l1"> | |
| <div class="insight-box"> | |
| <strong>Level 1 failures</strong> are more distributed since unfolding is given. Initial-training failures concentrate at Fold 2 and Fold 4 (mid-task precision). Fine-tuning nearly eliminates failures entirely; only 2.3 (mirroring) and 2.4 (chunk=45) regress significantly. | |
| </div> | |
| <div class="chart-wrap"> | |
| <div class="chart-header"> | |
| <div class="chart-title">Where does the robot fail? Level 1 failed rollouts by subtask</div> | |
| <div class="mode-toggle"> | |
| <button class="mode-btn active" id="mode-l1-abs" onclick="setMode('l1','abs')">Counts</button> | |
| <button class="mode-btn" id="mode-l1-pct" onclick="setMode('l1','pct')">Percentage</button> | |
| </div> | |
| </div> | |
| <svg id="chart-l1" width="100%" height="320" style="overflow:visible"></svg> | |
| <div class="legend" id="legend-l1"></div> | |
| </div> | |
| <p class="note">Level 1 begins with the shirt already laid flat, so "Unfold" is not a failure point. Toggle "Percentage" to compare where each experiment struggles, independent of how many total failures it has.</p> | |
| </div> | |
| </div> | |
| <script> | |
| function _initFailureAnalysis() { | |
| const EXPERIMENTS = [ | |
| { id:'1.1 π0', series:1 }, | |
| { id:'1.2 π0.5', series:1 }, | |
| { id:'1.3 Relative', series:1 }, | |
| { id:'1.4 RABC low', series:1 }, | |
| { id:'1.5 RABC high', series:1 }, | |
| { id:'1.7 Rel+RABC', series:1 }, | |
| { id:'2.1 HQ', series:2 }, | |
| { id:'2.2 HQ+RABC+Rel', series:2 }, | |
| { id:'2.3 HQ+mirror', series:2 }, | |
| { id:'2.4 HQ chunk45', series:2 }, | |
| { id:'2.5 HQ+RABC+Rel★', series:2 }, | |
| ]; | |
| const L2_FAILURES = { | |
| '1.1 π0': { 'Unfold':10 }, | |
| '1.2 π0.5': { 'Unfold':9, 'Rotation':1 }, | |
| '1.3 Relative': { 'Unfold':10 }, | |
| '1.4 RABC low': { 'Unfold':10 }, | |
| '1.5 RABC high': { 'Unfold':9, 'Fold 1':1 }, | |
| '1.7 Rel+RABC': { 'Unfold':8, 'Fold 3':1, 'Rotation':1 }, | |
| '2.1 HQ': { 'Unfold':8, 'Rotation':1 }, | |
| '2.2 HQ+RABC+Rel': { 'Unfold':4, 'Rotation':1 }, | |
| '2.3 HQ+mirror': { 'Unfold':8, 'Fold 1':1 }, | |
| '2.4 HQ chunk45': { 'Unfold':9, 'Fold 3':1 }, | |
| '2.5 HQ+RABC+Rel★': { 'Unfold':2 }, | |
| }; | |
| const L1_FAILURES = { | |
| '1.1 π0': { 'Fold 2':1 }, | |
| '1.2 π0.5': { 'Rotation':4, 'Fold 4':2, 'Fold 2':1 }, | |
| '1.3 Relative': { 'Rotation':1, 'Fold 4':1 }, | |
| '1.4 RABC low': { 'Rotation':2, 'Fold 3':1, 'Fold 4':2, 'Fold 2':3 }, | |
| '1.5 RABC high': { 'Fold 3':2, 'Fold 2':6, 'Fold 1':1 }, | |
| '1.7 Rel+RABC': { 'Fold 4':1, 'Fold 2':1, 'Rotation':1 }, | |
| '2.1 HQ': { 'Fold 2':1, 'Fold 4':1 }, | |
| '2.2 HQ+RABC+Rel': { 'Fold 2':1 }, | |
| '2.3 HQ+mirror': { 'Fold 1':3, 'Fold 4':3, 'Fold 3':3 }, | |
| '2.4 HQ chunk45': { 'Rotation':2, 'Fold 4':3, 'Fold 3':1 }, | |
| '2.5 HQ+RABC+Rel★': {}, | |
| }; | |
| const SUBTASKS_L2 = ['Unfold','Fold 1','Fold 2','Fold 3','Fold 4','Rotation']; | |
| const SUBTASKS_L1 = ['Fold 1','Fold 2','Fold 3','Fold 4','Rotation']; | |
| const COLORS = { | |
| 'Unfold': '#ef4444', | |
| 'Fold 1': '#f97316', | |
| 'Fold 2': '#eab308', | |
| 'Fold 3': '#84cc16', | |
| 'Fold 4': '#22d3ee', | |
| 'Rotation': '#818cf8', | |
| }; | |
| const HIDDEN_BY_DEFAULT = new Set(['1.4 RABC low','1.5 RABC high','2.3 HQ+mirror','2.4 HQ chunk45']); | |
| let faShowAll = false; | |
| window._faToggleFilter = function() { | |
| faShowAll = !faShowAll; | |
| const btn = document.getElementById("fa-filter-btn"); | |
| btn.textContent = faShowAll ? "Key experiments" : "Show all 11"; | |
| btn.classList.toggle("showing-all", faShowAll); | |
| rendered.l1 = false; | |
| rendered.l2 = false; | |
| renderTab(document.querySelector('.panel.active').id.replace('panel-','')); | |
| }; | |
| function getVisibleExperiments() { | |
| return faShowAll ? EXPERIMENTS : EXPERIMENTS.filter(e => !HIDDEN_BY_DEFAULT.has(e.id)); | |
| } | |
| const modes = { l2: 'abs', l1: 'abs' }; | |
| function setMode(level, mode) { | |
| modes[level] = mode; | |
| document.getElementById(`mode-${level}-abs`).classList.toggle('active', mode === 'abs'); | |
| document.getElementById(`mode-${level}-pct`).classList.toggle('active', mode === 'pct'); | |
| // Force re-render | |
| rendered[level] = false; | |
| renderTab(level); | |
| } | |
| function cssVar(name) { | |
| return getComputedStyle(document.documentElement).getPropertyValue(name).trim(); | |
| } | |
| function buildStackedBar(svgId, legendId, data, subtasks, experiments, normalize) { | |
| const textColor = cssVar('--text'); | |
| const subtextColor = cssVar('--subtext'); | |
| const borderColor = cssVar('--border'); | |
| const svgEl = document.getElementById(svgId); | |
| const W = svgEl.parentElement.clientWidth - 40; | |
| const H = 340; | |
| const margin = { top: 30, right: 16, bottom: 80, left: 70 }; | |
| const innerW = W - margin.left - margin.right; | |
| const innerH = H - margin.top - margin.bottom; | |
| svgEl.setAttribute('viewBox', `0 0 ${W} ${H}`); | |
| svgEl.setAttribute('height', H); | |
| const svg = d3.select(`#${svgId}`) | |
| .attr('viewBox', `0 0 ${W} ${H}`) | |
| .attr('height', H); | |
| svg.selectAll('*').remove(); | |
| const g = svg.append('g') | |
| .attr('transform', `translate(${margin.left},${margin.top})`); | |
| const expIds = experiments.map(a => a.id); | |
| const stackData = expIds.map(id => { | |
| const row = { id }; | |
| subtasks.forEach(s => { row[s] = (data[id] && data[id][s]) || 0; }); | |
| row._total = subtasks.reduce((sum, s) => sum + row[s], 0); | |
| return row; | |
| }); | |
| let displayData; | |
| if (normalize) { | |
| displayData = stackData.map(row => { | |
| const out = { id: row.id, _total: row._total }; | |
| subtasks.forEach(s => { | |
| out[s] = row._total > 0 ? (row[s] / row._total) * 100 : 0; | |
| }); | |
| out._displayTotal = row._total > 0 ? 100 : 0; | |
| return out; | |
| }); | |
| } else { | |
| displayData = stackData.map(row => ({ ...row, _displayTotal: row._total })); | |
| } | |
| const maxVal = normalize ? 100 : (d3.max(displayData, d => d._displayTotal) || 10); | |
| const x = d3.scaleBand().domain(expIds).range([0, innerW]).padding(0.28); | |
| const y = d3.scaleLinear().domain([0, maxVal]).range([innerH, 0]).nice(); | |
| const stack = d3.stack().keys(subtasks)(displayData); | |
| // Grid lines | |
| g.append('g').attr('class', 'grid') | |
| .call(d3.axisLeft(y).tickSize(-innerW).tickFormat('').ticks(5)) | |
| .call(gg => { | |
| gg.select('.domain').remove(); | |
| gg.selectAll('line').attr('stroke', borderColor).attr('stroke-dasharray', '3,3'); | |
| }); | |
| // Stacked bars | |
| const layer = g.selectAll('.layer').data(stack).join('g') | |
| .attr('class', 'layer').attr('fill', d => COLORS[d.key] || '#666'); | |
| layer.selectAll('rect').data(d => d).join('rect') | |
| .attr('x', d => x(d.data.id)) | |
| .attr('y', d => y(d[1])) | |
| .attr('height', d => Math.max(0, y(d[0]) - y(d[1]))) | |
| .attr('width', x.bandwidth()) | |
| .attr('rx', 2) | |
| .attr('opacity', 0.88); | |
| // Labels on top | |
| g.selectAll('.bar-label').data(displayData).join('text') | |
| .attr('class', 'bar-label') | |
| .attr('x', d => x(d.id) + x.bandwidth() / 2) | |
| .attr('y', d => d._total === 0 ? y(0) - 4 : y(d._displayTotal) - 5) | |
| .attr('text-anchor', 'middle') | |
| .attr('fill', d => d._total === 0 ? borderColor : subtextColor) | |
| .attr('font-size', '11') | |
| .text(d => { | |
| if (d._total === 0) return '✓ 0 failures'; | |
| return normalize ? `n=${d._total}` : d._total; | |
| }); | |
| // Series divider line | |
| const s1Last = experiments.filter(a => a.series === 1).pop().id; | |
| const s2First = experiments.filter(a => a.series === 2)[0]?.id; | |
| if (s1Last && s2First) { | |
| const xDiv = x(s1Last) + x.bandwidth() + x.step() * 0.14; | |
| g.append('line') | |
| .attr('x1', xDiv).attr('x2', xDiv) | |
| .attr('y1', -22).attr('y2', innerH + 4) | |
| .attr('stroke', borderColor).attr('stroke-width', 1).attr('stroke-dasharray', '4,3'); | |
| g.append('text').attr('x', xDiv - 6).attr('y', -18).attr('text-anchor', 'end') | |
| .attr('fill', '#f7934f').attr('font-size', '10').attr('letter-spacing', '0.06em').text('SERIES 1'); | |
| if (s2First) { | |
| g.append('text').attr('x', xDiv + 6).attr('y', -18).attr('text-anchor', 'start') | |
| .attr('fill', '#4dc98a').attr('font-size', '10').attr('letter-spacing', '0.06em').text('SERIES 2'); | |
| } | |
| } | |
| // Axes | |
| g.append('g') | |
| .call(d3.axisLeft(y).ticks(5).tickSize(4).tickFormat(d => normalize ? d + '%' : d)) | |
| .call(gg => { | |
| gg.select('.domain').attr('stroke', borderColor); | |
| gg.selectAll('text').attr('fill', subtextColor).attr('font-size', '11'); | |
| gg.selectAll('line').attr('stroke', borderColor); | |
| }); | |
| g.append('g').attr('transform', `translate(0,${innerH})`) | |
| .call(d3.axisBottom(x).tickSize(0)) | |
| .call(gg => { | |
| gg.select('.domain').attr('stroke', borderColor); | |
| gg.selectAll('text') | |
| .attr('fill', d => { | |
| const a = experiments.find(a => a.id === d); | |
| return a?.series === 2 ? '#4dc98a' : '#f7934f'; | |
| }) | |
| .attr('font-size', '11') | |
| .attr('transform', 'rotate(-40)') | |
| .attr('text-anchor', 'end') | |
| .attr('dx', '-0.5em') | |
| .attr('dy', '0.3em'); | |
| }); | |
| // Y axis label | |
| svg.append('text').attr('transform', 'rotate(-90)') | |
| .attr('x', -(margin.top + innerH / 2)).attr('y', 10).attr('text-anchor', 'middle') | |
| .attr('fill', subtextColor).attr('font-size', '11') | |
| .text(normalize ? 'Failure distribution (%)' : 'Failed rollouts (n)'); | |
| // Legend | |
| const legendEl = document.getElementById(legendId); | |
| legendEl.innerHTML = subtasks.map(s => ` | |
| <div class="legend-item"> | |
| <div class="legend-swatch" style="background:${COLORS[s]}"></div> | |
| <span>${s}</span> | |
| </div> | |
| `).join(''); | |
| } | |
| const rendered = { l2: false, l1: false }; | |
| function renderTab(id) { | |
| if (rendered[id]) return; | |
| rendered[id] = true; | |
| const normalize = modes[id] === 'pct'; | |
| const visExps = getVisibleExperiments(); | |
| if (id === 'l2') buildStackedBar('chart-l2', 'legend-l2', L2_FAILURES, SUBTASKS_L2, visExps, normalize); | |
| if (id === 'l1') buildStackedBar('chart-l1', 'legend-l1', L1_FAILURES, SUBTASKS_L1, visExps, normalize); | |
| } | |
| function showTab(id) { | |
| document.querySelectorAll('.panel').forEach(p => p.classList.remove('active')); | |
| document.querySelectorAll('.tab').forEach(t => t.classList.remove('active')); | |
| document.getElementById('panel-' + id).classList.add('active'); | |
| document.querySelectorAll('.tab').forEach(t => { | |
| if (t.getAttribute('onclick').includes("'" + id + "'")) t.classList.add('active'); | |
| }); | |
| renderTab(id); | |
| } | |
| window.showTab = showTab; | |
| window.setMode = setMode; | |
| renderTab('l1'); | |
| } | |
| if (typeof d3 !== "undefined") { | |
| _initFailureAnalysis(); | |
| } else { | |
| var s = document.createElement("script"); | |
| s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js"; | |
| s.onload = _initFailureAnalysis; | |
| document.head.appendChild(s); | |
| } | |
| </script> | |
| </body> | |
| </html> | |