Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> | |
| <style> | |
| :root { --bg: transparent; --text: #e8eaf0; --subtext: #8b8fa8; --grid: #2a2d3a; --border: #2a2d3a; } | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { background: var(--bg); font-family: system-ui, sans-serif; color: var(--text); } | |
| .axis text { fill: var(--subtext); font-size: 13px; } | |
| .axis line, .axis path { stroke: var(--grid); } | |
| .grid line { stroke: var(--grid); stroke-dasharray: 3,3; } | |
| .tooltip { | |
| position: absolute; background: #1a1d27; border: 1px solid var(--border); | |
| border-radius: 8px; padding: 10px 14px; pointer-events: none; | |
| opacity: 0; transition: opacity .15s; z-index: 10; min-width: 220px; | |
| box-shadow: 0 4px 16px rgba(0,0,0,.4); font-size: 13px; | |
| } | |
| .tooltip strong { display: block; margin-bottom: 5px; } | |
| .tooltip-row { display: flex; justify-content: space-between; gap: 12px; margin-top: 3px; font-size: 12px; color: var(--subtext); } | |
| .tooltip-row span:last-child { color: var(--text); font-weight: 600; } | |
| .lollipop-dot { cursor: pointer; transition: r .12s; } | |
| </style> | |
| </head> | |
| <body> | |
| <div style="position:relative"> | |
| <svg id="ts-chart" style="overflow:visible"></svg> | |
| <div class="tooltip" id="ts-tooltip"></div> | |
| </div> | |
| <script> | |
| function _initTotalScore() { | |
| const raw = [ | |
| {label:"1.1 π0",series:"1",score:440, pct:29.3,total_sr:40}, | |
| {label:"1.2 π0.5",series:"1",score:480, pct:32.0,total_sr:20}, | |
| {label:"1.3 Relative",series:"1",score:460, pct:30.7,total_sr:35}, | |
| {label:"1.4 RABC low",series:"1",score:330, pct:22.0,total_sr:15}, | |
| {label:"1.5 RABC high",series:"1",score:170, pct:11.3,total_sr:0 }, | |
| {label:"1.7 Rel+RABC",series:"1",score:600, pct:40.0,total_sr:40}, | |
| {label:"2.1 HQ",series:"2",score:620, pct:41.3,total_sr:40}, | |
| {label:"2.2 HQ+RABC+Rel",series:"2",score:1090,pct:72.7,total_sr:75}, | |
| {label:"2.3 HQ+mirror",series:"2",score:310, pct:20.7,total_sr:5 }, | |
| {label:"2.4 HQ chunk45",series:"2",score:460, pct:30.7,total_sr:20}, | |
| {label:"2.5 HQ+RABC+Rel★",series:"2",score:1300,pct:86.7,total_sr:90}, | |
| ]; | |
| // Sort highest → lowest score % | |
| const data = [...raw].sort((a,b) => b.pct - a.pct); | |
| const seriesColor = s => s === "2" ? "#f7934f" : "#4f8ef7"; | |
| // Performance-based color: red → yellow → green | |
| const perfColor = d3.scaleSequential().domain([0,100]) | |
| .interpolator(d3.interpolateRgbBasis(["#f87171","#fbbf24","#4dc98a"])); | |
| const margin = {top:28, right:20, bottom:80, left:80}; | |
| const svg = d3.select("#ts-chart"); | |
| const container = svg.node().parentElement; | |
| const tooltip = d3.select("#ts-tooltip"); | |
| function render() { | |
| svg.selectAll("*").remove(); | |
| const W = container.clientWidth; | |
| const H = Math.max(290, Math.min(380, W * 0.47)); | |
| const w = W - margin.left - margin.right; | |
| const h = H - margin.top - margin.bottom; | |
| svg.attr("width",W).attr("height",H); | |
| const g = svg.append("g").attr("transform",`translate(${margin.left},${margin.top})`); | |
| const x = d3.scaleBand().domain(data.map(d=>d.label)).range([0,w]).padding(0.3); | |
| const y = d3.scaleLinear().domain([0,100]).range([h,0]); | |
| g.append("g").attr("class","grid").selectAll("line") | |
| .data([25,50,75,100]).join("line") | |
| .attr("x1",0).attr("x2",w).attr("y1",d=>y(d)).attr("y2",d=>y(d)); | |
| // 50% reference line | |
| g.append("line").attr("stroke","#fbbf24").attr("stroke-dasharray","5,3").attr("stroke-width",1.5).attr("opacity",0.6) | |
| .attr("x1",0).attr("x2",w).attr("y1",y(50)).attr("y2",y(50)); | |
| g.append("text").attr("x",w+3).attr("y",y(50)+4).attr("fill","#fbbf24").attr("font-size",11).text("50%"); | |
| g.append("g").attr("class","axis").attr("transform",`translate(0,${h})`).call( | |
| d3.axisBottom(x).tickSize(0)) | |
| .call(gg=>{gg.select(".domain").remove();gg.selectAll("text").attr("transform","rotate(-40)").attr("text-anchor","end").attr("dx","-0.5em").attr("dy","0.3em").attr("font-size",11)}); | |
| g.append("g").attr("class","axis").call( | |
| d3.axisLeft(y).ticks(5).tickFormat(d=>d+"%").tickSize(0)) | |
| .call(ax=>ax.select(".domain").remove()) | |
| .call(ax=>ax.selectAll(".tick line").remove()); | |
| // Series pip under labels | |
| data.forEach(d => { | |
| g.append("rect") | |
| .attr("x",x(d.label)).attr("width",x.bandwidth()) | |
| .attr("y",h+60).attr("height",4).attr("rx",2) | |
| .attr("fill",seriesColor(d.series)).attr("opacity",0.8); | |
| }); | |
| // Stems | |
| data.forEach(d => { | |
| g.append("line") | |
| .attr("x1",x(d.label)+x.bandwidth()/2).attr("x2",x(d.label)+x.bandwidth()/2) | |
| .attr("y1",y(0)).attr("y2",y(d.pct)) | |
| .attr("stroke",perfColor(d.pct)).attr("stroke-width",2).attr("opacity",0.55); | |
| }); | |
| // Dots + labels | |
| data.forEach(d => { | |
| const cx = x(d.label)+x.bandwidth()/2; | |
| const cy = y(d.pct); | |
| const r = Math.max(7, Math.min(11, x.bandwidth()*0.38)); | |
| g.append("circle").attr("class","lollipop-dot") | |
| .attr("cx",cx).attr("cy",cy).attr("r",r) | |
| .attr("fill",perfColor(d.pct)).attr("stroke","#1a1d27").attr("stroke-width",2) | |
| .on("mousemove",function(event){ | |
| d3.select(this).attr("r",r+2); | |
| tooltip.style("opacity",1).html(` | |
| <strong>Experiment ${d.label} <small style="color:${seriesColor(d.series)}">(Series ${d.series})</small></strong>\n <div style=\"margin-top:6px;padding-top:6px;border-top:1px solid #2a2d3a;font-size:11px;color:#8b8fa8;line-height:1.5\">${(EXPERIMENTS[d.label]||{}).note||''}</div> | |
| <div class="tooltip-row"><span>Score</span><span>${d.score} / 1500</span></div> | |
| <div class="tooltip-row"><span>Score %</span><span>${d.pct}%</span></div> | |
| <div class="tooltip-row"><span>Total SR</span><span>${d.total_sr}%</span></div> | |
| `); | |
| const bx=container.getBoundingClientRect(); | |
| const ex=event.clientX-bx.left, ey=event.clientY-bx.top; | |
| tooltip.style("left",Math.min(ex+12,W-190)+"px").style("top",Math.max(ey-90,0)+"px"); | |
| }) | |
| .on("mouseleave",function(){ | |
| d3.select(this).attr("r",r); | |
| tooltip.style("opacity",0); | |
| }); | |
| g.append("text") | |
| .attr("x",cx).attr("y",cy-r-4).attr("text-anchor","middle") | |
| .attr("fill","#e8eaf0") | |
| .attr("font-size",Math.max(8,Math.min(11,x.bandwidth()*0.26))) | |
| .attr("font-weight","600") | |
| .text(d.pct+"%"); | |
| }); | |
| // Highlight best experiment | |
| const best = data[0]; | |
| if (best) { | |
| const bx = x(best.label) + x.bandwidth()/2; | |
| const by = y(best.pct); | |
| g.append("line").attr("x1",bx).attr("x2",bx).attr("y1",by-16).attr("y2",-8) | |
| .attr("stroke","#4dc98a").attr("stroke-width",1).attr("stroke-dasharray","2,2").attr("opacity",0.5); | |
| g.append("text").attr("x",bx).attr("y",-12).attr("text-anchor","middle") | |
| .attr("fill","#4dc98a").attr("font-size",11).attr("font-weight","600").text("★ best"); | |
| } | |
| g.append("text").attr("x",w).attr("y",-12).attr("text-anchor","end") | |
| .attr("fill","#8b8fa8").attr("font-size",12) | |
| .text("sorted: highest → lowest score %"); | |
| } | |
| render(); | |
| window.addEventListener("resize", render); | |
| const EXPERIMENTS = { | |
| "1.1 π0": { desc:"π0 · all data · 200k steps · MEAN_STD", note:"Base pi0 policy trained from scratch on the full dataset." }, | |
| "1.2 π0.5": { desc:"π0.5 · all data · 200k steps · MEAN_STD", note:"Upgraded to pi0.5 architecture, same data and steps." }, | |
| "1.3 Relative": { desc:"π0.5 · all data · 200k steps · Relative Actions · QUANTILES", note:"Adds Relative Actions on top of 1.2, expressing actions relative to current state." }, | |
| "1.4 RABC low": { desc:"π0.5 · all data · 200k steps · RABC κ=0.01", note:"Selective Action Reward Model with low κ (≈ mean threshold, not very selective)." }, | |
| "1.5 RABC high": { desc:"π0.5 · all data · 200k steps · RABC κ=0.0215", note:"SARM with κ = mean + ½ std, more selective filtering than 1.4." }, | |
| "1.7 Rel+RABC": { desc:"π0.5 · all data · 200k steps · Relative Actions + RABC κ=0.0215 · QUANTILES", note:"Best of initial training. Base checkpoint for 2.5." }, | |
| "2.1 HQ": { desc:"π0.5 · HQ data · 100k steps · fine-tune from 1.3", note:"Fine-tunes 1.3 on curated high-quality data only." }, | |
| "2.2 HQ+RABC+Rel": { desc:"π0.5 · HQ data · 100k steps · fine-tune from 1.3 + RABC κ=0.0265 + Relative Actions", note:"Adds RABC on high-quality fine-tune from 1.3." }, | |
| "2.3 HQ+mirror": { desc:"π0.5 · HQ + mirrored · 100k steps · fine-tune from 1.3 + Relative Actions + mirroring", note:"Augments the high-quality dataset with mirrored trajectories." }, | |
| "2.4 HQ chunk45": { desc:"π0.5 · HQ data · 100k steps · fine-tune from 1.3 · chunk=45", note:"Explores chunked action prediction (chunk=50, RTC size=50, execution horizon=35)." }, | |
| "2.5 HQ+RABC+Rel★": { desc:"π0.5 · HQ data · 100k steps · fine-tune from 1.7 + RABC κ=0.0265 + Relative Actions (best)", note:"Top performer. Best overall result." }, | |
| }; | |
| } | |
| if (typeof d3 !== "undefined") { | |
| _initTotalScore(); | |
| } else { | |
| var s = document.createElement("script"); | |
| s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js"; | |
| s.onload = _initTotalScore; | |
| document.head.appendChild(s); | |
| } | |
| </script> | |
| </body> | |
| </html> | |