robot-folding / app /src /content /embeds /folding /statistical-analysis.html
pepijn223's picture
pepijn223 HF Staff
docs(folding): fix STEP citation and spell out Toyota Research Institute
170e1d4 unverified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{background:transparent;font-family:system-ui,sans-serif;color:#e8eaf0}
.wrap{max-width:980px;margin:0 auto;padding:20px 20px 36px}
.card{background:#1a1d27;border:1px solid #2a2d3a;border-radius:6px;overflow:visible;margin-bottom:12px}
.card-head{padding:9px 14px;border-bottom:1px solid #2a2d3a;font-size:12px;text-transform:uppercase;letter-spacing:.07em;color:#8b8fa8;display:flex;justify-content:space-between;align-items:center;flex-wrap:wrap;gap:8px}
.chart-area{padding:16px 16px 10px}
svg text{font-family:system-ui,sans-serif}
.insight{background:#12151f;border:1px solid #2a2d3a;border-left:3px solid #f7934f;border-radius:4px;padding:9px 13px;margin-bottom:12px;font-size:12px;color:#8b8fa8;line-height:1.7}
.insight strong{color:#e8eaf0}
.insight.blue{border-left-color:#60a5fa}
.insight.green{border-left-color:#4dc98a}
.insight.purple{border-left-color:#818cf8}
.legend{display:flex;gap:12px;flex-wrap:wrap;padding:8px 14px;border-top:1px solid #2a2d3a}
.li{display:flex;align-items:center;gap:6px;font-size:12px;color:#8b8fa8}
.lsw{width:14px;height:3px;border-radius:1px}
.ctrl-btn{padding:5px 12px;background:#1a1d27;border:1px solid #2a2d3a;border-radius:4px;color:#8b8fa8;font-family:inherit;font-size:11px;cursor:pointer;letter-spacing:.04em;transition:all .15s}
.ctrl-btn:hover,.ctrl-btn.active{color:#e8eaf0;border-color:#4a4d5a;background:#252835}
.note{font-size:12px;color:#555e7a;margin-top:10px;line-height:1.6}
@media(prefers-color-scheme:dark){.note{color:#c8cad8}}
.tooltip{position:fixed;background:#252835;border:1px solid #3a3d4a;border-radius:4px;padding:8px 11px;font-size:11px;color:#e8eaf0;pointer-events:none;opacity:0;transition:opacity .1s;z-index:100;line-height:1.7;max-width:240px}
</style>
</head>
<body>
<div class="wrap">
<div class="insight purple">
<strong>What this shows:</strong> Each violin is the Bayesian posterior distribution over a policy's true success rate, given the observed rollouts. Wide = high uncertainty; narrow = high certainty. <strong>CLD letters above</strong> summarise which policies are statistically separable (<a href="https://arxiv.org/abs/2503.10966" style="color:#818cf8">STEP</a>, Bonferroni-corrected). Shared letter → not significantly different.
</div>
<div class="card">
<div class="card-head">
<span>Posterior success rate distributions: Beta(1+k, 1+n-k)</span>
<div style="display:flex;gap:6px">
<button class="ctrl-btn active" id="v-total" onclick="setVLevel('total')">Total</button>
<button class="ctrl-btn" id="v-l1" onclick="setVLevel('L1')">Level 1</button>
<button class="ctrl-btn" id="v-l2" onclick="setVLevel('L2')">Level 2</button>
</div>
</div>
<div class="chart-area" style="overflow:visible"><svg id="svg-violin" width="100%" height="500" style="overflow:visible"></svg></div>
<div class="legend">
<div class="li"><div class="lsw" style="background:#f7934f"></div>Initial (1.x)</div>
<div class="li"><div class="lsw" style="background:#4dc98a"></div>Fine-tuned (2.x)</div>
<div class="li" style="font-size:10px;color:#8b8fa8">Bold letter = CLD group (STEP, Bonferroni α=0.10/55, n<sub>max</sub>=50)</div>
</div>
</div>
<p class="note">Violins represent posterior uncertainty, not confidence intervals. Two overlapping violins can still be statistically distinct. <a href="https://arxiv.org/abs/2503.10966" style="color:#555e7a">STEP</a> sequential test with Bonferroni correction for 55 pairwise comparisons (α=0.10, per-pair α&lt;0.0018, n<sub>max</sub>=50).</p>
</div>
<div class="tooltip" id="tooltip"></div>
<script>
function _initStatAnalysis() {
// ── DATA ──────────────────────────────────────────────────────────────────────
const EXPS = ['1.1 π0','1.2 π0.5','1.3 Relative','1.4 RABC low','1.5 RABC high','1.7 Rel+RABC','2.1 HQ','2.2 HQ+RABC+Rel','2.3 HQ+mirror','2.4 HQ chunk45','2.5 HQ+RABC+Rel★'];
const DATA = {
'1.1 π0': {total:[8,20], L1:[8,10], L2:[0,10], series:1},
'1.2 π0.5': {total:[4,20], L1:[4,10], L2:[0,10], series:1},
'1.3 Relative': {total:[7,20], L1:[7,10], L2:[0,10], series:1},
'1.4 RABC low': {total:[3,20], L1:[3,10], L2:[0,10], series:1},
'1.5 RABC high': {total:[0,20], L1:[0,10], L2:[0,10], series:1},
'1.7 Rel+RABC': {total:[8,20], L1:[8,10], L2:[0,10], series:1},
'2.1 HQ': {total:[8,20], L1:[7,10], L2:[1,10], series:2},
'2.2 HQ+RABC+Rel': {total:[15,20], L1:[10,10], L2:[5,10], series:2},
'2.3 HQ+mirror': {total:[1,20], L1:[0,10], L2:[1,10], series:2},
'2.4 HQ chunk45': {total:[4,20], L1:[4,10], L2:[0,10], series:2},
'2.5 HQ+RABC+Rel★': {total:[18,20], L1:[10,10], L2:[8,10], series:2},
};
// CLD assignments (STEP sequential test, two-sided, Bonferroni α=0.10/55, n_max=50)
const CLD = {
total: {'2.5 HQ+RABC+Rel★':'a','2.2 HQ+RABC+Rel':'ab','1.1 π0':'bc','1.7 Rel+RABC':'bc','2.1 HQ':'bc','1.3 Relative':'bc','1.2 π0.5':'cd','2.4 HQ chunk45':'cd','1.4 RABC low':'cd','2.3 HQ+mirror':'cd','1.5 RABC high':'d'},
L1: {'2.2 HQ+RABC+Rel':'a','2.5 HQ+RABC+Rel★':'a','1.1 π0':'a','1.7 Rel+RABC':'a','1.3 Relative':'a','2.1 HQ':'a','1.2 π0.5':'ab','2.4 HQ chunk45':'ab','1.4 RABC low':'ab','2.3 HQ+mirror':'b','1.5 RABC high':'b'},
L2: {'2.5 HQ+RABC+Rel★':'a','2.2 HQ+RABC+Rel':'ab','2.1 HQ':'ab','2.3 HQ+mirror':'ab','1.1 π0':'b','1.2 π0.5':'b','1.3 Relative':'b','1.4 RABC low':'b','1.5 RABC high':'b','1.7 Rel+RABC':'b','2.4 HQ chunk45':'b'},
};
// ── BETA DISTRIBUTION PDF ────────────────────────────────────────────────────
function betaPdf(x, a, b){
if(x<=0||x>=1) return 0;
// Log computation for numerical stability
const logB = lgamma(a)+lgamma(b)-lgamma(a+b);
return Math.exp((a-1)*Math.log(x)+(b-1)*Math.log(1-x)-logB);
}
function lgamma(x){
// Stirling approx
if(x<0.5) return Math.log(Math.PI/Math.sin(Math.PI*x))-lgamma(1-x);
x--;
let a=0.99999999999980993;
const c=[676.5203681218851,-1259.1392167224028,771.32342877765313,-176.61502916214059,
12.507343278686905,-0.13857109526572012,9.9843695780195716e-6,1.5056327351493116e-7];
for(let i=0;i<8;i++) a+=c[i]/(x+i+1);
const t=x+8-0.5;
return 0.5*Math.log(2*Math.PI)+(x+0.5)*Math.log(t)-t+Math.log(a);
}
function betaSamples(k, n, nSamples=200){
const alpha=1+k, beta=1+(n-k);
const xs=[], ys=[];
for(let i=0;i<=nSamples;i++){
const x=i/nSamples;
xs.push(x); ys.push(betaPdf(x,alpha,beta));
}
return {xs, ys, alpha, beta};
}
// ── COLOURS ───────────────────────────────────────────────────────────────────
const C1='#f7934f', C2='#4dc98a';
const BORDER='#2a2d3a', SUB='#8b8fa8', CARD='#1a1d27';
function seriesColor(s){ return s===1?C1:C2; }
// ── ① VIOLIN PLOTS ───────────────────────────────────────────────────────────
let vLevel='total';
function setVLevel(lv){
vLevel=lv;
['v-total','v-l1','v-l2'].forEach(id=>document.getElementById(id).classList.toggle('active',id===`v-${lv}`));
drawViolin();
}
function drawViolin(){
const svgEl=document.getElementById('svg-violin');
const W=svgEl.parentElement.clientWidth-32, H=500;
const m={top:50,right:16,bottom:80,left:70};
const iW=W-m.left-m.right, iH=H-m.top-m.bottom;
svgEl.setAttribute('viewBox',`0 0 ${W} ${H}`);
svgEl.setAttribute('height', H);
const svg=d3.select('#svg-violin').attr('viewBox',`0 0 ${W} ${H}`).attr('height',H);
svg.selectAll('*').remove();
const g=svg.append('g').attr('transform',`translate(${m.left},${m.top})`);
// Sort by SR descending for this level
const sorted=EXPS.slice().sort((a,b)=>{
const [ka,na]=DATA[a][vLevel], [kb,nb]=DATA[b][vLevel];
return kb/nb - ka/na;
});
const x=d3.scaleBand().domain(sorted).range([0,iW]).padding(0.15);
const y=d3.scaleLinear().domain([0,1.05]).range([iH,0]);
// Grid
g.append('g').call(d3.axisLeft(y).tickSize(-iW).tickFormat('').ticks(5))
.call(gg=>{gg.select('.domain').remove();gg.selectAll('line').attr('stroke',BORDER).attr('stroke-dasharray','3,3')});
// 50% line
g.append('line').attr('x1',0).attr('x2',iW).attr('y1',y(0.5)).attr('y2',y(0.5))
.attr('stroke','#3a3d4a').attr('stroke-dasharray','4,3').attr('opacity',.6);
g.append('text').attr('x',iW-2).attr('y',y(0.5)-4).attr('text-anchor','end')
.attr('fill','#3a3d4a').attr('font-size',10).text('50%');
const tooltip=document.getElementById('tooltip');
sorted.forEach(expId=>{
const [k,n]=DATA[expId][vLevel];
const c=seriesColor(DATA[expId].series);
const bx=x(expId), bw=x.bandwidth();
const cx=bx+bw/2;
const {xs,ys}=betaSamples(k,n,300);
const maxY=Math.max(...ys);
const halfWidth=bw/2*0.9;
// Violin path
const violinPoints=xs.map((xi,i)=>({xi,yi:ys[i]}));
const path=g.append('path');
let d='';
// Right side
violinPoints.forEach(({xi,yi},i)=>{
const px=cx+yi/maxY*halfWidth, py=y(xi);
d+=(i===0?`M ${px} ${py}`:`L ${px} ${py}`);
});
// Left side (reversed)
violinPoints.slice().reverse().forEach(({xi,yi})=>{
d+=` L ${cx-yi/maxY*halfWidth} ${y(xi)}`;
});
d+=' Z';
path.attr('d',d).attr('fill',c).attr('opacity',.25).attr('stroke',c).attr('stroke-width',.5).attr('stroke-opacity',.5);
// IQR box overlay
const alpha=1+k, beta=1+(n-k);
// Approximate quantiles via numerical integration
function betaCdf(x,a,b){ /* simplified — use linear approx */ return x; }
const mean=alpha/(alpha+beta);
// Posterior median ≈ (alpha-1/3)/(alpha+beta-2/3) for alpha,beta>1
const median = alpha>1&&beta>1 ? (alpha-1/3)/(alpha+beta-2/3) : mean;
// Thin vertical line at posterior mean
g.append('line').attr('x1',cx-halfWidth*0.5).attr('x2',cx+halfWidth*0.5)
.attr('y1',y(mean)).attr('y2',y(mean)).attr('stroke',c).attr('stroke-width',2).attr('opacity',.9);
// CLD letter
const cldLetter=(CLD[vLevel]||CLD.total)[expId]||'?';
g.append('text').attr('x',cx).attr('y',-8).attr('text-anchor','middle')
.attr('fill',c).attr('font-size',13).attr('font-weight','bold').text(cldLetter);
// SR fraction label
g.append('text').attr('x',cx).attr('y',-22).attr('text-anchor','middle')
.attr('fill',SUB).attr('font-size',11).text(`${k}/${n}`);
// Hover
const hitbox=g.append('rect').attr('x',bx).attr('y',0).attr('width',bw).attr('height',iH)
.attr('fill','transparent').style('cursor','default');
const wi_lo=(mean-1.96*Math.sqrt(mean*(1-mean)/n));
const wi_hi=(mean+1.96*Math.sqrt(mean*(1-mean)/n));
hitbox.on('mouseover',(evt)=>{
tooltip.style.opacity='1';
tooltip.innerHTML=`<strong>Experiment ${expId}</strong> (Series ${DATA[expId].series})<br/>Successes: ${k}/${n}<br/>Posterior mean: ${(mean*100).toFixed(1)}%<br/>CLD group: <strong>${cldLetter}</strong><br/><span style="font-size:8px;color:#555e7a">Policies sharing letter '${cldLetter}' are<br/>not significantly different</span>`;
tooltip.style.left=(evt.clientX+12)+'px';
tooltip.style.top=(evt.clientY-8)+'px';
}).on('mouseout',()=>tooltip.style.opacity='0');
});
// Axes
g.append('g').attr('transform',`translate(0,${iH})`)
.call(d3.axisBottom(x).tickSize(0))
.call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',d=>seriesColor(DATA[d].series)).attr('font-size',11).attr('transform','rotate(-40)').attr('text-anchor','end').attr('dx','-0.5em').attr('dy','0.3em')});
g.append('g').call(d3.axisLeft(y).ticks(5).tickFormat(d=>Math.round(d*100)+'%').tickSize(3))
.call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',SUB).attr('font-size',11);gg.selectAll('line').attr('stroke',BORDER)});
g.append('text').attr('transform','rotate(-90)').attr('x',-iH/2).attr('y',-30).attr('text-anchor','middle')
.attr('fill',SUB).attr('font-size',11).text('Posterior success rate');
}
window.setVLevel = setVLevel;
drawViolin();
window.addEventListener('resize',()=>{setTimeout(drawViolin,100)});
}
if (typeof d3 !== "undefined") {
_initStatAnalysis();
} else {
var s = document.createElement("script");
s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js";
s.onload = _initStatAnalysis;
document.head.appendChild(s);
}
</script>
</body>
</html>