Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width,initial-scale=1"/> | |
| <style> | |
| *{box-sizing:border-box;margin:0;padding:0} | |
| body{background:transparent;font-family:system-ui,sans-serif;color:#e8eaf0} | |
| .wrap{max-width:980px;margin:0 auto;padding:20px 20px 36px} | |
| .card{background:#1a1d27;border:1px solid #2a2d3a;border-radius:6px;overflow:visible;margin-bottom:12px} | |
| .card-head{padding:9px 14px;border-bottom:1px solid #2a2d3a;font-size:12px;text-transform:uppercase;letter-spacing:.07em;color:#8b8fa8;display:flex;justify-content:space-between;align-items:center;flex-wrap:wrap;gap:8px} | |
| .chart-area{padding:16px 16px 10px} | |
| svg text{font-family:system-ui,sans-serif} | |
| .insight{background:#12151f;border:1px solid #2a2d3a;border-left:3px solid #f7934f;border-radius:4px;padding:9px 13px;margin-bottom:12px;font-size:12px;color:#8b8fa8;line-height:1.7} | |
| .insight strong{color:#e8eaf0} | |
| .insight.blue{border-left-color:#60a5fa} | |
| .insight.green{border-left-color:#4dc98a} | |
| .insight.purple{border-left-color:#818cf8} | |
| .legend{display:flex;gap:12px;flex-wrap:wrap;padding:8px 14px;border-top:1px solid #2a2d3a} | |
| .li{display:flex;align-items:center;gap:6px;font-size:12px;color:#8b8fa8} | |
| .lsw{width:14px;height:3px;border-radius:1px} | |
| .ctrl-btn{padding:5px 12px;background:#1a1d27;border:1px solid #2a2d3a;border-radius:4px;color:#8b8fa8;font-family:inherit;font-size:11px;cursor:pointer;letter-spacing:.04em;transition:all .15s} | |
| .ctrl-btn:hover,.ctrl-btn.active{color:#e8eaf0;border-color:#4a4d5a;background:#252835} | |
| .note{font-size:12px;color:#555e7a;margin-top:10px;line-height:1.6} | |
| @media(prefers-color-scheme:dark){.note{color:#c8cad8}} | |
| .tooltip{position:fixed;background:#252835;border:1px solid #3a3d4a;border-radius:4px;padding:8px 11px;font-size:11px;color:#e8eaf0;pointer-events:none;opacity:0;transition:opacity .1s;z-index:100;line-height:1.7;max-width:240px} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="wrap"> | |
| <div class="insight purple"> | |
| <strong>What this shows:</strong> Each violin is the Bayesian posterior distribution over a policy's true success rate, given the observed rollouts. Wide = high uncertainty; narrow = high certainty. <strong>CLD letters above</strong> summarise which policies are statistically separable (<a href="https://arxiv.org/abs/2503.10966" style="color:#818cf8">STEP</a>, Bonferroni-corrected). Shared letter → not significantly different. | |
| </div> | |
| <div class="card"> | |
| <div class="card-head"> | |
| <span>Posterior success rate distributions: Beta(1+k, 1+n-k)</span> | |
| <div style="display:flex;gap:6px"> | |
| <button class="ctrl-btn active" id="v-total" onclick="setVLevel('total')">Total</button> | |
| <button class="ctrl-btn" id="v-l1" onclick="setVLevel('L1')">Level 1</button> | |
| <button class="ctrl-btn" id="v-l2" onclick="setVLevel('L2')">Level 2</button> | |
| </div> | |
| </div> | |
| <div class="chart-area" style="overflow:visible"><svg id="svg-violin" width="100%" height="500" style="overflow:visible"></svg></div> | |
| <div class="legend"> | |
| <div class="li"><div class="lsw" style="background:#f7934f"></div>Initial (1.x)</div> | |
| <div class="li"><div class="lsw" style="background:#4dc98a"></div>Fine-tuned (2.x)</div> | |
| <div class="li" style="font-size:10px;color:#8b8fa8">Bold letter = CLD group (STEP, Bonferroni α=0.10/55, n<sub>max</sub>=50)</div> | |
| </div> | |
| </div> | |
| <p class="note">Violins represent posterior uncertainty, not confidence intervals. Two overlapping violins can still be statistically distinct. <a href="https://arxiv.org/abs/2503.10966" style="color:#555e7a">STEP</a> sequential test with Bonferroni correction for 55 pairwise comparisons (α=0.10, per-pair α<0.0018, n<sub>max</sub>=50).</p> | |
| </div> | |
| <div class="tooltip" id="tooltip"></div> | |
| <script> | |
| function _initStatAnalysis() { | |
| // ── DATA ────────────────────────────────────────────────────────────────────── | |
| const EXPS = ['1.1 π0','1.2 π0.5','1.3 Relative','1.4 RABC low','1.5 RABC high','1.7 Rel+RABC','2.1 HQ','2.2 HQ+RABC+Rel','2.3 HQ+mirror','2.4 HQ chunk45','2.5 HQ+RABC+Rel★']; | |
| const DATA = { | |
| '1.1 π0': {total:[8,20], L1:[8,10], L2:[0,10], series:1}, | |
| '1.2 π0.5': {total:[4,20], L1:[4,10], L2:[0,10], series:1}, | |
| '1.3 Relative': {total:[7,20], L1:[7,10], L2:[0,10], series:1}, | |
| '1.4 RABC low': {total:[3,20], L1:[3,10], L2:[0,10], series:1}, | |
| '1.5 RABC high': {total:[0,20], L1:[0,10], L2:[0,10], series:1}, | |
| '1.7 Rel+RABC': {total:[8,20], L1:[8,10], L2:[0,10], series:1}, | |
| '2.1 HQ': {total:[8,20], L1:[7,10], L2:[1,10], series:2}, | |
| '2.2 HQ+RABC+Rel': {total:[15,20], L1:[10,10], L2:[5,10], series:2}, | |
| '2.3 HQ+mirror': {total:[1,20], L1:[0,10], L2:[1,10], series:2}, | |
| '2.4 HQ chunk45': {total:[4,20], L1:[4,10], L2:[0,10], series:2}, | |
| '2.5 HQ+RABC+Rel★': {total:[18,20], L1:[10,10], L2:[8,10], series:2}, | |
| }; | |
| // CLD assignments (STEP sequential test, two-sided, Bonferroni α=0.10/55, n_max=50) | |
| const CLD = { | |
| total: {'2.5 HQ+RABC+Rel★':'a','2.2 HQ+RABC+Rel':'ab','1.1 π0':'bc','1.7 Rel+RABC':'bc','2.1 HQ':'bc','1.3 Relative':'bc','1.2 π0.5':'cd','2.4 HQ chunk45':'cd','1.4 RABC low':'cd','2.3 HQ+mirror':'cd','1.5 RABC high':'d'}, | |
| L1: {'2.2 HQ+RABC+Rel':'a','2.5 HQ+RABC+Rel★':'a','1.1 π0':'a','1.7 Rel+RABC':'a','1.3 Relative':'a','2.1 HQ':'a','1.2 π0.5':'ab','2.4 HQ chunk45':'ab','1.4 RABC low':'ab','2.3 HQ+mirror':'b','1.5 RABC high':'b'}, | |
| L2: {'2.5 HQ+RABC+Rel★':'a','2.2 HQ+RABC+Rel':'ab','2.1 HQ':'ab','2.3 HQ+mirror':'ab','1.1 π0':'b','1.2 π0.5':'b','1.3 Relative':'b','1.4 RABC low':'b','1.5 RABC high':'b','1.7 Rel+RABC':'b','2.4 HQ chunk45':'b'}, | |
| }; | |
| // ── BETA DISTRIBUTION PDF ──────────────────────────────────────────────────── | |
| function betaPdf(x, a, b){ | |
| if(x<=0||x>=1) return 0; | |
| // Log computation for numerical stability | |
| const logB = lgamma(a)+lgamma(b)-lgamma(a+b); | |
| return Math.exp((a-1)*Math.log(x)+(b-1)*Math.log(1-x)-logB); | |
| } | |
| function lgamma(x){ | |
| // Stirling approx | |
| if(x<0.5) return Math.log(Math.PI/Math.sin(Math.PI*x))-lgamma(1-x); | |
| x--; | |
| let a=0.99999999999980993; | |
| const c=[676.5203681218851,-1259.1392167224028,771.32342877765313,-176.61502916214059, | |
| 12.507343278686905,-0.13857109526572012,9.9843695780195716e-6,1.5056327351493116e-7]; | |
| for(let i=0;i<8;i++) a+=c[i]/(x+i+1); | |
| const t=x+8-0.5; | |
| return 0.5*Math.log(2*Math.PI)+(x+0.5)*Math.log(t)-t+Math.log(a); | |
| } | |
| function betaSamples(k, n, nSamples=200){ | |
| const alpha=1+k, beta=1+(n-k); | |
| const xs=[], ys=[]; | |
| for(let i=0;i<=nSamples;i++){ | |
| const x=i/nSamples; | |
| xs.push(x); ys.push(betaPdf(x,alpha,beta)); | |
| } | |
| return {xs, ys, alpha, beta}; | |
| } | |
| // ── COLOURS ─────────────────────────────────────────────────────────────────── | |
| const C1='#f7934f', C2='#4dc98a'; | |
| const BORDER='#2a2d3a', SUB='#8b8fa8', CARD='#1a1d27'; | |
| function seriesColor(s){ return s===1?C1:C2; } | |
| // ── ① VIOLIN PLOTS ─────────────────────────────────────────────────────────── | |
| let vLevel='total'; | |
| function setVLevel(lv){ | |
| vLevel=lv; | |
| ['v-total','v-l1','v-l2'].forEach(id=>document.getElementById(id).classList.toggle('active',id===`v-${lv}`)); | |
| drawViolin(); | |
| } | |
| function drawViolin(){ | |
| const svgEl=document.getElementById('svg-violin'); | |
| const W=svgEl.parentElement.clientWidth-32, H=500; | |
| const m={top:50,right:16,bottom:80,left:70}; | |
| const iW=W-m.left-m.right, iH=H-m.top-m.bottom; | |
| svgEl.setAttribute('viewBox',`0 0 ${W} ${H}`); | |
| svgEl.setAttribute('height', H); | |
| const svg=d3.select('#svg-violin').attr('viewBox',`0 0 ${W} ${H}`).attr('height',H); | |
| svg.selectAll('*').remove(); | |
| const g=svg.append('g').attr('transform',`translate(${m.left},${m.top})`); | |
| // Sort by SR descending for this level | |
| const sorted=EXPS.slice().sort((a,b)=>{ | |
| const [ka,na]=DATA[a][vLevel], [kb,nb]=DATA[b][vLevel]; | |
| return kb/nb - ka/na; | |
| }); | |
| const x=d3.scaleBand().domain(sorted).range([0,iW]).padding(0.15); | |
| const y=d3.scaleLinear().domain([0,1.05]).range([iH,0]); | |
| // Grid | |
| g.append('g').call(d3.axisLeft(y).tickSize(-iW).tickFormat('').ticks(5)) | |
| .call(gg=>{gg.select('.domain').remove();gg.selectAll('line').attr('stroke',BORDER).attr('stroke-dasharray','3,3')}); | |
| // 50% line | |
| g.append('line').attr('x1',0).attr('x2',iW).attr('y1',y(0.5)).attr('y2',y(0.5)) | |
| .attr('stroke','#3a3d4a').attr('stroke-dasharray','4,3').attr('opacity',.6); | |
| g.append('text').attr('x',iW-2).attr('y',y(0.5)-4).attr('text-anchor','end') | |
| .attr('fill','#3a3d4a').attr('font-size',10).text('50%'); | |
| const tooltip=document.getElementById('tooltip'); | |
| sorted.forEach(expId=>{ | |
| const [k,n]=DATA[expId][vLevel]; | |
| const c=seriesColor(DATA[expId].series); | |
| const bx=x(expId), bw=x.bandwidth(); | |
| const cx=bx+bw/2; | |
| const {xs,ys}=betaSamples(k,n,300); | |
| const maxY=Math.max(...ys); | |
| const halfWidth=bw/2*0.9; | |
| // Violin path | |
| const violinPoints=xs.map((xi,i)=>({xi,yi:ys[i]})); | |
| const path=g.append('path'); | |
| let d=''; | |
| // Right side | |
| violinPoints.forEach(({xi,yi},i)=>{ | |
| const px=cx+yi/maxY*halfWidth, py=y(xi); | |
| d+=(i===0?`M ${px} ${py}`:`L ${px} ${py}`); | |
| }); | |
| // Left side (reversed) | |
| violinPoints.slice().reverse().forEach(({xi,yi})=>{ | |
| d+=` L ${cx-yi/maxY*halfWidth} ${y(xi)}`; | |
| }); | |
| d+=' Z'; | |
| path.attr('d',d).attr('fill',c).attr('opacity',.25).attr('stroke',c).attr('stroke-width',.5).attr('stroke-opacity',.5); | |
| // IQR box overlay | |
| const alpha=1+k, beta=1+(n-k); | |
| // Approximate quantiles via numerical integration | |
| function betaCdf(x,a,b){ /* simplified — use linear approx */ return x; } | |
| const mean=alpha/(alpha+beta); | |
| // Posterior median ≈ (alpha-1/3)/(alpha+beta-2/3) for alpha,beta>1 | |
| const median = alpha>1&&beta>1 ? (alpha-1/3)/(alpha+beta-2/3) : mean; | |
| // Thin vertical line at posterior mean | |
| g.append('line').attr('x1',cx-halfWidth*0.5).attr('x2',cx+halfWidth*0.5) | |
| .attr('y1',y(mean)).attr('y2',y(mean)).attr('stroke',c).attr('stroke-width',2).attr('opacity',.9); | |
| // CLD letter | |
| const cldLetter=(CLD[vLevel]||CLD.total)[expId]||'?'; | |
| g.append('text').attr('x',cx).attr('y',-8).attr('text-anchor','middle') | |
| .attr('fill',c).attr('font-size',13).attr('font-weight','bold').text(cldLetter); | |
| // SR fraction label | |
| g.append('text').attr('x',cx).attr('y',-22).attr('text-anchor','middle') | |
| .attr('fill',SUB).attr('font-size',11).text(`${k}/${n}`); | |
| // Hover | |
| const hitbox=g.append('rect').attr('x',bx).attr('y',0).attr('width',bw).attr('height',iH) | |
| .attr('fill','transparent').style('cursor','default'); | |
| const wi_lo=(mean-1.96*Math.sqrt(mean*(1-mean)/n)); | |
| const wi_hi=(mean+1.96*Math.sqrt(mean*(1-mean)/n)); | |
| hitbox.on('mouseover',(evt)=>{ | |
| tooltip.style.opacity='1'; | |
| tooltip.innerHTML=`<strong>Experiment ${expId}</strong> (Series ${DATA[expId].series})<br/>Successes: ${k}/${n}<br/>Posterior mean: ${(mean*100).toFixed(1)}%<br/>CLD group: <strong>${cldLetter}</strong><br/><span style="font-size:8px;color:#555e7a">Policies sharing letter '${cldLetter}' are<br/>not significantly different</span>`; | |
| tooltip.style.left=(evt.clientX+12)+'px'; | |
| tooltip.style.top=(evt.clientY-8)+'px'; | |
| }).on('mouseout',()=>tooltip.style.opacity='0'); | |
| }); | |
| // Axes | |
| g.append('g').attr('transform',`translate(0,${iH})`) | |
| .call(d3.axisBottom(x).tickSize(0)) | |
| .call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',d=>seriesColor(DATA[d].series)).attr('font-size',11).attr('transform','rotate(-40)').attr('text-anchor','end').attr('dx','-0.5em').attr('dy','0.3em')}); | |
| g.append('g').call(d3.axisLeft(y).ticks(5).tickFormat(d=>Math.round(d*100)+'%').tickSize(3)) | |
| .call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',SUB).attr('font-size',11);gg.selectAll('line').attr('stroke',BORDER)}); | |
| g.append('text').attr('transform','rotate(-90)').attr('x',-iH/2).attr('y',-30).attr('text-anchor','middle') | |
| .attr('fill',SUB).attr('font-size',11).text('Posterior success rate'); | |
| } | |
| window.setVLevel = setVLevel; | |
| drawViolin(); | |
| window.addEventListener('resize',()=>{setTimeout(drawViolin,100)}); | |
| } | |
| if (typeof d3 !== "undefined") { | |
| _initStatAnalysis(); | |
| } else { | |
| var s = document.createElement("script"); | |
| s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js"; | |
| s.onload = _initStatAnalysis; | |
| document.head.appendChild(s); | |
| } | |
| </script> | |
| </body> | |
| </html> | |