File size: 12,876 Bytes
fb7e94b
 
 
 
 
 
 
 
 
 
5f268a7
f0f3d44
fb7e94b
 
 
f0f3d44
fb7e94b
 
 
 
 
 
f0f3d44
fb7e94b
 
f0f3d44
fb7e94b
 
f0f3d44
a72b4f6
f0f3d44
fb7e94b
 
 
 
 
170e1d4
fb7e94b
 
 
f0f3d44
fb7e94b
 
 
 
 
 
5f268a7
fb7e94b
f0f3d44
 
32be670
fb7e94b
 
170e1d4
fb7e94b
 
 
 
 
 
 
9a36854
fb7e94b
5f268a7
 
9a36854
5f268a7
 
9a36854
5f268a7
9a36854
5f268a7
 
9a36854
fb7e94b
 
32be670
fb7e94b
32be670
f0f3d44
32be670
fb7e94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f268a7
 
fb7e94b
 
5f268a7
 
fb7e94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0f3d44
fb7e94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0f3d44
fb7e94b
 
 
f0f3d44
fb7e94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0f3d44
fb7e94b
f0f3d44
fb7e94b
f0f3d44
fb7e94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{background:transparent;font-family:system-ui,sans-serif;color:#e8eaf0}
.wrap{max-width:980px;margin:0 auto;padding:20px 20px 36px}

.card{background:#1a1d27;border:1px solid #2a2d3a;border-radius:6px;overflow:visible;margin-bottom:12px}
.card-head{padding:9px 14px;border-bottom:1px solid #2a2d3a;font-size:12px;text-transform:uppercase;letter-spacing:.07em;color:#8b8fa8;display:flex;justify-content:space-between;align-items:center;flex-wrap:wrap;gap:8px}
.chart-area{padding:16px 16px 10px}
svg text{font-family:system-ui,sans-serif}

.insight{background:#12151f;border:1px solid #2a2d3a;border-left:3px solid #f7934f;border-radius:4px;padding:9px 13px;margin-bottom:12px;font-size:12px;color:#8b8fa8;line-height:1.7}
.insight strong{color:#e8eaf0}
.insight.blue{border-left-color:#60a5fa}
.insight.green{border-left-color:#4dc98a}
.insight.purple{border-left-color:#818cf8}

.legend{display:flex;gap:12px;flex-wrap:wrap;padding:8px 14px;border-top:1px solid #2a2d3a}
.li{display:flex;align-items:center;gap:6px;font-size:12px;color:#8b8fa8}
.lsw{width:14px;height:3px;border-radius:1px}

.ctrl-btn{padding:5px 12px;background:#1a1d27;border:1px solid #2a2d3a;border-radius:4px;color:#8b8fa8;font-family:inherit;font-size:11px;cursor:pointer;letter-spacing:.04em;transition:all .15s}
.ctrl-btn:hover,.ctrl-btn.active{color:#e8eaf0;border-color:#4a4d5a;background:#252835}

.note{font-size:12px;color:#555e7a;margin-top:10px;line-height:1.6}
@media(prefers-color-scheme:dark){.note{color:#c8cad8}}
.tooltip{position:fixed;background:#252835;border:1px solid #3a3d4a;border-radius:4px;padding:8px 11px;font-size:11px;color:#e8eaf0;pointer-events:none;opacity:0;transition:opacity .1s;z-index:100;line-height:1.7;max-width:240px}
</style>
</head>
<body>
<div class="wrap">
<div class="insight purple">
  <strong>What this shows:</strong> Each violin is the Bayesian posterior distribution over a policy's true success rate, given the observed rollouts. Wide = high uncertainty; narrow = high certainty. <strong>CLD letters above</strong> summarise which policies are statistically separable (<a href="https://arxiv.org/abs/2503.10966" style="color:#818cf8">STEP</a>, Bonferroni-corrected). Shared letter β†’ not significantly different.
</div>
<div class="card">
  <div class="card-head">
    <span>Posterior success rate distributions: Beta(1+k, 1+n-k)</span>
    <div style="display:flex;gap:6px">
      <button class="ctrl-btn active" id="v-total" onclick="setVLevel('total')">Total</button>
      <button class="ctrl-btn" id="v-l1" onclick="setVLevel('L1')">Level 1</button>
      <button class="ctrl-btn" id="v-l2" onclick="setVLevel('L2')">Level 2</button>
    </div>
  </div>
  <div class="chart-area" style="overflow:visible"><svg id="svg-violin" width="100%" height="500" style="overflow:visible"></svg></div>
  <div class="legend">
    <div class="li"><div class="lsw" style="background:#f7934f"></div>Initial (1.x)</div>
    <div class="li"><div class="lsw" style="background:#4dc98a"></div>Fine-tuned (2.x)</div>
    <div class="li" style="font-size:10px;color:#8b8fa8">Bold letter = CLD group (STEP, Bonferroni Ξ±=0.10/55, n<sub>max</sub>=50)</div>
  </div>
</div>
<p class="note">Violins represent posterior uncertainty, not confidence intervals. Two overlapping violins can still be statistically distinct. <a href="https://arxiv.org/abs/2503.10966" style="color:#555e7a">STEP</a> sequential test with Bonferroni correction for 55 pairwise comparisons (Ξ±=0.10, per-pair Ξ±&lt;0.0018, n<sub>max</sub>=50).</p>
</div>

<div class="tooltip" id="tooltip"></div>

<script>
function _initStatAnalysis() {
// ── DATA ──────────────────────────────────────────────────────────────────────
const EXPS = ['1.1 Ο€0','1.2 Ο€0.5','1.3 Relative','1.4 RABC low','1.5 RABC high','1.7 Rel+RABC','2.1 HQ','2.2 HQ+RABC+Rel','2.3 HQ+mirror','2.4 HQ chunk45','2.5 HQ+RABC+Relβ˜…'];
const DATA = {
  '1.1 Ο€0': {total:[8,20],  L1:[8,10],  L2:[0,10],  series:1},
  '1.2 Ο€0.5': {total:[4,20],  L1:[4,10],  L2:[0,10],  series:1},
  '1.3 Relative': {total:[7,20],  L1:[7,10],  L2:[0,10],  series:1},
  '1.4 RABC low': {total:[3,20],  L1:[3,10],  L2:[0,10],  series:1},
  '1.5 RABC high': {total:[0,20],  L1:[0,10],  L2:[0,10],  series:1},
  '1.7 Rel+RABC': {total:[8,20],  L1:[8,10],  L2:[0,10],  series:1},
  '2.1 HQ': {total:[8,20],  L1:[7,10],  L2:[1,10],  series:2},
  '2.2 HQ+RABC+Rel': {total:[15,20], L1:[10,10], L2:[5,10],  series:2},
  '2.3 HQ+mirror': {total:[1,20],  L1:[0,10],  L2:[1,10],  series:2},
  '2.4 HQ chunk45': {total:[4,20],  L1:[4,10],  L2:[0,10],  series:2},
  '2.5 HQ+RABC+Relβ˜…': {total:[18,20], L1:[10,10], L2:[8,10],  series:2},
};

// CLD assignments (STEP sequential test, two-sided, Bonferroni Ξ±=0.10/55, n_max=50)
const CLD = {
  total: {'2.5 HQ+RABC+Relβ˜…':'a','2.2 HQ+RABC+Rel':'ab','1.1 Ο€0':'bc','1.7 Rel+RABC':'bc','2.1 HQ':'bc','1.3 Relative':'bc','1.2 Ο€0.5':'cd','2.4 HQ chunk45':'cd','1.4 RABC low':'cd','2.3 HQ+mirror':'cd','1.5 RABC high':'d'},
  L1:   {'2.2 HQ+RABC+Rel':'a','2.5 HQ+RABC+Relβ˜…':'a','1.1 Ο€0':'a','1.7 Rel+RABC':'a','1.3 Relative':'a','2.1 HQ':'a','1.2 Ο€0.5':'ab','2.4 HQ chunk45':'ab','1.4 RABC low':'ab','2.3 HQ+mirror':'b','1.5 RABC high':'b'},
  L2:   {'2.5 HQ+RABC+Relβ˜…':'a','2.2 HQ+RABC+Rel':'ab','2.1 HQ':'ab','2.3 HQ+mirror':'ab','1.1 Ο€0':'b','1.2 Ο€0.5':'b','1.3 Relative':'b','1.4 RABC low':'b','1.5 RABC high':'b','1.7 Rel+RABC':'b','2.4 HQ chunk45':'b'},
};

// ── BETA DISTRIBUTION PDF ────────────────────────────────────────────────────
function betaPdf(x, a, b){
  if(x<=0||x>=1) return 0;
  // Log computation for numerical stability
  const logB = lgamma(a)+lgamma(b)-lgamma(a+b);
  return Math.exp((a-1)*Math.log(x)+(b-1)*Math.log(1-x)-logB);
}

function lgamma(x){
  // Stirling approx
  if(x<0.5) return Math.log(Math.PI/Math.sin(Math.PI*x))-lgamma(1-x);
  x--;
  let a=0.99999999999980993;
  const c=[676.5203681218851,-1259.1392167224028,771.32342877765313,-176.61502916214059,
           12.507343278686905,-0.13857109526572012,9.9843695780195716e-6,1.5056327351493116e-7];
  for(let i=0;i<8;i++) a+=c[i]/(x+i+1);
  const t=x+8-0.5;
  return 0.5*Math.log(2*Math.PI)+(x+0.5)*Math.log(t)-t+Math.log(a);
}

function betaSamples(k, n, nSamples=200){
  const alpha=1+k, beta=1+(n-k);
  const xs=[], ys=[];
  for(let i=0;i<=nSamples;i++){
    const x=i/nSamples;
    xs.push(x); ys.push(betaPdf(x,alpha,beta));
  }
  return {xs, ys, alpha, beta};
}

// ── COLOURS ───────────────────────────────────────────────────────────────────
const C1='#f7934f', C2='#4dc98a';
const BORDER='#2a2d3a', SUB='#8b8fa8', CARD='#1a1d27';

function seriesColor(s){ return s===1?C1:C2; }

// ── β‘  VIOLIN PLOTS ───────────────────────────────────────────────────────────
let vLevel='total';
function setVLevel(lv){
  vLevel=lv;
  ['v-total','v-l1','v-l2'].forEach(id=>document.getElementById(id).classList.toggle('active',id===`v-${lv}`));
  drawViolin();
}

function drawViolin(){
  const svgEl=document.getElementById('svg-violin');
  const W=svgEl.parentElement.clientWidth-32, H=500;
  const m={top:50,right:16,bottom:80,left:70};
  const iW=W-m.left-m.right, iH=H-m.top-m.bottom;
  svgEl.setAttribute('viewBox',`0 0 ${W} ${H}`);
  svgEl.setAttribute('height', H);
  const svg=d3.select('#svg-violin').attr('viewBox',`0 0 ${W} ${H}`).attr('height',H);
  svg.selectAll('*').remove();
  const g=svg.append('g').attr('transform',`translate(${m.left},${m.top})`);

  // Sort by SR descending for this level
  const sorted=EXPS.slice().sort((a,b)=>{
    const [ka,na]=DATA[a][vLevel], [kb,nb]=DATA[b][vLevel];
    return kb/nb - ka/na;
  });

  const x=d3.scaleBand().domain(sorted).range([0,iW]).padding(0.15);
  const y=d3.scaleLinear().domain([0,1.05]).range([iH,0]);

  // Grid
  g.append('g').call(d3.axisLeft(y).tickSize(-iW).tickFormat('').ticks(5))
   .call(gg=>{gg.select('.domain').remove();gg.selectAll('line').attr('stroke',BORDER).attr('stroke-dasharray','3,3')});

  // 50% line
  g.append('line').attr('x1',0).attr('x2',iW).attr('y1',y(0.5)).attr('y2',y(0.5))
   .attr('stroke','#3a3d4a').attr('stroke-dasharray','4,3').attr('opacity',.6);
  g.append('text').attr('x',iW-2).attr('y',y(0.5)-4).attr('text-anchor','end')
   .attr('fill','#3a3d4a').attr('font-size',10).text('50%');

  const tooltip=document.getElementById('tooltip');

  sorted.forEach(expId=>{
    const [k,n]=DATA[expId][vLevel];
    const c=seriesColor(DATA[expId].series);
    const bx=x(expId), bw=x.bandwidth();
    const cx=bx+bw/2;

    const {xs,ys}=betaSamples(k,n,300);
    const maxY=Math.max(...ys);
    const halfWidth=bw/2*0.9;

    // Violin path
    const violinPoints=xs.map((xi,i)=>({xi,yi:ys[i]}));
    const path=g.append('path');
    let d='';
    // Right side
    violinPoints.forEach(({xi,yi},i)=>{
      const px=cx+yi/maxY*halfWidth, py=y(xi);
      d+=(i===0?`M ${px} ${py}`:`L ${px} ${py}`);
    });
    // Left side (reversed)
    violinPoints.slice().reverse().forEach(({xi,yi})=>{
      d+=` L ${cx-yi/maxY*halfWidth} ${y(xi)}`;
    });
    d+=' Z';
    path.attr('d',d).attr('fill',c).attr('opacity',.25).attr('stroke',c).attr('stroke-width',.5).attr('stroke-opacity',.5);

    // IQR box overlay
    const alpha=1+k, beta=1+(n-k);
    // Approximate quantiles via numerical integration
    function betaCdf(x,a,b){ /* simplified β€” use linear approx */ return x; }
    const mean=alpha/(alpha+beta);
    // Posterior median β‰ˆ (alpha-1/3)/(alpha+beta-2/3) for alpha,beta>1
    const median = alpha>1&&beta>1 ? (alpha-1/3)/(alpha+beta-2/3) : mean;

    // Thin vertical line at posterior mean
    g.append('line').attr('x1',cx-halfWidth*0.5).attr('x2',cx+halfWidth*0.5)
     .attr('y1',y(mean)).attr('y2',y(mean)).attr('stroke',c).attr('stroke-width',2).attr('opacity',.9);

    // CLD letter
    const cldLetter=(CLD[vLevel]||CLD.total)[expId]||'?';
    g.append('text').attr('x',cx).attr('y',-8).attr('text-anchor','middle')
     .attr('fill',c).attr('font-size',13).attr('font-weight','bold').text(cldLetter);

    // SR fraction label
    g.append('text').attr('x',cx).attr('y',-22).attr('text-anchor','middle')
     .attr('fill',SUB).attr('font-size',11).text(`${k}/${n}`);

    // Hover
    const hitbox=g.append('rect').attr('x',bx).attr('y',0).attr('width',bw).attr('height',iH)
     .attr('fill','transparent').style('cursor','default');
    const wi_lo=(mean-1.96*Math.sqrt(mean*(1-mean)/n));
    const wi_hi=(mean+1.96*Math.sqrt(mean*(1-mean)/n));
    hitbox.on('mouseover',(evt)=>{
      tooltip.style.opacity='1';
      tooltip.innerHTML=`<strong>Experiment ${expId}</strong> (Series ${DATA[expId].series})<br/>Successes: ${k}/${n}<br/>Posterior mean: ${(mean*100).toFixed(1)}%<br/>CLD group: <strong>${cldLetter}</strong><br/><span style="font-size:8px;color:#555e7a">Policies sharing letter '${cldLetter}' are<br/>not significantly different</span>`;
      tooltip.style.left=(evt.clientX+12)+'px';
      tooltip.style.top=(evt.clientY-8)+'px';
    }).on('mouseout',()=>tooltip.style.opacity='0');
  });

  // Axes
  g.append('g').attr('transform',`translate(0,${iH})`)
   .call(d3.axisBottom(x).tickSize(0))
   .call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',d=>seriesColor(DATA[d].series)).attr('font-size',11).attr('transform','rotate(-40)').attr('text-anchor','end').attr('dx','-0.5em').attr('dy','0.3em')});
  g.append('g').call(d3.axisLeft(y).ticks(5).tickFormat(d=>Math.round(d*100)+'%').tickSize(3))
   .call(gg=>{gg.select('.domain').attr('stroke',BORDER);gg.selectAll('text').attr('fill',SUB).attr('font-size',11);gg.selectAll('line').attr('stroke',BORDER)});
  g.append('text').attr('transform','rotate(-90)').attr('x',-iH/2).attr('y',-30).attr('text-anchor','middle')
   .attr('fill',SUB).attr('font-size',11).text('Posterior success rate');
}

window.setVLevel = setVLevel;

drawViolin();
window.addEventListener('resize',()=>{setTimeout(drawViolin,100)});

}

if (typeof d3 !== "undefined") {
  _initStatAnalysis();
} else {
  var s = document.createElement("script");
  s.src = "https://cdnjs.cloudflare.com/ajax/libs/d3/7.9.0/d3.min.js";
  s.onload = _initStatAnalysis;
  document.head.appendChild(s);
}
</script>
</body>
</html>