tfrere HF Staff commited on
Commit
ea8f121
·
1 Parent(s): 1b8f2e6
app/src/content/embeds/d3-po-beta-ablation.html ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-apo-beta-ablation"></div>
2
+ <style>
3
+ .d3-apo-beta-ablation {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ position: relative;
7
+ --axis-color: var(--text-color, #333);
8
+ --tick-color: var(--muted-color, #666);
9
+ --grid-color: rgba(0,0,0,.08);
10
+ }
11
+ [data-theme="dark"] .d3-apo-beta-ablation {
12
+ --axis-color: var(--text-color, #ccc);
13
+ --tick-color: var(--muted-color, #999);
14
+ --grid-color: rgba(255,255,255,.10);
15
+ }
16
+ .d3-apo-beta-ablation svg {
17
+ display: block;
18
+ overflow: visible;
19
+ }
20
+ .d3-apo-beta-ablation .axes path,
21
+ .d3-apo-beta-ablation .axes line {
22
+ stroke: var(--axis-color);
23
+ shape-rendering: crispEdges;
24
+ }
25
+ .d3-apo-beta-ablation .axes text {
26
+ fill: var(--tick-color);
27
+ font-size: 11px;
28
+ }
29
+ .d3-apo-beta-ablation .grid line {
30
+ stroke: var(--grid-color);
31
+ stroke-dasharray: 2,2;
32
+ shape-rendering: crispEdges;
33
+ }
34
+ .d3-apo-beta-ablation .axis-label {
35
+ fill: var(--text-color);
36
+ font-size: 12px;
37
+ font-weight: 600;
38
+ }
39
+ .d3-apo-beta-ablation .line-think {
40
+ fill: none;
41
+ stroke-width: 2.5;
42
+ stroke-linecap: round;
43
+ stroke-linejoin: round;
44
+ }
45
+ .d3-apo-beta-ablation .line-no-think {
46
+ fill: none;
47
+ stroke-width: 2.5;
48
+ stroke-linecap: round;
49
+ stroke-linejoin: round;
50
+ }
51
+ .d3-apo-beta-ablation .reference-line {
52
+ fill: none;
53
+ stroke-width: 1.5;
54
+ stroke-dasharray: 5, 5;
55
+ opacity: 0.4;
56
+ }
57
+ .d3-apo-beta-ablation .dot {
58
+ stroke: var(--surface-bg);
59
+ stroke-width: 2;
60
+ }
61
+ .d3-apo-beta-ablation .header {
62
+ display: flex;
63
+ align-items: flex-start;
64
+ justify-content: space-between;
65
+ gap: 16px;
66
+ margin-top: 16px;
67
+ flex-wrap: wrap;
68
+ }
69
+ .d3-apo-beta-ablation .legend {
70
+ display: flex;
71
+ flex-direction: column;
72
+ align-items: flex-start;
73
+ gap: 6px;
74
+ }
75
+ .d3-apo-beta-ablation .legend-title {
76
+ font-size: 12px;
77
+ font-weight: 700;
78
+ color: var(--text-color);
79
+ }
80
+ .d3-apo-beta-ablation .legend .items {
81
+ display: flex;
82
+ flex-wrap: wrap;
83
+ gap: 8px 14px;
84
+ }
85
+ .d3-apo-beta-ablation .legend .item {
86
+ display: inline-flex;
87
+ align-items: center;
88
+ gap: 6px;
89
+ white-space: nowrap;
90
+ font-size: 12px;
91
+ color: var(--text-color);
92
+ }
93
+ .d3-apo-beta-ablation .legend .swatch {
94
+ width: 14px;
95
+ height: 14px;
96
+ border-radius: 3px;
97
+ border: 1px solid var(--border-color);
98
+ }
99
+ .d3-apo-beta-ablation .legend .swatch-line {
100
+ width: 20px;
101
+ height: 2px;
102
+ border: none;
103
+ }
104
+ .d3-apo-beta-ablation .legend .swatch-dashed {
105
+ width: 20px;
106
+ height: 2px;
107
+ border: none;
108
+ background: repeating-linear-gradient(
109
+ to right,
110
+ var(--text-color) 0,
111
+ var(--text-color) 4px,
112
+ transparent 4px,
113
+ transparent 8px
114
+ );
115
+ }
116
+ .d3-apo-beta-ablation .controls {
117
+ display: flex;
118
+ gap: 16px;
119
+ align-items: flex-start;
120
+ justify-content: flex-end;
121
+ flex-wrap: wrap;
122
+ }
123
+ .d3-apo-beta-ablation .control-group {
124
+ display: flex;
125
+ flex-direction: column;
126
+ align-items: flex-start;
127
+ gap: 6px;
128
+ }
129
+ .d3-apo-beta-ablation .controls label {
130
+ font-size: 12px;
131
+ font-weight: 700;
132
+ color: var(--text-color);
133
+ }
134
+ .d3-apo-beta-ablation .controls select {
135
+ font-size: 12px;
136
+ padding: 8px 28px 8px 10px;
137
+ border: 1px solid var(--border-color);
138
+ border-radius: 8px;
139
+ background: var(--surface-bg);
140
+ color: var(--text-color);
141
+ cursor: pointer;
142
+ appearance: none;
143
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
144
+ background-repeat: no-repeat;
145
+ background-position: right 8px center;
146
+ }
147
+ .d3-apo-beta-ablation .controls select:focus {
148
+ outline: 2px solid var(--primary-color);
149
+ outline-offset: 2px;
150
+ }
151
+ .d3-apo-beta-ablation .d3-tooltip {
152
+ position: absolute;
153
+ background: var(--surface-bg);
154
+ border: 1px solid var(--border-color);
155
+ border-radius: 8px;
156
+ padding: 12px;
157
+ pointer-events: none;
158
+ opacity: 0;
159
+ transition: opacity 0.2s;
160
+ box-shadow: 0 2px 8px rgba(0,0,0,0.15);
161
+ font-size: 12px;
162
+ z-index: 1000;
163
+ }
164
+ .d3-apo-beta-ablation .tooltip-title {
165
+ font-weight: 700;
166
+ margin-bottom: 8px;
167
+ color: var(--text-color);
168
+ }
169
+ .d3-apo-beta-ablation .tooltip-item {
170
+ display: flex;
171
+ align-items: center;
172
+ gap: 8px;
173
+ margin: 4px 0;
174
+ color: var(--text-color);
175
+ }
176
+ .d3-apo-beta-ablation .tooltip-color {
177
+ width: 12px;
178
+ height: 12px;
179
+ border-radius: 2px;
180
+ }
181
+ </style>
182
+ <script>
183
+ (() => {
184
+ const ensureD3 = (cb) => {
185
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
186
+ let s = document.getElementById('d3-cdn-script');
187
+ if (!s) {
188
+ s = document.createElement('script');
189
+ s.id = 'd3-cdn-script';
190
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
191
+ document.head.appendChild(s);
192
+ }
193
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
194
+ s.addEventListener('load', onReady, { once: true });
195
+ if (window.d3) onReady();
196
+ };
197
+
198
+ const bootstrap = () => {
199
+ const scriptEl = document.currentScript;
200
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
201
+ if (!(container && container.classList && container.classList.contains('d3-apo-beta-ablation'))) {
202
+ const candidates = Array.from(document.querySelectorAll('.d3-apo-beta-ablation'))
203
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
204
+ container = candidates[candidates.length - 1] || null;
205
+ }
206
+ if (!container) return;
207
+ if (container.dataset) {
208
+ if (container.dataset.mounted === 'true') return;
209
+ container.dataset.mounted = 'true';
210
+ }
211
+
212
+ // Data embedded inline
213
+ const data = [{"system_prompt":"/think","Evaluation":"AIME25","Score":45.47,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92,"Beta":0.05},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58,"Beta":0.05},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46,"Beta":0.05},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Beta":0.05},{"system_prompt":"/think","Evaluation":"Average","Score":45.4,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.205,"Beta":0.05},{"system_prompt":"/think","Evaluation":"AIME25","Score":43.28,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":6.77,"Beta":0.01},{"system_prompt":"/think","Evaluation":"AIME25","Score":46.82,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.02,"Beta":0.1},{"system_prompt":"/think","Evaluation":"AIME25","Score":44.95,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.6,"Beta":0.2},{"system_prompt":"/think","Evaluation":"AIME25","Score":47.45,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.29,"Beta":0.5},{"system_prompt":"/think","Evaluation":"AIME25","Score":47.45,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.29,"Beta":0.75},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.36,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.8,"Beta":0.99},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":39.77,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.95,"Beta":0.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.38,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":33.27,"Beta":0.1},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.05,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.57,"Beta":0.2},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":41.54,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.63,"Beta":0.5},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":41.54,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.63,"Beta":0.75},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.36,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43,"Beta":0.99},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.04,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.02,"Beta":0.01},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.47,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.36,"Beta":0.1},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.55,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.33,"Beta":0.2},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.58,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.91,"Beta":0.5},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.58,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.91,"Beta":0.75},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.13,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.41,"Beta":0.99},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":12.87,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":10.89,"Beta":0.01},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":29.7,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83,"Beta":0.1},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":26.73,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83,"Beta":0.2},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":28.71,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Beta":0.5},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":28.71,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Beta":0.75},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":34.65,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":19.8,"Beta":0.99},{"system_prompt":"/think","Evaluation":"Average","Score":41.24,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.9075,"Beta":0.01},{"system_prompt":"/think","Evaluation":"Average","Score":48.0925,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.37,"Beta":0.1},{"system_prompt":"/think","Evaluation":"Average","Score":46.32,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.8325,"Beta":0.2},{"system_prompt":"/think","Evaluation":"Average","Score":47.82,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.6775,"Beta":0.5},{"system_prompt":"/think","Evaluation":"Average","Score":47.82,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.6775,"Beta":0.75},{"system_prompt":"/think","Evaluation":"Average","Score":47.875,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.61,"Beta":0.99}];
214
+ const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
215
+
216
+ // Get colors from ColorPalettes or fallback
217
+ const getColors = () => {
218
+ if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
219
+ const colors = window.ColorPalettes.getColors('categorical', 2);
220
+ return { think: colors[0], noThink: colors[1] };
221
+ }
222
+ return { think: '#E377C2', noThink: '#7FC97F' };
223
+ };
224
+
225
+ let colors = getColors();
226
+
227
+ // Set up dimensions
228
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
229
+
230
+ // Create SVG
231
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
232
+ const g = svg.append('g');
233
+
234
+ // Tooltip
235
+ container.style.position = container.style.position || 'relative';
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div');
240
+ tip.className = 'd3-tooltip';
241
+ Object.assign(tip.style, {
242
+ position: 'absolute',
243
+ top: '0px',
244
+ left: '0px',
245
+ transform: 'translate(-9999px, -9999px)',
246
+ pointerEvents: 'none',
247
+ padding: '8px 10px',
248
+ borderRadius: '8px',
249
+ fontSize: '12px',
250
+ lineHeight: '1.35',
251
+ border: '1px solid var(--border-color)',
252
+ background: 'var(--surface-bg)',
253
+ color: 'var(--text-color)',
254
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
255
+ opacity: '0',
256
+ transition: 'opacity .12s ease',
257
+ zIndex: '1000'
258
+ });
259
+ tipInner = document.createElement('div');
260
+ tipInner.className = 'd3-tooltip__inner';
261
+ tipInner.style.textAlign = 'left';
262
+ tip.appendChild(tipInner);
263
+ container.appendChild(tip);
264
+ } else {
265
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
266
+ }
267
+
268
+ const showTooltip = (html, event) => {
269
+ tipInner.innerHTML = html;
270
+ const [mx, my] = d3.pointer(event, container);
271
+ const offsetX = 12, offsetY = 12;
272
+ tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
273
+ tip.style.opacity = '1';
274
+ };
275
+
276
+ const hideTooltip = () => {
277
+ tip.style.opacity = '0';
278
+ setTimeout(() => {
279
+ tip.style.transform = 'translate(-9999px, -9999px)';
280
+ }, 120);
281
+ };
282
+
283
+ // Get unique evaluations
284
+ const evaluations = [...new Set(data.map(d => d.Evaluation))];
285
+
286
+ // Create header with legend and controls
287
+ const header = d3.select(container).append('div').attr('class', 'header');
288
+
289
+ const legend = header.append('div').attr('class', 'legend');
290
+ legend.append('div').attr('class', 'legend-title').text('Legend');
291
+ const legendItems = legend.append('div').attr('class', 'items');
292
+
293
+ const controls = header.append('div').attr('class', 'controls');
294
+ const controlGroup = controls.append('div').attr('class', 'control-group');
295
+ controlGroup.append('label').attr('for', 'metric-select-beta').text('Metric');
296
+ const select = controlGroup.append('select').attr('id', 'metric-select-beta');
297
+
298
+ // Populate dropdown
299
+ select.selectAll('option')
300
+ .data(evaluations)
301
+ .enter()
302
+ .append('option')
303
+ .text(d => d)
304
+ .attr('value', d => d);
305
+
306
+ // Build legend
307
+ const buildLegend = () => {
308
+ legendItems.html('');
309
+
310
+ const thinkItem = legendItems.append('span').attr('class', 'item');
311
+ thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
312
+ thinkItem.append('span').text('/think');
313
+
314
+ const noThinkItem = legendItems.append('span').attr('class', 'item');
315
+ noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
316
+ noThinkItem.append('span').text('/no_think');
317
+
318
+ const sftItem = legendItems.append('span').attr('class', 'item');
319
+ sftItem.append('span').attr('class', 'swatch-dashed');
320
+ sftItem.append('span').text('SFT checkpoint');
321
+ };
322
+
323
+ buildLegend();
324
+
325
+ // Update chart function
326
+ function updateChart(evaluation) {
327
+ const filtered = data.filter(d => d.Evaluation === evaluation);
328
+ const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a.Beta - b.Beta);
329
+ const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a.Beta - b.Beta);
330
+
331
+ g.selectAll("*").remove();
332
+
333
+ const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
334
+ const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
335
+
336
+ const width = container.clientWidth || 800;
337
+ const height = Math.max(320, Math.round(width / 2.5));
338
+ const innerWidth = width - margin.left - margin.right;
339
+ const innerHeight = height - margin.top - margin.bottom;
340
+
341
+ svg.attr('width', width).attr('height', height);
342
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
343
+
344
+ // Scales
345
+ const xScale = d3.scaleLog()
346
+ .domain([d3.min(filtered, d => d.Beta), d3.max(filtered, d => d.Beta)])
347
+ .range([0, innerWidth]);
348
+
349
+ const allScores = filtered.map(d => d.Score);
350
+ if (sftThink) allScores.push(sftThink.Score);
351
+ if (sftNoThink) allScores.push(sftNoThink.Score);
352
+ const maxScore = d3.max(allScores);
353
+
354
+ const yScale = d3.scaleLinear()
355
+ .domain([0, maxScore * 1.1])
356
+ .range([innerHeight, 0]);
357
+
358
+ // Grid
359
+ g.append("g")
360
+ .attr("class", "grid")
361
+ .attr("transform", `translate(0,${innerHeight})`)
362
+ .call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
363
+
364
+ g.append("g")
365
+ .attr("class", "grid")
366
+ .call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
367
+
368
+ // Axes
369
+ const tickValues = [0.01, 0.05, 0.1, 0.5, 1.0];
370
+ g.append("g")
371
+ .attr("class", "axes")
372
+ .attr("transform", `translate(0,${innerHeight})`)
373
+ .call(d3.axisBottom(xScale).tickValues(tickValues).tickFormat(d3.format(".2f")).tickSizeOuter(0))
374
+ .call(gAxis => {
375
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
376
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
377
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
378
+ });
379
+
380
+ g.append("g")
381
+ .attr("class", "axes")
382
+ .call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
383
+ .call(gAxis => {
384
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
385
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
386
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
387
+ });
388
+
389
+ // Axis labels
390
+ g.append("text")
391
+ .attr("class", "axis-label")
392
+ .attr("text-anchor", "middle")
393
+ .attr("x", innerWidth / 2)
394
+ .attr("y", innerHeight + 40)
395
+ .text("Beta");
396
+
397
+ g.append("text")
398
+ .attr("class", "axis-label")
399
+ .attr("text-anchor", "middle")
400
+ .attr("transform", "rotate(-90)")
401
+ .attr("y", -45)
402
+ .attr("x", -innerHeight / 2)
403
+ .text("Score (%)");
404
+
405
+ // Line generator
406
+ const line = d3.line()
407
+ .x(d => xScale(d.Beta))
408
+ .y(d => yScale(d.Score));
409
+
410
+ // Reference lines
411
+ if (sftThink) {
412
+ g.append("line")
413
+ .attr("class", "reference-line")
414
+ .style("stroke", colors.think)
415
+ .attr("x1", 0)
416
+ .attr("x2", innerWidth)
417
+ .attr("y1", yScale(sftThink.Score))
418
+ .attr("y2", yScale(sftThink.Score));
419
+ }
420
+
421
+ if (sftNoThink) {
422
+ g.append("line")
423
+ .attr("class", "reference-line")
424
+ .style("stroke", colors.noThink)
425
+ .attr("x1", 0)
426
+ .attr("x2", innerWidth)
427
+ .attr("y1", yScale(sftNoThink.Score))
428
+ .attr("y2", yScale(sftNoThink.Score));
429
+ }
430
+
431
+ // Lines
432
+ g.append("path")
433
+ .datum(thinkData)
434
+ .attr("class", "line-think")
435
+ .style("stroke", colors.think)
436
+ .attr("d", line);
437
+
438
+ g.append("path")
439
+ .datum(noThinkData)
440
+ .attr("class", "line-no-think")
441
+ .style("stroke", colors.noThink)
442
+ .attr("d", line);
443
+
444
+ // Dots for /think
445
+ g.selectAll(".dot-think")
446
+ .data(thinkData)
447
+ .enter()
448
+ .append("circle")
449
+ .attr("class", "dot")
450
+ .style("fill", colors.think)
451
+ .attr("cx", d => xScale(d.Beta))
452
+ .attr("cy", d => yScale(d.Score))
453
+ .attr("r", 4)
454
+ .on("mouseenter", function(event, d) {
455
+ const noThinkValue = noThinkData.find(item => item.Beta === d.Beta);
456
+ const html = `
457
+ <div class="tooltip-title">Beta ${d.Beta.toFixed(2)}</div>
458
+ <div class="tooltip-item">
459
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
460
+ <span>/think: ${d.Score.toFixed(2)}%</span>
461
+ </div>
462
+ ${noThinkValue ? `
463
+ <div class="tooltip-item">
464
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
465
+ <span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
466
+ </div>` : ''}
467
+ `;
468
+ showTooltip(html, event);
469
+ })
470
+ .on("mouseleave", hideTooltip);
471
+
472
+ // Dots for /no_think
473
+ g.selectAll(".dot-no-think")
474
+ .data(noThinkData)
475
+ .enter()
476
+ .append("circle")
477
+ .attr("class", "dot")
478
+ .style("fill", colors.noThink)
479
+ .attr("cx", d => xScale(d.Beta))
480
+ .attr("cy", d => yScale(d.Score))
481
+ .attr("r", 4)
482
+ .on("mouseenter", function(event, d) {
483
+ const thinkValue = thinkData.find(item => item.Beta === d.Beta);
484
+ const html = `
485
+ <div class="tooltip-title">Beta ${d.Beta.toFixed(2)}</div>
486
+ ${thinkValue ? `
487
+ <div class="tooltip-item">
488
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
489
+ <span>/think: ${thinkValue.Score.toFixed(2)}%</span>
490
+ </div>` : ''}
491
+ <div class="tooltip-item">
492
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
493
+ <span>/no_think: ${d.Score.toFixed(2)}%</span>
494
+ </div>
495
+ `;
496
+ showTooltip(html, event);
497
+ })
498
+ .on("mouseleave", hideTooltip);
499
+ }
500
+
501
+ // Set default value to "Average" if it exists
502
+ const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
503
+ select.property("value", defaultEval);
504
+
505
+ // Initial chart
506
+ updateChart(defaultEval);
507
+
508
+ // Update on dropdown change
509
+ select.on("change", function() {
510
+ updateChart(this.value);
511
+ });
512
+
513
+ // Resize handling
514
+ const rerender = () => updateChart(select.property("value"));
515
+ if (window.ResizeObserver) {
516
+ const ro = new ResizeObserver(() => rerender());
517
+ ro.observe(container);
518
+ } else {
519
+ window.addEventListener('resize', rerender);
520
+ }
521
+
522
+ // Listen for ColorPalettes changes
523
+ if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
524
+ window.ColorPalettes.addListener(() => {
525
+ colors = getColors();
526
+ buildLegend();
527
+ updateChart(select.property("value"));
528
+ });
529
+ }
530
+ };
531
+
532
+ if (document.readyState === 'loading') {
533
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
534
+ } else {
535
+ ensureD3(bootstrap);
536
+ }
537
+ })();
538
+ </script>
app/src/content/embeds/d3-po-loss-ablations.html ADDED
@@ -0,0 +1,527 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-dpo-ablations"></div>
2
+ <style>
3
+ .d3-dpo-ablations {
4
+ width: 100%;
5
+ position: relative;
6
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
7
+ }
8
+ .d3-dpo-ablations svg {
9
+ display: block;
10
+ width: 100%;
11
+ }
12
+ .d3-dpo-ablations .bar {
13
+ stroke: none;
14
+ }
15
+ .d3-dpo-ablations .axes path,
16
+ .d3-dpo-ablations .axes line {
17
+ stroke: var(--axis-color, var(--text-color));
18
+ }
19
+ .d3-dpo-ablations .axes text {
20
+ fill: var(--tick-color, var(--muted-color));
21
+ font-size: 11px;
22
+ }
23
+ .d3-dpo-ablations .grid line {
24
+ stroke: var(--grid-color, rgba(0,0,0,.08));
25
+ }
26
+ .d3-dpo-ablations .d3-tooltip {
27
+ position: absolute;
28
+ top: 0;
29
+ left: 0;
30
+ transform: translate(-9999px, -9999px);
31
+ pointer-events: none;
32
+ padding: 8px 10px;
33
+ border-radius: 8px;
34
+ font-size: 12px;
35
+ line-height: 1.35;
36
+ border: 1px solid var(--border-color);
37
+ background: var(--surface-bg);
38
+ color: var(--text-color);
39
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
40
+ opacity: 0;
41
+ transition: opacity .12s ease;
42
+ }
43
+ .d3-dpo-ablations .d3-tooltip__inner {
44
+ text-align: left;
45
+ }
46
+ .d3-dpo-ablations .legend {
47
+ display: flex;
48
+ flex-direction: column;
49
+ align-items: flex-start;
50
+ gap: 6px;
51
+ margin-top: 16px;
52
+ }
53
+ .d3-dpo-ablations .legend-title {
54
+ font-size: 12px;
55
+ font-weight: 700;
56
+ color: var(--text-color);
57
+ }
58
+ .d3-dpo-ablations .legend .items {
59
+ display: flex;
60
+ flex-wrap: wrap;
61
+ gap: 8px 14px;
62
+ }
63
+ .d3-dpo-ablations .legend .item {
64
+ display: inline-flex;
65
+ align-items: center;
66
+ gap: 6px;
67
+ white-space: nowrap;
68
+ font-size: 12px;
69
+ color: var(--text-color);
70
+ }
71
+ .d3-dpo-ablations .legend .swatch {
72
+ width: 14px;
73
+ height: 14px;
74
+ border-radius: 3px;
75
+ border: 1px solid var(--border-color);
76
+ }
77
+ .d3-dpo-ablations .controls {
78
+ display: flex;
79
+ gap: 16px;
80
+ align-items: center;
81
+ justify-content: flex-end;
82
+ flex-wrap: wrap;
83
+ margin-top: 8px;
84
+ }
85
+ .d3-dpo-ablations .control-group {
86
+ display: flex;
87
+ flex-direction: column;
88
+ align-items: flex-start;
89
+ gap: 6px;
90
+ }
91
+ .d3-dpo-ablations .controls label {
92
+ font-size: 12px;
93
+ font-weight: 700;
94
+ color: var(--text-color);
95
+ }
96
+ .d3-dpo-ablations .controls select {
97
+ font-size: 12px;
98
+ padding: 8px 28px 8px 10px;
99
+ border: 1px solid var(--border-color);
100
+ border-radius: 8px;
101
+ background: var(--surface-bg);
102
+ color: var(--text-color);
103
+ cursor: pointer;
104
+ }
105
+ .d3-dpo-ablations .checkbox-group {
106
+ display: flex;
107
+ align-items: center;
108
+ gap: 6px;
109
+ }
110
+ .d3-dpo-ablations .checkbox-group input[type="checkbox"] {
111
+ width: 16px;
112
+ height: 16px;
113
+ cursor: pointer;
114
+ }
115
+ .d3-dpo-ablations .checkbox-group label {
116
+ font-size: 12px;
117
+ color: var(--text-color);
118
+ cursor: pointer;
119
+ font-weight: 400;
120
+ }
121
+ </style>
122
+ <script>
123
+ (() => {
124
+ const ensureD3 = (cb) => {
125
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
126
+ let s = document.getElementById('d3-cdn-script');
127
+ if (!s) {
128
+ s = document.createElement('script');
129
+ s.id = 'd3-cdn-script';
130
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
131
+ document.head.appendChild(s);
132
+ }
133
+ const onReady = () => {
134
+ if (window.d3 && typeof window.d3.select === 'function') cb();
135
+ };
136
+ s.addEventListener('load', onReady, { once: true });
137
+ if (window.d3) onReady();
138
+ };
139
+
140
+ const bootstrap = () => {
141
+ const scriptEl = document.currentScript;
142
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
143
+ if (!(container && container.classList && container.classList.contains('d3-dpo-ablations'))) {
144
+ const candidates = Array.from(document.querySelectorAll('.d3-dpo-ablations'))
145
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
146
+ container = candidates[candidates.length - 1] || null;
147
+ }
148
+ if (!container) return;
149
+ if (container.dataset) {
150
+ if (container.dataset.mounted === 'true') return;
151
+ container.dataset.mounted = 'true';
152
+ }
153
+
154
+ // Tooltip
155
+ container.style.position = container.style.position || 'relative';
156
+ let tip = container.querySelector('.d3-tooltip');
157
+ let tipInner;
158
+ if (!tip) {
159
+ tip = document.createElement('div');
160
+ tip.className = 'd3-tooltip';
161
+ tipInner = document.createElement('div');
162
+ tipInner.className = 'd3-tooltip__inner';
163
+ tip.appendChild(tipInner);
164
+ container.appendChild(tip);
165
+ } else {
166
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
167
+ }
168
+
169
+ const showTooltip = (html, event) => {
170
+ tipInner.innerHTML = html;
171
+ tip.style.opacity = '1';
172
+ const [mx, my] = d3.pointer(event, container);
173
+ tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
174
+ };
175
+
176
+ const hideTooltip = () => {
177
+ tip.style.opacity = '0';
178
+ setTimeout(() => {
179
+ tip.style.transform = 'translate(-9999px, -9999px)';
180
+ }, 120);
181
+ };
182
+
183
+ // SVG scaffolding
184
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
185
+ const gRoot = svg.append('g');
186
+
187
+ let width = 800, height = 400;
188
+ const margin = { top: 16, right: 16, bottom: 80, left: 60 };
189
+
190
+ // Data loading
191
+ const CSV_PATHS = [
192
+ '/data/apo/smollm3_dpo_ablations.csv',
193
+ './assets/data/apo/smollm3_dpo_ablations.csv',
194
+ '../assets/data/apo/smollm3_dpo_ablations.csv',
195
+ '../../assets/data/apo/smollm3_dpo_ablations.csv'
196
+ ];
197
+
198
+ const fetchFirstAvailable = async (paths) => {
199
+ for (const p of paths) {
200
+ try {
201
+ const r = await fetch(p, { cache: 'no-cache' });
202
+ if (r.ok) return await r.text();
203
+ } catch (e) {}
204
+ }
205
+ throw new Error('CSV not found');
206
+ };
207
+
208
+ fetchFirstAvailable(CSV_PATHS)
209
+ .then((csvText) => {
210
+ const data = d3.csvParse(csvText);
211
+
212
+ // Get unique methods and benchmarks
213
+ const methods = ['SFT', 'DPO', 'IPO', 'APO-zero', 'APO-down', 'DiscoPOP'];
214
+ const benchmarks = ['aime25', 'gpqa_d', 'ifeval', 'lcb_v4'];
215
+ const benchmarkNames = {
216
+ 'aime25': 'AIME 2025',
217
+ 'gpqa_d': 'GPQA Diamond',
218
+ 'ifeval': 'IFEval',
219
+ 'lcb_v4': 'LiveCodeBench v4'
220
+ };
221
+
222
+ let selectedMode = 'no_think';
223
+ let selectedBenchmark = 'ifeval';
224
+ let showDelta = false;
225
+
226
+ // Get colors
227
+ const colors = window.ColorPalettes
228
+ ? window.ColorPalettes.getColors('categorical', methods.length)
229
+ : ['#4e79a7', '#f28e2c', '#e15759', '#76b7b2', '#59a14f', '#edc949'];
230
+
231
+ // Create controls
232
+ const controls = document.createElement('div');
233
+ controls.className = 'controls';
234
+
235
+ // Reasoning mode select
236
+ const modeGroup = document.createElement('div');
237
+ modeGroup.className = 'control-group';
238
+
239
+ const modeLabel = document.createElement('label');
240
+ modeLabel.textContent = 'Reasoning mode';
241
+ modeLabel.setAttribute('for', 'mode-select-' + Date.now());
242
+
243
+ const modeSelect = document.createElement('select');
244
+ modeSelect.id = modeLabel.getAttribute('for');
245
+
246
+ const thinkOption = document.createElement('option');
247
+ thinkOption.value = 'think';
248
+ thinkOption.textContent = '/think';
249
+
250
+ const noThinkOption = document.createElement('option');
251
+ noThinkOption.value = 'no_think';
252
+ noThinkOption.textContent = '/no_think';
253
+ noThinkOption.selected = true;
254
+
255
+ modeSelect.appendChild(thinkOption);
256
+ modeSelect.appendChild(noThinkOption);
257
+
258
+ modeSelect.addEventListener('change', (e) => {
259
+ selectedMode = e.target.value;
260
+ render();
261
+ });
262
+
263
+ modeGroup.appendChild(modeLabel);
264
+ modeGroup.appendChild(modeSelect);
265
+
266
+ // Benchmark select
267
+ const benchmarkGroup = document.createElement('div');
268
+ benchmarkGroup.className = 'control-group';
269
+
270
+ const benchmarkLabel = document.createElement('label');
271
+ benchmarkLabel.textContent = 'Benchmark';
272
+ benchmarkLabel.setAttribute('for', 'benchmark-select-' + Date.now());
273
+
274
+ const benchmarkSelect = document.createElement('select');
275
+ benchmarkSelect.id = benchmarkLabel.getAttribute('for');
276
+
277
+ benchmarks.forEach(bench => {
278
+ const option = document.createElement('option');
279
+ option.value = bench;
280
+ option.textContent = benchmarkNames[bench];
281
+ if (bench === 'ifeval') option.selected = true;
282
+ benchmarkSelect.appendChild(option);
283
+ });
284
+
285
+ benchmarkSelect.addEventListener('change', (e) => {
286
+ selectedBenchmark = e.target.value;
287
+ render();
288
+ });
289
+
290
+ benchmarkGroup.appendChild(benchmarkLabel);
291
+ benchmarkGroup.appendChild(benchmarkSelect);
292
+
293
+ // Delta checkbox
294
+ const deltaGroup = document.createElement('div');
295
+ deltaGroup.className = 'control-group';
296
+
297
+ const deltaCheckboxGroup = document.createElement('div');
298
+ deltaCheckboxGroup.className = 'checkbox-group';
299
+
300
+ const deltaCheckbox = document.createElement('input');
301
+ deltaCheckbox.type = 'checkbox';
302
+ deltaCheckbox.id = 'delta-checkbox-' + Date.now();
303
+
304
+ const deltaLabel = document.createElement('label');
305
+ deltaLabel.textContent = 'Show Δ vs SFT';
306
+ deltaLabel.setAttribute('for', deltaCheckbox.id);
307
+
308
+ deltaCheckbox.addEventListener('change', (e) => {
309
+ showDelta = e.target.checked;
310
+ render();
311
+ });
312
+
313
+ deltaCheckboxGroup.appendChild(deltaCheckbox);
314
+ deltaCheckboxGroup.appendChild(deltaLabel);
315
+ deltaGroup.appendChild(deltaCheckboxGroup);
316
+
317
+ controls.appendChild(modeGroup);
318
+ controls.appendChild(benchmarkGroup);
319
+ controls.appendChild(deltaGroup);
320
+ container.appendChild(controls);
321
+
322
+ // Create legend
323
+ const legend = document.createElement('div');
324
+ legend.className = 'legend';
325
+
326
+ const legendTitle = document.createElement('div');
327
+ legendTitle.className = 'legend-title';
328
+ legendTitle.textContent = 'Legend';
329
+
330
+ const legendItems = document.createElement('div');
331
+ legendItems.className = 'items';
332
+
333
+ methods.forEach((method, idx) => {
334
+ const item = document.createElement('span');
335
+ item.className = 'item';
336
+
337
+ const swatch = document.createElement('span');
338
+ swatch.className = 'swatch';
339
+ swatch.style.background = colors[idx];
340
+
341
+ const text = document.createElement('span');
342
+ text.textContent = method;
343
+
344
+ item.appendChild(swatch);
345
+ item.appendChild(text);
346
+ legendItems.appendChild(item);
347
+ });
348
+
349
+ legend.appendChild(legendTitle);
350
+ legend.appendChild(legendItems);
351
+ container.appendChild(legend);
352
+
353
+ function updateSize() {
354
+ width = container.clientWidth || 800;
355
+ height = Math.max(360, Math.round(width / 2.5));
356
+ svg.attr('width', width).attr('height', height);
357
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
358
+ return {
359
+ innerWidth: width - margin.left - margin.right,
360
+ innerHeight: height - margin.top - margin.bottom
361
+ };
362
+ }
363
+
364
+ function render() {
365
+ const { innerWidth, innerHeight } = updateSize();
366
+
367
+ // Clear previous
368
+ gRoot.selectAll('*').remove();
369
+
370
+ // Filter data for selected mode
371
+ const filteredData = data.filter(d => d['reasoning mode'] === selectedMode);
372
+
373
+ // Create data for chart
374
+ let chartData = methods.map(method => {
375
+ const row = filteredData.find(d => d.Method === method);
376
+ const absoluteValue = row ? parseFloat(row[selectedBenchmark]) : 0;
377
+ return {
378
+ method,
379
+ absoluteValue,
380
+ value: absoluteValue
381
+ };
382
+ });
383
+
384
+ // Get SFT baseline value
385
+ const sftValue = chartData.find(d => d.method === 'SFT').absoluteValue;
386
+
387
+ // Calculate delta if checkbox is checked
388
+ if (showDelta) {
389
+ chartData = chartData.map(d => ({
390
+ ...d,
391
+ value: d.method === 'SFT' ? 0 : d.absoluteValue - sftValue
392
+ }));
393
+ }
394
+
395
+ // Separate SFT and sort others by descending value
396
+ const sftData = chartData.find(d => d.method === 'SFT');
397
+ const otherData = chartData.filter(d => d.method !== 'SFT')
398
+ .sort((a, b) => b.value - a.value);
399
+
400
+ // Recombine with SFT first (unless showing delta)
401
+ chartData = showDelta ? otherData : [sftData, ...otherData];
402
+
403
+ // Get ordered methods for color mapping and x-axis
404
+ const orderedMethods = chartData.map(d => d.method);
405
+
406
+ // Scales
407
+ const xScale = d3.scaleBand()
408
+ .domain(orderedMethods)
409
+ .range([0, innerWidth])
410
+ .padding(0.3);
411
+
412
+ // Set y-axis domain based on whether we're showing delta
413
+ const yMin = showDelta ? Math.min(0, d3.min(chartData, d => d.value)) : 0;
414
+ const yMax = showDelta
415
+ ? Math.max(Math.abs(yMin), d3.max(chartData, d => d.value)) * 1.1
416
+ : d3.max(chartData, d => d.value) * 1.1;
417
+
418
+ const yScale = d3.scaleLinear()
419
+ .domain(showDelta ? [yMin * 1.1, yMax] : [0, yMax])
420
+ .range([innerHeight, 0])
421
+ .nice();
422
+
423
+ // Grid
424
+ gRoot.append('g')
425
+ .attr('class', 'grid')
426
+ .call(
427
+ d3.axisLeft(yScale)
428
+ .ticks(6)
429
+ .tickSize(-innerWidth)
430
+ .tickFormat('')
431
+ )
432
+ .call(g => g.select('.domain').remove());
433
+
434
+ // Create color map based on original method order
435
+ const colorMap = {};
436
+ methods.forEach((method, idx) => {
437
+ colorMap[method] = colors[idx];
438
+ });
439
+
440
+ // Add zero line if showing delta
441
+ if (showDelta) {
442
+ gRoot.append('line')
443
+ .attr('x1', 0)
444
+ .attr('x2', innerWidth)
445
+ .attr('y1', yScale(0))
446
+ .attr('y2', yScale(0))
447
+ .attr('stroke', 'var(--text-color)')
448
+ .attr('stroke-width', 2)
449
+ .attr('stroke-dasharray', '4,4')
450
+ .attr('opacity', 0.5);
451
+ }
452
+
453
+ // Bars
454
+ gRoot.selectAll('rect.bar')
455
+ .data(chartData)
456
+ .join('rect')
457
+ .attr('class', 'bar')
458
+ .attr('x', d => xScale(d.method))
459
+ .attr('y', d => d.value >= 0 ? yScale(d.value) : yScale(0))
460
+ .attr('width', xScale.bandwidth())
461
+ .attr('height', d => Math.abs(yScale(d.value) - yScale(0)))
462
+ .attr('fill', d => colorMap[d.method])
463
+ .attr('opacity', 0.85)
464
+ .on('mouseenter', (event, d) => {
465
+ const deltaText = showDelta && d.method !== 'SFT'
466
+ ? `Δ: ${d.value >= 0 ? '+' : ''}${d.value.toFixed(2)}%`
467
+ : '';
468
+ const absoluteText = `${benchmarkNames[selectedBenchmark]}: ${d.absoluteValue.toFixed(2)}%`;
469
+ const html = `<strong>${d.method}</strong><br/>${absoluteText}${deltaText ? '<br/>' + deltaText : ''}`;
470
+ showTooltip(html, event);
471
+ })
472
+ .on('mouseleave', hideTooltip);
473
+
474
+ // X axis
475
+ const xAxis = gRoot.append('g')
476
+ .attr('class', 'axes')
477
+ .attr('transform', `translate(0,${innerHeight})`)
478
+ .call(d3.axisBottom(xScale));
479
+
480
+ xAxis.select('.domain').remove();
481
+
482
+ xAxis.selectAll('text')
483
+ .style('text-anchor', 'middle');
484
+
485
+ // Y axis
486
+ const yAxis = gRoot.append('g')
487
+ .attr('class', 'axes')
488
+ .call(d3.axisLeft(yScale).ticks(6));
489
+
490
+ yAxis.select('.domain').remove();
491
+
492
+ // Y axis label
493
+ gRoot.append('text')
494
+ .attr('class', 'axes')
495
+ .attr('transform', 'rotate(-90)')
496
+ .attr('x', -innerHeight / 2)
497
+ .attr('y', -45)
498
+ .attr('text-anchor', 'middle')
499
+ .style('font-size', '12px')
500
+ .style('fill', 'var(--text-color)')
501
+ .text(showDelta ? 'Δ Score vs SFT (%)' : 'Score (%)');
502
+ }
503
+
504
+ render();
505
+
506
+ if (window.ResizeObserver) {
507
+ const ro = new ResizeObserver(() => render());
508
+ ro.observe(container);
509
+ } else {
510
+ window.addEventListener('resize', render);
511
+ }
512
+ })
513
+ .catch((err) => {
514
+ const pre = document.createElement('pre');
515
+ pre.textContent = 'Error loading data: ' + err.message;
516
+ pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
517
+ container.appendChild(pre);
518
+ });
519
+ };
520
+
521
+ if (document.readyState === 'loading') {
522
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
523
+ } else {
524
+ ensureD3(bootstrap);
525
+ }
526
+ })();
527
+ </script>
app/src/content/embeds/d3-po-lr-ablation.html ADDED
@@ -0,0 +1,538 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-apo-lr-ablation"></div>
2
+ <style>
3
+ .d3-apo-lr-ablation {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ position: relative;
7
+ --axis-color: var(--text-color, #333);
8
+ --tick-color: var(--muted-color, #666);
9
+ --grid-color: rgba(0,0,0,.08);
10
+ }
11
+ [data-theme="dark"] .d3-apo-lr-ablation {
12
+ --axis-color: var(--text-color, #ccc);
13
+ --tick-color: var(--muted-color, #999);
14
+ --grid-color: rgba(255,255,255,.10);
15
+ }
16
+ .d3-apo-lr-ablation svg {
17
+ display: block;
18
+ overflow: visible;
19
+ }
20
+ .d3-apo-lr-ablation .axes path,
21
+ .d3-apo-lr-ablation .axes line {
22
+ stroke: var(--axis-color);
23
+ shape-rendering: crispEdges;
24
+ }
25
+ .d3-apo-lr-ablation .axes text {
26
+ fill: var(--tick-color);
27
+ font-size: 11px;
28
+ }
29
+ .d3-apo-lr-ablation .grid line {
30
+ stroke: var(--grid-color);
31
+ stroke-dasharray: 2,2;
32
+ shape-rendering: crispEdges;
33
+ }
34
+ .d3-apo-lr-ablation .axis-label {
35
+ fill: var(--text-color);
36
+ font-size: 12px;
37
+ font-weight: 600;
38
+ }
39
+ .d3-apo-lr-ablation .line-think {
40
+ fill: none;
41
+ stroke-width: 2.5;
42
+ stroke-linecap: round;
43
+ stroke-linejoin: round;
44
+ }
45
+ .d3-apo-lr-ablation .line-no-think {
46
+ fill: none;
47
+ stroke-width: 2.5;
48
+ stroke-linecap: round;
49
+ stroke-linejoin: round;
50
+ }
51
+ .d3-apo-lr-ablation .reference-line {
52
+ fill: none;
53
+ stroke-width: 1.5;
54
+ stroke-dasharray: 5, 5;
55
+ opacity: 0.4;
56
+ }
57
+ .d3-apo-lr-ablation .dot {
58
+ stroke: var(--surface-bg);
59
+ stroke-width: 2;
60
+ }
61
+ .d3-apo-lr-ablation .header {
62
+ display: flex;
63
+ align-items: flex-start;
64
+ justify-content: space-between;
65
+ gap: 16px;
66
+ margin-top: 16px;
67
+ flex-wrap: wrap;
68
+ }
69
+ .d3-apo-lr-ablation .legend {
70
+ display: flex;
71
+ flex-direction: column;
72
+ align-items: flex-start;
73
+ gap: 6px;
74
+ }
75
+ .d3-apo-lr-ablation .legend-title {
76
+ font-size: 12px;
77
+ font-weight: 700;
78
+ color: var(--text-color);
79
+ }
80
+ .d3-apo-lr-ablation .legend .items {
81
+ display: flex;
82
+ flex-wrap: wrap;
83
+ gap: 8px 14px;
84
+ }
85
+ .d3-apo-lr-ablation .legend .item {
86
+ display: inline-flex;
87
+ align-items: center;
88
+ gap: 6px;
89
+ white-space: nowrap;
90
+ font-size: 12px;
91
+ color: var(--text-color);
92
+ }
93
+ .d3-apo-lr-ablation .legend .swatch {
94
+ width: 14px;
95
+ height: 14px;
96
+ border-radius: 3px;
97
+ border: 1px solid var(--border-color);
98
+ }
99
+ .d3-apo-lr-ablation .legend .swatch-line {
100
+ width: 20px;
101
+ height: 2px;
102
+ border: none;
103
+ }
104
+ .d3-apo-lr-ablation .legend .swatch-dashed {
105
+ width: 20px;
106
+ height: 2px;
107
+ border: none;
108
+ background: repeating-linear-gradient(
109
+ to right,
110
+ var(--text-color) 0,
111
+ var(--text-color) 4px,
112
+ transparent 4px,
113
+ transparent 8px
114
+ );
115
+ }
116
+ .d3-apo-lr-ablation .controls {
117
+ display: flex;
118
+ gap: 16px;
119
+ align-items: flex-start;
120
+ justify-content: flex-end;
121
+ flex-wrap: wrap;
122
+ }
123
+ .d3-apo-lr-ablation .control-group {
124
+ display: flex;
125
+ flex-direction: column;
126
+ align-items: flex-start;
127
+ gap: 6px;
128
+ }
129
+ .d3-apo-lr-ablation .controls label {
130
+ font-size: 12px;
131
+ font-weight: 700;
132
+ color: var(--text-color);
133
+ }
134
+ .d3-apo-lr-ablation .controls select {
135
+ font-size: 12px;
136
+ padding: 8px 28px 8px 10px;
137
+ border: 1px solid var(--border-color);
138
+ border-radius: 8px;
139
+ background: var(--surface-bg);
140
+ color: var(--text-color);
141
+ cursor: pointer;
142
+ appearance: none;
143
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
144
+ background-repeat: no-repeat;
145
+ background-position: right 8px center;
146
+ }
147
+ .d3-apo-lr-ablation .controls select:focus {
148
+ outline: 2px solid var(--primary-color);
149
+ outline-offset: 2px;
150
+ }
151
+ .d3-apo-lr-ablation .d3-tooltip {
152
+ position: absolute;
153
+ background: var(--surface-bg);
154
+ border: 1px solid var(--border-color);
155
+ border-radius: 8px;
156
+ padding: 12px;
157
+ pointer-events: none;
158
+ opacity: 0;
159
+ transition: opacity 0.2s;
160
+ box-shadow: 0 2px 8px rgba(0,0,0,0.15);
161
+ font-size: 12px;
162
+ z-index: 1000;
163
+ }
164
+ .d3-apo-lr-ablation .tooltip-title {
165
+ font-weight: 700;
166
+ margin-bottom: 8px;
167
+ color: var(--text-color);
168
+ }
169
+ .d3-apo-lr-ablation .tooltip-item {
170
+ display: flex;
171
+ align-items: center;
172
+ gap: 8px;
173
+ margin: 4px 0;
174
+ color: var(--text-color);
175
+ }
176
+ .d3-apo-lr-ablation .tooltip-color {
177
+ width: 12px;
178
+ height: 12px;
179
+ border-radius: 2px;
180
+ }
181
+ </style>
182
+ <script>
183
+ (() => {
184
+ const ensureD3 = (cb) => {
185
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
186
+ let s = document.getElementById('d3-cdn-script');
187
+ if (!s) {
188
+ s = document.createElement('script');
189
+ s.id = 'd3-cdn-script';
190
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
191
+ document.head.appendChild(s);
192
+ }
193
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
194
+ s.addEventListener('load', onReady, { once: true });
195
+ if (window.d3) onReady();
196
+ };
197
+
198
+ const bootstrap = () => {
199
+ const scriptEl = document.currentScript;
200
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
201
+ if (!(container && container.classList && container.classList.contains('d3-apo-lr-ablation'))) {
202
+ const candidates = Array.from(document.querySelectorAll('.d3-apo-lr-ablation'))
203
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
204
+ container = candidates[candidates.length - 1] || null;
205
+ }
206
+ if (!container) return;
207
+ if (container.dataset) {
208
+ if (container.dataset.mounted === 'true') return;
209
+ container.dataset.mounted = 'true';
210
+ }
211
+
212
+ // Data embedded inline
213
+ const data = [{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"AIME25","Score":29.9},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.9},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"AIME25","Score":36.46},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"AIME25","Score":45.47},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"AIME25","Score":48.7},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.5},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"AIME25","Score":48.59},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":9.27},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":34.28},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":26.64},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":36.93},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":29.04},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.7},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.77},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.44},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.2},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.72},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":70.86},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.97},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.46},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"IF-Eval","Score":72.61},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.13},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.86},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.0},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":13.86},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":21.78},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":31.68},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"Average","Score":36.94},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"Average","Score":28.57},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"Average","Score":41.535},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"Average","Score":40.4433333333},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"Average","Score":45.4},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"Average","Score":32.205},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"Average","Score":47.9375},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"Average","Score":32.5675},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"Average","Score":49.6425},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"Average","Score":32.5875}];
214
+ const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
215
+
216
+ // Get colors from ColorPalettes or fallback
217
+ const getColors = () => {
218
+ if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
219
+ const colors = window.ColorPalettes.getColors('categorical', 2);
220
+ return { think: colors[0], noThink: colors[1] };
221
+ }
222
+ return { think: '#E377C2', noThink: '#7FC97F' };
223
+ };
224
+
225
+ let colors = getColors();
226
+
227
+ // Set up dimensions
228
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
229
+
230
+ // Create SVG
231
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
232
+ const g = svg.append('g');
233
+
234
+ // Tooltip
235
+ container.style.position = container.style.position || 'relative';
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div');
240
+ tip.className = 'd3-tooltip';
241
+ Object.assign(tip.style, {
242
+ position: 'absolute',
243
+ top: '0px',
244
+ left: '0px',
245
+ transform: 'translate(-9999px, -9999px)',
246
+ pointerEvents: 'none',
247
+ padding: '8px 10px',
248
+ borderRadius: '8px',
249
+ fontSize: '12px',
250
+ lineHeight: '1.35',
251
+ border: '1px solid var(--border-color)',
252
+ background: 'var(--surface-bg)',
253
+ color: 'var(--text-color)',
254
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
255
+ opacity: '0',
256
+ transition: 'opacity .12s ease',
257
+ zIndex: '1000'
258
+ });
259
+ tipInner = document.createElement('div');
260
+ tipInner.className = 'd3-tooltip__inner';
261
+ tipInner.style.textAlign = 'left';
262
+ tip.appendChild(tipInner);
263
+ container.appendChild(tip);
264
+ } else {
265
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
266
+ }
267
+
268
+ const showTooltip = (html, event) => {
269
+ tipInner.innerHTML = html;
270
+ const [mx, my] = d3.pointer(event, container);
271
+ const offsetX = 12, offsetY = 12;
272
+ tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
273
+ tip.style.opacity = '1';
274
+ };
275
+
276
+ const hideTooltip = () => {
277
+ tip.style.opacity = '0';
278
+ setTimeout(() => {
279
+ tip.style.transform = 'translate(-9999px, -9999px)';
280
+ }, 120);
281
+ };
282
+
283
+ // Get unique evaluations
284
+ const evaluations = [...new Set(data.map(d => d.Evaluation))];
285
+
286
+ // Create header with legend and controls
287
+ const header = d3.select(container).append('div').attr('class', 'header');
288
+
289
+ const legend = header.append('div').attr('class', 'legend');
290
+ legend.append('div').attr('class', 'legend-title').text('Legend');
291
+ const legendItems = legend.append('div').attr('class', 'items');
292
+
293
+ const controls = header.append('div').attr('class', 'controls');
294
+ const controlGroup = controls.append('div').attr('class', 'control-group');
295
+ controlGroup.append('label').attr('for', 'metric-select-lr').text('Metric');
296
+ const select = controlGroup.append('select').attr('id', 'metric-select-lr');
297
+
298
+ // Populate dropdown
299
+ select.selectAll('option')
300
+ .data(evaluations)
301
+ .enter()
302
+ .append('option')
303
+ .text(d => d)
304
+ .attr('value', d => d);
305
+
306
+ // Build legend
307
+ const buildLegend = () => {
308
+ legendItems.html('');
309
+
310
+ const thinkItem = legendItems.append('span').attr('class', 'item');
311
+ thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
312
+ thinkItem.append('span').text('/think');
313
+
314
+ const noThinkItem = legendItems.append('span').attr('class', 'item');
315
+ noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
316
+ noThinkItem.append('span').text('/no_think');
317
+
318
+ const sftItem = legendItems.append('span').attr('class', 'item');
319
+ sftItem.append('span').attr('class', 'swatch-dashed');
320
+ sftItem.append('span').text('SFT checkpoint');
321
+ };
322
+
323
+ buildLegend();
324
+
325
+ // Update chart function
326
+ function updateChart(evaluation) {
327
+ const filtered = data.filter(d => d.Evaluation === evaluation);
328
+ const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a["Learning rate"] - b["Learning rate"]);
329
+ const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a["Learning rate"] - b["Learning rate"]);
330
+
331
+ g.selectAll("*").remove();
332
+
333
+ const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
334
+ const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
335
+
336
+ const width = container.clientWidth || 800;
337
+ const height = Math.max(320, Math.round(width / 2.5));
338
+ const innerWidth = width - margin.left - margin.right;
339
+ const innerHeight = height - margin.top - margin.bottom;
340
+
341
+ svg.attr('width', width).attr('height', height);
342
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
343
+
344
+ // Scales
345
+ const xScale = d3.scaleLog()
346
+ .domain([d3.min(filtered, d => d["Learning rate"]), d3.max(filtered, d => d["Learning rate"])])
347
+ .range([0, innerWidth]);
348
+
349
+ const allScores = filtered.map(d => d.Score);
350
+ if (sftThink) allScores.push(sftThink.Score);
351
+ if (sftNoThink) allScores.push(sftNoThink.Score);
352
+ const maxScore = d3.max(allScores);
353
+
354
+ const yScale = d3.scaleLinear()
355
+ .domain([0, maxScore * 1.1])
356
+ .range([innerHeight, 0]);
357
+
358
+ // Grid
359
+ g.append("g")
360
+ .attr("class", "grid")
361
+ .attr("transform", `translate(0,${innerHeight})`)
362
+ .call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
363
+
364
+ g.append("g")
365
+ .attr("class", "grid")
366
+ .call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
367
+
368
+ // Axes
369
+ const tickValues = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5];
370
+ g.append("g")
371
+ .attr("class", "axes")
372
+ .attr("transform", `translate(0,${innerHeight})`)
373
+ .call(d3.axisBottom(xScale).tickValues(tickValues).tickFormat(d3.format(".0e")).tickSizeOuter(0))
374
+ .call(gAxis => {
375
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
376
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
377
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
378
+ });
379
+
380
+ g.append("g")
381
+ .attr("class", "axes")
382
+ .call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
383
+ .call(gAxis => {
384
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
385
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
386
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
387
+ });
388
+
389
+ // Axis labels
390
+ g.append("text")
391
+ .attr("class", "axis-label")
392
+ .attr("text-anchor", "middle")
393
+ .attr("x", innerWidth / 2)
394
+ .attr("y", innerHeight + 40)
395
+ .text("Learning rate");
396
+
397
+ g.append("text")
398
+ .attr("class", "axis-label")
399
+ .attr("text-anchor", "middle")
400
+ .attr("transform", "rotate(-90)")
401
+ .attr("y", -45)
402
+ .attr("x", -innerHeight / 2)
403
+ .text("Score (%)");
404
+
405
+ // Line generator
406
+ const line = d3.line()
407
+ .x(d => xScale(d["Learning rate"]))
408
+ .y(d => yScale(d.Score));
409
+
410
+ // Reference lines
411
+ if (sftThink) {
412
+ g.append("line")
413
+ .attr("class", "reference-line")
414
+ .style("stroke", colors.think)
415
+ .attr("x1", 0)
416
+ .attr("x2", innerWidth)
417
+ .attr("y1", yScale(sftThink.Score))
418
+ .attr("y2", yScale(sftThink.Score));
419
+ }
420
+
421
+ if (sftNoThink) {
422
+ g.append("line")
423
+ .attr("class", "reference-line")
424
+ .style("stroke", colors.noThink)
425
+ .attr("x1", 0)
426
+ .attr("x2", innerWidth)
427
+ .attr("y1", yScale(sftNoThink.Score))
428
+ .attr("y2", yScale(sftNoThink.Score));
429
+ }
430
+
431
+ // Lines
432
+ g.append("path")
433
+ .datum(thinkData)
434
+ .attr("class", "line-think")
435
+ .style("stroke", colors.think)
436
+ .attr("d", line);
437
+
438
+ g.append("path")
439
+ .datum(noThinkData)
440
+ .attr("class", "line-no-think")
441
+ .style("stroke", colors.noThink)
442
+ .attr("d", line);
443
+
444
+ // Dots for /think
445
+ g.selectAll(".dot-think")
446
+ .data(thinkData)
447
+ .enter()
448
+ .append("circle")
449
+ .attr("class", "dot")
450
+ .style("fill", colors.think)
451
+ .attr("cx", d => xScale(d["Learning rate"]))
452
+ .attr("cy", d => yScale(d.Score))
453
+ .attr("r", 4)
454
+ .on("mouseenter", function(event, d) {
455
+ const noThinkValue = noThinkData.find(item => item["Learning rate"] === d["Learning rate"]);
456
+ const html = `
457
+ <div class="tooltip-title">LR ${d["Learning rate"].toExponential(0)}</div>
458
+ <div class="tooltip-item">
459
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
460
+ <span>/think: ${d.Score.toFixed(2)}%</span>
461
+ </div>
462
+ ${noThinkValue ? `
463
+ <div class="tooltip-item">
464
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
465
+ <span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
466
+ </div>` : ''}
467
+ `;
468
+ showTooltip(html, event);
469
+ })
470
+ .on("mouseleave", hideTooltip);
471
+
472
+ // Dots for /no_think
473
+ g.selectAll(".dot-no-think")
474
+ .data(noThinkData)
475
+ .enter()
476
+ .append("circle")
477
+ .attr("class", "dot")
478
+ .style("fill", colors.noThink)
479
+ .attr("cx", d => xScale(d["Learning rate"]))
480
+ .attr("cy", d => yScale(d.Score))
481
+ .attr("r", 4)
482
+ .on("mouseenter", function(event, d) {
483
+ const thinkValue = thinkData.find(item => item["Learning rate"] === d["Learning rate"]);
484
+ const html = `
485
+ <div class="tooltip-title">LR ${d["Learning rate"].toExponential(0)}</div>
486
+ ${thinkValue ? `
487
+ <div class="tooltip-item">
488
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
489
+ <span>/think: ${thinkValue.Score.toFixed(2)}%</span>
490
+ </div>` : ''}
491
+ <div class="tooltip-item">
492
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
493
+ <span>/no_think: ${d.Score.toFixed(2)}%</span>
494
+ </div>
495
+ `;
496
+ showTooltip(html, event);
497
+ })
498
+ .on("mouseleave", hideTooltip);
499
+ }
500
+
501
+ // Set default value to "Average" if it exists
502
+ const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
503
+ select.property("value", defaultEval);
504
+
505
+ // Initial chart
506
+ updateChart(defaultEval);
507
+
508
+ // Update on dropdown change
509
+ select.on("change", function() {
510
+ updateChart(this.value);
511
+ });
512
+
513
+ // Resize handling
514
+ const rerender = () => updateChart(select.property("value"));
515
+ if (window.ResizeObserver) {
516
+ const ro = new ResizeObserver(() => rerender());
517
+ ro.observe(container);
518
+ } else {
519
+ window.addEventListener('resize', rerender);
520
+ }
521
+
522
+ // Listen for ColorPalettes changes
523
+ if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
524
+ window.ColorPalettes.addListener(() => {
525
+ colors = getColors();
526
+ buildLegend();
527
+ updateChart(select.property("value"));
528
+ });
529
+ }
530
+ };
531
+
532
+ if (document.readyState === 'loading') {
533
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
534
+ } else {
535
+ ensureD3(bootstrap);
536
+ }
537
+ })();
538
+ </script>
app/src/content/embeds/d3-po-size-ablation.html ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-apo-size-ablation"></div>
2
+ <style>
3
+ .d3-apo-size-ablation {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
6
+ position: relative;
7
+ --axis-color: var(--text-color, #333);
8
+ --tick-color: var(--muted-color, #666);
9
+ --grid-color: rgba(0,0,0,.08);
10
+ }
11
+ [data-theme="dark"] .d3-apo-size-ablation {
12
+ --axis-color: var(--text-color, #ccc);
13
+ --tick-color: var(--muted-color, #999);
14
+ --grid-color: rgba(255,255,255,.10);
15
+ }
16
+ .d3-apo-size-ablation svg {
17
+ display: block;
18
+ overflow: visible;
19
+ }
20
+ .d3-apo-size-ablation .axes path,
21
+ .d3-apo-size-ablation .axes line {
22
+ stroke: var(--axis-color);
23
+ shape-rendering: crispEdges;
24
+ }
25
+ .d3-apo-size-ablation .axes text {
26
+ fill: var(--tick-color);
27
+ font-size: 11px;
28
+ }
29
+ .d3-apo-size-ablation .grid line {
30
+ stroke: var(--grid-color);
31
+ stroke-dasharray: 2,2;
32
+ shape-rendering: crispEdges;
33
+ }
34
+ .d3-apo-size-ablation .axis-label {
35
+ fill: var(--text-color);
36
+ font-size: 12px;
37
+ font-weight: 600;
38
+ }
39
+ .d3-apo-size-ablation .line-think {
40
+ fill: none;
41
+ stroke-width: 2.5;
42
+ stroke-linecap: round;
43
+ stroke-linejoin: round;
44
+ }
45
+ .d3-apo-size-ablation .line-no-think {
46
+ fill: none;
47
+ stroke-width: 2.5;
48
+ stroke-linecap: round;
49
+ stroke-linejoin: round;
50
+ }
51
+ .d3-apo-size-ablation .reference-line {
52
+ fill: none;
53
+ stroke-width: 1.5;
54
+ stroke-dasharray: 5, 5;
55
+ opacity: 0.4;
56
+ }
57
+ .d3-apo-size-ablation .dot {
58
+ stroke: var(--surface-bg);
59
+ stroke-width: 2;
60
+ }
61
+ .d3-apo-size-ablation .header {
62
+ display: flex;
63
+ align-items: flex-start;
64
+ justify-content: space-between;
65
+ gap: 16px;
66
+ margin-top: 16px;
67
+ flex-wrap: wrap;
68
+ }
69
+ .d3-apo-size-ablation .legend {
70
+ display: flex;
71
+ flex-direction: column;
72
+ align-items: flex-start;
73
+ gap: 6px;
74
+ }
75
+ .d3-apo-size-ablation .legend-title {
76
+ font-size: 12px;
77
+ font-weight: 700;
78
+ color: var(--text-color);
79
+ }
80
+ .d3-apo-size-ablation .legend .items {
81
+ display: flex;
82
+ flex-wrap: wrap;
83
+ gap: 8px 14px;
84
+ }
85
+ .d3-apo-size-ablation .legend .item {
86
+ display: inline-flex;
87
+ align-items: center;
88
+ gap: 6px;
89
+ white-space: nowrap;
90
+ font-size: 12px;
91
+ color: var(--text-color);
92
+ }
93
+ .d3-apo-size-ablation .legend .swatch {
94
+ width: 14px;
95
+ height: 14px;
96
+ border-radius: 3px;
97
+ border: 1px solid var(--border-color);
98
+ }
99
+ .d3-apo-size-ablation .legend .swatch-line {
100
+ width: 20px;
101
+ height: 2px;
102
+ border: none;
103
+ }
104
+ .d3-apo-size-ablation .legend .swatch-dashed {
105
+ width: 20px;
106
+ height: 2px;
107
+ border: none;
108
+ background: repeating-linear-gradient(
109
+ to right,
110
+ var(--text-color) 0,
111
+ var(--text-color) 4px,
112
+ transparent 4px,
113
+ transparent 8px
114
+ );
115
+ }
116
+ .d3-apo-size-ablation .controls {
117
+ display: flex;
118
+ gap: 16px;
119
+ align-items: flex-start;
120
+ justify-content: flex-end;
121
+ flex-wrap: wrap;
122
+ }
123
+ .d3-apo-size-ablation .control-group {
124
+ display: flex;
125
+ flex-direction: column;
126
+ align-items: flex-start;
127
+ gap: 6px;
128
+ }
129
+ .d3-apo-size-ablation .controls label {
130
+ font-size: 12px;
131
+ font-weight: 700;
132
+ color: var(--text-color);
133
+ }
134
+ .d3-apo-size-ablation .controls select {
135
+ font-size: 12px;
136
+ padding: 8px 28px 8px 10px;
137
+ border: 1px solid var(--border-color);
138
+ border-radius: 8px;
139
+ background: var(--surface-bg);
140
+ color: var(--text-color);
141
+ cursor: pointer;
142
+ appearance: none;
143
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
144
+ background-repeat: no-repeat;
145
+ background-position: right 8px center;
146
+ }
147
+ .d3-apo-size-ablation .controls select:focus {
148
+ outline: 2px solid var(--primary-color);
149
+ outline-offset: 2px;
150
+ }
151
+ .d3-apo-size-ablation .d3-tooltip {
152
+ position: absolute;
153
+ background: var(--surface-bg);
154
+ border: 1px solid var(--border-color);
155
+ border-radius: 8px;
156
+ padding: 12px;
157
+ pointer-events: none;
158
+ opacity: 0;
159
+ transition: opacity 0.2s;
160
+ box-shadow: 0 2px 8px rgba(0,0,0,0.15);
161
+ font-size: 12px;
162
+ z-index: 1000;
163
+ }
164
+ .d3-apo-size-ablation .tooltip-title {
165
+ font-weight: 700;
166
+ margin-bottom: 8px;
167
+ color: var(--text-color);
168
+ }
169
+ .d3-apo-size-ablation .tooltip-item {
170
+ display: flex;
171
+ align-items: center;
172
+ gap: 8px;
173
+ margin: 4px 0;
174
+ color: var(--text-color);
175
+ }
176
+ .d3-apo-size-ablation .tooltip-color {
177
+ width: 12px;
178
+ height: 12px;
179
+ border-radius: 2px;
180
+ }
181
+ </style>
182
+ <script>
183
+ (() => {
184
+ const ensureD3 = (cb) => {
185
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
186
+ let s = document.getElementById('d3-cdn-script');
187
+ if (!s) {
188
+ s = document.createElement('script');
189
+ s.id = 'd3-cdn-script';
190
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
191
+ document.head.appendChild(s);
192
+ }
193
+ const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
194
+ s.addEventListener('load', onReady, { once: true });
195
+ if (window.d3) onReady();
196
+ };
197
+
198
+ const bootstrap = () => {
199
+ const scriptEl = document.currentScript;
200
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
201
+ if (!(container && container.classList && container.classList.contains('d3-apo-size-ablation'))) {
202
+ const candidates = Array.from(document.querySelectorAll('.d3-apo-size-ablation'))
203
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
204
+ container = candidates[candidates.length - 1] || null;
205
+ }
206
+ if (!container) return;
207
+ if (container.dataset) {
208
+ if (container.dataset.mounted === 'true') return;
209
+ container.dataset.mounted = 'true';
210
+ }
211
+
212
+ // Data embedded inline
213
+ const data = [{"system_prompt":"/think","Evaluation":"AIME25","Score":45.47,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"Average","Score":45.4,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.205,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"AIME25","Score":42.4,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.43,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.36,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.23,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.54,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.54,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.65,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.4,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.65,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":6.93,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.16,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.97,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"AIME25","Score":43.18,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":5.94,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.75,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.32,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.94,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.76,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":45.39,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.19,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.55,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.69,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.51,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":33.84,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.12,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.12,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.12,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.3,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.13,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":70.93,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.97,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.24,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":75.99,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.05,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":74.66,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.93,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.22,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.24,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":72.09,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.27,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.23,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":76.14,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":41.58,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":9.9,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":9.9,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":29.7,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":15.84,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":14.85,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"Average","Score":49.715,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"Average","Score":29.89,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"Average","Score":48.485,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.0325,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"Average","Score":50.6475,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.67,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"Average","Score":49.6325,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.475,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"Average","Score":49.02,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.7125,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"Average","Score":46.5275,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.805,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"Average","Score":45.3175,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.8075,"Data Subset":338692}];
214
+ const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
215
+
216
+ // Get colors from ColorPalettes or fallback
217
+ const getColors = () => {
218
+ if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
219
+ const colors = window.ColorPalettes.getColors('categorical', 2);
220
+ return { think: colors[0], noThink: colors[1] };
221
+ }
222
+ return { think: '#E377C2', noThink: '#7FC97F' };
223
+ };
224
+
225
+ let colors = getColors();
226
+
227
+ // Set up dimensions
228
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
229
+
230
+ // Create SVG
231
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
232
+ const g = svg.append('g');
233
+
234
+ // Tooltip
235
+ container.style.position = container.style.position || 'relative';
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div');
240
+ tip.className = 'd3-tooltip';
241
+ Object.assign(tip.style, {
242
+ position: 'absolute',
243
+ top: '0px',
244
+ left: '0px',
245
+ transform: 'translate(-9999px, -9999px)',
246
+ pointerEvents: 'none',
247
+ padding: '8px 10px',
248
+ borderRadius: '8px',
249
+ fontSize: '12px',
250
+ lineHeight: '1.35',
251
+ border: '1px solid var(--border-color)',
252
+ background: 'var(--surface-bg)',
253
+ color: 'var(--text-color)',
254
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
255
+ opacity: '0',
256
+ transition: 'opacity .12s ease',
257
+ zIndex: '1000'
258
+ });
259
+ tipInner = document.createElement('div');
260
+ tipInner.className = 'd3-tooltip__inner';
261
+ tipInner.style.textAlign = 'left';
262
+ tip.appendChild(tipInner);
263
+ container.appendChild(tip);
264
+ } else {
265
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
266
+ }
267
+
268
+ const showTooltip = (html, event) => {
269
+ tipInner.innerHTML = html;
270
+ const [mx, my] = d3.pointer(event, container);
271
+ const offsetX = 12, offsetY = 12;
272
+ tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
273
+ tip.style.opacity = '1';
274
+ };
275
+
276
+ const hideTooltip = () => {
277
+ tip.style.opacity = '0';
278
+ setTimeout(() => {
279
+ tip.style.transform = 'translate(-9999px, -9999px)';
280
+ }, 120);
281
+ };
282
+
283
+ // Get unique evaluations
284
+ const evaluations = [...new Set(data.map(d => d.Evaluation))];
285
+
286
+ // Create header with legend and controls
287
+ const header = d3.select(container).append('div').attr('class', 'header');
288
+
289
+ const legend = header.append('div').attr('class', 'legend');
290
+ legend.append('div').attr('class', 'legend-title').text('Legend');
291
+ const legendItems = legend.append('div').attr('class', 'items');
292
+
293
+ const controls = header.append('div').attr('class', 'controls');
294
+ const controlGroup = controls.append('div').attr('class', 'control-group');
295
+ controlGroup.append('label').attr('for', 'metric-select-size').text('Metric');
296
+ const select = controlGroup.append('select').attr('id', 'metric-select-size');
297
+
298
+ // Populate dropdown
299
+ select.selectAll('option')
300
+ .data(evaluations)
301
+ .enter()
302
+ .append('option')
303
+ .text(d => d)
304
+ .attr('value', d => d);
305
+
306
+ // Build legend
307
+ const buildLegend = () => {
308
+ legendItems.html('');
309
+
310
+ const thinkItem = legendItems.append('span').attr('class', 'item');
311
+ thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
312
+ thinkItem.append('span').text('/think');
313
+
314
+ const noThinkItem = legendItems.append('span').attr('class', 'item');
315
+ noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
316
+ noThinkItem.append('span').text('/no_think');
317
+
318
+ const sftItem = legendItems.append('span').attr('class', 'item');
319
+ sftItem.append('span').attr('class', 'swatch-dashed');
320
+ sftItem.append('span').text('SFT checkpoint');
321
+ };
322
+
323
+ buildLegend();
324
+
325
+ // Update chart function
326
+ function updateChart(evaluation) {
327
+ const filtered = data.filter(d => d.Evaluation === evaluation);
328
+ const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a["Data Subset"] - b["Data Subset"]);
329
+ const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a["Data Subset"] - b["Data Subset"]);
330
+
331
+ g.selectAll("*").remove();
332
+
333
+ const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
334
+ const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
335
+
336
+ const width = container.clientWidth || 800;
337
+ const height = Math.max(320, Math.round(width / 2.5));
338
+ const innerWidth = width - margin.left - margin.right;
339
+ const innerHeight = height - margin.top - margin.bottom;
340
+
341
+ svg.attr('width', width).attr('height', height);
342
+ g.attr('transform', `translate(${margin.left},${margin.top})`);
343
+
344
+ // Scales
345
+ const xScale = d3.scaleLog()
346
+ .domain([d3.min(filtered, d => d["Data Subset"]), d3.max(filtered, d => d["Data Subset"])])
347
+ .range([0, innerWidth]);
348
+
349
+ const allScores = filtered.map(d => d.Score);
350
+ if (sftThink) allScores.push(sftThink.Score);
351
+ if (sftNoThink) allScores.push(sftNoThink.Score);
352
+ const maxScore = d3.max(allScores);
353
+
354
+ const yScale = d3.scaleLinear()
355
+ .domain([0, maxScore * 1.1])
356
+ .range([innerHeight, 0]);
357
+
358
+ // Grid
359
+ g.append("g")
360
+ .attr("class", "grid")
361
+ .attr("transform", `translate(0,${innerHeight})`)
362
+ .call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
363
+
364
+ g.append("g")
365
+ .attr("class", "grid")
366
+ .call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
367
+
368
+ // Axes
369
+ g.append("g")
370
+ .attr("class", "axes")
371
+ .attr("transform", `translate(0,${innerHeight})`)
372
+ .call(d3.axisBottom(xScale).ticks(5).tickSizeOuter(0))
373
+ .call(gAxis => {
374
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
375
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
376
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
377
+ });
378
+
379
+ g.append("g")
380
+ .attr("class", "axes")
381
+ .call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
382
+ .call(gAxis => {
383
+ gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
384
+ gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
385
+ gAxis.select(".domain").attr("stroke", "var(--axis-color)");
386
+ });
387
+
388
+ // Axis labels
389
+ g.append("text")
390
+ .attr("class", "axis-label")
391
+ .attr("text-anchor", "middle")
392
+ .attr("x", innerWidth / 2)
393
+ .attr("y", innerHeight + 40)
394
+ .text("Dataset size");
395
+
396
+ g.append("text")
397
+ .attr("class", "axis-label")
398
+ .attr("text-anchor", "middle")
399
+ .attr("transform", "rotate(-90)")
400
+ .attr("y", -45)
401
+ .attr("x", -innerHeight / 2)
402
+ .text("Score (%)");
403
+
404
+ // Line generator
405
+ const line = d3.line()
406
+ .x(d => xScale(d["Data Subset"]))
407
+ .y(d => yScale(d.Score));
408
+
409
+ // Reference lines
410
+ if (sftThink) {
411
+ g.append("line")
412
+ .attr("class", "reference-line")
413
+ .style("stroke", colors.think)
414
+ .attr("x1", 0)
415
+ .attr("x2", innerWidth)
416
+ .attr("y1", yScale(sftThink.Score))
417
+ .attr("y2", yScale(sftThink.Score));
418
+ }
419
+
420
+ if (sftNoThink) {
421
+ g.append("line")
422
+ .attr("class", "reference-line")
423
+ .style("stroke", colors.noThink)
424
+ .attr("x1", 0)
425
+ .attr("x2", innerWidth)
426
+ .attr("y1", yScale(sftNoThink.Score))
427
+ .attr("y2", yScale(sftNoThink.Score));
428
+ }
429
+
430
+ // Lines
431
+ g.append("path")
432
+ .datum(thinkData)
433
+ .attr("class", "line-think")
434
+ .style("stroke", colors.think)
435
+ .attr("d", line);
436
+
437
+ g.append("path")
438
+ .datum(noThinkData)
439
+ .attr("class", "line-no-think")
440
+ .style("stroke", colors.noThink)
441
+ .attr("d", line);
442
+
443
+ // Dots for /think
444
+ g.selectAll(".dot-think")
445
+ .data(thinkData)
446
+ .enter()
447
+ .append("circle")
448
+ .attr("class", "dot")
449
+ .style("fill", colors.think)
450
+ .attr("cx", d => xScale(d["Data Subset"]))
451
+ .attr("cy", d => yScale(d.Score))
452
+ .attr("r", 4)
453
+ .on("mouseenter", function(event, d) {
454
+ const noThinkValue = noThinkData.find(item => item["Data Subset"] === d["Data Subset"]);
455
+ const html = `
456
+ <div class="tooltip-title">Dataset Size ${d["Data Subset"].toLocaleString()}</div>
457
+ <div class="tooltip-item">
458
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
459
+ <span>/think: ${d.Score.toFixed(2)}%</span>
460
+ </div>
461
+ ${noThinkValue ? `
462
+ <div class="tooltip-item">
463
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
464
+ <span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
465
+ </div>` : ''}
466
+ `;
467
+ showTooltip(html, event);
468
+ })
469
+ .on("mouseleave", hideTooltip);
470
+
471
+ // Dots for /no_think
472
+ g.selectAll(".dot-no-think")
473
+ .data(noThinkData)
474
+ .enter()
475
+ .append("circle")
476
+ .attr("class", "dot")
477
+ .style("fill", colors.noThink)
478
+ .attr("cx", d => xScale(d["Data Subset"]))
479
+ .attr("cy", d => yScale(d.Score))
480
+ .attr("r", 4)
481
+ .on("mouseenter", function(event, d) {
482
+ const thinkValue = thinkData.find(item => item["Data Subset"] === d["Data Subset"]);
483
+ const html = `
484
+ <div class="tooltip-title">Dataset Size ${d["Data Subset"].toLocaleString()}</div>
485
+ ${thinkValue ? `
486
+ <div class="tooltip-item">
487
+ <div class="tooltip-color" style="background-color: ${colors.think};"></div>
488
+ <span>/think: ${thinkValue.Score.toFixed(2)}%</span>
489
+ </div>` : ''}
490
+ <div class="tooltip-item">
491
+ <div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
492
+ <span>/no_think: ${d.Score.toFixed(2)}%</span>
493
+ </div>
494
+ `;
495
+ showTooltip(html, event);
496
+ })
497
+ .on("mouseleave", hideTooltip);
498
+ }
499
+
500
+ // Set default value to "Average" if it exists
501
+ const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
502
+ select.property("value", defaultEval);
503
+
504
+ // Initial chart
505
+ updateChart(defaultEval);
506
+
507
+ // Update on dropdown change
508
+ select.on("change", function() {
509
+ updateChart(this.value);
510
+ });
511
+
512
+ // Resize handling
513
+ const rerender = () => updateChart(select.property("value"));
514
+ if (window.ResizeObserver) {
515
+ const ro = new ResizeObserver(() => rerender());
516
+ ro.observe(container);
517
+ } else {
518
+ window.addEventListener('resize', rerender);
519
+ }
520
+
521
+ // Listen for ColorPalettes changes
522
+ if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
523
+ window.ColorPalettes.addListener(() => {
524
+ colors = getColors();
525
+ buildLegend();
526
+ updateChart(select.property("value"));
527
+ });
528
+ }
529
+ };
530
+
531
+ if (document.readyState === 'loading') {
532
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
533
+ } else {
534
+ ensureD3(bootstrap);
535
+ }
536
+ })();
537
+ </script>
app/src/content/embeds/d3-rl-aime25.html ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-grpo-aime25"></div>
2
+ <style>
3
+ .d3-grpo-aime25 {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
6
+ position: relative;
7
+ }
8
+
9
+ .d3-grpo-aime25 svg {
10
+ display: block;
11
+ width: 100%;
12
+ }
13
+
14
+ .d3-grpo-aime25 .axis path {
15
+ stroke: none;
16
+ }
17
+
18
+ .d3-grpo-aime25 .axis line {
19
+ stroke: var(--axis-color);
20
+ shape-rendering: crispEdges;
21
+ }
22
+
23
+ .d3-grpo-aime25 .axis text {
24
+ fill: var(--tick-color);
25
+ font-size: 11px;
26
+ }
27
+
28
+ .d3-grpo-aime25 .grid line {
29
+ stroke: var(--grid-color);
30
+ stroke-dasharray: 2,2;
31
+ }
32
+
33
+ .d3-grpo-aime25 .line {
34
+ fill: none;
35
+ stroke-width: 2.5;
36
+ stroke-linejoin: round;
37
+ stroke-linecap: round;
38
+ }
39
+
40
+ .d3-grpo-aime25 .axis-label {
41
+ fill: var(--text-color);
42
+ font-size: 12px;
43
+ font-weight: 600;
44
+ }
45
+
46
+ .d3-grpo-aime25 .header {
47
+ display: flex;
48
+ align-items: center;
49
+ justify-content: space-between;
50
+ flex-wrap: wrap;
51
+ gap: 16px;
52
+ margin-top: 12px;
53
+ padding-top: 12px;
54
+ border-top: 1px solid var(--border-color);
55
+ }
56
+
57
+ .d3-grpo-aime25 .legend {
58
+ display: flex;
59
+ flex-direction: column;
60
+ align-items: flex-start;
61
+ gap: 6px;
62
+ }
63
+
64
+ .d3-grpo-aime25 .legend-title {
65
+ font-size: 12px;
66
+ font-weight: 700;
67
+ color: var(--text-color);
68
+ }
69
+
70
+ .d3-grpo-aime25 .legend .items {
71
+ display: flex;
72
+ flex-wrap: wrap;
73
+ gap: 8px 14px;
74
+ }
75
+
76
+ .d3-grpo-aime25 .legend .item {
77
+ display: inline-flex;
78
+ align-items: center;
79
+ gap: 6px;
80
+ white-space: nowrap;
81
+ font-size: 12px;
82
+ color: var(--text-color);
83
+ cursor: pointer;
84
+ user-select: none;
85
+ opacity: 1;
86
+ transition: opacity 0.2s ease;
87
+ }
88
+
89
+ .d3-grpo-aime25 .legend .item.dimmed {
90
+ opacity: 0.3;
91
+ }
92
+
93
+ .d3-grpo-aime25 .legend .swatch {
94
+ width: 14px;
95
+ height: 14px;
96
+ border-radius: 3px;
97
+ border: 1px solid var(--border-color);
98
+ }
99
+
100
+ .d3-grpo-aime25 .controls {
101
+ display: flex;
102
+ gap: 16px;
103
+ align-items: center;
104
+ justify-content: flex-end;
105
+ flex-wrap: wrap;
106
+ }
107
+
108
+ .d3-grpo-aime25 .controls .control-group {
109
+ display: flex;
110
+ flex-direction: column;
111
+ align-items: flex-start;
112
+ gap: 6px;
113
+ }
114
+
115
+ .d3-grpo-aime25 .controls label {
116
+ font-size: 12px;
117
+ font-weight: 700;
118
+ color: var(--text-color);
119
+ }
120
+
121
+ .d3-grpo-aime25 .controls .toggle-group {
122
+ display: flex;
123
+ gap: 8px;
124
+ align-items: center;
125
+ }
126
+
127
+ .d3-grpo-aime25 .controls .toggle-btn {
128
+ padding: 6px 12px;
129
+ font-size: 12px;
130
+ border: 1px solid var(--border-color);
131
+ border-radius: 8px;
132
+ background: var(--surface-bg);
133
+ color: var(--text-color);
134
+ cursor: pointer;
135
+ transition: all 0.2s ease;
136
+ }
137
+
138
+ .d3-grpo-aime25 .controls .toggle-btn:hover {
139
+ background: var(--primary-color);
140
+ color: white;
141
+ border-color: var(--primary-color);
142
+ }
143
+
144
+ .d3-grpo-aime25 .controls .toggle-btn.active {
145
+ background: var(--primary-color);
146
+ color: white;
147
+ border-color: var(--primary-color);
148
+ }
149
+ </style>
150
+ <script>
151
+ (() => {
152
+ const ensureD3 = (cb) => {
153
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
154
+ let s = document.getElementById('d3-cdn-script');
155
+ if (!s) {
156
+ s = document.createElement('script');
157
+ s.id = 'd3-cdn-script';
158
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
159
+ document.head.appendChild(s);
160
+ }
161
+ const onReady = () => {
162
+ if (window.d3 && typeof window.d3.select === 'function') cb();
163
+ };
164
+ s.addEventListener('load', onReady, { once: true });
165
+ if (window.d3) onReady();
166
+ };
167
+
168
+ const bootstrap = () => {
169
+ const scriptEl = document.currentScript;
170
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
171
+ if (!(container && container.classList && container.classList.contains('d3-grpo-aime25'))) {
172
+ const candidates = Array.from(document.querySelectorAll('.d3-grpo-aime25'))
173
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
174
+ container = candidates[candidates.length - 1] || null;
175
+ }
176
+ if (!container) return;
177
+ if (container.dataset) {
178
+ if (container.dataset.mounted === 'true') return;
179
+ container.dataset.mounted = 'true';
180
+ }
181
+
182
+ // Data loading configuration
183
+ let mountEl = container;
184
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
185
+ mountEl = mountEl.parentElement;
186
+ }
187
+ let providedData = null;
188
+ try {
189
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
190
+ if (attr && attr.trim()) {
191
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
192
+ }
193
+ } catch (_) {}
194
+
195
+ const DEFAULT_CSV = '/data/grpo/aime25_perf.csv';
196
+ const ensureDataPrefix = (p) => {
197
+ if (typeof p !== 'string' || !p) return p;
198
+ if (p.startsWith('/')) return p;
199
+ return `/data/${p}`;
200
+ };
201
+ const normalizeInput = (inp) => Array.isArray(inp)
202
+ ? inp.map(ensureDataPrefix)
203
+ : (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
204
+
205
+ const CSV_PATHS = Array.isArray(providedData)
206
+ ? normalizeInput(providedData)
207
+ : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
208
+ DEFAULT_CSV,
209
+ './assets/data/grpo/aime25_perf.csv',
210
+ '../assets/data/grpo/aime25_perf.csv',
211
+ '../../assets/data/grpo/aime25_perf.csv'
212
+ ]);
213
+
214
+ const fetchFirstAvailable = async (paths) => {
215
+ const errors = [];
216
+ for (const p of paths) {
217
+ try {
218
+ const r = await fetch(p, { cache: 'no-cache' });
219
+ if (r.ok) return await r.text();
220
+ errors.push(`${p}: ${r.status}`);
221
+ } catch (e) {
222
+ errors.push(`${p}: ${e.message}`);
223
+ }
224
+ }
225
+ throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
226
+ };
227
+
228
+ // Tooltip setup
229
+ container.style.position = container.style.position || 'relative';
230
+ let tip = container.querySelector('.d3-tooltip');
231
+ let tipInner;
232
+ if (!tip) {
233
+ tip = document.createElement('div');
234
+ tip.className = 'd3-tooltip';
235
+ Object.assign(tip.style, {
236
+ position: 'absolute',
237
+ top: '0px',
238
+ left: '0px',
239
+ transform: 'translate(-9999px, -9999px)',
240
+ pointerEvents: 'none',
241
+ padding: '8px 10px',
242
+ borderRadius: '8px',
243
+ fontSize: '12px',
244
+ lineHeight: '1.35',
245
+ border: '1px solid var(--border-color)',
246
+ background: 'var(--surface-bg)',
247
+ color: 'var(--text-color)',
248
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
249
+ opacity: '0',
250
+ transition: 'opacity .12s ease',
251
+ zIndex: '1000'
252
+ });
253
+ tipInner = document.createElement('div');
254
+ tipInner.className = 'd3-tooltip__inner';
255
+ tipInner.style.textAlign = 'left';
256
+ tip.appendChild(tipInner);
257
+ container.appendChild(tip);
258
+ } else {
259
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
260
+ }
261
+
262
+ // SVG setup
263
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
264
+ const gRoot = svg.append('g');
265
+ const gGrid = gRoot.append('g').attr('class', 'grid');
266
+ const gLines = gRoot.append('g').attr('class', 'lines');
267
+ const gAxes = gRoot.append('g').attr('class', 'axes');
268
+
269
+ // State
270
+ let width = 800, height = 400;
271
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
272
+ let series = [];
273
+ let hiddenSeries = new Set();
274
+
275
+ // Color setup
276
+ const getColors = (count) => {
277
+ if (window.ColorPalettes && window.ColorPalettes.getColors) {
278
+ return window.ColorPalettes.getColors('categorical', count);
279
+ }
280
+ return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
281
+ };
282
+
283
+ function parseData(csvText) {
284
+ const rows = d3.csvParse(csvText);
285
+
286
+ // Get column names (excluding 'step')
287
+ const headers = Object.keys(rows[0]).filter(h => h !== 'step');
288
+
289
+ // Build series data
290
+ series = headers.map(header => {
291
+ const points = rows
292
+ .map(row => ({
293
+ step: +row.step,
294
+ value: +row[header]
295
+ }))
296
+ .filter(p => !isNaN(p.step) && !isNaN(p.value));
297
+
298
+ return {
299
+ name: header,
300
+ points
301
+ };
302
+ });
303
+ }
304
+
305
+ function updateSize() {
306
+ width = container.clientWidth || 800;
307
+ height = Math.max(320, Math.round(width / 2.5));
308
+ svg.attr('width', width).attr('height', height);
309
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
310
+ return {
311
+ innerWidth: width - margin.left - margin.right,
312
+ innerHeight: height - margin.top - margin.bottom
313
+ };
314
+ }
315
+
316
+ function render() {
317
+ const { innerWidth, innerHeight } = updateSize();
318
+ if (series.length === 0) return;
319
+
320
+ // Filter visible series
321
+ const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
322
+ if (visibleSeries.length === 0) return;
323
+
324
+ // Get all points
325
+ const allPoints = visibleSeries.flatMap(s => s.points);
326
+
327
+ // Scales
328
+ const xScale = d3.scaleLinear()
329
+ .domain([0, d3.max(allPoints, d => d.step) || 1])
330
+ .range([0, innerWidth])
331
+ .nice();
332
+
333
+ const minVal = d3.min(allPoints, d => d.value);
334
+ const maxVal = d3.max(allPoints, d => d.value);
335
+ const yScale = d3.scaleLinear()
336
+ .domain([minVal * 0.95, maxVal * 1.05])
337
+ .range([innerHeight, 0]);
338
+
339
+ // Grid
340
+ gGrid.selectAll('.grid-y').data([0])
341
+ .join('g')
342
+ .attr('class', 'grid grid-y')
343
+ .call(d3.axisLeft(yScale)
344
+ .tickSize(-innerWidth)
345
+ .tickFormat('')
346
+ )
347
+ .call(g => g.select('.domain').remove());
348
+
349
+ // Colors
350
+ const colors = getColors(series.length);
351
+ const colorScale = (name) => {
352
+ const idx = series.findIndex(s => s.name === name);
353
+ return colors[idx % colors.length];
354
+ };
355
+
356
+ // Line generator
357
+ const line = d3.line()
358
+ .x(d => xScale(d.step))
359
+ .y(d => yScale(d.value))
360
+ .curve(d3.curveMonotoneX);
361
+
362
+ // Render lines
363
+ gLines.selectAll('.line')
364
+ .data(visibleSeries, d => d.name)
365
+ .join('path')
366
+ .attr('class', 'line')
367
+ .attr('d', d => line(d.points))
368
+ .attr('stroke', d => colorScale(d.name));
369
+
370
+ // Axes
371
+ gAxes.selectAll('.x-axis').data([0])
372
+ .join('g')
373
+ .attr('class', 'x-axis axis')
374
+ .attr('transform', `translate(0,${innerHeight})`)
375
+ .call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
376
+
377
+ gAxes.selectAll('.y-axis').data([0])
378
+ .join('g')
379
+ .attr('class', 'y-axis axis')
380
+ .call(d3.axisLeft(yScale).ticks(8));
381
+
382
+ // Axis labels
383
+ gAxes.selectAll('.x-label').data([0])
384
+ .join('text')
385
+ .attr('class', 'x-label axis-label')
386
+ .attr('text-anchor', 'middle')
387
+ .attr('x', innerWidth / 2)
388
+ .attr('y', innerHeight + 45)
389
+ .text('Training step');
390
+
391
+ gAxes.selectAll('.y-label').data([0])
392
+ .join('text')
393
+ .attr('class', 'y-label axis-label')
394
+ .attr('text-anchor', 'middle')
395
+ .attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
396
+ .text('AIME 2025 Score (%)');
397
+
398
+ // Tooltip interactions
399
+ const bisect = d3.bisector(d => d.step).left;
400
+
401
+ svg.on('mousemove', function(event) {
402
+ const [mx] = d3.pointer(event, gRoot.node());
403
+ const step = xScale.invert(mx);
404
+
405
+ let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong><br/>`;
406
+
407
+ visibleSeries.forEach(s => {
408
+ const idx = bisect(s.points, step);
409
+ if (idx > 0 && idx < s.points.length) {
410
+ const p = s.points[idx];
411
+ const color = colorScale(s.name);
412
+ tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${p.value.toFixed(2)}%</div>`;
413
+ }
414
+ });
415
+
416
+ tipInner.innerHTML = tooltipHtml;
417
+ const tipBounds = tip.getBoundingClientRect();
418
+ const [px, py] = d3.pointer(event, container);
419
+
420
+ let tipX = px + 12;
421
+ let tipY = py - 12;
422
+
423
+ if (tipX + tipBounds.width > width - 10) {
424
+ tipX = px - tipBounds.width - 12;
425
+ }
426
+ if (tipY - tipBounds.height < 10) {
427
+ tipY = py + 20;
428
+ }
429
+
430
+ tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
431
+ tip.style.opacity = '1';
432
+ });
433
+
434
+ svg.on('mouseleave', () => {
435
+ tip.style.opacity = '0';
436
+ tip.style.transform = 'translate(-9999px, -9999px)';
437
+ });
438
+ }
439
+
440
+ function makeLegend() {
441
+ let header = container.querySelector('.header');
442
+ if (!header) {
443
+ header = document.createElement('div');
444
+ header.className = 'header';
445
+ container.appendChild(header);
446
+ }
447
+
448
+ let legend = header.querySelector('.legend');
449
+ if (!legend) {
450
+ legend = document.createElement('div');
451
+ legend.className = 'legend';
452
+ header.appendChild(legend);
453
+ }
454
+
455
+ let title = legend.querySelector('.legend-title');
456
+ if (!title) {
457
+ title = document.createElement('div');
458
+ title.className = 'legend-title';
459
+ title.textContent = 'Overlong Penalty';
460
+ legend.appendChild(title);
461
+ } else {
462
+ title.textContent = 'Overlong Penalty';
463
+ }
464
+
465
+ let items = legend.querySelector('.items');
466
+ if (!items) {
467
+ items = document.createElement('div');
468
+ items.className = 'items';
469
+ legend.appendChild(items);
470
+ }
471
+
472
+ const colors = getColors(series.length);
473
+
474
+ items.innerHTML = '';
475
+ series.forEach((s, i) => {
476
+ const item = document.createElement('span');
477
+ item.className = 'item';
478
+ if (hiddenSeries.has(s.name)) {
479
+ item.classList.add('dimmed');
480
+ }
481
+
482
+ const swatch = document.createElement('span');
483
+ swatch.className = 'swatch';
484
+ swatch.style.background = colors[i % colors.length];
485
+
486
+ const text = document.createElement('span');
487
+ text.textContent = s.name;
488
+
489
+ item.appendChild(swatch);
490
+ item.appendChild(text);
491
+ items.appendChild(item);
492
+
493
+ item.addEventListener('click', () => {
494
+ if (hiddenSeries.has(s.name)) {
495
+ hiddenSeries.delete(s.name);
496
+ } else {
497
+ hiddenSeries.add(s.name);
498
+ }
499
+ makeLegend();
500
+ render();
501
+ });
502
+ });
503
+ }
504
+
505
+ // Load data and initialize
506
+ fetchFirstAvailable(CSV_PATHS)
507
+ .then(csvText => {
508
+ parseData(csvText);
509
+ makeLegend();
510
+ render();
511
+
512
+ // Responsiveness
513
+ if (window.ResizeObserver) {
514
+ const ro = new ResizeObserver(() => render());
515
+ ro.observe(container);
516
+ } else {
517
+ window.addEventListener('resize', render);
518
+ }
519
+ })
520
+ .catch(err => {
521
+ const pre = document.createElement('pre');
522
+ pre.style.color = '#f44336';
523
+ pre.style.fontSize = '12px';
524
+ pre.style.padding = '12px';
525
+ pre.textContent = `Error loading data: ${err.message}`;
526
+ container.appendChild(pre);
527
+ });
528
+ };
529
+
530
+ if (document.readyState === 'loading') {
531
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
532
+ } else {
533
+ ensureD3(bootstrap);
534
+ }
535
+ })();
536
+ </script>
app/src/content/embeds/d3-rl-full-length.html ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-grpo-full-length"></div>
2
+ <style>
3
+ .d3-grpo-full-length {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
6
+ position: relative;
7
+ }
8
+
9
+ .d3-grpo-full-length svg {
10
+ display: block;
11
+ width: 100%;
12
+ }
13
+
14
+ .d3-grpo-full-length .axis path {
15
+ stroke: none;
16
+ }
17
+
18
+ .d3-grpo-full-length .axis line {
19
+ stroke: var(--axis-color);
20
+ shape-rendering: crispEdges;
21
+ }
22
+
23
+ .d3-grpo-full-length .axis text {
24
+ fill: var(--tick-color);
25
+ font-size: 11px;
26
+ }
27
+
28
+ .d3-grpo-full-length .grid line {
29
+ stroke: var(--grid-color);
30
+ stroke-dasharray: 2,2;
31
+ }
32
+
33
+ .d3-grpo-full-length .confidence-band {
34
+ opacity: 0.15;
35
+ }
36
+
37
+ .d3-grpo-full-length .line {
38
+ fill: none;
39
+ stroke-width: 2;
40
+ stroke-linejoin: round;
41
+ stroke-linecap: round;
42
+ }
43
+
44
+ .d3-grpo-full-length .axis-label {
45
+ fill: var(--text-color);
46
+ font-size: 12px;
47
+ font-weight: 600;
48
+ }
49
+
50
+ .d3-grpo-full-length .header {
51
+ display: flex;
52
+ align-items: center;
53
+ justify-content: space-between;
54
+ flex-wrap: wrap;
55
+ gap: 16px;
56
+ margin-top: 12px;
57
+ padding-top: 12px;
58
+ border-top: 1px solid var(--border-color);
59
+ }
60
+
61
+ .d3-grpo-full-length .legend {
62
+ display: flex;
63
+ flex-direction: column;
64
+ align-items: flex-start;
65
+ gap: 6px;
66
+ }
67
+
68
+ .d3-grpo-full-length .legend-title {
69
+ font-size: 12px;
70
+ font-weight: 700;
71
+ color: var(--text-color);
72
+ }
73
+
74
+ .d3-grpo-full-length .legend .items {
75
+ display: flex;
76
+ flex-wrap: wrap;
77
+ gap: 8px 14px;
78
+ }
79
+
80
+ .d3-grpo-full-length .legend .item {
81
+ display: inline-flex;
82
+ align-items: center;
83
+ gap: 6px;
84
+ white-space: nowrap;
85
+ font-size: 12px;
86
+ color: var(--text-color);
87
+ cursor: pointer;
88
+ user-select: none;
89
+ opacity: 1;
90
+ transition: opacity 0.2s ease;
91
+ }
92
+
93
+ .d3-grpo-full-length .legend .item.dimmed {
94
+ opacity: 0.3;
95
+ }
96
+
97
+ .d3-grpo-full-length .legend .swatch {
98
+ width: 14px;
99
+ height: 14px;
100
+ border-radius: 3px;
101
+ border: 1px solid var(--border-color);
102
+ }
103
+
104
+ .d3-grpo-full-length .controls {
105
+ display: flex;
106
+ gap: 16px;
107
+ align-items: center;
108
+ justify-content: flex-end;
109
+ flex-wrap: wrap;
110
+ }
111
+
112
+ .d3-grpo-full-length .controls .control-group {
113
+ display: flex;
114
+ flex-direction: column;
115
+ align-items: flex-start;
116
+ gap: 6px;
117
+ }
118
+
119
+ .d3-grpo-full-length .controls label {
120
+ font-size: 12px;
121
+ font-weight: 700;
122
+ color: var(--text-color);
123
+ }
124
+
125
+ .d3-grpo-full-length .controls .toggle-group {
126
+ display: flex;
127
+ gap: 8px;
128
+ align-items: center;
129
+ }
130
+
131
+ .d3-grpo-full-length .controls .toggle-btn {
132
+ padding: 6px 12px;
133
+ font-size: 12px;
134
+ border: 1px solid var(--border-color);
135
+ border-radius: 8px;
136
+ background: var(--surface-bg);
137
+ color: var(--text-color);
138
+ cursor: pointer;
139
+ transition: all 0.2s ease;
140
+ }
141
+
142
+ .d3-grpo-full-length .controls .toggle-btn:hover {
143
+ background: var(--primary-color);
144
+ color: white;
145
+ border-color: var(--primary-color);
146
+ }
147
+
148
+ .d3-grpo-full-length .controls .toggle-btn.active {
149
+ background: var(--primary-color);
150
+ color: white;
151
+ border-color: var(--primary-color);
152
+ }
153
+ </style>
154
+ <script>
155
+ (() => {
156
+ const ensureD3 = (cb) => {
157
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
158
+ let s = document.getElementById('d3-cdn-script');
159
+ if (!s) {
160
+ s = document.createElement('script');
161
+ s.id = 'd3-cdn-script';
162
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
163
+ document.head.appendChild(s);
164
+ }
165
+ const onReady = () => {
166
+ if (window.d3 && typeof window.d3.select === 'function') cb();
167
+ };
168
+ s.addEventListener('load', onReady, { once: true });
169
+ if (window.d3) onReady();
170
+ };
171
+
172
+ const bootstrap = () => {
173
+ const scriptEl = document.currentScript;
174
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
175
+ if (!(container && container.classList && container.classList.contains('d3-grpo-full-length'))) {
176
+ const candidates = Array.from(document.querySelectorAll('.d3-grpo-full-length'))
177
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
178
+ container = candidates[candidates.length - 1] || null;
179
+ }
180
+ if (!container) return;
181
+ if (container.dataset) {
182
+ if (container.dataset.mounted === 'true') return;
183
+ container.dataset.mounted = 'true';
184
+ }
185
+
186
+ // Data loading configuration
187
+ let mountEl = container;
188
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
189
+ mountEl = mountEl.parentElement;
190
+ }
191
+ let providedData = null;
192
+ try {
193
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
194
+ if (attr && attr.trim()) {
195
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
196
+ }
197
+ } catch (_) {}
198
+
199
+ const DEFAULT_CSV = '/data/grpo/rl_reward_curves.csv';
200
+ const ensureDataPrefix = (p) => {
201
+ if (typeof p !== 'string' || !p) return p;
202
+ // If it starts with /, it's already absolute
203
+ if (p.startsWith('/')) return p;
204
+ // Otherwise, prefix with /data/
205
+ return `/data/${p}`;
206
+ };
207
+ const normalizeInput = (inp) => Array.isArray(inp)
208
+ ? inp.map(ensureDataPrefix)
209
+ : (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
210
+
211
+ const CSV_PATHS = Array.isArray(providedData)
212
+ ? normalizeInput(providedData)
213
+ : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
214
+ DEFAULT_CSV,
215
+ './assets/data/grpo/rl_reward_curves.csv',
216
+ '../assets/data/grpo/rl_reward_curves.csv',
217
+ '../../assets/data/grpo/rl_reward_curves.csv'
218
+ ]);
219
+
220
+ const fetchFirstAvailable = async (paths) => {
221
+ const errors = [];
222
+ for (const p of paths) {
223
+ try {
224
+ const r = await fetch(p, { cache: 'no-cache' });
225
+ if (r.ok) return await r.text();
226
+ errors.push(`${p}: ${r.status}`);
227
+ } catch (e) {
228
+ errors.push(`${p}: ${e.message}`);
229
+ }
230
+ }
231
+ throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
232
+ };
233
+
234
+ // Tooltip setup
235
+ container.style.position = container.style.position || 'relative';
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div');
240
+ tip.className = 'd3-tooltip';
241
+ Object.assign(tip.style, {
242
+ position: 'absolute',
243
+ top: '0px',
244
+ left: '0px',
245
+ transform: 'translate(-9999px, -9999px)',
246
+ pointerEvents: 'none',
247
+ padding: '8px 10px',
248
+ borderRadius: '8px',
249
+ fontSize: '12px',
250
+ lineHeight: '1.35',
251
+ border: '1px solid var(--border-color)',
252
+ background: 'var(--surface-bg)',
253
+ color: 'var(--text-color)',
254
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
255
+ opacity: '0',
256
+ transition: 'opacity .12s ease',
257
+ zIndex: '1000'
258
+ });
259
+ tipInner = document.createElement('div');
260
+ tipInner.className = 'd3-tooltip__inner';
261
+ tipInner.style.textAlign = 'left';
262
+ tip.appendChild(tipInner);
263
+ container.appendChild(tip);
264
+ } else {
265
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
266
+ }
267
+
268
+ // SVG setup
269
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
270
+ const gRoot = svg.append('g');
271
+ const gGrid = gRoot.append('g').attr('class', 'grid');
272
+ const gBands = gRoot.append('g').attr('class', 'bands');
273
+ const gLines = gRoot.append('g').attr('class', 'lines');
274
+ const gAxes = gRoot.append('g').attr('class', 'axes');
275
+
276
+ // State
277
+ let width = 800, height = 400;
278
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
279
+ let rawData = {}; // Store both datasets
280
+ let series = [];
281
+ let hiddenSeries = new Set();
282
+ let showRunningAverage = true;
283
+ let currentMetric = 'reward'; // 'reward' or 'length'
284
+ const RUNNING_AVG_WINDOW = 50; // steps
285
+
286
+ // Color setup
287
+ const getColors = (count) => {
288
+ if (window.ColorPalettes && window.ColorPalettes.getColors) {
289
+ return window.ColorPalettes.getColors('categorical', count);
290
+ }
291
+ // Fallback colors
292
+ return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
293
+ };
294
+
295
+ // Calculate running average based on step window
296
+ function calculateRunningAverage(points, windowSize) {
297
+ if (points.length === 0) return [];
298
+
299
+ const avgPoints = [];
300
+ for (let i = 0; i < points.length; i++) {
301
+ const currentStep = points[i].step;
302
+ const minStep = currentStep - windowSize;
303
+
304
+ // Find all points within the window
305
+ const windowPoints = points.filter(p => p.step >= minStep && p.step <= currentStep);
306
+
307
+ if (windowPoints.length > 0) {
308
+ const avgMean = d3.mean(windowPoints, p => p.mean);
309
+ const avgMin = d3.mean(windowPoints, p => p.min);
310
+ const avgMax = d3.mean(windowPoints, p => p.max);
311
+
312
+ avgPoints.push({
313
+ step: currentStep,
314
+ mean: avgMean,
315
+ min: avgMin,
316
+ max: avgMax
317
+ });
318
+ }
319
+ }
320
+
321
+ return avgPoints;
322
+ }
323
+
324
+ function parseData(csvText, metricType) {
325
+ const rows = d3.csvParse(csvText);
326
+
327
+ // Determine metric column suffix based on type
328
+ const metricSuffix = metricType === 'reward'
329
+ ? 'train/reward'
330
+ : 'train/completions/mean_terminated_length';
331
+
332
+ // Extract run names (each run has _step, mean, MIN, MAX columns)
333
+ const runNames = [];
334
+ const headers = Object.keys(rows[0]);
335
+
336
+ headers.forEach(h => {
337
+ if (h.includes(` - ${metricSuffix}`) && !h.includes('MIN') && !h.includes('MAX')) {
338
+ const runName = h.split(' - ')[0];
339
+ runNames.push(runName);
340
+ }
341
+ });
342
+
343
+ // For v18.00, just use a simple label
344
+ const displayNameMap = {
345
+ 'grpo-SmollM3-3B-GRPO-no-think-v18.00': 'No Penalty'
346
+ };
347
+
348
+ // Build series data using train/global_step for x-axis
349
+ series = runNames.map(runName => {
350
+ const meanCol = `${runName} - ${metricSuffix}`;
351
+ const minCol = `${meanCol}__MIN`;
352
+ const maxCol = `${meanCol}__MAX`;
353
+
354
+ const points = rows
355
+ .filter(row => row['train/global_step'] && row[meanCol])
356
+ .map(row => ({
357
+ step: +row['train/global_step'],
358
+ mean: +row[meanCol],
359
+ min: +row[minCol],
360
+ max: +row[maxCol]
361
+ }))
362
+ .filter(p => !isNaN(p.step) && !isNaN(p.mean));
363
+
364
+ // Calculate running average
365
+ const runningAvgPoints = calculateRunningAverage(points, RUNNING_AVG_WINDOW);
366
+
367
+ // Map to display name
368
+ const displayName = displayNameMap[runName] || runName;
369
+
370
+ return {
371
+ name: displayName,
372
+ fullName: runName,
373
+ points,
374
+ runningAvgPoints
375
+ };
376
+ });
377
+ }
378
+
379
+ function updateSize() {
380
+ width = container.clientWidth || 800;
381
+ height = Math.max(320, Math.round(width / 2.5));
382
+ svg.attr('width', width).attr('height', height);
383
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
384
+ return {
385
+ innerWidth: width - margin.left - margin.right,
386
+ innerHeight: height - margin.top - margin.bottom
387
+ };
388
+ }
389
+
390
+ function render() {
391
+ const { innerWidth, innerHeight } = updateSize();
392
+ if (series.length === 0) return;
393
+
394
+ // Filter visible series
395
+ const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
396
+ if (visibleSeries.length === 0) return;
397
+
398
+ // Select which points to use based on running average toggle
399
+ const getPoints = (s) => showRunningAverage ? s.runningAvgPoints : s.points;
400
+
401
+ // No filtering for full-length data
402
+ const getFilteredPoints = (s) => getPoints(s);
403
+
404
+ // Get all points for domain calculation
405
+ const allPoints = visibleSeries.flatMap(s => getFilteredPoints(s));
406
+
407
+ // Scales - calculate from data
408
+ const xScale = d3.scaleLinear()
409
+ .domain([0, d3.max(allPoints, d => d.step) || 1])
410
+ .range([0, innerWidth])
411
+ .nice();
412
+
413
+ // Set y-axis domain based on current metric - calculate from data
414
+ const minVal = d3.min(allPoints, d => d.mean);
415
+ const maxVal = d3.max(allPoints, d => d.mean);
416
+ const yDomain = [minVal * 0.95, maxVal * 1.05];
417
+
418
+ const yScale = d3.scaleLinear()
419
+ .domain(yDomain)
420
+ .range([innerHeight, 0]);
421
+
422
+ // Grid
423
+ gGrid.selectAll('.grid-y').data([0])
424
+ .join('g')
425
+ .attr('class', 'grid grid-y')
426
+ .call(d3.axisLeft(yScale)
427
+ .tickSize(-innerWidth)
428
+ .tickFormat('')
429
+ )
430
+ .call(g => g.select('.domain').remove());
431
+
432
+ // Colors
433
+ const colors = getColors(series.length);
434
+ const colorScale = (name) => {
435
+ const idx = series.findIndex(s => s.name === name);
436
+ return colors[idx % colors.length];
437
+ };
438
+
439
+ // Line generator
440
+ const line = d3.line()
441
+ .x(d => xScale(d.step))
442
+ .y(d => yScale(d.mean))
443
+ .curve(d3.curveMonotoneX);
444
+
445
+ // Render lines
446
+ gLines.selectAll('.line')
447
+ .data(visibleSeries, d => d.name)
448
+ .join('path')
449
+ .attr('class', 'line')
450
+ .attr('d', d => line(getFilteredPoints(d)))
451
+ .attr('stroke', d => colorScale(d.name));
452
+
453
+ // Axes
454
+ const xAxis = gAxes.selectAll('.x-axis').data([0])
455
+ .join('g')
456
+ .attr('class', 'x-axis axis')
457
+ .attr('transform', `translate(0,${innerHeight})`)
458
+ .call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
459
+
460
+ const yAxis = gAxes.selectAll('.y-axis').data([0])
461
+ .join('g')
462
+ .attr('class', 'y-axis axis')
463
+ .call(d3.axisLeft(yScale).ticks(8));
464
+
465
+ // Axis labels
466
+ gAxes.selectAll('.x-label').data([0])
467
+ .join('text')
468
+ .attr('class', 'x-label axis-label')
469
+ .attr('text-anchor', 'middle')
470
+ .attr('x', innerWidth / 2)
471
+ .attr('y', innerHeight + 45)
472
+ .text('Training step');
473
+
474
+ gAxes.selectAll('.y-label').data([0])
475
+ .join('text')
476
+ .attr('class', 'y-label axis-label')
477
+ .attr('text-anchor', 'middle')
478
+ .attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
479
+ .text(currentMetric === 'reward' ? 'Reward' : 'Mean Terminated Length');
480
+
481
+ // Tooltip interactions
482
+ const bisect = d3.bisector(d => d.step).left;
483
+
484
+ svg.on('mousemove', function(event) {
485
+ const [mx] = d3.pointer(event, gRoot.node());
486
+ const step = xScale.invert(mx);
487
+
488
+ let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong>`;
489
+ if (showRunningAverage) {
490
+ tooltipHtml += ` <span style="font-weight:normal;font-size:11px">(${RUNNING_AVG_WINDOW}-step avg)</span>`;
491
+ }
492
+ tooltipHtml += `<br/>`;
493
+
494
+ visibleSeries.forEach(s => {
495
+ const points = getFilteredPoints(s);
496
+ const idx = bisect(points, step);
497
+ if (idx > 0 && idx < points.length) {
498
+ const p = points[idx];
499
+ const color = colorScale(s.name);
500
+ const valueStr = currentMetric === 'reward'
501
+ ? `${(p.mean * 100).toFixed(1)}%`
502
+ : `${p.mean.toFixed(1)} tokens`;
503
+ tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${valueStr}</div>`;
504
+ }
505
+ });
506
+
507
+ tipInner.innerHTML = tooltipHtml;
508
+ const tipBounds = tip.getBoundingClientRect();
509
+ const [px, py] = d3.pointer(event, container);
510
+
511
+ let tipX = px + 12;
512
+ let tipY = py - 12;
513
+
514
+ if (tipX + tipBounds.width > width - 10) {
515
+ tipX = px - tipBounds.width - 12;
516
+ }
517
+ if (tipY - tipBounds.height < 10) {
518
+ tipY = py + 20;
519
+ }
520
+
521
+ tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
522
+ tip.style.opacity = '1';
523
+ });
524
+
525
+ svg.on('mouseleave', () => {
526
+ tip.style.opacity = '0';
527
+ tip.style.transform = 'translate(-9999px, -9999px)';
528
+ });
529
+ }
530
+
531
+ function makeLegend() {
532
+ let header = container.querySelector('.header');
533
+ if (!header) {
534
+ header = document.createElement('div');
535
+ header.className = 'header';
536
+ container.appendChild(header);
537
+ }
538
+
539
+ let legend = header.querySelector('.legend');
540
+ if (!legend) {
541
+ legend = document.createElement('div');
542
+ legend.className = 'legend';
543
+ header.appendChild(legend);
544
+ }
545
+
546
+ let title = legend.querySelector('.legend-title');
547
+ if (!title) {
548
+ title = document.createElement('div');
549
+ title.className = 'legend-title';
550
+ title.textContent = 'Configuration';
551
+ legend.appendChild(title);
552
+ } else {
553
+ title.textContent = 'Configuration';
554
+ }
555
+
556
+ let items = legend.querySelector('.items');
557
+ if (!items) {
558
+ items = document.createElement('div');
559
+ items.className = 'items';
560
+ legend.appendChild(items);
561
+ }
562
+
563
+ const colors = getColors(series.length);
564
+
565
+ items.innerHTML = '';
566
+ series.forEach((s, i) => {
567
+ const item = document.createElement('span');
568
+ item.className = 'item';
569
+ if (hiddenSeries.has(s.name)) {
570
+ item.classList.add('dimmed');
571
+ }
572
+
573
+ const swatch = document.createElement('span');
574
+ swatch.className = 'swatch';
575
+ swatch.style.background = colors[i % colors.length];
576
+
577
+ const text = document.createElement('span');
578
+ text.textContent = s.name;
579
+
580
+ item.appendChild(swatch);
581
+ item.appendChild(text);
582
+ items.appendChild(item);
583
+
584
+ item.addEventListener('click', () => {
585
+ if (hiddenSeries.has(s.name)) {
586
+ hiddenSeries.delete(s.name);
587
+ } else {
588
+ hiddenSeries.add(s.name);
589
+ }
590
+ makeLegend();
591
+ render();
592
+ });
593
+ });
594
+ }
595
+
596
+ function makeControls() {
597
+ let header = container.querySelector('.header');
598
+ if (!header) {
599
+ header = document.createElement('div');
600
+ header.className = 'header';
601
+ container.appendChild(header);
602
+ }
603
+
604
+ let controls = header.querySelector('.controls');
605
+ if (!controls) {
606
+ controls = document.createElement('div');
607
+ controls.className = 'controls';
608
+ header.appendChild(controls);
609
+ }
610
+
611
+ controls.innerHTML = '';
612
+
613
+ // Metric selection group
614
+ const metricGroup = document.createElement('div');
615
+ metricGroup.className = 'control-group';
616
+
617
+ const metricLabel = document.createElement('label');
618
+ metricLabel.textContent = 'Metric';
619
+ metricGroup.appendChild(metricLabel);
620
+
621
+ const metricToggleGroup = document.createElement('div');
622
+ metricToggleGroup.className = 'toggle-group';
623
+
624
+ const rewardBtn = document.createElement('button');
625
+ rewardBtn.className = 'toggle-btn' + (currentMetric === 'reward' ? ' active' : '');
626
+ rewardBtn.textContent = 'Reward';
627
+ rewardBtn.addEventListener('click', () => {
628
+ if (currentMetric !== 'reward') {
629
+ currentMetric = 'reward';
630
+ parseData(rawData.reward, 'reward');
631
+ makeControls();
632
+ makeLegend();
633
+ render();
634
+ }
635
+ });
636
+
637
+ const lengthBtn = document.createElement('button');
638
+ lengthBtn.className = 'toggle-btn' + (currentMetric === 'length' ? ' active' : '');
639
+ lengthBtn.textContent = 'Length';
640
+ lengthBtn.addEventListener('click', () => {
641
+ if (currentMetric !== 'length') {
642
+ currentMetric = 'length';
643
+ parseData(rawData.length, 'length');
644
+ makeControls();
645
+ makeLegend();
646
+ render();
647
+ }
648
+ });
649
+
650
+ metricToggleGroup.appendChild(rewardBtn);
651
+ metricToggleGroup.appendChild(lengthBtn);
652
+ metricGroup.appendChild(metricToggleGroup);
653
+ controls.appendChild(metricGroup);
654
+
655
+ // Display options group
656
+ const displayGroup = document.createElement('div');
657
+ displayGroup.className = 'control-group';
658
+
659
+ const displayLabel = document.createElement('label');
660
+ displayLabel.textContent = 'Display';
661
+ displayGroup.appendChild(displayLabel);
662
+
663
+ const displayToggleGroup = document.createElement('div');
664
+ displayToggleGroup.className = 'toggle-group';
665
+
666
+ const runningAvgBtn = document.createElement('button');
667
+ runningAvgBtn.className = 'toggle-btn' + (showRunningAverage ? ' active' : '');
668
+ runningAvgBtn.textContent = `Running Avg (${RUNNING_AVG_WINDOW} steps)`;
669
+ runningAvgBtn.addEventListener('click', () => {
670
+ showRunningAverage = !showRunningAverage;
671
+ makeControls();
672
+ render();
673
+ });
674
+
675
+ displayToggleGroup.appendChild(runningAvgBtn);
676
+ displayGroup.appendChild(displayToggleGroup);
677
+ controls.appendChild(displayGroup);
678
+ }
679
+
680
+ // Load both datasets
681
+ const REWARD_PATHS = [
682
+ '/data/grpo/rl_reward_full_length.csv',
683
+ './assets/data/grpo/rl_reward_full_length.csv',
684
+ '../assets/data/grpo/rl_reward_full_length.csv',
685
+ '../../assets/data/grpo/rl_reward_full_length.csv'
686
+ ];
687
+
688
+ const LENGTH_PATHS = [
689
+ '/data/grpo/rl_mean_terminated_length_full_length.csv',
690
+ './assets/data/grpo/rl_mean_terminated_length_full_length.csv',
691
+ '../assets/data/grpo/rl_mean_terminated_length_full_length.csv',
692
+ '../../assets/data/grpo/rl_mean_terminated_length_full_length.csv'
693
+ ];
694
+
695
+ Promise.all([
696
+ fetchFirstAvailable(REWARD_PATHS),
697
+ fetchFirstAvailable(LENGTH_PATHS)
698
+ ])
699
+ .then(([rewardCsvText, lengthCsvText]) => {
700
+ // Store both datasets
701
+ rawData.reward = rewardCsvText;
702
+ rawData.length = lengthCsvText;
703
+
704
+ // Initialize with reward data
705
+ parseData(rewardCsvText, 'reward');
706
+ makeLegend();
707
+ makeControls();
708
+ render();
709
+
710
+ // Responsiveness
711
+ if (window.ResizeObserver) {
712
+ const ro = new ResizeObserver(() => render());
713
+ ro.observe(container);
714
+ } else {
715
+ window.addEventListener('resize', render);
716
+ }
717
+ })
718
+ .catch(err => {
719
+ const pre = document.createElement('pre');
720
+ pre.style.color = '#f44336';
721
+ pre.style.fontSize = '12px';
722
+ pre.style.padding = '12px';
723
+ pre.textContent = `Error loading data: ${err.message}`;
724
+ container.appendChild(pre);
725
+ });
726
+ };
727
+
728
+ if (document.readyState === 'loading') {
729
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
730
+ } else {
731
+ ensureD3(bootstrap);
732
+ }
733
+ })();
734
+ </script>
app/src/content/embeds/d3-rl-reward-curves.html ADDED
@@ -0,0 +1,770 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-grpo-reward-curves"></div>
2
+ <style>
3
+ .d3-grpo-reward-curves {
4
+ width: 100%;
5
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
6
+ position: relative;
7
+ }
8
+
9
+ .d3-grpo-reward-curves svg {
10
+ display: block;
11
+ width: 100%;
12
+ }
13
+
14
+ .d3-grpo-reward-curves .axis path {
15
+ stroke: none;
16
+ }
17
+
18
+ .d3-grpo-reward-curves .axis line {
19
+ stroke: var(--axis-color);
20
+ shape-rendering: crispEdges;
21
+ }
22
+
23
+ .d3-grpo-reward-curves .axis text {
24
+ fill: var(--tick-color);
25
+ font-size: 11px;
26
+ }
27
+
28
+ .d3-grpo-reward-curves .grid line {
29
+ stroke: var(--grid-color);
30
+ stroke-dasharray: 2,2;
31
+ }
32
+
33
+ .d3-grpo-reward-curves .confidence-band {
34
+ opacity: 0.15;
35
+ }
36
+
37
+ .d3-grpo-reward-curves .line {
38
+ fill: none;
39
+ stroke-width: 2;
40
+ stroke-linejoin: round;
41
+ stroke-linecap: round;
42
+ }
43
+
44
+ .d3-grpo-reward-curves .axis-label {
45
+ fill: var(--text-color);
46
+ font-size: 12px;
47
+ font-weight: 600;
48
+ }
49
+
50
+ .d3-grpo-reward-curves .header {
51
+ display: flex;
52
+ align-items: center;
53
+ justify-content: space-between;
54
+ flex-wrap: wrap;
55
+ gap: 16px;
56
+ margin-top: 12px;
57
+ padding-top: 12px;
58
+ border-top: 1px solid var(--border-color);
59
+ }
60
+
61
+ .d3-grpo-reward-curves .legend {
62
+ display: flex;
63
+ flex-direction: column;
64
+ align-items: flex-start;
65
+ gap: 6px;
66
+ }
67
+
68
+ .d3-grpo-reward-curves .legend-title {
69
+ font-size: 12px;
70
+ font-weight: 700;
71
+ color: var(--text-color);
72
+ }
73
+
74
+ .d3-grpo-reward-curves .legend .items {
75
+ display: flex;
76
+ flex-wrap: wrap;
77
+ gap: 8px 14px;
78
+ }
79
+
80
+ .d3-grpo-reward-curves .legend .item {
81
+ display: inline-flex;
82
+ align-items: center;
83
+ gap: 6px;
84
+ white-space: nowrap;
85
+ font-size: 12px;
86
+ color: var(--text-color);
87
+ cursor: pointer;
88
+ user-select: none;
89
+ opacity: 1;
90
+ transition: opacity 0.2s ease;
91
+ }
92
+
93
+ .d3-grpo-reward-curves .legend .item.dimmed {
94
+ opacity: 0.3;
95
+ }
96
+
97
+ .d3-grpo-reward-curves .legend .swatch {
98
+ width: 14px;
99
+ height: 14px;
100
+ border-radius: 3px;
101
+ border: 1px solid var(--border-color);
102
+ }
103
+
104
+ .d3-grpo-reward-curves .controls {
105
+ display: flex;
106
+ gap: 16px;
107
+ align-items: center;
108
+ justify-content: flex-end;
109
+ flex-wrap: wrap;
110
+ }
111
+
112
+ .d3-grpo-reward-curves .controls .control-group {
113
+ display: flex;
114
+ flex-direction: column;
115
+ align-items: flex-start;
116
+ gap: 6px;
117
+ }
118
+
119
+ .d3-grpo-reward-curves .controls label {
120
+ font-size: 12px;
121
+ font-weight: 700;
122
+ color: var(--text-color);
123
+ }
124
+
125
+ .d3-grpo-reward-curves .controls .toggle-group {
126
+ display: flex;
127
+ gap: 8px;
128
+ align-items: center;
129
+ }
130
+
131
+ .d3-grpo-reward-curves .controls .toggle-btn {
132
+ padding: 6px 12px;
133
+ font-size: 12px;
134
+ border: 1px solid var(--border-color);
135
+ border-radius: 8px;
136
+ background: var(--surface-bg);
137
+ color: var(--text-color);
138
+ cursor: pointer;
139
+ transition: all 0.2s ease;
140
+ }
141
+
142
+ .d3-grpo-reward-curves .controls .toggle-btn:hover {
143
+ background: var(--primary-color);
144
+ color: white;
145
+ border-color: var(--primary-color);
146
+ }
147
+
148
+ .d3-grpo-reward-curves .controls .toggle-btn.active {
149
+ background: var(--primary-color);
150
+ color: white;
151
+ border-color: var(--primary-color);
152
+ }
153
+ </style>
154
+ <script>
155
+ (() => {
156
+ const ensureD3 = (cb) => {
157
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
158
+ let s = document.getElementById('d3-cdn-script');
159
+ if (!s) {
160
+ s = document.createElement('script');
161
+ s.id = 'd3-cdn-script';
162
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
163
+ document.head.appendChild(s);
164
+ }
165
+ const onReady = () => {
166
+ if (window.d3 && typeof window.d3.select === 'function') cb();
167
+ };
168
+ s.addEventListener('load', onReady, { once: true });
169
+ if (window.d3) onReady();
170
+ };
171
+
172
+ const bootstrap = () => {
173
+ const scriptEl = document.currentScript;
174
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
175
+ if (!(container && container.classList && container.classList.contains('d3-grpo-reward-curves'))) {
176
+ const candidates = Array.from(document.querySelectorAll('.d3-grpo-reward-curves'))
177
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
178
+ container = candidates[candidates.length - 1] || null;
179
+ }
180
+ if (!container) return;
181
+ if (container.dataset) {
182
+ if (container.dataset.mounted === 'true') return;
183
+ container.dataset.mounted = 'true';
184
+ }
185
+
186
+ // Data loading configuration
187
+ let mountEl = container;
188
+ while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
189
+ mountEl = mountEl.parentElement;
190
+ }
191
+ let providedData = null;
192
+ try {
193
+ const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
194
+ if (attr && attr.trim()) {
195
+ providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
196
+ }
197
+ } catch (_) {}
198
+
199
+ const DEFAULT_CSV = '/data/grpo/rl_reward_curves.csv';
200
+ const ensureDataPrefix = (p) => {
201
+ if (typeof p !== 'string' || !p) return p;
202
+ // If it starts with /, it's already absolute
203
+ if (p.startsWith('/')) return p;
204
+ // Otherwise, prefix with /data/
205
+ return `/data/${p}`;
206
+ };
207
+ const normalizeInput = (inp) => Array.isArray(inp)
208
+ ? inp.map(ensureDataPrefix)
209
+ : (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
210
+
211
+ const CSV_PATHS = Array.isArray(providedData)
212
+ ? normalizeInput(providedData)
213
+ : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
214
+ DEFAULT_CSV,
215
+ './assets/data/grpo/rl_reward_curves.csv',
216
+ '../assets/data/grpo/rl_reward_curves.csv',
217
+ '../../assets/data/grpo/rl_reward_curves.csv'
218
+ ]);
219
+
220
+ const fetchFirstAvailable = async (paths) => {
221
+ const errors = [];
222
+ for (const p of paths) {
223
+ try {
224
+ const r = await fetch(p, { cache: 'no-cache' });
225
+ if (r.ok) return await r.text();
226
+ errors.push(`${p}: ${r.status}`);
227
+ } catch (e) {
228
+ errors.push(`${p}: ${e.message}`);
229
+ }
230
+ }
231
+ throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
232
+ };
233
+
234
+ // Tooltip setup
235
+ container.style.position = container.style.position || 'relative';
236
+ let tip = container.querySelector('.d3-tooltip');
237
+ let tipInner;
238
+ if (!tip) {
239
+ tip = document.createElement('div');
240
+ tip.className = 'd3-tooltip';
241
+ Object.assign(tip.style, {
242
+ position: 'absolute',
243
+ top: '0px',
244
+ left: '0px',
245
+ transform: 'translate(-9999px, -9999px)',
246
+ pointerEvents: 'none',
247
+ padding: '8px 10px',
248
+ borderRadius: '8px',
249
+ fontSize: '12px',
250
+ lineHeight: '1.35',
251
+ border: '1px solid var(--border-color)',
252
+ background: 'var(--surface-bg)',
253
+ color: 'var(--text-color)',
254
+ boxShadow: '0 4px 24px rgba(0,0,0,.18)',
255
+ opacity: '0',
256
+ transition: 'opacity .12s ease',
257
+ zIndex: '1000'
258
+ });
259
+ tipInner = document.createElement('div');
260
+ tipInner.className = 'd3-tooltip__inner';
261
+ tipInner.style.textAlign = 'left';
262
+ tip.appendChild(tipInner);
263
+ container.appendChild(tip);
264
+ } else {
265
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
266
+ }
267
+
268
+ // SVG setup
269
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
270
+ const gRoot = svg.append('g');
271
+ const gGrid = gRoot.append('g').attr('class', 'grid');
272
+ const gBands = gRoot.append('g').attr('class', 'bands');
273
+ const gLines = gRoot.append('g').attr('class', 'lines');
274
+ const gAxes = gRoot.append('g').attr('class', 'axes');
275
+
276
+ // State
277
+ let width = 800, height = 400;
278
+ const margin = { top: 16, right: 28, bottom: 56, left: 64 };
279
+ let rawData = {}; // Store both datasets
280
+ let series = [];
281
+ let hiddenSeries = new Set();
282
+ let showRunningAverage = true;
283
+ let currentMetric = 'reward'; // 'reward' or 'length'
284
+ const RUNNING_AVG_WINDOW = 50; // steps
285
+
286
+ // Color setup
287
+ const getColors = (count) => {
288
+ if (window.ColorPalettes && window.ColorPalettes.getColors) {
289
+ return window.ColorPalettes.getColors('categorical', count);
290
+ }
291
+ // Fallback colors
292
+ return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
293
+ };
294
+
295
+ // Calculate running average based on step window
296
+ function calculateRunningAverage(points, windowSize) {
297
+ if (points.length === 0) return [];
298
+
299
+ const avgPoints = [];
300
+ for (let i = 0; i < points.length; i++) {
301
+ const currentStep = points[i].step;
302
+ const minStep = currentStep - windowSize;
303
+
304
+ // Find all points within the window
305
+ const windowPoints = points.filter(p => p.step >= minStep && p.step <= currentStep);
306
+
307
+ if (windowPoints.length > 0) {
308
+ const avgMean = d3.mean(windowPoints, p => p.mean);
309
+ const avgMin = d3.mean(windowPoints, p => p.min);
310
+ const avgMax = d3.mean(windowPoints, p => p.max);
311
+
312
+ avgPoints.push({
313
+ step: currentStep,
314
+ mean: avgMean,
315
+ min: avgMin,
316
+ max: avgMax
317
+ });
318
+ }
319
+ }
320
+
321
+ return avgPoints;
322
+ }
323
+
324
+ function parseData(csvText, metricType) {
325
+ const rows = d3.csvParse(csvText);
326
+
327
+ // Determine metric column suffix based on type
328
+ const metricSuffix = metricType === 'reward'
329
+ ? 'train/rewards/strip_reasoning_accuracy_reward/mean'
330
+ : 'train/completions/mean_terminated_length';
331
+
332
+ // Extract run names (each run has _step, mean, MIN, MAX columns)
333
+ const runNames = [];
334
+ const headers = Object.keys(rows[0]);
335
+
336
+ headers.forEach(h => {
337
+ if (h.includes(` - ${metricSuffix}`) && !h.includes('MIN') && !h.includes('MAX')) {
338
+ const runName = h.split(' - ')[0];
339
+ runNames.push(runName);
340
+ }
341
+ });
342
+
343
+ // Mapping of v27.x to overlong penalty ranges
344
+ const penaltyRangeMap = {
345
+ 'v27.00': '1.5-2k',
346
+ 'v27.01': '2-2.5k',
347
+ 'v27.02': '2.5-3k',
348
+ 'v27.03': '3-3.5k',
349
+ 'v27.04': '3.5-4k',
350
+ 'v27.05': '4-4.5k'
351
+ };
352
+
353
+ // Build series data using train/global_step for x-axis
354
+ series = runNames.map(runName => {
355
+ const meanCol = `${runName} - ${metricSuffix}`;
356
+ const minCol = `${meanCol}__MIN`;
357
+ const maxCol = `${meanCol}__MAX`;
358
+
359
+ const points = rows
360
+ .filter(row => row['train/global_step'] && row[meanCol])
361
+ .map(row => ({
362
+ step: +row['train/global_step'],
363
+ mean: +row[meanCol],
364
+ min: +row[minCol],
365
+ max: +row[maxCol]
366
+ }))
367
+ .filter(p => !isNaN(p.step) && !isNaN(p.mean));
368
+
369
+ // Calculate running average
370
+ const runningAvgPoints = calculateRunningAverage(points, RUNNING_AVG_WINDOW);
371
+
372
+ // Extract version from run name
373
+ const versionMatch = runName.match(/v(\d+\.\d+)/);
374
+ const version = versionMatch ? `v${versionMatch[1]}` : runName;
375
+
376
+ // Map to penalty range label
377
+ const displayName = penaltyRangeMap[version] || version;
378
+
379
+ return {
380
+ name: displayName,
381
+ version: version, // Keep original version for sorting
382
+ fullName: runName,
383
+ points,
384
+ runningAvgPoints
385
+ };
386
+ });
387
+
388
+ // Sort series by version number to get correct order
389
+ series.sort((a, b) => {
390
+ const getVersionNum = (v) => {
391
+ const match = v.version?.match(/v(\d+)\.(\d+)/);
392
+ return match ? parseFloat(`${match[1]}.${match[2]}`) : 0;
393
+ };
394
+ return getVersionNum(a) - getVersionNum(b);
395
+ });
396
+ }
397
+
398
+ function updateSize() {
399
+ width = container.clientWidth || 800;
400
+ height = Math.max(320, Math.round(width / 2.5));
401
+ svg.attr('width', width).attr('height', height);
402
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
403
+ return {
404
+ innerWidth: width - margin.left - margin.right,
405
+ innerHeight: height - margin.top - margin.bottom
406
+ };
407
+ }
408
+
409
+ function render() {
410
+ const { innerWidth, innerHeight } = updateSize();
411
+ if (series.length === 0) return;
412
+
413
+ // Filter visible series
414
+ const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
415
+ if (visibleSeries.length === 0) return;
416
+
417
+ // Select which points to use based on running average toggle
418
+ const getPoints = (s) => showRunningAverage ? s.runningAvgPoints : s.points;
419
+
420
+ // Filter points to limit x-axis to 500 steps and y-axis based on metric
421
+ const MAX_STEPS = 500;
422
+ const getFilteredPoints = (s) => {
423
+ const pts = getPoints(s);
424
+ if (currentMetric === 'reward') {
425
+ const MIN_REWARD = 0.55;
426
+ const MAX_REWARD = 0.9;
427
+ return pts.filter(p => p.step <= MAX_STEPS && p.mean >= MIN_REWARD && p.mean <= MAX_REWARD);
428
+ } else {
429
+ // For length, no y-axis filtering
430
+ return pts.filter(p => p.step <= MAX_STEPS);
431
+ }
432
+ };
433
+
434
+ // Get all points for domain calculation
435
+ const allPoints = visibleSeries.flatMap(s => getFilteredPoints(s));
436
+
437
+ // Scales
438
+ const xScale = d3.scaleLinear()
439
+ .domain([0, MAX_STEPS])
440
+ .range([0, innerWidth])
441
+ .nice();
442
+
443
+ // Set y-axis domain based on current metric
444
+ let yDomain;
445
+ if (currentMetric === 'reward') {
446
+ yDomain = [0.55, 0.9];
447
+ } else {
448
+ // For length, calculate from data
449
+ const minVal = d3.min(allPoints, d => d.mean);
450
+ const maxVal = d3.max(allPoints, d => d.mean);
451
+ yDomain = [minVal * 0.95, maxVal * 1.05];
452
+ }
453
+
454
+ const yScale = d3.scaleLinear()
455
+ .domain(yDomain)
456
+ .range([innerHeight, 0]);
457
+
458
+ // Grid
459
+ gGrid.selectAll('.grid-y').data([0])
460
+ .join('g')
461
+ .attr('class', 'grid grid-y')
462
+ .call(d3.axisLeft(yScale)
463
+ .tickSize(-innerWidth)
464
+ .tickFormat('')
465
+ )
466
+ .call(g => g.select('.domain').remove());
467
+
468
+ // Colors
469
+ const colors = getColors(series.length);
470
+ const colorScale = (name) => {
471
+ const idx = series.findIndex(s => s.name === name);
472
+ return colors[idx % colors.length];
473
+ };
474
+
475
+ // Line generator
476
+ const line = d3.line()
477
+ .x(d => xScale(d.step))
478
+ .y(d => yScale(d.mean))
479
+ .curve(d3.curveMonotoneX);
480
+
481
+ // Render lines
482
+ gLines.selectAll('.line')
483
+ .data(visibleSeries, d => d.name)
484
+ .join('path')
485
+ .attr('class', 'line')
486
+ .attr('d', d => line(getFilteredPoints(d)))
487
+ .attr('stroke', d => colorScale(d.name));
488
+
489
+ // Axes
490
+ const xAxis = gAxes.selectAll('.x-axis').data([0])
491
+ .join('g')
492
+ .attr('class', 'x-axis axis')
493
+ .attr('transform', `translate(0,${innerHeight})`)
494
+ .call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
495
+
496
+ const yAxis = gAxes.selectAll('.y-axis').data([0])
497
+ .join('g')
498
+ .attr('class', 'y-axis axis')
499
+ .call(d3.axisLeft(yScale).ticks(8));
500
+
501
+ // Axis labels
502
+ gAxes.selectAll('.x-label').data([0])
503
+ .join('text')
504
+ .attr('class', 'x-label axis-label')
505
+ .attr('text-anchor', 'middle')
506
+ .attr('x', innerWidth / 2)
507
+ .attr('y', innerHeight + 45)
508
+ .text('Training step');
509
+
510
+ gAxes.selectAll('.y-label').data([0])
511
+ .join('text')
512
+ .attr('class', 'y-label axis-label')
513
+ .attr('text-anchor', 'middle')
514
+ .attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
515
+ .text(currentMetric === 'reward' ? 'Reward' : 'Mean Terminated Length');
516
+
517
+ // Tooltip interactions
518
+ const bisect = d3.bisector(d => d.step).left;
519
+
520
+ svg.on('mousemove', function(event) {
521
+ const [mx] = d3.pointer(event, gRoot.node());
522
+ const step = xScale.invert(mx);
523
+
524
+ let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong>`;
525
+ if (showRunningAverage) {
526
+ tooltipHtml += ` <span style="font-weight:normal;font-size:11px">(${RUNNING_AVG_WINDOW}-step avg)</span>`;
527
+ }
528
+ tooltipHtml += `<br/>`;
529
+
530
+ visibleSeries.forEach(s => {
531
+ const points = getFilteredPoints(s);
532
+ const idx = bisect(points, step);
533
+ if (idx > 0 && idx < points.length) {
534
+ const p = points[idx];
535
+ const color = colorScale(s.name);
536
+ const valueStr = currentMetric === 'reward'
537
+ ? `${(p.mean * 100).toFixed(1)}%`
538
+ : `${p.mean.toFixed(1)} tokens`;
539
+ tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${valueStr}</div>`;
540
+ }
541
+ });
542
+
543
+ tipInner.innerHTML = tooltipHtml;
544
+ const tipBounds = tip.getBoundingClientRect();
545
+ const [px, py] = d3.pointer(event, container);
546
+
547
+ let tipX = px + 12;
548
+ let tipY = py - 12;
549
+
550
+ if (tipX + tipBounds.width > width - 10) {
551
+ tipX = px - tipBounds.width - 12;
552
+ }
553
+ if (tipY - tipBounds.height < 10) {
554
+ tipY = py + 20;
555
+ }
556
+
557
+ tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
558
+ tip.style.opacity = '1';
559
+ });
560
+
561
+ svg.on('mouseleave', () => {
562
+ tip.style.opacity = '0';
563
+ tip.style.transform = 'translate(-9999px, -9999px)';
564
+ });
565
+ }
566
+
567
+ function makeLegend() {
568
+ let header = container.querySelector('.header');
569
+ if (!header) {
570
+ header = document.createElement('div');
571
+ header.className = 'header';
572
+ container.appendChild(header);
573
+ }
574
+
575
+ let legend = header.querySelector('.legend');
576
+ if (!legend) {
577
+ legend = document.createElement('div');
578
+ legend.className = 'legend';
579
+ header.appendChild(legend);
580
+ }
581
+
582
+ let title = legend.querySelector('.legend-title');
583
+ if (!title) {
584
+ title = document.createElement('div');
585
+ title.className = 'legend-title';
586
+ title.textContent = 'Overlong Penalty';
587
+ legend.appendChild(title);
588
+ } else {
589
+ title.textContent = 'Overlong Penalty';
590
+ }
591
+
592
+ let items = legend.querySelector('.items');
593
+ if (!items) {
594
+ items = document.createElement('div');
595
+ items.className = 'items';
596
+ legend.appendChild(items);
597
+ }
598
+
599
+ const colors = getColors(series.length);
600
+
601
+ items.innerHTML = '';
602
+ series.forEach((s, i) => {
603
+ const item = document.createElement('span');
604
+ item.className = 'item';
605
+ if (hiddenSeries.has(s.name)) {
606
+ item.classList.add('dimmed');
607
+ }
608
+
609
+ const swatch = document.createElement('span');
610
+ swatch.className = 'swatch';
611
+ swatch.style.background = colors[i % colors.length];
612
+
613
+ const text = document.createElement('span');
614
+ text.textContent = s.name;
615
+
616
+ item.appendChild(swatch);
617
+ item.appendChild(text);
618
+ items.appendChild(item);
619
+
620
+ item.addEventListener('click', () => {
621
+ if (hiddenSeries.has(s.name)) {
622
+ hiddenSeries.delete(s.name);
623
+ } else {
624
+ hiddenSeries.add(s.name);
625
+ }
626
+ makeLegend();
627
+ render();
628
+ });
629
+ });
630
+ }
631
+
632
+ function makeControls() {
633
+ let header = container.querySelector('.header');
634
+ if (!header) {
635
+ header = document.createElement('div');
636
+ header.className = 'header';
637
+ container.appendChild(header);
638
+ }
639
+
640
+ let controls = header.querySelector('.controls');
641
+ if (!controls) {
642
+ controls = document.createElement('div');
643
+ controls.className = 'controls';
644
+ header.appendChild(controls);
645
+ }
646
+
647
+ controls.innerHTML = '';
648
+
649
+ // Metric selection group
650
+ const metricGroup = document.createElement('div');
651
+ metricGroup.className = 'control-group';
652
+
653
+ const metricLabel = document.createElement('label');
654
+ metricLabel.textContent = 'Metric';
655
+ metricGroup.appendChild(metricLabel);
656
+
657
+ const metricToggleGroup = document.createElement('div');
658
+ metricToggleGroup.className = 'toggle-group';
659
+
660
+ const rewardBtn = document.createElement('button');
661
+ rewardBtn.className = 'toggle-btn' + (currentMetric === 'reward' ? ' active' : '');
662
+ rewardBtn.textContent = 'Reward';
663
+ rewardBtn.addEventListener('click', () => {
664
+ if (currentMetric !== 'reward') {
665
+ currentMetric = 'reward';
666
+ parseData(rawData.reward, 'reward');
667
+ makeControls();
668
+ makeLegend();
669
+ render();
670
+ }
671
+ });
672
+
673
+ const lengthBtn = document.createElement('button');
674
+ lengthBtn.className = 'toggle-btn' + (currentMetric === 'length' ? ' active' : '');
675
+ lengthBtn.textContent = 'Length';
676
+ lengthBtn.addEventListener('click', () => {
677
+ if (currentMetric !== 'length') {
678
+ currentMetric = 'length';
679
+ parseData(rawData.length, 'length');
680
+ makeControls();
681
+ makeLegend();
682
+ render();
683
+ }
684
+ });
685
+
686
+ metricToggleGroup.appendChild(rewardBtn);
687
+ metricToggleGroup.appendChild(lengthBtn);
688
+ metricGroup.appendChild(metricToggleGroup);
689
+ controls.appendChild(metricGroup);
690
+
691
+ // Display options group
692
+ const displayGroup = document.createElement('div');
693
+ displayGroup.className = 'control-group';
694
+
695
+ const displayLabel = document.createElement('label');
696
+ displayLabel.textContent = 'Display';
697
+ displayGroup.appendChild(displayLabel);
698
+
699
+ const displayToggleGroup = document.createElement('div');
700
+ displayToggleGroup.className = 'toggle-group';
701
+
702
+ const runningAvgBtn = document.createElement('button');
703
+ runningAvgBtn.className = 'toggle-btn' + (showRunningAverage ? ' active' : '');
704
+ runningAvgBtn.textContent = `Running Avg (${RUNNING_AVG_WINDOW} steps)`;
705
+ runningAvgBtn.addEventListener('click', () => {
706
+ showRunningAverage = !showRunningAverage;
707
+ makeControls();
708
+ render();
709
+ });
710
+
711
+ displayToggleGroup.appendChild(runningAvgBtn);
712
+ displayGroup.appendChild(displayToggleGroup);
713
+ controls.appendChild(displayGroup);
714
+ }
715
+
716
+ // Load both datasets
717
+ const REWARD_PATHS = [
718
+ '/data/grpo/rl_reward_curves.csv',
719
+ './assets/data/grpo/rl_reward_curves.csv',
720
+ '../assets/data/grpo/rl_reward_curves.csv',
721
+ '../../assets/data/grpo/rl_reward_curves.csv'
722
+ ];
723
+
724
+ const LENGTH_PATHS = [
725
+ '/data/grpo/rl_mean_teminated_lengths.csv',
726
+ './assets/data/grpo/rl_mean_teminated_lengths.csv',
727
+ '../assets/data/grpo/rl_mean_teminated_lengths.csv',
728
+ '../../assets/data/grpo/rl_mean_teminated_lengths.csv'
729
+ ];
730
+
731
+ Promise.all([
732
+ fetchFirstAvailable(REWARD_PATHS),
733
+ fetchFirstAvailable(LENGTH_PATHS)
734
+ ])
735
+ .then(([rewardCsvText, lengthCsvText]) => {
736
+ // Store both datasets
737
+ rawData.reward = rewardCsvText;
738
+ rawData.length = lengthCsvText;
739
+
740
+ // Initialize with reward data
741
+ parseData(rewardCsvText, 'reward');
742
+ makeLegend();
743
+ makeControls();
744
+ render();
745
+
746
+ // Responsiveness
747
+ if (window.ResizeObserver) {
748
+ const ro = new ResizeObserver(() => render());
749
+ ro.observe(container);
750
+ } else {
751
+ window.addEventListener('resize', render);
752
+ }
753
+ })
754
+ .catch(err => {
755
+ const pre = document.createElement('pre');
756
+ pre.style.color = '#f44336';
757
+ pre.style.fontSize = '12px';
758
+ pre.style.padding = '12px';
759
+ pre.textContent = `Error loading data: ${err.message}`;
760
+ container.appendChild(pre);
761
+ });
762
+ };
763
+
764
+ if (document.readyState === 'loading') {
765
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
766
+ } else {
767
+ ensureD3(bootstrap);
768
+ }
769
+ })();
770
+ </script>
app/src/content/embeds/d3-rl-token-comparison.html ADDED
@@ -0,0 +1,504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-grpo-token-comparison"></div>
2
+ <style>
3
+ .d3-grpo-token-comparison {
4
+ width: 100%;
5
+ position: relative;
6
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
7
+ }
8
+ .d3-grpo-token-comparison svg {
9
+ display: block;
10
+ width: 100%;
11
+ }
12
+ .d3-grpo-token-comparison .bar {
13
+ stroke: none;
14
+ }
15
+ .d3-grpo-token-comparison .axes path,
16
+ .d3-grpo-token-comparison .axes line {
17
+ stroke: var(--axis-color, var(--text-color));
18
+ }
19
+ .d3-grpo-token-comparison .axes text {
20
+ fill: var(--tick-color, var(--muted-color));
21
+ font-size: 11px;
22
+ }
23
+ .d3-grpo-token-comparison .grid line {
24
+ stroke: var(--grid-color, rgba(0,0,0,.08));
25
+ }
26
+ .d3-grpo-token-comparison .chart-title {
27
+ font-size: 13px;
28
+ font-weight: 600;
29
+ fill: var(--text-color);
30
+ }
31
+ .d3-grpo-token-comparison .d3-tooltip {
32
+ position: absolute;
33
+ top: 0;
34
+ left: 0;
35
+ transform: translate(-9999px, -9999px);
36
+ pointer-events: none;
37
+ padding: 8px 10px;
38
+ border-radius: 8px;
39
+ font-size: 12px;
40
+ line-height: 1.35;
41
+ border: 1px solid var(--border-color);
42
+ background: var(--surface-bg);
43
+ color: var(--text-color);
44
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
45
+ opacity: 0;
46
+ transition: opacity .12s ease;
47
+ }
48
+ .d3-grpo-token-comparison .d3-tooltip__inner {
49
+ text-align: left;
50
+ }
51
+ .d3-grpo-token-comparison .legend {
52
+ display: flex;
53
+ flex-direction: column;
54
+ align-items: flex-start;
55
+ gap: 6px;
56
+ margin-top: 16px;
57
+ }
58
+ .d3-grpo-token-comparison .legend-title {
59
+ font-size: 12px;
60
+ font-weight: 700;
61
+ color: var(--text-color);
62
+ }
63
+ .d3-grpo-token-comparison .legend .items {
64
+ display: flex;
65
+ flex-wrap: wrap;
66
+ gap: 8px 14px;
67
+ }
68
+ .d3-grpo-token-comparison .legend .item {
69
+ display: inline-flex;
70
+ align-items: center;
71
+ gap: 6px;
72
+ white-space: nowrap;
73
+ font-size: 12px;
74
+ color: var(--text-color);
75
+ }
76
+ .d3-grpo-token-comparison .legend .swatch {
77
+ width: 14px;
78
+ height: 14px;
79
+ border-radius: 3px;
80
+ border: 1px solid var(--border-color);
81
+ }
82
+ .d3-grpo-token-comparison .controls {
83
+ display: flex;
84
+ gap: 16px;
85
+ align-items: center;
86
+ justify-content: flex-end;
87
+ flex-wrap: wrap;
88
+ margin-top: 8px;
89
+ }
90
+ .d3-grpo-token-comparison .control-group {
91
+ display: flex;
92
+ flex-direction: column;
93
+ align-items: flex-start;
94
+ gap: 6px;
95
+ }
96
+ .d3-grpo-token-comparison .controls label {
97
+ font-size: 12px;
98
+ font-weight: 700;
99
+ color: var(--text-color);
100
+ }
101
+ .d3-grpo-token-comparison .controls select {
102
+ font-size: 12px;
103
+ padding: 8px 28px 8px 10px;
104
+ border: 1px solid var(--border-color);
105
+ border-radius: 8px;
106
+ background: var(--surface-bg);
107
+ color: var(--text-color);
108
+ cursor: pointer;
109
+ }
110
+ .d3-grpo-token-comparison .slider-container {
111
+ display: flex;
112
+ flex-direction: column;
113
+ gap: 8px;
114
+ min-width: 300px;
115
+ }
116
+ .d3-grpo-token-comparison .slider-row {
117
+ display: flex;
118
+ align-items: center;
119
+ gap: 12px;
120
+ }
121
+ .d3-grpo-token-comparison input[type="range"] {
122
+ flex: 1;
123
+ height: 6px;
124
+ border-radius: 3px;
125
+ background: var(--border-color);
126
+ outline: none;
127
+ -webkit-appearance: none;
128
+ cursor: pointer;
129
+ }
130
+ .d3-grpo-token-comparison input[type="range"]::-webkit-slider-thumb {
131
+ -webkit-appearance: none;
132
+ appearance: none;
133
+ width: 16px;
134
+ height: 16px;
135
+ border-radius: 50%;
136
+ background: var(--primary-color);
137
+ cursor: pointer;
138
+ }
139
+ .d3-grpo-token-comparison input[type="range"]::-moz-range-thumb {
140
+ width: 16px;
141
+ height: 16px;
142
+ border-radius: 50%;
143
+ background: var(--primary-color);
144
+ cursor: pointer;
145
+ border: none;
146
+ }
147
+ .d3-grpo-token-comparison .slider-value {
148
+ font-size: 12px;
149
+ font-weight: 600;
150
+ color: var(--text-color);
151
+ min-width: 60px;
152
+ text-align: right;
153
+ }
154
+ </style>
155
+ <script>
156
+ (() => {
157
+ const ensureD3 = (cb) => {
158
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
159
+ let s = document.getElementById('d3-cdn-script');
160
+ if (!s) {
161
+ s = document.createElement('script');
162
+ s.id = 'd3-cdn-script';
163
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
164
+ document.head.appendChild(s);
165
+ }
166
+ const onReady = () => {
167
+ if (window.d3 && typeof window.d3.select === 'function') cb();
168
+ };
169
+ s.addEventListener('load', onReady, { once: true });
170
+ if (window.d3) onReady();
171
+ };
172
+
173
+ const bootstrap = () => {
174
+ const scriptEl = document.currentScript;
175
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
176
+ if (!(container && container.classList && container.classList.contains('d3-grpo-token-comparison'))) {
177
+ const candidates = Array.from(document.querySelectorAll('.d3-grpo-token-comparison'))
178
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
179
+ container = candidates[candidates.length - 1] || null;
180
+ }
181
+ if (!container) return;
182
+ if (container.dataset) {
183
+ if (container.dataset.mounted === 'true') return;
184
+ container.dataset.mounted = 'true';
185
+ }
186
+
187
+ // Tooltip
188
+ container.style.position = container.style.position || 'relative';
189
+ let tip = container.querySelector('.d3-tooltip');
190
+ let tipInner;
191
+ if (!tip) {
192
+ tip = document.createElement('div');
193
+ tip.className = 'd3-tooltip';
194
+ tipInner = document.createElement('div');
195
+ tipInner.className = 'd3-tooltip__inner';
196
+ tip.appendChild(tipInner);
197
+ container.appendChild(tip);
198
+ } else {
199
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
200
+ }
201
+
202
+ const showTooltip = (html, event) => {
203
+ tipInner.innerHTML = html;
204
+ tip.style.opacity = '1';
205
+ const [mx, my] = d3.pointer(event, container);
206
+ tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
207
+ };
208
+
209
+ const hideTooltip = () => {
210
+ tip.style.opacity = '0';
211
+ setTimeout(() => {
212
+ tip.style.transform = 'translate(-9999px, -9999px)';
213
+ }, 120);
214
+ };
215
+
216
+ // SVG scaffolding
217
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
218
+ const gRoot = svg.append('g');
219
+
220
+ let width = 800, height = 400;
221
+ const margin = { top: 40, right: 16, bottom: 56, left: 60 };
222
+
223
+ // Dataset configurations
224
+ const datasetConfigs = [
225
+ {
226
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_00-step-000000400_aime25_2025-10-16T01-18-56.json',
227
+ name: '1.5-2k',
228
+ id: 'grpo-1.5-2k'
229
+ },
230
+ {
231
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_01-step-000000400_aime25_2025-10-16T01-22-56.json',
232
+ name: '2-2.5k',
233
+ id: 'grpo-2-2.5k'
234
+ },
235
+ {
236
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_02-step-000000400_aime25_2025-10-16T01-23-56.json',
237
+ name: '2.5-3k',
238
+ id: 'grpo-2.5-3k'
239
+ },
240
+ {
241
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_03-step-000000400_aime25_2025-10-16T01-58-31.json',
242
+ name: '3-3.5k',
243
+ id: 'grpo-3-3.5k'
244
+ },
245
+ {
246
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_04-step-000000400_aime25_2025-10-16T04-55-04.json',
247
+ name: '3.5-4k',
248
+ id: 'grpo-3.5-4k'
249
+ },
250
+ {
251
+ file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_05-step-000000400_aime25_2025-10-16T06-12-05.json',
252
+ name: '4-4.5k',
253
+ id: 'grpo-4-4.5k'
254
+ }
255
+ ];
256
+
257
+ const fetchFirstAvailable = async (filename) => {
258
+ const paths = [
259
+ `/data/grpo/histograms/${filename}`,
260
+ `./assets/data/grpo/histograms/${filename}`,
261
+ `../assets/data/grpo/histograms/${filename}`,
262
+ `../../assets/data/grpo/histograms/${filename}`
263
+ ];
264
+
265
+ for (const p of paths) {
266
+ try {
267
+ const r = await fetch(p, { cache: 'no-cache' });
268
+ if (r.ok) return await r.json();
269
+ } catch (e) {}
270
+ }
271
+ throw new Error(`JSON not found: ${filename}`);
272
+ };
273
+
274
+ // Load baseline (APO No-Think)
275
+ const baselineFile = 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json';
276
+
277
+ fetchFirstAvailable(baselineFile)
278
+ .then(async (baselineData) => {
279
+ // Load all comparison datasets
280
+ const comparisonData = await Promise.all(
281
+ datasetConfigs.map(async (config) => ({
282
+ ...config,
283
+ data: await fetchFirstAvailable(config.file)
284
+ }))
285
+ );
286
+
287
+ // Get colors
288
+ const colors = window.ColorPalettes
289
+ ? window.ColorPalettes.getColors('categorical', 2)
290
+ : ['#4e79a7', '#f28e2c'];
291
+
292
+ let selectedComparison = comparisonData[0];
293
+ let currentIndex = 0;
294
+
295
+ // Create controls
296
+ const controls = document.createElement('div');
297
+ controls.className = 'controls';
298
+
299
+ const sliderContainer = document.createElement('div');
300
+ sliderContainer.className = 'slider-container';
301
+
302
+ const label = document.createElement('label');
303
+ label.textContent = 'Overlong Penalty';
304
+ label.style.fontSize = '12px';
305
+ label.style.fontWeight = '700';
306
+ label.style.color = 'var(--text-color)';
307
+
308
+ const sliderRow = document.createElement('div');
309
+ sliderRow.className = 'slider-row';
310
+
311
+ const slider = document.createElement('input');
312
+ slider.type = 'range';
313
+ slider.min = '0';
314
+ slider.max = String(comparisonData.length - 1);
315
+ slider.value = '0';
316
+ slider.step = '1';
317
+
318
+ const sliderValue = document.createElement('span');
319
+ sliderValue.className = 'slider-value';
320
+ sliderValue.textContent = comparisonData[0].name;
321
+
322
+ slider.addEventListener('input', (e) => {
323
+ currentIndex = parseInt(e.target.value);
324
+ selectedComparison = comparisonData[currentIndex];
325
+ sliderValue.textContent = selectedComparison.name;
326
+ render();
327
+ });
328
+
329
+ sliderRow.appendChild(slider);
330
+ sliderRow.appendChild(sliderValue);
331
+ sliderContainer.appendChild(label);
332
+ sliderContainer.appendChild(sliderRow);
333
+ controls.appendChild(sliderContainer);
334
+ container.appendChild(controls);
335
+
336
+ // Create legend
337
+ const legend = document.createElement('div');
338
+ legend.className = 'legend';
339
+
340
+ const legendTitle = document.createElement('div');
341
+ legendTitle.className = 'legend-title';
342
+ legendTitle.textContent = 'Legend';
343
+
344
+ const legendItems = document.createElement('div');
345
+ legendItems.className = 'items';
346
+
347
+ ['APO No-Think (Baseline)', 'GRPO on Math with Overlong Penalty'].forEach((name, idx) => {
348
+ const item = document.createElement('span');
349
+ item.className = 'item';
350
+
351
+ const swatch = document.createElement('span');
352
+ swatch.className = 'swatch';
353
+ swatch.style.background = colors[idx];
354
+
355
+ const text = document.createElement('span');
356
+ text.textContent = name;
357
+
358
+ item.appendChild(swatch);
359
+ item.appendChild(text);
360
+ legendItems.appendChild(item);
361
+ });
362
+
363
+ legend.appendChild(legendTitle);
364
+ legend.appendChild(legendItems);
365
+ container.appendChild(legend);
366
+
367
+ function updateSize() {
368
+ width = container.clientWidth || 800;
369
+ height = Math.max(400, Math.round(width / 2.2));
370
+ svg.attr('width', width).attr('height', height);
371
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
372
+ return {
373
+ innerWidth: width - margin.left - margin.right,
374
+ innerHeight: height - margin.top - margin.bottom
375
+ };
376
+ }
377
+
378
+ function render() {
379
+ const { innerWidth, innerHeight } = updateSize();
380
+
381
+ // Clear previous
382
+ gRoot.selectAll('*').remove();
383
+
384
+ const datasets = [
385
+ { name: 'APO No-Think (Baseline)', data: baselineData, idx: 0 },
386
+ { name: selectedComparison.name, data: selectedComparison.data, idx: 1 }
387
+ ];
388
+
389
+ // Use 0-4k for both to keep scales comparable
390
+ const xDomain = [0, 4000];
391
+
392
+ // Create bins for both datasets
393
+ const allBins = datasets.map(ds => {
394
+ const tokens = ds.data.token_counts;
395
+ const bins = d3.bin()
396
+ .domain(xDomain)
397
+ .thresholds(30)(tokens);
398
+ return { ...ds, bins };
399
+ });
400
+
401
+ // Find max frequency across both datasets for shared y-scale
402
+ const maxFreq = d3.max(allBins.flatMap(d => d.bins.map(b => b.length)));
403
+
404
+ const xScale = d3.scaleLinear()
405
+ .domain(xDomain)
406
+ .range([0, innerWidth]);
407
+
408
+ const yScale = d3.scaleLinear()
409
+ .domain([0, maxFreq])
410
+ .range([innerHeight, 0])
411
+ .nice();
412
+
413
+ // Grid
414
+ gRoot.append('g')
415
+ .attr('class', 'grid')
416
+ .call(
417
+ d3.axisLeft(yScale)
418
+ .ticks(5)
419
+ .tickSize(-innerWidth)
420
+ .tickFormat('')
421
+ )
422
+ .call(g => g.select('.domain').remove());
423
+
424
+ // Draw histograms (comparison first, baseline second so baseline is on top)
425
+ allBins.reverse().forEach(({ name, bins, idx, data }) => {
426
+ gRoot.selectAll(`rect.bar-${idx}`)
427
+ .data(bins)
428
+ .join('rect')
429
+ .attr('class', `bar bar-${idx}`)
430
+ .attr('x', d => xScale(d.x0))
431
+ .attr('y', d => yScale(d.length))
432
+ .attr('width', d => Math.max(1, xScale(d.x1) - xScale(d.x0) - 1))
433
+ .attr('height', d => Math.max(0, innerHeight - yScale(d.length)))
434
+ .attr('fill', colors[idx])
435
+ .attr('opacity', 0.6)
436
+ .on('mouseenter', (event, d) => {
437
+ const stats = data.statistics;
438
+ const html = `<strong>${name}</strong><br/>Tokens: ${d.x0.toFixed(0)} - ${d.x1.toFixed(0)}<br/>Count: ${d.length}<br/>Mean: ${stats.mean.toFixed(0)} | Median: ${stats.median.toFixed(0)}`;
439
+ showTooltip(html, event);
440
+ })
441
+ .on('mouseleave', hideTooltip);
442
+ });
443
+
444
+ // X axis
445
+ const xAxis = gRoot.append('g')
446
+ .attr('class', 'axes')
447
+ .attr('transform', `translate(0,${innerHeight})`)
448
+ .call(d3.axisBottom(xScale).ticks(8).tickFormat(d3.format(',d')));
449
+
450
+ xAxis.select('.domain').remove();
451
+
452
+ // Y axis
453
+ const yAxis = gRoot.append('g')
454
+ .attr('class', 'axes')
455
+ .call(d3.axisLeft(yScale).ticks(6));
456
+
457
+ yAxis.select('.domain').remove();
458
+
459
+ // X axis label
460
+ gRoot.append('text')
461
+ .attr('class', 'axes')
462
+ .attr('x', innerWidth / 2)
463
+ .attr('y', innerHeight + 40)
464
+ .attr('text-anchor', 'middle')
465
+ .style('font-size', '12px')
466
+ .style('fill', 'var(--text-color)')
467
+ .text('Token count');
468
+
469
+ // Y axis label
470
+ gRoot.append('text')
471
+ .attr('class', 'axes')
472
+ .attr('transform', 'rotate(-90)')
473
+ .attr('x', -innerHeight / 2)
474
+ .attr('y', -45)
475
+ .attr('text-anchor', 'middle')
476
+ .style('font-size', '12px')
477
+ .style('fill', 'var(--text-color)')
478
+ .text('Frequency');
479
+ }
480
+
481
+ render();
482
+
483
+ if (window.ResizeObserver) {
484
+ const ro = new ResizeObserver(() => render());
485
+ ro.observe(container);
486
+ } else {
487
+ window.addEventListener('resize', render);
488
+ }
489
+ })
490
+ .catch((err) => {
491
+ const pre = document.createElement('pre');
492
+ pre.textContent = 'Error loading data: ' + err.message;
493
+ pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
494
+ container.appendChild(pre);
495
+ });
496
+ };
497
+
498
+ if (document.readyState === 'loading') {
499
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
500
+ } else {
501
+ ensureD3(bootstrap);
502
+ }
503
+ })();
504
+ </script>
app/src/content/embeds/d3-rl-token-histogram.html ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="d3-grpo-token-histogram"></div>
2
+ <style>
3
+ .d3-grpo-token-histogram {
4
+ width: 100%;
5
+ position: relative;
6
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
7
+ }
8
+ .d3-grpo-token-histogram svg {
9
+ display: block;
10
+ width: 100%;
11
+ }
12
+ .d3-grpo-token-histogram .bar {
13
+ stroke: none;
14
+ }
15
+ .d3-grpo-token-histogram .axes path,
16
+ .d3-grpo-token-histogram .axes line {
17
+ stroke: var(--axis-color, var(--text-color));
18
+ }
19
+ .d3-grpo-token-histogram .axes text {
20
+ fill: var(--tick-color, var(--muted-color));
21
+ font-size: 11px;
22
+ }
23
+ .d3-grpo-token-histogram .grid line {
24
+ stroke: var(--grid-color, rgba(0,0,0,.08));
25
+ }
26
+ .d3-grpo-token-histogram .chart-title {
27
+ font-size: 13px;
28
+ font-weight: 600;
29
+ fill: var(--text-color);
30
+ }
31
+ .d3-grpo-token-histogram .d3-tooltip {
32
+ position: absolute;
33
+ top: 0;
34
+ left: 0;
35
+ transform: translate(-9999px, -9999px);
36
+ pointer-events: none;
37
+ padding: 8px 10px;
38
+ border-radius: 8px;
39
+ font-size: 12px;
40
+ line-height: 1.35;
41
+ border: 1px solid var(--border-color);
42
+ background: var(--surface-bg);
43
+ color: var(--text-color);
44
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
45
+ opacity: 0;
46
+ transition: opacity .12s ease;
47
+ }
48
+ .d3-grpo-token-histogram .d3-tooltip__inner {
49
+ text-align: left;
50
+ }
51
+ </style>
52
+ <script>
53
+ (() => {
54
+ const ensureD3 = (cb) => {
55
+ if (window.d3 && typeof window.d3.select === 'function') return cb();
56
+ let s = document.getElementById('d3-cdn-script');
57
+ if (!s) {
58
+ s = document.createElement('script');
59
+ s.id = 'd3-cdn-script';
60
+ s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
61
+ document.head.appendChild(s);
62
+ }
63
+ const onReady = () => {
64
+ if (window.d3 && typeof window.d3.select === 'function') cb();
65
+ };
66
+ s.addEventListener('load', onReady, { once: true });
67
+ if (window.d3) onReady();
68
+ };
69
+
70
+ const bootstrap = () => {
71
+ const scriptEl = document.currentScript;
72
+ let container = scriptEl ? scriptEl.previousElementSibling : null;
73
+ if (!(container && container.classList && container.classList.contains('d3-grpo-token-histogram'))) {
74
+ const candidates = Array.from(document.querySelectorAll('.d3-grpo-token-histogram'))
75
+ .filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
76
+ container = candidates[candidates.length - 1] || null;
77
+ }
78
+ if (!container) return;
79
+ if (container.dataset) {
80
+ if (container.dataset.mounted === 'true') return;
81
+ container.dataset.mounted = 'true';
82
+ }
83
+
84
+ // Tooltip
85
+ container.style.position = container.style.position || 'relative';
86
+ let tip = container.querySelector('.d3-tooltip');
87
+ let tipInner;
88
+ if (!tip) {
89
+ tip = document.createElement('div');
90
+ tip.className = 'd3-tooltip';
91
+ tipInner = document.createElement('div');
92
+ tipInner.className = 'd3-tooltip__inner';
93
+ tip.appendChild(tipInner);
94
+ container.appendChild(tip);
95
+ } else {
96
+ tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
97
+ }
98
+
99
+ const showTooltip = (html, event) => {
100
+ tipInner.innerHTML = html;
101
+ tip.style.opacity = '1';
102
+ const [mx, my] = d3.pointer(event, container);
103
+ tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
104
+ };
105
+
106
+ const hideTooltip = () => {
107
+ tip.style.opacity = '0';
108
+ setTimeout(() => {
109
+ tip.style.transform = 'translate(-9999px, -9999px)';
110
+ }, 120);
111
+ };
112
+
113
+ // SVG scaffolding
114
+ const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
115
+ const gRoot = svg.append('g');
116
+
117
+ let width = 800, height = 400;
118
+ const margin = { top: 40, right: 16, bottom: 56, left: 60 };
119
+
120
+ // Data loading
121
+ const JSON_PATHS = [
122
+ '/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
123
+ './assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
124
+ '../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
125
+ '../../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json'
126
+ ];
127
+
128
+ const JSON_PATHS_THINK = [
129
+ '/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
130
+ './assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
131
+ '../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
132
+ '../../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json'
133
+ ];
134
+
135
+ const fetchFirstAvailable = async (paths) => {
136
+ for (const p of paths) {
137
+ try {
138
+ const r = await fetch(p, { cache: 'no-cache' });
139
+ if (r.ok) return await r.json();
140
+ } catch (e) {}
141
+ }
142
+ throw new Error('JSON not found');
143
+ };
144
+
145
+ Promise.all([
146
+ fetchFirstAvailable(JSON_PATHS),
147
+ fetchFirstAvailable(JSON_PATHS_THINK)
148
+ ])
149
+ .then(([dataNoThink, dataThink]) => {
150
+ const datasets = [
151
+ { name: 'No-Think', data: dataNoThink, title: 'APO No-Think' },
152
+ { name: 'Think', data: dataThink, title: 'APO Think' }
153
+ ];
154
+
155
+ // Get colors
156
+ const colors = window.ColorPalettes
157
+ ? window.ColorPalettes.getColors('categorical', 2)
158
+ : ['#4e79a7', '#f28e2c'];
159
+
160
+ function updateSize() {
161
+ width = container.clientWidth || 800;
162
+ height = Math.max(400, Math.round(width / 2.2));
163
+ svg.attr('width', width).attr('height', height);
164
+ gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
165
+ return {
166
+ innerWidth: width - margin.left - margin.right,
167
+ innerHeight: height - margin.top - margin.bottom
168
+ };
169
+ }
170
+
171
+ function render() {
172
+ const { innerWidth, innerHeight } = updateSize();
173
+
174
+ // Clear previous
175
+ gRoot.selectAll('*').remove();
176
+
177
+ // Calculate histogram bins for each dataset
178
+ const chartWidth = (innerWidth - 40) / 2;
179
+ const histograms = datasets.map((ds, idx) => {
180
+ const tokens = ds.data.token_counts;
181
+ const xOffset = idx * (chartWidth + 40);
182
+
183
+ // Use independent x-axis ranges optimized for each dataset
184
+ // No-Think: most data is 0-4k range (median 723)
185
+ // Think: data is spread around median 16,769
186
+ const xDomain = idx === 0
187
+ ? [0, 4000] // No-Think: focus on 0-4k range
188
+ : [0, 32000]; // Think: show full range
189
+
190
+ // Create bins
191
+ const bins = d3.bin()
192
+ .domain(xDomain)
193
+ .thresholds(30)(tokens);
194
+
195
+ const xScale = d3.scaleLinear()
196
+ .domain(xDomain)
197
+ .range([0, chartWidth]);
198
+
199
+ const yScale = d3.scaleLinear()
200
+ .domain([0, d3.max(bins, d => d.length)])
201
+ .range([innerHeight, 0])
202
+ .nice();
203
+
204
+ return { ds, bins, xScale, yScale, xOffset, idx };
205
+ });
206
+
207
+ // Draw each histogram
208
+ histograms.forEach(({ ds, bins, xScale, yScale, xOffset, idx }) => {
209
+ const g = gRoot.append('g').attr('transform', `translate(${xOffset},0)`);
210
+
211
+ // Title
212
+ g.append('text')
213
+ .attr('class', 'chart-title')
214
+ .attr('x', chartWidth / 2)
215
+ .attr('y', -12)
216
+ .attr('text-anchor', 'middle')
217
+ .text(ds.title);
218
+
219
+ // Bars
220
+ g.selectAll('rect.bar')
221
+ .data(bins)
222
+ .join('rect')
223
+ .attr('class', 'bar')
224
+ .attr('x', d => xScale(d.x0))
225
+ .attr('y', d => yScale(d.length))
226
+ .attr('width', d => Math.max(1, xScale(d.x1) - xScale(d.x0) - 1))
227
+ .attr('height', d => Math.max(0, innerHeight - yScale(d.length)))
228
+ .attr('fill', colors[idx])
229
+ .attr('opacity', 0.8)
230
+ .on('mouseenter', (event, d) => {
231
+ const html = `<strong>Tokens: ${d.x0.toFixed(0)} - ${d.x1.toFixed(0)}</strong><br/>Count: ${d.length}`;
232
+ showTooltip(html, event);
233
+ })
234
+ .on('mouseleave', hideTooltip);
235
+
236
+ // X axis
237
+ const xAxis = g.append('g')
238
+ .attr('class', 'axes')
239
+ .attr('transform', `translate(0,${innerHeight})`)
240
+ .call(d3.axisBottom(xScale).ticks(5).tickFormat(d3.format(',d')));
241
+
242
+ xAxis.select('.domain').remove();
243
+
244
+ // Y axis
245
+ const yAxis = g.append('g')
246
+ .attr('class', 'axes')
247
+ .call(d3.axisLeft(yScale).ticks(5));
248
+
249
+ yAxis.select('.domain').remove();
250
+
251
+ // Grid
252
+ g.append('g')
253
+ .attr('class', 'grid')
254
+ .call(
255
+ d3.axisLeft(yScale)
256
+ .ticks(5)
257
+ .tickSize(-chartWidth)
258
+ .tickFormat('')
259
+ )
260
+ .call(g => g.select('.domain').remove());
261
+
262
+ // X axis label
263
+ g.append('text')
264
+ .attr('class', 'axes')
265
+ .attr('x', chartWidth / 2)
266
+ .attr('y', innerHeight + 40)
267
+ .attr('text-anchor', 'middle')
268
+ .style('font-size', '12px')
269
+ .style('fill', 'var(--text-color)')
270
+ .text('Token count');
271
+
272
+ // Y axis label (only for left chart)
273
+ if (idx === 0) {
274
+ g.append('text')
275
+ .attr('class', 'axes')
276
+ .attr('transform', 'rotate(-90)')
277
+ .attr('x', -innerHeight / 2)
278
+ .attr('y', -45)
279
+ .attr('text-anchor', 'middle')
280
+ .style('font-size', '12px')
281
+ .style('fill', 'var(--text-color)')
282
+ .text('Frequency');
283
+ }
284
+
285
+ // Statistics text
286
+ const stats = ds.data.statistics;
287
+ const statsText = `Mean: ${stats.mean.toFixed(0)} | Median: ${stats.median.toFixed(0)}`;
288
+ g.append('text')
289
+ .attr('x', chartWidth / 2)
290
+ .attr('y', innerHeight + 54)
291
+ .attr('text-anchor', 'middle')
292
+ .style('font-size', '11px')
293
+ .style('fill', 'var(--muted-color)')
294
+ .text(statsText);
295
+ });
296
+ }
297
+
298
+ render();
299
+
300
+ if (window.ResizeObserver) {
301
+ const ro = new ResizeObserver(() => render());
302
+ ro.observe(container);
303
+ } else {
304
+ window.addEventListener('resize', render);
305
+ }
306
+ })
307
+ .catch((err) => {
308
+ const pre = document.createElement('pre');
309
+ pre.textContent = 'Error loading data: ' + err.message;
310
+ pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
311
+ container.appendChild(pre);
312
+ });
313
+ };
314
+
315
+ if (document.readyState === 'loading') {
316
+ document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
317
+ } else {
318
+ ensureD3(bootstrap);
319
+ }
320
+ })();
321
+ </script>