Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
update
Browse files- app/src/content/embeds/d3-po-beta-ablation.html +538 -0
- app/src/content/embeds/d3-po-loss-ablations.html +527 -0
- app/src/content/embeds/d3-po-lr-ablation.html +538 -0
- app/src/content/embeds/d3-po-size-ablation.html +537 -0
- app/src/content/embeds/d3-rl-aime25.html +536 -0
- app/src/content/embeds/d3-rl-full-length.html +734 -0
- app/src/content/embeds/d3-rl-reward-curves.html +770 -0
- app/src/content/embeds/d3-rl-token-comparison.html +504 -0
- app/src/content/embeds/d3-rl-token-histogram.html +321 -0
app/src/content/embeds/d3-po-beta-ablation.html
ADDED
|
@@ -0,0 +1,538 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-apo-beta-ablation"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-apo-beta-ablation {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
--axis-color: var(--text-color, #333);
|
| 8 |
+
--tick-color: var(--muted-color, #666);
|
| 9 |
+
--grid-color: rgba(0,0,0,.08);
|
| 10 |
+
}
|
| 11 |
+
[data-theme="dark"] .d3-apo-beta-ablation {
|
| 12 |
+
--axis-color: var(--text-color, #ccc);
|
| 13 |
+
--tick-color: var(--muted-color, #999);
|
| 14 |
+
--grid-color: rgba(255,255,255,.10);
|
| 15 |
+
}
|
| 16 |
+
.d3-apo-beta-ablation svg {
|
| 17 |
+
display: block;
|
| 18 |
+
overflow: visible;
|
| 19 |
+
}
|
| 20 |
+
.d3-apo-beta-ablation .axes path,
|
| 21 |
+
.d3-apo-beta-ablation .axes line {
|
| 22 |
+
stroke: var(--axis-color);
|
| 23 |
+
shape-rendering: crispEdges;
|
| 24 |
+
}
|
| 25 |
+
.d3-apo-beta-ablation .axes text {
|
| 26 |
+
fill: var(--tick-color);
|
| 27 |
+
font-size: 11px;
|
| 28 |
+
}
|
| 29 |
+
.d3-apo-beta-ablation .grid line {
|
| 30 |
+
stroke: var(--grid-color);
|
| 31 |
+
stroke-dasharray: 2,2;
|
| 32 |
+
shape-rendering: crispEdges;
|
| 33 |
+
}
|
| 34 |
+
.d3-apo-beta-ablation .axis-label {
|
| 35 |
+
fill: var(--text-color);
|
| 36 |
+
font-size: 12px;
|
| 37 |
+
font-weight: 600;
|
| 38 |
+
}
|
| 39 |
+
.d3-apo-beta-ablation .line-think {
|
| 40 |
+
fill: none;
|
| 41 |
+
stroke-width: 2.5;
|
| 42 |
+
stroke-linecap: round;
|
| 43 |
+
stroke-linejoin: round;
|
| 44 |
+
}
|
| 45 |
+
.d3-apo-beta-ablation .line-no-think {
|
| 46 |
+
fill: none;
|
| 47 |
+
stroke-width: 2.5;
|
| 48 |
+
stroke-linecap: round;
|
| 49 |
+
stroke-linejoin: round;
|
| 50 |
+
}
|
| 51 |
+
.d3-apo-beta-ablation .reference-line {
|
| 52 |
+
fill: none;
|
| 53 |
+
stroke-width: 1.5;
|
| 54 |
+
stroke-dasharray: 5, 5;
|
| 55 |
+
opacity: 0.4;
|
| 56 |
+
}
|
| 57 |
+
.d3-apo-beta-ablation .dot {
|
| 58 |
+
stroke: var(--surface-bg);
|
| 59 |
+
stroke-width: 2;
|
| 60 |
+
}
|
| 61 |
+
.d3-apo-beta-ablation .header {
|
| 62 |
+
display: flex;
|
| 63 |
+
align-items: flex-start;
|
| 64 |
+
justify-content: space-between;
|
| 65 |
+
gap: 16px;
|
| 66 |
+
margin-top: 16px;
|
| 67 |
+
flex-wrap: wrap;
|
| 68 |
+
}
|
| 69 |
+
.d3-apo-beta-ablation .legend {
|
| 70 |
+
display: flex;
|
| 71 |
+
flex-direction: column;
|
| 72 |
+
align-items: flex-start;
|
| 73 |
+
gap: 6px;
|
| 74 |
+
}
|
| 75 |
+
.d3-apo-beta-ablation .legend-title {
|
| 76 |
+
font-size: 12px;
|
| 77 |
+
font-weight: 700;
|
| 78 |
+
color: var(--text-color);
|
| 79 |
+
}
|
| 80 |
+
.d3-apo-beta-ablation .legend .items {
|
| 81 |
+
display: flex;
|
| 82 |
+
flex-wrap: wrap;
|
| 83 |
+
gap: 8px 14px;
|
| 84 |
+
}
|
| 85 |
+
.d3-apo-beta-ablation .legend .item {
|
| 86 |
+
display: inline-flex;
|
| 87 |
+
align-items: center;
|
| 88 |
+
gap: 6px;
|
| 89 |
+
white-space: nowrap;
|
| 90 |
+
font-size: 12px;
|
| 91 |
+
color: var(--text-color);
|
| 92 |
+
}
|
| 93 |
+
.d3-apo-beta-ablation .legend .swatch {
|
| 94 |
+
width: 14px;
|
| 95 |
+
height: 14px;
|
| 96 |
+
border-radius: 3px;
|
| 97 |
+
border: 1px solid var(--border-color);
|
| 98 |
+
}
|
| 99 |
+
.d3-apo-beta-ablation .legend .swatch-line {
|
| 100 |
+
width: 20px;
|
| 101 |
+
height: 2px;
|
| 102 |
+
border: none;
|
| 103 |
+
}
|
| 104 |
+
.d3-apo-beta-ablation .legend .swatch-dashed {
|
| 105 |
+
width: 20px;
|
| 106 |
+
height: 2px;
|
| 107 |
+
border: none;
|
| 108 |
+
background: repeating-linear-gradient(
|
| 109 |
+
to right,
|
| 110 |
+
var(--text-color) 0,
|
| 111 |
+
var(--text-color) 4px,
|
| 112 |
+
transparent 4px,
|
| 113 |
+
transparent 8px
|
| 114 |
+
);
|
| 115 |
+
}
|
| 116 |
+
.d3-apo-beta-ablation .controls {
|
| 117 |
+
display: flex;
|
| 118 |
+
gap: 16px;
|
| 119 |
+
align-items: flex-start;
|
| 120 |
+
justify-content: flex-end;
|
| 121 |
+
flex-wrap: wrap;
|
| 122 |
+
}
|
| 123 |
+
.d3-apo-beta-ablation .control-group {
|
| 124 |
+
display: flex;
|
| 125 |
+
flex-direction: column;
|
| 126 |
+
align-items: flex-start;
|
| 127 |
+
gap: 6px;
|
| 128 |
+
}
|
| 129 |
+
.d3-apo-beta-ablation .controls label {
|
| 130 |
+
font-size: 12px;
|
| 131 |
+
font-weight: 700;
|
| 132 |
+
color: var(--text-color);
|
| 133 |
+
}
|
| 134 |
+
.d3-apo-beta-ablation .controls select {
|
| 135 |
+
font-size: 12px;
|
| 136 |
+
padding: 8px 28px 8px 10px;
|
| 137 |
+
border: 1px solid var(--border-color);
|
| 138 |
+
border-radius: 8px;
|
| 139 |
+
background: var(--surface-bg);
|
| 140 |
+
color: var(--text-color);
|
| 141 |
+
cursor: pointer;
|
| 142 |
+
appearance: none;
|
| 143 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
|
| 144 |
+
background-repeat: no-repeat;
|
| 145 |
+
background-position: right 8px center;
|
| 146 |
+
}
|
| 147 |
+
.d3-apo-beta-ablation .controls select:focus {
|
| 148 |
+
outline: 2px solid var(--primary-color);
|
| 149 |
+
outline-offset: 2px;
|
| 150 |
+
}
|
| 151 |
+
.d3-apo-beta-ablation .d3-tooltip {
|
| 152 |
+
position: absolute;
|
| 153 |
+
background: var(--surface-bg);
|
| 154 |
+
border: 1px solid var(--border-color);
|
| 155 |
+
border-radius: 8px;
|
| 156 |
+
padding: 12px;
|
| 157 |
+
pointer-events: none;
|
| 158 |
+
opacity: 0;
|
| 159 |
+
transition: opacity 0.2s;
|
| 160 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
|
| 161 |
+
font-size: 12px;
|
| 162 |
+
z-index: 1000;
|
| 163 |
+
}
|
| 164 |
+
.d3-apo-beta-ablation .tooltip-title {
|
| 165 |
+
font-weight: 700;
|
| 166 |
+
margin-bottom: 8px;
|
| 167 |
+
color: var(--text-color);
|
| 168 |
+
}
|
| 169 |
+
.d3-apo-beta-ablation .tooltip-item {
|
| 170 |
+
display: flex;
|
| 171 |
+
align-items: center;
|
| 172 |
+
gap: 8px;
|
| 173 |
+
margin: 4px 0;
|
| 174 |
+
color: var(--text-color);
|
| 175 |
+
}
|
| 176 |
+
.d3-apo-beta-ablation .tooltip-color {
|
| 177 |
+
width: 12px;
|
| 178 |
+
height: 12px;
|
| 179 |
+
border-radius: 2px;
|
| 180 |
+
}
|
| 181 |
+
</style>
|
| 182 |
+
<script>
|
| 183 |
+
(() => {
|
| 184 |
+
const ensureD3 = (cb) => {
|
| 185 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 186 |
+
let s = document.getElementById('d3-cdn-script');
|
| 187 |
+
if (!s) {
|
| 188 |
+
s = document.createElement('script');
|
| 189 |
+
s.id = 'd3-cdn-script';
|
| 190 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 191 |
+
document.head.appendChild(s);
|
| 192 |
+
}
|
| 193 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 194 |
+
s.addEventListener('load', onReady, { once: true });
|
| 195 |
+
if (window.d3) onReady();
|
| 196 |
+
};
|
| 197 |
+
|
| 198 |
+
const bootstrap = () => {
|
| 199 |
+
const scriptEl = document.currentScript;
|
| 200 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 201 |
+
if (!(container && container.classList && container.classList.contains('d3-apo-beta-ablation'))) {
|
| 202 |
+
const candidates = Array.from(document.querySelectorAll('.d3-apo-beta-ablation'))
|
| 203 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 204 |
+
container = candidates[candidates.length - 1] || null;
|
| 205 |
+
}
|
| 206 |
+
if (!container) return;
|
| 207 |
+
if (container.dataset) {
|
| 208 |
+
if (container.dataset.mounted === 'true') return;
|
| 209 |
+
container.dataset.mounted = 'true';
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
// Data embedded inline
|
| 213 |
+
const data = [{"system_prompt":"/think","Evaluation":"AIME25","Score":45.47,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92,"Beta":0.05},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58,"Beta":0.05},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46,"Beta":0.05},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Beta":0.05},{"system_prompt":"/think","Evaluation":"Average","Score":45.4,"Beta":0.05},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.205,"Beta":0.05},{"system_prompt":"/think","Evaluation":"AIME25","Score":43.28,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":6.77,"Beta":0.01},{"system_prompt":"/think","Evaluation":"AIME25","Score":46.82,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.02,"Beta":0.1},{"system_prompt":"/think","Evaluation":"AIME25","Score":44.95,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.6,"Beta":0.2},{"system_prompt":"/think","Evaluation":"AIME25","Score":47.45,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.29,"Beta":0.5},{"system_prompt":"/think","Evaluation":"AIME25","Score":47.45,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.29,"Beta":0.75},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.36,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.8,"Beta":0.99},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":39.77,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.95,"Beta":0.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.38,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":33.27,"Beta":0.1},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.05,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.57,"Beta":0.2},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":41.54,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.63,"Beta":0.5},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":41.54,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.63,"Beta":0.75},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.36,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43,"Beta":0.99},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.04,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.02,"Beta":0.01},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.47,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.36,"Beta":0.1},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.55,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.33,"Beta":0.2},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.58,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.91,"Beta":0.5},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.58,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.91,"Beta":0.75},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.13,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.41,"Beta":0.99},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":12.87,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":10.89,"Beta":0.01},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":29.7,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83,"Beta":0.1},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":26.73,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83,"Beta":0.2},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":28.71,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Beta":0.5},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":28.71,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Beta":0.75},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":34.65,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":19.8,"Beta":0.99},{"system_prompt":"/think","Evaluation":"Average","Score":41.24,"Beta":0.01},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.9075,"Beta":0.01},{"system_prompt":"/think","Evaluation":"Average","Score":48.0925,"Beta":0.1},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.37,"Beta":0.1},{"system_prompt":"/think","Evaluation":"Average","Score":46.32,"Beta":0.2},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.8325,"Beta":0.2},{"system_prompt":"/think","Evaluation":"Average","Score":47.82,"Beta":0.5},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.6775,"Beta":0.5},{"system_prompt":"/think","Evaluation":"Average","Score":47.82,"Beta":0.75},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.6775,"Beta":0.75},{"system_prompt":"/think","Evaluation":"Average","Score":47.875,"Beta":0.99},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.61,"Beta":0.99}];
|
| 214 |
+
const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
|
| 215 |
+
|
| 216 |
+
// Get colors from ColorPalettes or fallback
|
| 217 |
+
const getColors = () => {
|
| 218 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
|
| 219 |
+
const colors = window.ColorPalettes.getColors('categorical', 2);
|
| 220 |
+
return { think: colors[0], noThink: colors[1] };
|
| 221 |
+
}
|
| 222 |
+
return { think: '#E377C2', noThink: '#7FC97F' };
|
| 223 |
+
};
|
| 224 |
+
|
| 225 |
+
let colors = getColors();
|
| 226 |
+
|
| 227 |
+
// Set up dimensions
|
| 228 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 229 |
+
|
| 230 |
+
// Create SVG
|
| 231 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 232 |
+
const g = svg.append('g');
|
| 233 |
+
|
| 234 |
+
// Tooltip
|
| 235 |
+
container.style.position = container.style.position || 'relative';
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div');
|
| 240 |
+
tip.className = 'd3-tooltip';
|
| 241 |
+
Object.assign(tip.style, {
|
| 242 |
+
position: 'absolute',
|
| 243 |
+
top: '0px',
|
| 244 |
+
left: '0px',
|
| 245 |
+
transform: 'translate(-9999px, -9999px)',
|
| 246 |
+
pointerEvents: 'none',
|
| 247 |
+
padding: '8px 10px',
|
| 248 |
+
borderRadius: '8px',
|
| 249 |
+
fontSize: '12px',
|
| 250 |
+
lineHeight: '1.35',
|
| 251 |
+
border: '1px solid var(--border-color)',
|
| 252 |
+
background: 'var(--surface-bg)',
|
| 253 |
+
color: 'var(--text-color)',
|
| 254 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 255 |
+
opacity: '0',
|
| 256 |
+
transition: 'opacity .12s ease',
|
| 257 |
+
zIndex: '1000'
|
| 258 |
+
});
|
| 259 |
+
tipInner = document.createElement('div');
|
| 260 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 261 |
+
tipInner.style.textAlign = 'left';
|
| 262 |
+
tip.appendChild(tipInner);
|
| 263 |
+
container.appendChild(tip);
|
| 264 |
+
} else {
|
| 265 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
const showTooltip = (html, event) => {
|
| 269 |
+
tipInner.innerHTML = html;
|
| 270 |
+
const [mx, my] = d3.pointer(event, container);
|
| 271 |
+
const offsetX = 12, offsetY = 12;
|
| 272 |
+
tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
|
| 273 |
+
tip.style.opacity = '1';
|
| 274 |
+
};
|
| 275 |
+
|
| 276 |
+
const hideTooltip = () => {
|
| 277 |
+
tip.style.opacity = '0';
|
| 278 |
+
setTimeout(() => {
|
| 279 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 280 |
+
}, 120);
|
| 281 |
+
};
|
| 282 |
+
|
| 283 |
+
// Get unique evaluations
|
| 284 |
+
const evaluations = [...new Set(data.map(d => d.Evaluation))];
|
| 285 |
+
|
| 286 |
+
// Create header with legend and controls
|
| 287 |
+
const header = d3.select(container).append('div').attr('class', 'header');
|
| 288 |
+
|
| 289 |
+
const legend = header.append('div').attr('class', 'legend');
|
| 290 |
+
legend.append('div').attr('class', 'legend-title').text('Legend');
|
| 291 |
+
const legendItems = legend.append('div').attr('class', 'items');
|
| 292 |
+
|
| 293 |
+
const controls = header.append('div').attr('class', 'controls');
|
| 294 |
+
const controlGroup = controls.append('div').attr('class', 'control-group');
|
| 295 |
+
controlGroup.append('label').attr('for', 'metric-select-beta').text('Metric');
|
| 296 |
+
const select = controlGroup.append('select').attr('id', 'metric-select-beta');
|
| 297 |
+
|
| 298 |
+
// Populate dropdown
|
| 299 |
+
select.selectAll('option')
|
| 300 |
+
.data(evaluations)
|
| 301 |
+
.enter()
|
| 302 |
+
.append('option')
|
| 303 |
+
.text(d => d)
|
| 304 |
+
.attr('value', d => d);
|
| 305 |
+
|
| 306 |
+
// Build legend
|
| 307 |
+
const buildLegend = () => {
|
| 308 |
+
legendItems.html('');
|
| 309 |
+
|
| 310 |
+
const thinkItem = legendItems.append('span').attr('class', 'item');
|
| 311 |
+
thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
|
| 312 |
+
thinkItem.append('span').text('/think');
|
| 313 |
+
|
| 314 |
+
const noThinkItem = legendItems.append('span').attr('class', 'item');
|
| 315 |
+
noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
|
| 316 |
+
noThinkItem.append('span').text('/no_think');
|
| 317 |
+
|
| 318 |
+
const sftItem = legendItems.append('span').attr('class', 'item');
|
| 319 |
+
sftItem.append('span').attr('class', 'swatch-dashed');
|
| 320 |
+
sftItem.append('span').text('SFT checkpoint');
|
| 321 |
+
};
|
| 322 |
+
|
| 323 |
+
buildLegend();
|
| 324 |
+
|
| 325 |
+
// Update chart function
|
| 326 |
+
function updateChart(evaluation) {
|
| 327 |
+
const filtered = data.filter(d => d.Evaluation === evaluation);
|
| 328 |
+
const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a.Beta - b.Beta);
|
| 329 |
+
const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a.Beta - b.Beta);
|
| 330 |
+
|
| 331 |
+
g.selectAll("*").remove();
|
| 332 |
+
|
| 333 |
+
const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
|
| 334 |
+
const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
|
| 335 |
+
|
| 336 |
+
const width = container.clientWidth || 800;
|
| 337 |
+
const height = Math.max(320, Math.round(width / 2.5));
|
| 338 |
+
const innerWidth = width - margin.left - margin.right;
|
| 339 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 340 |
+
|
| 341 |
+
svg.attr('width', width).attr('height', height);
|
| 342 |
+
g.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 343 |
+
|
| 344 |
+
// Scales
|
| 345 |
+
const xScale = d3.scaleLog()
|
| 346 |
+
.domain([d3.min(filtered, d => d.Beta), d3.max(filtered, d => d.Beta)])
|
| 347 |
+
.range([0, innerWidth]);
|
| 348 |
+
|
| 349 |
+
const allScores = filtered.map(d => d.Score);
|
| 350 |
+
if (sftThink) allScores.push(sftThink.Score);
|
| 351 |
+
if (sftNoThink) allScores.push(sftNoThink.Score);
|
| 352 |
+
const maxScore = d3.max(allScores);
|
| 353 |
+
|
| 354 |
+
const yScale = d3.scaleLinear()
|
| 355 |
+
.domain([0, maxScore * 1.1])
|
| 356 |
+
.range([innerHeight, 0]);
|
| 357 |
+
|
| 358 |
+
// Grid
|
| 359 |
+
g.append("g")
|
| 360 |
+
.attr("class", "grid")
|
| 361 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 362 |
+
.call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
|
| 363 |
+
|
| 364 |
+
g.append("g")
|
| 365 |
+
.attr("class", "grid")
|
| 366 |
+
.call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
|
| 367 |
+
|
| 368 |
+
// Axes
|
| 369 |
+
const tickValues = [0.01, 0.05, 0.1, 0.5, 1.0];
|
| 370 |
+
g.append("g")
|
| 371 |
+
.attr("class", "axes")
|
| 372 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 373 |
+
.call(d3.axisBottom(xScale).tickValues(tickValues).tickFormat(d3.format(".2f")).tickSizeOuter(0))
|
| 374 |
+
.call(gAxis => {
|
| 375 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 376 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 377 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 378 |
+
});
|
| 379 |
+
|
| 380 |
+
g.append("g")
|
| 381 |
+
.attr("class", "axes")
|
| 382 |
+
.call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
|
| 383 |
+
.call(gAxis => {
|
| 384 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 385 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 386 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 387 |
+
});
|
| 388 |
+
|
| 389 |
+
// Axis labels
|
| 390 |
+
g.append("text")
|
| 391 |
+
.attr("class", "axis-label")
|
| 392 |
+
.attr("text-anchor", "middle")
|
| 393 |
+
.attr("x", innerWidth / 2)
|
| 394 |
+
.attr("y", innerHeight + 40)
|
| 395 |
+
.text("Beta");
|
| 396 |
+
|
| 397 |
+
g.append("text")
|
| 398 |
+
.attr("class", "axis-label")
|
| 399 |
+
.attr("text-anchor", "middle")
|
| 400 |
+
.attr("transform", "rotate(-90)")
|
| 401 |
+
.attr("y", -45)
|
| 402 |
+
.attr("x", -innerHeight / 2)
|
| 403 |
+
.text("Score (%)");
|
| 404 |
+
|
| 405 |
+
// Line generator
|
| 406 |
+
const line = d3.line()
|
| 407 |
+
.x(d => xScale(d.Beta))
|
| 408 |
+
.y(d => yScale(d.Score));
|
| 409 |
+
|
| 410 |
+
// Reference lines
|
| 411 |
+
if (sftThink) {
|
| 412 |
+
g.append("line")
|
| 413 |
+
.attr("class", "reference-line")
|
| 414 |
+
.style("stroke", colors.think)
|
| 415 |
+
.attr("x1", 0)
|
| 416 |
+
.attr("x2", innerWidth)
|
| 417 |
+
.attr("y1", yScale(sftThink.Score))
|
| 418 |
+
.attr("y2", yScale(sftThink.Score));
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
if (sftNoThink) {
|
| 422 |
+
g.append("line")
|
| 423 |
+
.attr("class", "reference-line")
|
| 424 |
+
.style("stroke", colors.noThink)
|
| 425 |
+
.attr("x1", 0)
|
| 426 |
+
.attr("x2", innerWidth)
|
| 427 |
+
.attr("y1", yScale(sftNoThink.Score))
|
| 428 |
+
.attr("y2", yScale(sftNoThink.Score));
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
// Lines
|
| 432 |
+
g.append("path")
|
| 433 |
+
.datum(thinkData)
|
| 434 |
+
.attr("class", "line-think")
|
| 435 |
+
.style("stroke", colors.think)
|
| 436 |
+
.attr("d", line);
|
| 437 |
+
|
| 438 |
+
g.append("path")
|
| 439 |
+
.datum(noThinkData)
|
| 440 |
+
.attr("class", "line-no-think")
|
| 441 |
+
.style("stroke", colors.noThink)
|
| 442 |
+
.attr("d", line);
|
| 443 |
+
|
| 444 |
+
// Dots for /think
|
| 445 |
+
g.selectAll(".dot-think")
|
| 446 |
+
.data(thinkData)
|
| 447 |
+
.enter()
|
| 448 |
+
.append("circle")
|
| 449 |
+
.attr("class", "dot")
|
| 450 |
+
.style("fill", colors.think)
|
| 451 |
+
.attr("cx", d => xScale(d.Beta))
|
| 452 |
+
.attr("cy", d => yScale(d.Score))
|
| 453 |
+
.attr("r", 4)
|
| 454 |
+
.on("mouseenter", function(event, d) {
|
| 455 |
+
const noThinkValue = noThinkData.find(item => item.Beta === d.Beta);
|
| 456 |
+
const html = `
|
| 457 |
+
<div class="tooltip-title">Beta ${d.Beta.toFixed(2)}</div>
|
| 458 |
+
<div class="tooltip-item">
|
| 459 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 460 |
+
<span>/think: ${d.Score.toFixed(2)}%</span>
|
| 461 |
+
</div>
|
| 462 |
+
${noThinkValue ? `
|
| 463 |
+
<div class="tooltip-item">
|
| 464 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 465 |
+
<span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
|
| 466 |
+
</div>` : ''}
|
| 467 |
+
`;
|
| 468 |
+
showTooltip(html, event);
|
| 469 |
+
})
|
| 470 |
+
.on("mouseleave", hideTooltip);
|
| 471 |
+
|
| 472 |
+
// Dots for /no_think
|
| 473 |
+
g.selectAll(".dot-no-think")
|
| 474 |
+
.data(noThinkData)
|
| 475 |
+
.enter()
|
| 476 |
+
.append("circle")
|
| 477 |
+
.attr("class", "dot")
|
| 478 |
+
.style("fill", colors.noThink)
|
| 479 |
+
.attr("cx", d => xScale(d.Beta))
|
| 480 |
+
.attr("cy", d => yScale(d.Score))
|
| 481 |
+
.attr("r", 4)
|
| 482 |
+
.on("mouseenter", function(event, d) {
|
| 483 |
+
const thinkValue = thinkData.find(item => item.Beta === d.Beta);
|
| 484 |
+
const html = `
|
| 485 |
+
<div class="tooltip-title">Beta ${d.Beta.toFixed(2)}</div>
|
| 486 |
+
${thinkValue ? `
|
| 487 |
+
<div class="tooltip-item">
|
| 488 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 489 |
+
<span>/think: ${thinkValue.Score.toFixed(2)}%</span>
|
| 490 |
+
</div>` : ''}
|
| 491 |
+
<div class="tooltip-item">
|
| 492 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 493 |
+
<span>/no_think: ${d.Score.toFixed(2)}%</span>
|
| 494 |
+
</div>
|
| 495 |
+
`;
|
| 496 |
+
showTooltip(html, event);
|
| 497 |
+
})
|
| 498 |
+
.on("mouseleave", hideTooltip);
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
// Set default value to "Average" if it exists
|
| 502 |
+
const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
|
| 503 |
+
select.property("value", defaultEval);
|
| 504 |
+
|
| 505 |
+
// Initial chart
|
| 506 |
+
updateChart(defaultEval);
|
| 507 |
+
|
| 508 |
+
// Update on dropdown change
|
| 509 |
+
select.on("change", function() {
|
| 510 |
+
updateChart(this.value);
|
| 511 |
+
});
|
| 512 |
+
|
| 513 |
+
// Resize handling
|
| 514 |
+
const rerender = () => updateChart(select.property("value"));
|
| 515 |
+
if (window.ResizeObserver) {
|
| 516 |
+
const ro = new ResizeObserver(() => rerender());
|
| 517 |
+
ro.observe(container);
|
| 518 |
+
} else {
|
| 519 |
+
window.addEventListener('resize', rerender);
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// Listen for ColorPalettes changes
|
| 523 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
|
| 524 |
+
window.ColorPalettes.addListener(() => {
|
| 525 |
+
colors = getColors();
|
| 526 |
+
buildLegend();
|
| 527 |
+
updateChart(select.property("value"));
|
| 528 |
+
});
|
| 529 |
+
}
|
| 530 |
+
};
|
| 531 |
+
|
| 532 |
+
if (document.readyState === 'loading') {
|
| 533 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 534 |
+
} else {
|
| 535 |
+
ensureD3(bootstrap);
|
| 536 |
+
}
|
| 537 |
+
})();
|
| 538 |
+
</script>
|
app/src/content/embeds/d3-po-loss-ablations.html
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-dpo-ablations"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-dpo-ablations {
|
| 4 |
+
width: 100%;
|
| 5 |
+
position: relative;
|
| 6 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 7 |
+
}
|
| 8 |
+
.d3-dpo-ablations svg {
|
| 9 |
+
display: block;
|
| 10 |
+
width: 100%;
|
| 11 |
+
}
|
| 12 |
+
.d3-dpo-ablations .bar {
|
| 13 |
+
stroke: none;
|
| 14 |
+
}
|
| 15 |
+
.d3-dpo-ablations .axes path,
|
| 16 |
+
.d3-dpo-ablations .axes line {
|
| 17 |
+
stroke: var(--axis-color, var(--text-color));
|
| 18 |
+
}
|
| 19 |
+
.d3-dpo-ablations .axes text {
|
| 20 |
+
fill: var(--tick-color, var(--muted-color));
|
| 21 |
+
font-size: 11px;
|
| 22 |
+
}
|
| 23 |
+
.d3-dpo-ablations .grid line {
|
| 24 |
+
stroke: var(--grid-color, rgba(0,0,0,.08));
|
| 25 |
+
}
|
| 26 |
+
.d3-dpo-ablations .d3-tooltip {
|
| 27 |
+
position: absolute;
|
| 28 |
+
top: 0;
|
| 29 |
+
left: 0;
|
| 30 |
+
transform: translate(-9999px, -9999px);
|
| 31 |
+
pointer-events: none;
|
| 32 |
+
padding: 8px 10px;
|
| 33 |
+
border-radius: 8px;
|
| 34 |
+
font-size: 12px;
|
| 35 |
+
line-height: 1.35;
|
| 36 |
+
border: 1px solid var(--border-color);
|
| 37 |
+
background: var(--surface-bg);
|
| 38 |
+
color: var(--text-color);
|
| 39 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 40 |
+
opacity: 0;
|
| 41 |
+
transition: opacity .12s ease;
|
| 42 |
+
}
|
| 43 |
+
.d3-dpo-ablations .d3-tooltip__inner {
|
| 44 |
+
text-align: left;
|
| 45 |
+
}
|
| 46 |
+
.d3-dpo-ablations .legend {
|
| 47 |
+
display: flex;
|
| 48 |
+
flex-direction: column;
|
| 49 |
+
align-items: flex-start;
|
| 50 |
+
gap: 6px;
|
| 51 |
+
margin-top: 16px;
|
| 52 |
+
}
|
| 53 |
+
.d3-dpo-ablations .legend-title {
|
| 54 |
+
font-size: 12px;
|
| 55 |
+
font-weight: 700;
|
| 56 |
+
color: var(--text-color);
|
| 57 |
+
}
|
| 58 |
+
.d3-dpo-ablations .legend .items {
|
| 59 |
+
display: flex;
|
| 60 |
+
flex-wrap: wrap;
|
| 61 |
+
gap: 8px 14px;
|
| 62 |
+
}
|
| 63 |
+
.d3-dpo-ablations .legend .item {
|
| 64 |
+
display: inline-flex;
|
| 65 |
+
align-items: center;
|
| 66 |
+
gap: 6px;
|
| 67 |
+
white-space: nowrap;
|
| 68 |
+
font-size: 12px;
|
| 69 |
+
color: var(--text-color);
|
| 70 |
+
}
|
| 71 |
+
.d3-dpo-ablations .legend .swatch {
|
| 72 |
+
width: 14px;
|
| 73 |
+
height: 14px;
|
| 74 |
+
border-radius: 3px;
|
| 75 |
+
border: 1px solid var(--border-color);
|
| 76 |
+
}
|
| 77 |
+
.d3-dpo-ablations .controls {
|
| 78 |
+
display: flex;
|
| 79 |
+
gap: 16px;
|
| 80 |
+
align-items: center;
|
| 81 |
+
justify-content: flex-end;
|
| 82 |
+
flex-wrap: wrap;
|
| 83 |
+
margin-top: 8px;
|
| 84 |
+
}
|
| 85 |
+
.d3-dpo-ablations .control-group {
|
| 86 |
+
display: flex;
|
| 87 |
+
flex-direction: column;
|
| 88 |
+
align-items: flex-start;
|
| 89 |
+
gap: 6px;
|
| 90 |
+
}
|
| 91 |
+
.d3-dpo-ablations .controls label {
|
| 92 |
+
font-size: 12px;
|
| 93 |
+
font-weight: 700;
|
| 94 |
+
color: var(--text-color);
|
| 95 |
+
}
|
| 96 |
+
.d3-dpo-ablations .controls select {
|
| 97 |
+
font-size: 12px;
|
| 98 |
+
padding: 8px 28px 8px 10px;
|
| 99 |
+
border: 1px solid var(--border-color);
|
| 100 |
+
border-radius: 8px;
|
| 101 |
+
background: var(--surface-bg);
|
| 102 |
+
color: var(--text-color);
|
| 103 |
+
cursor: pointer;
|
| 104 |
+
}
|
| 105 |
+
.d3-dpo-ablations .checkbox-group {
|
| 106 |
+
display: flex;
|
| 107 |
+
align-items: center;
|
| 108 |
+
gap: 6px;
|
| 109 |
+
}
|
| 110 |
+
.d3-dpo-ablations .checkbox-group input[type="checkbox"] {
|
| 111 |
+
width: 16px;
|
| 112 |
+
height: 16px;
|
| 113 |
+
cursor: pointer;
|
| 114 |
+
}
|
| 115 |
+
.d3-dpo-ablations .checkbox-group label {
|
| 116 |
+
font-size: 12px;
|
| 117 |
+
color: var(--text-color);
|
| 118 |
+
cursor: pointer;
|
| 119 |
+
font-weight: 400;
|
| 120 |
+
}
|
| 121 |
+
</style>
|
| 122 |
+
<script>
|
| 123 |
+
(() => {
|
| 124 |
+
const ensureD3 = (cb) => {
|
| 125 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 126 |
+
let s = document.getElementById('d3-cdn-script');
|
| 127 |
+
if (!s) {
|
| 128 |
+
s = document.createElement('script');
|
| 129 |
+
s.id = 'd3-cdn-script';
|
| 130 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 131 |
+
document.head.appendChild(s);
|
| 132 |
+
}
|
| 133 |
+
const onReady = () => {
|
| 134 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 135 |
+
};
|
| 136 |
+
s.addEventListener('load', onReady, { once: true });
|
| 137 |
+
if (window.d3) onReady();
|
| 138 |
+
};
|
| 139 |
+
|
| 140 |
+
const bootstrap = () => {
|
| 141 |
+
const scriptEl = document.currentScript;
|
| 142 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 143 |
+
if (!(container && container.classList && container.classList.contains('d3-dpo-ablations'))) {
|
| 144 |
+
const candidates = Array.from(document.querySelectorAll('.d3-dpo-ablations'))
|
| 145 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 146 |
+
container = candidates[candidates.length - 1] || null;
|
| 147 |
+
}
|
| 148 |
+
if (!container) return;
|
| 149 |
+
if (container.dataset) {
|
| 150 |
+
if (container.dataset.mounted === 'true') return;
|
| 151 |
+
container.dataset.mounted = 'true';
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
// Tooltip
|
| 155 |
+
container.style.position = container.style.position || 'relative';
|
| 156 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 157 |
+
let tipInner;
|
| 158 |
+
if (!tip) {
|
| 159 |
+
tip = document.createElement('div');
|
| 160 |
+
tip.className = 'd3-tooltip';
|
| 161 |
+
tipInner = document.createElement('div');
|
| 162 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 163 |
+
tip.appendChild(tipInner);
|
| 164 |
+
container.appendChild(tip);
|
| 165 |
+
} else {
|
| 166 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
const showTooltip = (html, event) => {
|
| 170 |
+
tipInner.innerHTML = html;
|
| 171 |
+
tip.style.opacity = '1';
|
| 172 |
+
const [mx, my] = d3.pointer(event, container);
|
| 173 |
+
tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
|
| 174 |
+
};
|
| 175 |
+
|
| 176 |
+
const hideTooltip = () => {
|
| 177 |
+
tip.style.opacity = '0';
|
| 178 |
+
setTimeout(() => {
|
| 179 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 180 |
+
}, 120);
|
| 181 |
+
};
|
| 182 |
+
|
| 183 |
+
// SVG scaffolding
|
| 184 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 185 |
+
const gRoot = svg.append('g');
|
| 186 |
+
|
| 187 |
+
let width = 800, height = 400;
|
| 188 |
+
const margin = { top: 16, right: 16, bottom: 80, left: 60 };
|
| 189 |
+
|
| 190 |
+
// Data loading
|
| 191 |
+
const CSV_PATHS = [
|
| 192 |
+
'/data/apo/smollm3_dpo_ablations.csv',
|
| 193 |
+
'./assets/data/apo/smollm3_dpo_ablations.csv',
|
| 194 |
+
'../assets/data/apo/smollm3_dpo_ablations.csv',
|
| 195 |
+
'../../assets/data/apo/smollm3_dpo_ablations.csv'
|
| 196 |
+
];
|
| 197 |
+
|
| 198 |
+
const fetchFirstAvailable = async (paths) => {
|
| 199 |
+
for (const p of paths) {
|
| 200 |
+
try {
|
| 201 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 202 |
+
if (r.ok) return await r.text();
|
| 203 |
+
} catch (e) {}
|
| 204 |
+
}
|
| 205 |
+
throw new Error('CSV not found');
|
| 206 |
+
};
|
| 207 |
+
|
| 208 |
+
fetchFirstAvailable(CSV_PATHS)
|
| 209 |
+
.then((csvText) => {
|
| 210 |
+
const data = d3.csvParse(csvText);
|
| 211 |
+
|
| 212 |
+
// Get unique methods and benchmarks
|
| 213 |
+
const methods = ['SFT', 'DPO', 'IPO', 'APO-zero', 'APO-down', 'DiscoPOP'];
|
| 214 |
+
const benchmarks = ['aime25', 'gpqa_d', 'ifeval', 'lcb_v4'];
|
| 215 |
+
const benchmarkNames = {
|
| 216 |
+
'aime25': 'AIME 2025',
|
| 217 |
+
'gpqa_d': 'GPQA Diamond',
|
| 218 |
+
'ifeval': 'IFEval',
|
| 219 |
+
'lcb_v4': 'LiveCodeBench v4'
|
| 220 |
+
};
|
| 221 |
+
|
| 222 |
+
let selectedMode = 'no_think';
|
| 223 |
+
let selectedBenchmark = 'ifeval';
|
| 224 |
+
let showDelta = false;
|
| 225 |
+
|
| 226 |
+
// Get colors
|
| 227 |
+
const colors = window.ColorPalettes
|
| 228 |
+
? window.ColorPalettes.getColors('categorical', methods.length)
|
| 229 |
+
: ['#4e79a7', '#f28e2c', '#e15759', '#76b7b2', '#59a14f', '#edc949'];
|
| 230 |
+
|
| 231 |
+
// Create controls
|
| 232 |
+
const controls = document.createElement('div');
|
| 233 |
+
controls.className = 'controls';
|
| 234 |
+
|
| 235 |
+
// Reasoning mode select
|
| 236 |
+
const modeGroup = document.createElement('div');
|
| 237 |
+
modeGroup.className = 'control-group';
|
| 238 |
+
|
| 239 |
+
const modeLabel = document.createElement('label');
|
| 240 |
+
modeLabel.textContent = 'Reasoning mode';
|
| 241 |
+
modeLabel.setAttribute('for', 'mode-select-' + Date.now());
|
| 242 |
+
|
| 243 |
+
const modeSelect = document.createElement('select');
|
| 244 |
+
modeSelect.id = modeLabel.getAttribute('for');
|
| 245 |
+
|
| 246 |
+
const thinkOption = document.createElement('option');
|
| 247 |
+
thinkOption.value = 'think';
|
| 248 |
+
thinkOption.textContent = '/think';
|
| 249 |
+
|
| 250 |
+
const noThinkOption = document.createElement('option');
|
| 251 |
+
noThinkOption.value = 'no_think';
|
| 252 |
+
noThinkOption.textContent = '/no_think';
|
| 253 |
+
noThinkOption.selected = true;
|
| 254 |
+
|
| 255 |
+
modeSelect.appendChild(thinkOption);
|
| 256 |
+
modeSelect.appendChild(noThinkOption);
|
| 257 |
+
|
| 258 |
+
modeSelect.addEventListener('change', (e) => {
|
| 259 |
+
selectedMode = e.target.value;
|
| 260 |
+
render();
|
| 261 |
+
});
|
| 262 |
+
|
| 263 |
+
modeGroup.appendChild(modeLabel);
|
| 264 |
+
modeGroup.appendChild(modeSelect);
|
| 265 |
+
|
| 266 |
+
// Benchmark select
|
| 267 |
+
const benchmarkGroup = document.createElement('div');
|
| 268 |
+
benchmarkGroup.className = 'control-group';
|
| 269 |
+
|
| 270 |
+
const benchmarkLabel = document.createElement('label');
|
| 271 |
+
benchmarkLabel.textContent = 'Benchmark';
|
| 272 |
+
benchmarkLabel.setAttribute('for', 'benchmark-select-' + Date.now());
|
| 273 |
+
|
| 274 |
+
const benchmarkSelect = document.createElement('select');
|
| 275 |
+
benchmarkSelect.id = benchmarkLabel.getAttribute('for');
|
| 276 |
+
|
| 277 |
+
benchmarks.forEach(bench => {
|
| 278 |
+
const option = document.createElement('option');
|
| 279 |
+
option.value = bench;
|
| 280 |
+
option.textContent = benchmarkNames[bench];
|
| 281 |
+
if (bench === 'ifeval') option.selected = true;
|
| 282 |
+
benchmarkSelect.appendChild(option);
|
| 283 |
+
});
|
| 284 |
+
|
| 285 |
+
benchmarkSelect.addEventListener('change', (e) => {
|
| 286 |
+
selectedBenchmark = e.target.value;
|
| 287 |
+
render();
|
| 288 |
+
});
|
| 289 |
+
|
| 290 |
+
benchmarkGroup.appendChild(benchmarkLabel);
|
| 291 |
+
benchmarkGroup.appendChild(benchmarkSelect);
|
| 292 |
+
|
| 293 |
+
// Delta checkbox
|
| 294 |
+
const deltaGroup = document.createElement('div');
|
| 295 |
+
deltaGroup.className = 'control-group';
|
| 296 |
+
|
| 297 |
+
const deltaCheckboxGroup = document.createElement('div');
|
| 298 |
+
deltaCheckboxGroup.className = 'checkbox-group';
|
| 299 |
+
|
| 300 |
+
const deltaCheckbox = document.createElement('input');
|
| 301 |
+
deltaCheckbox.type = 'checkbox';
|
| 302 |
+
deltaCheckbox.id = 'delta-checkbox-' + Date.now();
|
| 303 |
+
|
| 304 |
+
const deltaLabel = document.createElement('label');
|
| 305 |
+
deltaLabel.textContent = 'Show Δ vs SFT';
|
| 306 |
+
deltaLabel.setAttribute('for', deltaCheckbox.id);
|
| 307 |
+
|
| 308 |
+
deltaCheckbox.addEventListener('change', (e) => {
|
| 309 |
+
showDelta = e.target.checked;
|
| 310 |
+
render();
|
| 311 |
+
});
|
| 312 |
+
|
| 313 |
+
deltaCheckboxGroup.appendChild(deltaCheckbox);
|
| 314 |
+
deltaCheckboxGroup.appendChild(deltaLabel);
|
| 315 |
+
deltaGroup.appendChild(deltaCheckboxGroup);
|
| 316 |
+
|
| 317 |
+
controls.appendChild(modeGroup);
|
| 318 |
+
controls.appendChild(benchmarkGroup);
|
| 319 |
+
controls.appendChild(deltaGroup);
|
| 320 |
+
container.appendChild(controls);
|
| 321 |
+
|
| 322 |
+
// Create legend
|
| 323 |
+
const legend = document.createElement('div');
|
| 324 |
+
legend.className = 'legend';
|
| 325 |
+
|
| 326 |
+
const legendTitle = document.createElement('div');
|
| 327 |
+
legendTitle.className = 'legend-title';
|
| 328 |
+
legendTitle.textContent = 'Legend';
|
| 329 |
+
|
| 330 |
+
const legendItems = document.createElement('div');
|
| 331 |
+
legendItems.className = 'items';
|
| 332 |
+
|
| 333 |
+
methods.forEach((method, idx) => {
|
| 334 |
+
const item = document.createElement('span');
|
| 335 |
+
item.className = 'item';
|
| 336 |
+
|
| 337 |
+
const swatch = document.createElement('span');
|
| 338 |
+
swatch.className = 'swatch';
|
| 339 |
+
swatch.style.background = colors[idx];
|
| 340 |
+
|
| 341 |
+
const text = document.createElement('span');
|
| 342 |
+
text.textContent = method;
|
| 343 |
+
|
| 344 |
+
item.appendChild(swatch);
|
| 345 |
+
item.appendChild(text);
|
| 346 |
+
legendItems.appendChild(item);
|
| 347 |
+
});
|
| 348 |
+
|
| 349 |
+
legend.appendChild(legendTitle);
|
| 350 |
+
legend.appendChild(legendItems);
|
| 351 |
+
container.appendChild(legend);
|
| 352 |
+
|
| 353 |
+
function updateSize() {
|
| 354 |
+
width = container.clientWidth || 800;
|
| 355 |
+
height = Math.max(360, Math.round(width / 2.5));
|
| 356 |
+
svg.attr('width', width).attr('height', height);
|
| 357 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 358 |
+
return {
|
| 359 |
+
innerWidth: width - margin.left - margin.right,
|
| 360 |
+
innerHeight: height - margin.top - margin.bottom
|
| 361 |
+
};
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
function render() {
|
| 365 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 366 |
+
|
| 367 |
+
// Clear previous
|
| 368 |
+
gRoot.selectAll('*').remove();
|
| 369 |
+
|
| 370 |
+
// Filter data for selected mode
|
| 371 |
+
const filteredData = data.filter(d => d['reasoning mode'] === selectedMode);
|
| 372 |
+
|
| 373 |
+
// Create data for chart
|
| 374 |
+
let chartData = methods.map(method => {
|
| 375 |
+
const row = filteredData.find(d => d.Method === method);
|
| 376 |
+
const absoluteValue = row ? parseFloat(row[selectedBenchmark]) : 0;
|
| 377 |
+
return {
|
| 378 |
+
method,
|
| 379 |
+
absoluteValue,
|
| 380 |
+
value: absoluteValue
|
| 381 |
+
};
|
| 382 |
+
});
|
| 383 |
+
|
| 384 |
+
// Get SFT baseline value
|
| 385 |
+
const sftValue = chartData.find(d => d.method === 'SFT').absoluteValue;
|
| 386 |
+
|
| 387 |
+
// Calculate delta if checkbox is checked
|
| 388 |
+
if (showDelta) {
|
| 389 |
+
chartData = chartData.map(d => ({
|
| 390 |
+
...d,
|
| 391 |
+
value: d.method === 'SFT' ? 0 : d.absoluteValue - sftValue
|
| 392 |
+
}));
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
// Separate SFT and sort others by descending value
|
| 396 |
+
const sftData = chartData.find(d => d.method === 'SFT');
|
| 397 |
+
const otherData = chartData.filter(d => d.method !== 'SFT')
|
| 398 |
+
.sort((a, b) => b.value - a.value);
|
| 399 |
+
|
| 400 |
+
// Recombine with SFT first (unless showing delta)
|
| 401 |
+
chartData = showDelta ? otherData : [sftData, ...otherData];
|
| 402 |
+
|
| 403 |
+
// Get ordered methods for color mapping and x-axis
|
| 404 |
+
const orderedMethods = chartData.map(d => d.method);
|
| 405 |
+
|
| 406 |
+
// Scales
|
| 407 |
+
const xScale = d3.scaleBand()
|
| 408 |
+
.domain(orderedMethods)
|
| 409 |
+
.range([0, innerWidth])
|
| 410 |
+
.padding(0.3);
|
| 411 |
+
|
| 412 |
+
// Set y-axis domain based on whether we're showing delta
|
| 413 |
+
const yMin = showDelta ? Math.min(0, d3.min(chartData, d => d.value)) : 0;
|
| 414 |
+
const yMax = showDelta
|
| 415 |
+
? Math.max(Math.abs(yMin), d3.max(chartData, d => d.value)) * 1.1
|
| 416 |
+
: d3.max(chartData, d => d.value) * 1.1;
|
| 417 |
+
|
| 418 |
+
const yScale = d3.scaleLinear()
|
| 419 |
+
.domain(showDelta ? [yMin * 1.1, yMax] : [0, yMax])
|
| 420 |
+
.range([innerHeight, 0])
|
| 421 |
+
.nice();
|
| 422 |
+
|
| 423 |
+
// Grid
|
| 424 |
+
gRoot.append('g')
|
| 425 |
+
.attr('class', 'grid')
|
| 426 |
+
.call(
|
| 427 |
+
d3.axisLeft(yScale)
|
| 428 |
+
.ticks(6)
|
| 429 |
+
.tickSize(-innerWidth)
|
| 430 |
+
.tickFormat('')
|
| 431 |
+
)
|
| 432 |
+
.call(g => g.select('.domain').remove());
|
| 433 |
+
|
| 434 |
+
// Create color map based on original method order
|
| 435 |
+
const colorMap = {};
|
| 436 |
+
methods.forEach((method, idx) => {
|
| 437 |
+
colorMap[method] = colors[idx];
|
| 438 |
+
});
|
| 439 |
+
|
| 440 |
+
// Add zero line if showing delta
|
| 441 |
+
if (showDelta) {
|
| 442 |
+
gRoot.append('line')
|
| 443 |
+
.attr('x1', 0)
|
| 444 |
+
.attr('x2', innerWidth)
|
| 445 |
+
.attr('y1', yScale(0))
|
| 446 |
+
.attr('y2', yScale(0))
|
| 447 |
+
.attr('stroke', 'var(--text-color)')
|
| 448 |
+
.attr('stroke-width', 2)
|
| 449 |
+
.attr('stroke-dasharray', '4,4')
|
| 450 |
+
.attr('opacity', 0.5);
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
// Bars
|
| 454 |
+
gRoot.selectAll('rect.bar')
|
| 455 |
+
.data(chartData)
|
| 456 |
+
.join('rect')
|
| 457 |
+
.attr('class', 'bar')
|
| 458 |
+
.attr('x', d => xScale(d.method))
|
| 459 |
+
.attr('y', d => d.value >= 0 ? yScale(d.value) : yScale(0))
|
| 460 |
+
.attr('width', xScale.bandwidth())
|
| 461 |
+
.attr('height', d => Math.abs(yScale(d.value) - yScale(0)))
|
| 462 |
+
.attr('fill', d => colorMap[d.method])
|
| 463 |
+
.attr('opacity', 0.85)
|
| 464 |
+
.on('mouseenter', (event, d) => {
|
| 465 |
+
const deltaText = showDelta && d.method !== 'SFT'
|
| 466 |
+
? `Δ: ${d.value >= 0 ? '+' : ''}${d.value.toFixed(2)}%`
|
| 467 |
+
: '';
|
| 468 |
+
const absoluteText = `${benchmarkNames[selectedBenchmark]}: ${d.absoluteValue.toFixed(2)}%`;
|
| 469 |
+
const html = `<strong>${d.method}</strong><br/>${absoluteText}${deltaText ? '<br/>' + deltaText : ''}`;
|
| 470 |
+
showTooltip(html, event);
|
| 471 |
+
})
|
| 472 |
+
.on('mouseleave', hideTooltip);
|
| 473 |
+
|
| 474 |
+
// X axis
|
| 475 |
+
const xAxis = gRoot.append('g')
|
| 476 |
+
.attr('class', 'axes')
|
| 477 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 478 |
+
.call(d3.axisBottom(xScale));
|
| 479 |
+
|
| 480 |
+
xAxis.select('.domain').remove();
|
| 481 |
+
|
| 482 |
+
xAxis.selectAll('text')
|
| 483 |
+
.style('text-anchor', 'middle');
|
| 484 |
+
|
| 485 |
+
// Y axis
|
| 486 |
+
const yAxis = gRoot.append('g')
|
| 487 |
+
.attr('class', 'axes')
|
| 488 |
+
.call(d3.axisLeft(yScale).ticks(6));
|
| 489 |
+
|
| 490 |
+
yAxis.select('.domain').remove();
|
| 491 |
+
|
| 492 |
+
// Y axis label
|
| 493 |
+
gRoot.append('text')
|
| 494 |
+
.attr('class', 'axes')
|
| 495 |
+
.attr('transform', 'rotate(-90)')
|
| 496 |
+
.attr('x', -innerHeight / 2)
|
| 497 |
+
.attr('y', -45)
|
| 498 |
+
.attr('text-anchor', 'middle')
|
| 499 |
+
.style('font-size', '12px')
|
| 500 |
+
.style('fill', 'var(--text-color)')
|
| 501 |
+
.text(showDelta ? 'Δ Score vs SFT (%)' : 'Score (%)');
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
render();
|
| 505 |
+
|
| 506 |
+
if (window.ResizeObserver) {
|
| 507 |
+
const ro = new ResizeObserver(() => render());
|
| 508 |
+
ro.observe(container);
|
| 509 |
+
} else {
|
| 510 |
+
window.addEventListener('resize', render);
|
| 511 |
+
}
|
| 512 |
+
})
|
| 513 |
+
.catch((err) => {
|
| 514 |
+
const pre = document.createElement('pre');
|
| 515 |
+
pre.textContent = 'Error loading data: ' + err.message;
|
| 516 |
+
pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
|
| 517 |
+
container.appendChild(pre);
|
| 518 |
+
});
|
| 519 |
+
};
|
| 520 |
+
|
| 521 |
+
if (document.readyState === 'loading') {
|
| 522 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 523 |
+
} else {
|
| 524 |
+
ensureD3(bootstrap);
|
| 525 |
+
}
|
| 526 |
+
})();
|
| 527 |
+
</script>
|
app/src/content/embeds/d3-po-lr-ablation.html
ADDED
|
@@ -0,0 +1,538 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-apo-lr-ablation"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-apo-lr-ablation {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
--axis-color: var(--text-color, #333);
|
| 8 |
+
--tick-color: var(--muted-color, #666);
|
| 9 |
+
--grid-color: rgba(0,0,0,.08);
|
| 10 |
+
}
|
| 11 |
+
[data-theme="dark"] .d3-apo-lr-ablation {
|
| 12 |
+
--axis-color: var(--text-color, #ccc);
|
| 13 |
+
--tick-color: var(--muted-color, #999);
|
| 14 |
+
--grid-color: rgba(255,255,255,.10);
|
| 15 |
+
}
|
| 16 |
+
.d3-apo-lr-ablation svg {
|
| 17 |
+
display: block;
|
| 18 |
+
overflow: visible;
|
| 19 |
+
}
|
| 20 |
+
.d3-apo-lr-ablation .axes path,
|
| 21 |
+
.d3-apo-lr-ablation .axes line {
|
| 22 |
+
stroke: var(--axis-color);
|
| 23 |
+
shape-rendering: crispEdges;
|
| 24 |
+
}
|
| 25 |
+
.d3-apo-lr-ablation .axes text {
|
| 26 |
+
fill: var(--tick-color);
|
| 27 |
+
font-size: 11px;
|
| 28 |
+
}
|
| 29 |
+
.d3-apo-lr-ablation .grid line {
|
| 30 |
+
stroke: var(--grid-color);
|
| 31 |
+
stroke-dasharray: 2,2;
|
| 32 |
+
shape-rendering: crispEdges;
|
| 33 |
+
}
|
| 34 |
+
.d3-apo-lr-ablation .axis-label {
|
| 35 |
+
fill: var(--text-color);
|
| 36 |
+
font-size: 12px;
|
| 37 |
+
font-weight: 600;
|
| 38 |
+
}
|
| 39 |
+
.d3-apo-lr-ablation .line-think {
|
| 40 |
+
fill: none;
|
| 41 |
+
stroke-width: 2.5;
|
| 42 |
+
stroke-linecap: round;
|
| 43 |
+
stroke-linejoin: round;
|
| 44 |
+
}
|
| 45 |
+
.d3-apo-lr-ablation .line-no-think {
|
| 46 |
+
fill: none;
|
| 47 |
+
stroke-width: 2.5;
|
| 48 |
+
stroke-linecap: round;
|
| 49 |
+
stroke-linejoin: round;
|
| 50 |
+
}
|
| 51 |
+
.d3-apo-lr-ablation .reference-line {
|
| 52 |
+
fill: none;
|
| 53 |
+
stroke-width: 1.5;
|
| 54 |
+
stroke-dasharray: 5, 5;
|
| 55 |
+
opacity: 0.4;
|
| 56 |
+
}
|
| 57 |
+
.d3-apo-lr-ablation .dot {
|
| 58 |
+
stroke: var(--surface-bg);
|
| 59 |
+
stroke-width: 2;
|
| 60 |
+
}
|
| 61 |
+
.d3-apo-lr-ablation .header {
|
| 62 |
+
display: flex;
|
| 63 |
+
align-items: flex-start;
|
| 64 |
+
justify-content: space-between;
|
| 65 |
+
gap: 16px;
|
| 66 |
+
margin-top: 16px;
|
| 67 |
+
flex-wrap: wrap;
|
| 68 |
+
}
|
| 69 |
+
.d3-apo-lr-ablation .legend {
|
| 70 |
+
display: flex;
|
| 71 |
+
flex-direction: column;
|
| 72 |
+
align-items: flex-start;
|
| 73 |
+
gap: 6px;
|
| 74 |
+
}
|
| 75 |
+
.d3-apo-lr-ablation .legend-title {
|
| 76 |
+
font-size: 12px;
|
| 77 |
+
font-weight: 700;
|
| 78 |
+
color: var(--text-color);
|
| 79 |
+
}
|
| 80 |
+
.d3-apo-lr-ablation .legend .items {
|
| 81 |
+
display: flex;
|
| 82 |
+
flex-wrap: wrap;
|
| 83 |
+
gap: 8px 14px;
|
| 84 |
+
}
|
| 85 |
+
.d3-apo-lr-ablation .legend .item {
|
| 86 |
+
display: inline-flex;
|
| 87 |
+
align-items: center;
|
| 88 |
+
gap: 6px;
|
| 89 |
+
white-space: nowrap;
|
| 90 |
+
font-size: 12px;
|
| 91 |
+
color: var(--text-color);
|
| 92 |
+
}
|
| 93 |
+
.d3-apo-lr-ablation .legend .swatch {
|
| 94 |
+
width: 14px;
|
| 95 |
+
height: 14px;
|
| 96 |
+
border-radius: 3px;
|
| 97 |
+
border: 1px solid var(--border-color);
|
| 98 |
+
}
|
| 99 |
+
.d3-apo-lr-ablation .legend .swatch-line {
|
| 100 |
+
width: 20px;
|
| 101 |
+
height: 2px;
|
| 102 |
+
border: none;
|
| 103 |
+
}
|
| 104 |
+
.d3-apo-lr-ablation .legend .swatch-dashed {
|
| 105 |
+
width: 20px;
|
| 106 |
+
height: 2px;
|
| 107 |
+
border: none;
|
| 108 |
+
background: repeating-linear-gradient(
|
| 109 |
+
to right,
|
| 110 |
+
var(--text-color) 0,
|
| 111 |
+
var(--text-color) 4px,
|
| 112 |
+
transparent 4px,
|
| 113 |
+
transparent 8px
|
| 114 |
+
);
|
| 115 |
+
}
|
| 116 |
+
.d3-apo-lr-ablation .controls {
|
| 117 |
+
display: flex;
|
| 118 |
+
gap: 16px;
|
| 119 |
+
align-items: flex-start;
|
| 120 |
+
justify-content: flex-end;
|
| 121 |
+
flex-wrap: wrap;
|
| 122 |
+
}
|
| 123 |
+
.d3-apo-lr-ablation .control-group {
|
| 124 |
+
display: flex;
|
| 125 |
+
flex-direction: column;
|
| 126 |
+
align-items: flex-start;
|
| 127 |
+
gap: 6px;
|
| 128 |
+
}
|
| 129 |
+
.d3-apo-lr-ablation .controls label {
|
| 130 |
+
font-size: 12px;
|
| 131 |
+
font-weight: 700;
|
| 132 |
+
color: var(--text-color);
|
| 133 |
+
}
|
| 134 |
+
.d3-apo-lr-ablation .controls select {
|
| 135 |
+
font-size: 12px;
|
| 136 |
+
padding: 8px 28px 8px 10px;
|
| 137 |
+
border: 1px solid var(--border-color);
|
| 138 |
+
border-radius: 8px;
|
| 139 |
+
background: var(--surface-bg);
|
| 140 |
+
color: var(--text-color);
|
| 141 |
+
cursor: pointer;
|
| 142 |
+
appearance: none;
|
| 143 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
|
| 144 |
+
background-repeat: no-repeat;
|
| 145 |
+
background-position: right 8px center;
|
| 146 |
+
}
|
| 147 |
+
.d3-apo-lr-ablation .controls select:focus {
|
| 148 |
+
outline: 2px solid var(--primary-color);
|
| 149 |
+
outline-offset: 2px;
|
| 150 |
+
}
|
| 151 |
+
.d3-apo-lr-ablation .d3-tooltip {
|
| 152 |
+
position: absolute;
|
| 153 |
+
background: var(--surface-bg);
|
| 154 |
+
border: 1px solid var(--border-color);
|
| 155 |
+
border-radius: 8px;
|
| 156 |
+
padding: 12px;
|
| 157 |
+
pointer-events: none;
|
| 158 |
+
opacity: 0;
|
| 159 |
+
transition: opacity 0.2s;
|
| 160 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
|
| 161 |
+
font-size: 12px;
|
| 162 |
+
z-index: 1000;
|
| 163 |
+
}
|
| 164 |
+
.d3-apo-lr-ablation .tooltip-title {
|
| 165 |
+
font-weight: 700;
|
| 166 |
+
margin-bottom: 8px;
|
| 167 |
+
color: var(--text-color);
|
| 168 |
+
}
|
| 169 |
+
.d3-apo-lr-ablation .tooltip-item {
|
| 170 |
+
display: flex;
|
| 171 |
+
align-items: center;
|
| 172 |
+
gap: 8px;
|
| 173 |
+
margin: 4px 0;
|
| 174 |
+
color: var(--text-color);
|
| 175 |
+
}
|
| 176 |
+
.d3-apo-lr-ablation .tooltip-color {
|
| 177 |
+
width: 12px;
|
| 178 |
+
height: 12px;
|
| 179 |
+
border-radius: 2px;
|
| 180 |
+
}
|
| 181 |
+
</style>
|
| 182 |
+
<script>
|
| 183 |
+
(() => {
|
| 184 |
+
const ensureD3 = (cb) => {
|
| 185 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 186 |
+
let s = document.getElementById('d3-cdn-script');
|
| 187 |
+
if (!s) {
|
| 188 |
+
s = document.createElement('script');
|
| 189 |
+
s.id = 'd3-cdn-script';
|
| 190 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 191 |
+
document.head.appendChild(s);
|
| 192 |
+
}
|
| 193 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 194 |
+
s.addEventListener('load', onReady, { once: true });
|
| 195 |
+
if (window.d3) onReady();
|
| 196 |
+
};
|
| 197 |
+
|
| 198 |
+
const bootstrap = () => {
|
| 199 |
+
const scriptEl = document.currentScript;
|
| 200 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 201 |
+
if (!(container && container.classList && container.classList.contains('d3-apo-lr-ablation'))) {
|
| 202 |
+
const candidates = Array.from(document.querySelectorAll('.d3-apo-lr-ablation'))
|
| 203 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 204 |
+
container = candidates[candidates.length - 1] || null;
|
| 205 |
+
}
|
| 206 |
+
if (!container) return;
|
| 207 |
+
if (container.dataset) {
|
| 208 |
+
if (container.dataset.mounted === 'true') return;
|
| 209 |
+
container.dataset.mounted = 'true';
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
// Data embedded inline
|
| 213 |
+
const data = [{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"AIME25","Score":29.9},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.9},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"AIME25","Score":36.46},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"AIME25","Score":45.47},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"AIME25","Score":48.7},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.5},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"AIME25","Score":48.59},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"AIME25","Score":9.27},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":34.28},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":26.64},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":36.93},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":29.04},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.7},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.77},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.44},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.2},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.72},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":70.86},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.97},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":75.46},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"IF-Eval","Score":72.61},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.13},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.86},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.0},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":13.86},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":21.78},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":16.83},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":31.68},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88},{"Learning rate":0.00001,"system_prompt":"/think","Evaluation":"Average","Score":36.94},{"Learning rate":0.00001,"system_prompt":"/no_think","Evaluation":"Average","Score":28.57},{"Learning rate":0.000005,"system_prompt":"/think","Evaluation":"Average","Score":41.535},{"Learning rate":0.000005,"system_prompt":"/no_think","Evaluation":"Average","Score":40.4433333333},{"Learning rate":0.000001,"system_prompt":"/think","Evaluation":"Average","Score":45.4},{"Learning rate":0.000001,"system_prompt":"/no_think","Evaluation":"Average","Score":32.205},{"Learning rate":0.0000005,"system_prompt":"/think","Evaluation":"Average","Score":47.9375},{"Learning rate":0.0000005,"system_prompt":"/no_think","Evaluation":"Average","Score":32.5675},{"Learning rate":0.0000001,"system_prompt":"/think","Evaluation":"Average","Score":49.6425},{"Learning rate":0.0000001,"system_prompt":"/no_think","Evaluation":"Average","Score":32.5875}];
|
| 214 |
+
const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
|
| 215 |
+
|
| 216 |
+
// Get colors from ColorPalettes or fallback
|
| 217 |
+
const getColors = () => {
|
| 218 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
|
| 219 |
+
const colors = window.ColorPalettes.getColors('categorical', 2);
|
| 220 |
+
return { think: colors[0], noThink: colors[1] };
|
| 221 |
+
}
|
| 222 |
+
return { think: '#E377C2', noThink: '#7FC97F' };
|
| 223 |
+
};
|
| 224 |
+
|
| 225 |
+
let colors = getColors();
|
| 226 |
+
|
| 227 |
+
// Set up dimensions
|
| 228 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 229 |
+
|
| 230 |
+
// Create SVG
|
| 231 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 232 |
+
const g = svg.append('g');
|
| 233 |
+
|
| 234 |
+
// Tooltip
|
| 235 |
+
container.style.position = container.style.position || 'relative';
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div');
|
| 240 |
+
tip.className = 'd3-tooltip';
|
| 241 |
+
Object.assign(tip.style, {
|
| 242 |
+
position: 'absolute',
|
| 243 |
+
top: '0px',
|
| 244 |
+
left: '0px',
|
| 245 |
+
transform: 'translate(-9999px, -9999px)',
|
| 246 |
+
pointerEvents: 'none',
|
| 247 |
+
padding: '8px 10px',
|
| 248 |
+
borderRadius: '8px',
|
| 249 |
+
fontSize: '12px',
|
| 250 |
+
lineHeight: '1.35',
|
| 251 |
+
border: '1px solid var(--border-color)',
|
| 252 |
+
background: 'var(--surface-bg)',
|
| 253 |
+
color: 'var(--text-color)',
|
| 254 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 255 |
+
opacity: '0',
|
| 256 |
+
transition: 'opacity .12s ease',
|
| 257 |
+
zIndex: '1000'
|
| 258 |
+
});
|
| 259 |
+
tipInner = document.createElement('div');
|
| 260 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 261 |
+
tipInner.style.textAlign = 'left';
|
| 262 |
+
tip.appendChild(tipInner);
|
| 263 |
+
container.appendChild(tip);
|
| 264 |
+
} else {
|
| 265 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
const showTooltip = (html, event) => {
|
| 269 |
+
tipInner.innerHTML = html;
|
| 270 |
+
const [mx, my] = d3.pointer(event, container);
|
| 271 |
+
const offsetX = 12, offsetY = 12;
|
| 272 |
+
tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
|
| 273 |
+
tip.style.opacity = '1';
|
| 274 |
+
};
|
| 275 |
+
|
| 276 |
+
const hideTooltip = () => {
|
| 277 |
+
tip.style.opacity = '0';
|
| 278 |
+
setTimeout(() => {
|
| 279 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 280 |
+
}, 120);
|
| 281 |
+
};
|
| 282 |
+
|
| 283 |
+
// Get unique evaluations
|
| 284 |
+
const evaluations = [...new Set(data.map(d => d.Evaluation))];
|
| 285 |
+
|
| 286 |
+
// Create header with legend and controls
|
| 287 |
+
const header = d3.select(container).append('div').attr('class', 'header');
|
| 288 |
+
|
| 289 |
+
const legend = header.append('div').attr('class', 'legend');
|
| 290 |
+
legend.append('div').attr('class', 'legend-title').text('Legend');
|
| 291 |
+
const legendItems = legend.append('div').attr('class', 'items');
|
| 292 |
+
|
| 293 |
+
const controls = header.append('div').attr('class', 'controls');
|
| 294 |
+
const controlGroup = controls.append('div').attr('class', 'control-group');
|
| 295 |
+
controlGroup.append('label').attr('for', 'metric-select-lr').text('Metric');
|
| 296 |
+
const select = controlGroup.append('select').attr('id', 'metric-select-lr');
|
| 297 |
+
|
| 298 |
+
// Populate dropdown
|
| 299 |
+
select.selectAll('option')
|
| 300 |
+
.data(evaluations)
|
| 301 |
+
.enter()
|
| 302 |
+
.append('option')
|
| 303 |
+
.text(d => d)
|
| 304 |
+
.attr('value', d => d);
|
| 305 |
+
|
| 306 |
+
// Build legend
|
| 307 |
+
const buildLegend = () => {
|
| 308 |
+
legendItems.html('');
|
| 309 |
+
|
| 310 |
+
const thinkItem = legendItems.append('span').attr('class', 'item');
|
| 311 |
+
thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
|
| 312 |
+
thinkItem.append('span').text('/think');
|
| 313 |
+
|
| 314 |
+
const noThinkItem = legendItems.append('span').attr('class', 'item');
|
| 315 |
+
noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
|
| 316 |
+
noThinkItem.append('span').text('/no_think');
|
| 317 |
+
|
| 318 |
+
const sftItem = legendItems.append('span').attr('class', 'item');
|
| 319 |
+
sftItem.append('span').attr('class', 'swatch-dashed');
|
| 320 |
+
sftItem.append('span').text('SFT checkpoint');
|
| 321 |
+
};
|
| 322 |
+
|
| 323 |
+
buildLegend();
|
| 324 |
+
|
| 325 |
+
// Update chart function
|
| 326 |
+
function updateChart(evaluation) {
|
| 327 |
+
const filtered = data.filter(d => d.Evaluation === evaluation);
|
| 328 |
+
const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a["Learning rate"] - b["Learning rate"]);
|
| 329 |
+
const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a["Learning rate"] - b["Learning rate"]);
|
| 330 |
+
|
| 331 |
+
g.selectAll("*").remove();
|
| 332 |
+
|
| 333 |
+
const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
|
| 334 |
+
const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
|
| 335 |
+
|
| 336 |
+
const width = container.clientWidth || 800;
|
| 337 |
+
const height = Math.max(320, Math.round(width / 2.5));
|
| 338 |
+
const innerWidth = width - margin.left - margin.right;
|
| 339 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 340 |
+
|
| 341 |
+
svg.attr('width', width).attr('height', height);
|
| 342 |
+
g.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 343 |
+
|
| 344 |
+
// Scales
|
| 345 |
+
const xScale = d3.scaleLog()
|
| 346 |
+
.domain([d3.min(filtered, d => d["Learning rate"]), d3.max(filtered, d => d["Learning rate"])])
|
| 347 |
+
.range([0, innerWidth]);
|
| 348 |
+
|
| 349 |
+
const allScores = filtered.map(d => d.Score);
|
| 350 |
+
if (sftThink) allScores.push(sftThink.Score);
|
| 351 |
+
if (sftNoThink) allScores.push(sftNoThink.Score);
|
| 352 |
+
const maxScore = d3.max(allScores);
|
| 353 |
+
|
| 354 |
+
const yScale = d3.scaleLinear()
|
| 355 |
+
.domain([0, maxScore * 1.1])
|
| 356 |
+
.range([innerHeight, 0]);
|
| 357 |
+
|
| 358 |
+
// Grid
|
| 359 |
+
g.append("g")
|
| 360 |
+
.attr("class", "grid")
|
| 361 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 362 |
+
.call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
|
| 363 |
+
|
| 364 |
+
g.append("g")
|
| 365 |
+
.attr("class", "grid")
|
| 366 |
+
.call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
|
| 367 |
+
|
| 368 |
+
// Axes
|
| 369 |
+
const tickValues = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5];
|
| 370 |
+
g.append("g")
|
| 371 |
+
.attr("class", "axes")
|
| 372 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 373 |
+
.call(d3.axisBottom(xScale).tickValues(tickValues).tickFormat(d3.format(".0e")).tickSizeOuter(0))
|
| 374 |
+
.call(gAxis => {
|
| 375 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 376 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 377 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 378 |
+
});
|
| 379 |
+
|
| 380 |
+
g.append("g")
|
| 381 |
+
.attr("class", "axes")
|
| 382 |
+
.call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
|
| 383 |
+
.call(gAxis => {
|
| 384 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 385 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 386 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 387 |
+
});
|
| 388 |
+
|
| 389 |
+
// Axis labels
|
| 390 |
+
g.append("text")
|
| 391 |
+
.attr("class", "axis-label")
|
| 392 |
+
.attr("text-anchor", "middle")
|
| 393 |
+
.attr("x", innerWidth / 2)
|
| 394 |
+
.attr("y", innerHeight + 40)
|
| 395 |
+
.text("Learning rate");
|
| 396 |
+
|
| 397 |
+
g.append("text")
|
| 398 |
+
.attr("class", "axis-label")
|
| 399 |
+
.attr("text-anchor", "middle")
|
| 400 |
+
.attr("transform", "rotate(-90)")
|
| 401 |
+
.attr("y", -45)
|
| 402 |
+
.attr("x", -innerHeight / 2)
|
| 403 |
+
.text("Score (%)");
|
| 404 |
+
|
| 405 |
+
// Line generator
|
| 406 |
+
const line = d3.line()
|
| 407 |
+
.x(d => xScale(d["Learning rate"]))
|
| 408 |
+
.y(d => yScale(d.Score));
|
| 409 |
+
|
| 410 |
+
// Reference lines
|
| 411 |
+
if (sftThink) {
|
| 412 |
+
g.append("line")
|
| 413 |
+
.attr("class", "reference-line")
|
| 414 |
+
.style("stroke", colors.think)
|
| 415 |
+
.attr("x1", 0)
|
| 416 |
+
.attr("x2", innerWidth)
|
| 417 |
+
.attr("y1", yScale(sftThink.Score))
|
| 418 |
+
.attr("y2", yScale(sftThink.Score));
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
if (sftNoThink) {
|
| 422 |
+
g.append("line")
|
| 423 |
+
.attr("class", "reference-line")
|
| 424 |
+
.style("stroke", colors.noThink)
|
| 425 |
+
.attr("x1", 0)
|
| 426 |
+
.attr("x2", innerWidth)
|
| 427 |
+
.attr("y1", yScale(sftNoThink.Score))
|
| 428 |
+
.attr("y2", yScale(sftNoThink.Score));
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
// Lines
|
| 432 |
+
g.append("path")
|
| 433 |
+
.datum(thinkData)
|
| 434 |
+
.attr("class", "line-think")
|
| 435 |
+
.style("stroke", colors.think)
|
| 436 |
+
.attr("d", line);
|
| 437 |
+
|
| 438 |
+
g.append("path")
|
| 439 |
+
.datum(noThinkData)
|
| 440 |
+
.attr("class", "line-no-think")
|
| 441 |
+
.style("stroke", colors.noThink)
|
| 442 |
+
.attr("d", line);
|
| 443 |
+
|
| 444 |
+
// Dots for /think
|
| 445 |
+
g.selectAll(".dot-think")
|
| 446 |
+
.data(thinkData)
|
| 447 |
+
.enter()
|
| 448 |
+
.append("circle")
|
| 449 |
+
.attr("class", "dot")
|
| 450 |
+
.style("fill", colors.think)
|
| 451 |
+
.attr("cx", d => xScale(d["Learning rate"]))
|
| 452 |
+
.attr("cy", d => yScale(d.Score))
|
| 453 |
+
.attr("r", 4)
|
| 454 |
+
.on("mouseenter", function(event, d) {
|
| 455 |
+
const noThinkValue = noThinkData.find(item => item["Learning rate"] === d["Learning rate"]);
|
| 456 |
+
const html = `
|
| 457 |
+
<div class="tooltip-title">LR ${d["Learning rate"].toExponential(0)}</div>
|
| 458 |
+
<div class="tooltip-item">
|
| 459 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 460 |
+
<span>/think: ${d.Score.toFixed(2)}%</span>
|
| 461 |
+
</div>
|
| 462 |
+
${noThinkValue ? `
|
| 463 |
+
<div class="tooltip-item">
|
| 464 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 465 |
+
<span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
|
| 466 |
+
</div>` : ''}
|
| 467 |
+
`;
|
| 468 |
+
showTooltip(html, event);
|
| 469 |
+
})
|
| 470 |
+
.on("mouseleave", hideTooltip);
|
| 471 |
+
|
| 472 |
+
// Dots for /no_think
|
| 473 |
+
g.selectAll(".dot-no-think")
|
| 474 |
+
.data(noThinkData)
|
| 475 |
+
.enter()
|
| 476 |
+
.append("circle")
|
| 477 |
+
.attr("class", "dot")
|
| 478 |
+
.style("fill", colors.noThink)
|
| 479 |
+
.attr("cx", d => xScale(d["Learning rate"]))
|
| 480 |
+
.attr("cy", d => yScale(d.Score))
|
| 481 |
+
.attr("r", 4)
|
| 482 |
+
.on("mouseenter", function(event, d) {
|
| 483 |
+
const thinkValue = thinkData.find(item => item["Learning rate"] === d["Learning rate"]);
|
| 484 |
+
const html = `
|
| 485 |
+
<div class="tooltip-title">LR ${d["Learning rate"].toExponential(0)}</div>
|
| 486 |
+
${thinkValue ? `
|
| 487 |
+
<div class="tooltip-item">
|
| 488 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 489 |
+
<span>/think: ${thinkValue.Score.toFixed(2)}%</span>
|
| 490 |
+
</div>` : ''}
|
| 491 |
+
<div class="tooltip-item">
|
| 492 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 493 |
+
<span>/no_think: ${d.Score.toFixed(2)}%</span>
|
| 494 |
+
</div>
|
| 495 |
+
`;
|
| 496 |
+
showTooltip(html, event);
|
| 497 |
+
})
|
| 498 |
+
.on("mouseleave", hideTooltip);
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
// Set default value to "Average" if it exists
|
| 502 |
+
const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
|
| 503 |
+
select.property("value", defaultEval);
|
| 504 |
+
|
| 505 |
+
// Initial chart
|
| 506 |
+
updateChart(defaultEval);
|
| 507 |
+
|
| 508 |
+
// Update on dropdown change
|
| 509 |
+
select.on("change", function() {
|
| 510 |
+
updateChart(this.value);
|
| 511 |
+
});
|
| 512 |
+
|
| 513 |
+
// Resize handling
|
| 514 |
+
const rerender = () => updateChart(select.property("value"));
|
| 515 |
+
if (window.ResizeObserver) {
|
| 516 |
+
const ro = new ResizeObserver(() => rerender());
|
| 517 |
+
ro.observe(container);
|
| 518 |
+
} else {
|
| 519 |
+
window.addEventListener('resize', rerender);
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
// Listen for ColorPalettes changes
|
| 523 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
|
| 524 |
+
window.ColorPalettes.addListener(() => {
|
| 525 |
+
colors = getColors();
|
| 526 |
+
buildLegend();
|
| 527 |
+
updateChart(select.property("value"));
|
| 528 |
+
});
|
| 529 |
+
}
|
| 530 |
+
};
|
| 531 |
+
|
| 532 |
+
if (document.readyState === 'loading') {
|
| 533 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 534 |
+
} else {
|
| 535 |
+
ensureD3(bootstrap);
|
| 536 |
+
}
|
| 537 |
+
})();
|
| 538 |
+
</script>
|
app/src/content/embeds/d3-po-size-ablation.html
ADDED
|
@@ -0,0 +1,537 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-apo-size-ablation"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-apo-size-ablation {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
--axis-color: var(--text-color, #333);
|
| 8 |
+
--tick-color: var(--muted-color, #666);
|
| 9 |
+
--grid-color: rgba(0,0,0,.08);
|
| 10 |
+
}
|
| 11 |
+
[data-theme="dark"] .d3-apo-size-ablation {
|
| 12 |
+
--axis-color: var(--text-color, #ccc);
|
| 13 |
+
--tick-color: var(--muted-color, #999);
|
| 14 |
+
--grid-color: rgba(255,255,255,.10);
|
| 15 |
+
}
|
| 16 |
+
.d3-apo-size-ablation svg {
|
| 17 |
+
display: block;
|
| 18 |
+
overflow: visible;
|
| 19 |
+
}
|
| 20 |
+
.d3-apo-size-ablation .axes path,
|
| 21 |
+
.d3-apo-size-ablation .axes line {
|
| 22 |
+
stroke: var(--axis-color);
|
| 23 |
+
shape-rendering: crispEdges;
|
| 24 |
+
}
|
| 25 |
+
.d3-apo-size-ablation .axes text {
|
| 26 |
+
fill: var(--tick-color);
|
| 27 |
+
font-size: 11px;
|
| 28 |
+
}
|
| 29 |
+
.d3-apo-size-ablation .grid line {
|
| 30 |
+
stroke: var(--grid-color);
|
| 31 |
+
stroke-dasharray: 2,2;
|
| 32 |
+
shape-rendering: crispEdges;
|
| 33 |
+
}
|
| 34 |
+
.d3-apo-size-ablation .axis-label {
|
| 35 |
+
fill: var(--text-color);
|
| 36 |
+
font-size: 12px;
|
| 37 |
+
font-weight: 600;
|
| 38 |
+
}
|
| 39 |
+
.d3-apo-size-ablation .line-think {
|
| 40 |
+
fill: none;
|
| 41 |
+
stroke-width: 2.5;
|
| 42 |
+
stroke-linecap: round;
|
| 43 |
+
stroke-linejoin: round;
|
| 44 |
+
}
|
| 45 |
+
.d3-apo-size-ablation .line-no-think {
|
| 46 |
+
fill: none;
|
| 47 |
+
stroke-width: 2.5;
|
| 48 |
+
stroke-linecap: round;
|
| 49 |
+
stroke-linejoin: round;
|
| 50 |
+
}
|
| 51 |
+
.d3-apo-size-ablation .reference-line {
|
| 52 |
+
fill: none;
|
| 53 |
+
stroke-width: 1.5;
|
| 54 |
+
stroke-dasharray: 5, 5;
|
| 55 |
+
opacity: 0.4;
|
| 56 |
+
}
|
| 57 |
+
.d3-apo-size-ablation .dot {
|
| 58 |
+
stroke: var(--surface-bg);
|
| 59 |
+
stroke-width: 2;
|
| 60 |
+
}
|
| 61 |
+
.d3-apo-size-ablation .header {
|
| 62 |
+
display: flex;
|
| 63 |
+
align-items: flex-start;
|
| 64 |
+
justify-content: space-between;
|
| 65 |
+
gap: 16px;
|
| 66 |
+
margin-top: 16px;
|
| 67 |
+
flex-wrap: wrap;
|
| 68 |
+
}
|
| 69 |
+
.d3-apo-size-ablation .legend {
|
| 70 |
+
display: flex;
|
| 71 |
+
flex-direction: column;
|
| 72 |
+
align-items: flex-start;
|
| 73 |
+
gap: 6px;
|
| 74 |
+
}
|
| 75 |
+
.d3-apo-size-ablation .legend-title {
|
| 76 |
+
font-size: 12px;
|
| 77 |
+
font-weight: 700;
|
| 78 |
+
color: var(--text-color);
|
| 79 |
+
}
|
| 80 |
+
.d3-apo-size-ablation .legend .items {
|
| 81 |
+
display: flex;
|
| 82 |
+
flex-wrap: wrap;
|
| 83 |
+
gap: 8px 14px;
|
| 84 |
+
}
|
| 85 |
+
.d3-apo-size-ablation .legend .item {
|
| 86 |
+
display: inline-flex;
|
| 87 |
+
align-items: center;
|
| 88 |
+
gap: 6px;
|
| 89 |
+
white-space: nowrap;
|
| 90 |
+
font-size: 12px;
|
| 91 |
+
color: var(--text-color);
|
| 92 |
+
}
|
| 93 |
+
.d3-apo-size-ablation .legend .swatch {
|
| 94 |
+
width: 14px;
|
| 95 |
+
height: 14px;
|
| 96 |
+
border-radius: 3px;
|
| 97 |
+
border: 1px solid var(--border-color);
|
| 98 |
+
}
|
| 99 |
+
.d3-apo-size-ablation .legend .swatch-line {
|
| 100 |
+
width: 20px;
|
| 101 |
+
height: 2px;
|
| 102 |
+
border: none;
|
| 103 |
+
}
|
| 104 |
+
.d3-apo-size-ablation .legend .swatch-dashed {
|
| 105 |
+
width: 20px;
|
| 106 |
+
height: 2px;
|
| 107 |
+
border: none;
|
| 108 |
+
background: repeating-linear-gradient(
|
| 109 |
+
to right,
|
| 110 |
+
var(--text-color) 0,
|
| 111 |
+
var(--text-color) 4px,
|
| 112 |
+
transparent 4px,
|
| 113 |
+
transparent 8px
|
| 114 |
+
);
|
| 115 |
+
}
|
| 116 |
+
.d3-apo-size-ablation .controls {
|
| 117 |
+
display: flex;
|
| 118 |
+
gap: 16px;
|
| 119 |
+
align-items: flex-start;
|
| 120 |
+
justify-content: flex-end;
|
| 121 |
+
flex-wrap: wrap;
|
| 122 |
+
}
|
| 123 |
+
.d3-apo-size-ablation .control-group {
|
| 124 |
+
display: flex;
|
| 125 |
+
flex-direction: column;
|
| 126 |
+
align-items: flex-start;
|
| 127 |
+
gap: 6px;
|
| 128 |
+
}
|
| 129 |
+
.d3-apo-size-ablation .controls label {
|
| 130 |
+
font-size: 12px;
|
| 131 |
+
font-weight: 700;
|
| 132 |
+
color: var(--text-color);
|
| 133 |
+
}
|
| 134 |
+
.d3-apo-size-ablation .controls select {
|
| 135 |
+
font-size: 12px;
|
| 136 |
+
padding: 8px 28px 8px 10px;
|
| 137 |
+
border: 1px solid var(--border-color);
|
| 138 |
+
border-radius: 8px;
|
| 139 |
+
background: var(--surface-bg);
|
| 140 |
+
color: var(--text-color);
|
| 141 |
+
cursor: pointer;
|
| 142 |
+
appearance: none;
|
| 143 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23666' d='M6 9L1 4h10z'/%3E%3C/svg%3E");
|
| 144 |
+
background-repeat: no-repeat;
|
| 145 |
+
background-position: right 8px center;
|
| 146 |
+
}
|
| 147 |
+
.d3-apo-size-ablation .controls select:focus {
|
| 148 |
+
outline: 2px solid var(--primary-color);
|
| 149 |
+
outline-offset: 2px;
|
| 150 |
+
}
|
| 151 |
+
.d3-apo-size-ablation .d3-tooltip {
|
| 152 |
+
position: absolute;
|
| 153 |
+
background: var(--surface-bg);
|
| 154 |
+
border: 1px solid var(--border-color);
|
| 155 |
+
border-radius: 8px;
|
| 156 |
+
padding: 12px;
|
| 157 |
+
pointer-events: none;
|
| 158 |
+
opacity: 0;
|
| 159 |
+
transition: opacity 0.2s;
|
| 160 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
|
| 161 |
+
font-size: 12px;
|
| 162 |
+
z-index: 1000;
|
| 163 |
+
}
|
| 164 |
+
.d3-apo-size-ablation .tooltip-title {
|
| 165 |
+
font-weight: 700;
|
| 166 |
+
margin-bottom: 8px;
|
| 167 |
+
color: var(--text-color);
|
| 168 |
+
}
|
| 169 |
+
.d3-apo-size-ablation .tooltip-item {
|
| 170 |
+
display: flex;
|
| 171 |
+
align-items: center;
|
| 172 |
+
gap: 8px;
|
| 173 |
+
margin: 4px 0;
|
| 174 |
+
color: var(--text-color);
|
| 175 |
+
}
|
| 176 |
+
.d3-apo-size-ablation .tooltip-color {
|
| 177 |
+
width: 12px;
|
| 178 |
+
height: 12px;
|
| 179 |
+
border-radius: 2px;
|
| 180 |
+
}
|
| 181 |
+
</style>
|
| 182 |
+
<script>
|
| 183 |
+
(() => {
|
| 184 |
+
const ensureD3 = (cb) => {
|
| 185 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 186 |
+
let s = document.getElementById('d3-cdn-script');
|
| 187 |
+
if (!s) {
|
| 188 |
+
s = document.createElement('script');
|
| 189 |
+
s.id = 'd3-cdn-script';
|
| 190 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 191 |
+
document.head.appendChild(s);
|
| 192 |
+
}
|
| 193 |
+
const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); };
|
| 194 |
+
s.addEventListener('load', onReady, { once: true });
|
| 195 |
+
if (window.d3) onReady();
|
| 196 |
+
};
|
| 197 |
+
|
| 198 |
+
const bootstrap = () => {
|
| 199 |
+
const scriptEl = document.currentScript;
|
| 200 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 201 |
+
if (!(container && container.classList && container.classList.contains('d3-apo-size-ablation'))) {
|
| 202 |
+
const candidates = Array.from(document.querySelectorAll('.d3-apo-size-ablation'))
|
| 203 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 204 |
+
container = candidates[candidates.length - 1] || null;
|
| 205 |
+
}
|
| 206 |
+
if (!container) return;
|
| 207 |
+
if (container.dataset) {
|
| 208 |
+
if (container.dataset.mounted === 'true') return;
|
| 209 |
+
container.dataset.mounted = 'true';
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
// Data embedded inline
|
| 213 |
+
const data = [{"system_prompt":"/think","Evaluation":"AIME25","Score":45.47,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.92,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.49,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.58,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.88,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.46,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":23.76,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"Average","Score":45.4,"Data Subset":169346},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.205,"Data Subset":169346},{"system_prompt":"/think","Evaluation":"AIME25","Score":42.4,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.43,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.36,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.23,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.54,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":8.54,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.65,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.4,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"AIME25","Score":48.65,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":6.93,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"AIME25","Score":45.16,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":7.97,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"AIME25","Score":43.18,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":5.94,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.75,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":32.32,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.94,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.76,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":45.39,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.19,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.55,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.69,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":44.51,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":33.84,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.12,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":31.12,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":43.12,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.3,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.13,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":70.93,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":71.97,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":74.24,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":75.99,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":77.05,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":74.66,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.93,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":73.22,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.24,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":72.09,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":78.27,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":69.23,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":76.14,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":41.58,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":9.9,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":9.9,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":32.67,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":11.88,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":29.7,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":15.84,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":13.86,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":25.74,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":14.85,"Data Subset":338692},{"system_prompt":"/think","Evaluation":"Average","Score":49.715,"Data Subset":1693},{"system_prompt":"/no_think","Evaluation":"Average","Score":29.89,"Data Subset":1693},{"system_prompt":"/think","Evaluation":"Average","Score":48.485,"Data Subset":8467},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.0325,"Data Subset":8467},{"system_prompt":"/think","Evaluation":"Average","Score":50.6475,"Data Subset":16934},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.67,"Data Subset":16934},{"system_prompt":"/think","Evaluation":"Average","Score":49.6325,"Data Subset":42336},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.475,"Data Subset":42336},{"system_prompt":"/think","Evaluation":"Average","Score":49.02,"Data Subset":84673},{"system_prompt":"/no_think","Evaluation":"Average","Score":33.7125,"Data Subset":84673},{"system_prompt":"/think","Evaluation":"Average","Score":46.5275,"Data Subset":127009},{"system_prompt":"/no_think","Evaluation":"Average","Score":32.805,"Data Subset":127009},{"system_prompt":"/think","Evaluation":"Average","Score":45.3175,"Data Subset":338692},{"system_prompt":"/no_think","Evaluation":"Average","Score":31.8075,"Data Subset":338692}];
|
| 214 |
+
const sftData = [{"system_prompt":"/think","Evaluation":"AIME25","Score":36.56},{"system_prompt":"/no_think","Evaluation":"AIME25","Score":4.01},{"system_prompt":"/think","Evaluation":"GPQA Diamond","Score":42.23},{"system_prompt":"/no_think","Evaluation":"GPQA Diamond","Score":30.43},{"system_prompt":"/think","Evaluation":"IF-Eval","Score":70.03},{"system_prompt":"/no_think","Evaluation":"IF-Eval","Score":67.29},{"system_prompt":"/think","Evaluation":"LiveCodeBench v4","Score":36.63},{"system_prompt":"/no_think","Evaluation":"LiveCodeBench v4","Score":12.87},{"system_prompt":"/think","Evaluation":"Average","Score":46.3625},{"system_prompt":"/no_think","Evaluation":"Average","Score":28.65}];
|
| 215 |
+
|
| 216 |
+
// Get colors from ColorPalettes or fallback
|
| 217 |
+
const getColors = () => {
|
| 218 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') {
|
| 219 |
+
const colors = window.ColorPalettes.getColors('categorical', 2);
|
| 220 |
+
return { think: colors[0], noThink: colors[1] };
|
| 221 |
+
}
|
| 222 |
+
return { think: '#E377C2', noThink: '#7FC97F' };
|
| 223 |
+
};
|
| 224 |
+
|
| 225 |
+
let colors = getColors();
|
| 226 |
+
|
| 227 |
+
// Set up dimensions
|
| 228 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 229 |
+
|
| 230 |
+
// Create SVG
|
| 231 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 232 |
+
const g = svg.append('g');
|
| 233 |
+
|
| 234 |
+
// Tooltip
|
| 235 |
+
container.style.position = container.style.position || 'relative';
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div');
|
| 240 |
+
tip.className = 'd3-tooltip';
|
| 241 |
+
Object.assign(tip.style, {
|
| 242 |
+
position: 'absolute',
|
| 243 |
+
top: '0px',
|
| 244 |
+
left: '0px',
|
| 245 |
+
transform: 'translate(-9999px, -9999px)',
|
| 246 |
+
pointerEvents: 'none',
|
| 247 |
+
padding: '8px 10px',
|
| 248 |
+
borderRadius: '8px',
|
| 249 |
+
fontSize: '12px',
|
| 250 |
+
lineHeight: '1.35',
|
| 251 |
+
border: '1px solid var(--border-color)',
|
| 252 |
+
background: 'var(--surface-bg)',
|
| 253 |
+
color: 'var(--text-color)',
|
| 254 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 255 |
+
opacity: '0',
|
| 256 |
+
transition: 'opacity .12s ease',
|
| 257 |
+
zIndex: '1000'
|
| 258 |
+
});
|
| 259 |
+
tipInner = document.createElement('div');
|
| 260 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 261 |
+
tipInner.style.textAlign = 'left';
|
| 262 |
+
tip.appendChild(tipInner);
|
| 263 |
+
container.appendChild(tip);
|
| 264 |
+
} else {
|
| 265 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
const showTooltip = (html, event) => {
|
| 269 |
+
tipInner.innerHTML = html;
|
| 270 |
+
const [mx, my] = d3.pointer(event, container);
|
| 271 |
+
const offsetX = 12, offsetY = 12;
|
| 272 |
+
tip.style.transform = `translate(${mx + offsetX}px, ${my + offsetY}px)`;
|
| 273 |
+
tip.style.opacity = '1';
|
| 274 |
+
};
|
| 275 |
+
|
| 276 |
+
const hideTooltip = () => {
|
| 277 |
+
tip.style.opacity = '0';
|
| 278 |
+
setTimeout(() => {
|
| 279 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 280 |
+
}, 120);
|
| 281 |
+
};
|
| 282 |
+
|
| 283 |
+
// Get unique evaluations
|
| 284 |
+
const evaluations = [...new Set(data.map(d => d.Evaluation))];
|
| 285 |
+
|
| 286 |
+
// Create header with legend and controls
|
| 287 |
+
const header = d3.select(container).append('div').attr('class', 'header');
|
| 288 |
+
|
| 289 |
+
const legend = header.append('div').attr('class', 'legend');
|
| 290 |
+
legend.append('div').attr('class', 'legend-title').text('Legend');
|
| 291 |
+
const legendItems = legend.append('div').attr('class', 'items');
|
| 292 |
+
|
| 293 |
+
const controls = header.append('div').attr('class', 'controls');
|
| 294 |
+
const controlGroup = controls.append('div').attr('class', 'control-group');
|
| 295 |
+
controlGroup.append('label').attr('for', 'metric-select-size').text('Metric');
|
| 296 |
+
const select = controlGroup.append('select').attr('id', 'metric-select-size');
|
| 297 |
+
|
| 298 |
+
// Populate dropdown
|
| 299 |
+
select.selectAll('option')
|
| 300 |
+
.data(evaluations)
|
| 301 |
+
.enter()
|
| 302 |
+
.append('option')
|
| 303 |
+
.text(d => d)
|
| 304 |
+
.attr('value', d => d);
|
| 305 |
+
|
| 306 |
+
// Build legend
|
| 307 |
+
const buildLegend = () => {
|
| 308 |
+
legendItems.html('');
|
| 309 |
+
|
| 310 |
+
const thinkItem = legendItems.append('span').attr('class', 'item');
|
| 311 |
+
thinkItem.append('span').attr('class', 'swatch-line').style('background', colors.think);
|
| 312 |
+
thinkItem.append('span').text('/think');
|
| 313 |
+
|
| 314 |
+
const noThinkItem = legendItems.append('span').attr('class', 'item');
|
| 315 |
+
noThinkItem.append('span').attr('class', 'swatch-line').style('background', colors.noThink);
|
| 316 |
+
noThinkItem.append('span').text('/no_think');
|
| 317 |
+
|
| 318 |
+
const sftItem = legendItems.append('span').attr('class', 'item');
|
| 319 |
+
sftItem.append('span').attr('class', 'swatch-dashed');
|
| 320 |
+
sftItem.append('span').text('SFT checkpoint');
|
| 321 |
+
};
|
| 322 |
+
|
| 323 |
+
buildLegend();
|
| 324 |
+
|
| 325 |
+
// Update chart function
|
| 326 |
+
function updateChart(evaluation) {
|
| 327 |
+
const filtered = data.filter(d => d.Evaluation === evaluation);
|
| 328 |
+
const thinkData = filtered.filter(d => d.system_prompt === "/think").sort((a, b) => a["Data Subset"] - b["Data Subset"]);
|
| 329 |
+
const noThinkData = filtered.filter(d => d.system_prompt === "/no_think").sort((a, b) => a["Data Subset"] - b["Data Subset"]);
|
| 330 |
+
|
| 331 |
+
g.selectAll("*").remove();
|
| 332 |
+
|
| 333 |
+
const sftThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/think");
|
| 334 |
+
const sftNoThink = sftData.find(d => d.Evaluation === evaluation && d.system_prompt === "/no_think");
|
| 335 |
+
|
| 336 |
+
const width = container.clientWidth || 800;
|
| 337 |
+
const height = Math.max(320, Math.round(width / 2.5));
|
| 338 |
+
const innerWidth = width - margin.left - margin.right;
|
| 339 |
+
const innerHeight = height - margin.top - margin.bottom;
|
| 340 |
+
|
| 341 |
+
svg.attr('width', width).attr('height', height);
|
| 342 |
+
g.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 343 |
+
|
| 344 |
+
// Scales
|
| 345 |
+
const xScale = d3.scaleLog()
|
| 346 |
+
.domain([d3.min(filtered, d => d["Data Subset"]), d3.max(filtered, d => d["Data Subset"])])
|
| 347 |
+
.range([0, innerWidth]);
|
| 348 |
+
|
| 349 |
+
const allScores = filtered.map(d => d.Score);
|
| 350 |
+
if (sftThink) allScores.push(sftThink.Score);
|
| 351 |
+
if (sftNoThink) allScores.push(sftNoThink.Score);
|
| 352 |
+
const maxScore = d3.max(allScores);
|
| 353 |
+
|
| 354 |
+
const yScale = d3.scaleLinear()
|
| 355 |
+
.domain([0, maxScore * 1.1])
|
| 356 |
+
.range([innerHeight, 0]);
|
| 357 |
+
|
| 358 |
+
// Grid
|
| 359 |
+
g.append("g")
|
| 360 |
+
.attr("class", "grid")
|
| 361 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 362 |
+
.call(d3.axisBottom(xScale).tickSize(-innerHeight).tickFormat("").tickSizeOuter(0));
|
| 363 |
+
|
| 364 |
+
g.append("g")
|
| 365 |
+
.attr("class", "grid")
|
| 366 |
+
.call(d3.axisLeft(yScale).tickSize(-innerWidth).tickFormat("").tickSizeOuter(0));
|
| 367 |
+
|
| 368 |
+
// Axes
|
| 369 |
+
g.append("g")
|
| 370 |
+
.attr("class", "axes")
|
| 371 |
+
.attr("transform", `translate(0,${innerHeight})`)
|
| 372 |
+
.call(d3.axisBottom(xScale).ticks(5).tickSizeOuter(0))
|
| 373 |
+
.call(gAxis => {
|
| 374 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 375 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 376 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 377 |
+
});
|
| 378 |
+
|
| 379 |
+
g.append("g")
|
| 380 |
+
.attr("class", "axes")
|
| 381 |
+
.call(d3.axisLeft(yScale).ticks(6).tickSizeOuter(0))
|
| 382 |
+
.call(gAxis => {
|
| 383 |
+
gAxis.selectAll(".tick line").attr("stroke", "var(--axis-color)").style("opacity", 1);
|
| 384 |
+
gAxis.selectAll(".tick text").attr("fill", "var(--tick-color)").style("opacity", 1);
|
| 385 |
+
gAxis.select(".domain").attr("stroke", "var(--axis-color)");
|
| 386 |
+
});
|
| 387 |
+
|
| 388 |
+
// Axis labels
|
| 389 |
+
g.append("text")
|
| 390 |
+
.attr("class", "axis-label")
|
| 391 |
+
.attr("text-anchor", "middle")
|
| 392 |
+
.attr("x", innerWidth / 2)
|
| 393 |
+
.attr("y", innerHeight + 40)
|
| 394 |
+
.text("Dataset size");
|
| 395 |
+
|
| 396 |
+
g.append("text")
|
| 397 |
+
.attr("class", "axis-label")
|
| 398 |
+
.attr("text-anchor", "middle")
|
| 399 |
+
.attr("transform", "rotate(-90)")
|
| 400 |
+
.attr("y", -45)
|
| 401 |
+
.attr("x", -innerHeight / 2)
|
| 402 |
+
.text("Score (%)");
|
| 403 |
+
|
| 404 |
+
// Line generator
|
| 405 |
+
const line = d3.line()
|
| 406 |
+
.x(d => xScale(d["Data Subset"]))
|
| 407 |
+
.y(d => yScale(d.Score));
|
| 408 |
+
|
| 409 |
+
// Reference lines
|
| 410 |
+
if (sftThink) {
|
| 411 |
+
g.append("line")
|
| 412 |
+
.attr("class", "reference-line")
|
| 413 |
+
.style("stroke", colors.think)
|
| 414 |
+
.attr("x1", 0)
|
| 415 |
+
.attr("x2", innerWidth)
|
| 416 |
+
.attr("y1", yScale(sftThink.Score))
|
| 417 |
+
.attr("y2", yScale(sftThink.Score));
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
if (sftNoThink) {
|
| 421 |
+
g.append("line")
|
| 422 |
+
.attr("class", "reference-line")
|
| 423 |
+
.style("stroke", colors.noThink)
|
| 424 |
+
.attr("x1", 0)
|
| 425 |
+
.attr("x2", innerWidth)
|
| 426 |
+
.attr("y1", yScale(sftNoThink.Score))
|
| 427 |
+
.attr("y2", yScale(sftNoThink.Score));
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
// Lines
|
| 431 |
+
g.append("path")
|
| 432 |
+
.datum(thinkData)
|
| 433 |
+
.attr("class", "line-think")
|
| 434 |
+
.style("stroke", colors.think)
|
| 435 |
+
.attr("d", line);
|
| 436 |
+
|
| 437 |
+
g.append("path")
|
| 438 |
+
.datum(noThinkData)
|
| 439 |
+
.attr("class", "line-no-think")
|
| 440 |
+
.style("stroke", colors.noThink)
|
| 441 |
+
.attr("d", line);
|
| 442 |
+
|
| 443 |
+
// Dots for /think
|
| 444 |
+
g.selectAll(".dot-think")
|
| 445 |
+
.data(thinkData)
|
| 446 |
+
.enter()
|
| 447 |
+
.append("circle")
|
| 448 |
+
.attr("class", "dot")
|
| 449 |
+
.style("fill", colors.think)
|
| 450 |
+
.attr("cx", d => xScale(d["Data Subset"]))
|
| 451 |
+
.attr("cy", d => yScale(d.Score))
|
| 452 |
+
.attr("r", 4)
|
| 453 |
+
.on("mouseenter", function(event, d) {
|
| 454 |
+
const noThinkValue = noThinkData.find(item => item["Data Subset"] === d["Data Subset"]);
|
| 455 |
+
const html = `
|
| 456 |
+
<div class="tooltip-title">Dataset Size ${d["Data Subset"].toLocaleString()}</div>
|
| 457 |
+
<div class="tooltip-item">
|
| 458 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 459 |
+
<span>/think: ${d.Score.toFixed(2)}%</span>
|
| 460 |
+
</div>
|
| 461 |
+
${noThinkValue ? `
|
| 462 |
+
<div class="tooltip-item">
|
| 463 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 464 |
+
<span>/no_think: ${noThinkValue.Score.toFixed(2)}%</span>
|
| 465 |
+
</div>` : ''}
|
| 466 |
+
`;
|
| 467 |
+
showTooltip(html, event);
|
| 468 |
+
})
|
| 469 |
+
.on("mouseleave", hideTooltip);
|
| 470 |
+
|
| 471 |
+
// Dots for /no_think
|
| 472 |
+
g.selectAll(".dot-no-think")
|
| 473 |
+
.data(noThinkData)
|
| 474 |
+
.enter()
|
| 475 |
+
.append("circle")
|
| 476 |
+
.attr("class", "dot")
|
| 477 |
+
.style("fill", colors.noThink)
|
| 478 |
+
.attr("cx", d => xScale(d["Data Subset"]))
|
| 479 |
+
.attr("cy", d => yScale(d.Score))
|
| 480 |
+
.attr("r", 4)
|
| 481 |
+
.on("mouseenter", function(event, d) {
|
| 482 |
+
const thinkValue = thinkData.find(item => item["Data Subset"] === d["Data Subset"]);
|
| 483 |
+
const html = `
|
| 484 |
+
<div class="tooltip-title">Dataset Size ${d["Data Subset"].toLocaleString()}</div>
|
| 485 |
+
${thinkValue ? `
|
| 486 |
+
<div class="tooltip-item">
|
| 487 |
+
<div class="tooltip-color" style="background-color: ${colors.think};"></div>
|
| 488 |
+
<span>/think: ${thinkValue.Score.toFixed(2)}%</span>
|
| 489 |
+
</div>` : ''}
|
| 490 |
+
<div class="tooltip-item">
|
| 491 |
+
<div class="tooltip-color" style="background-color: ${colors.noThink};"></div>
|
| 492 |
+
<span>/no_think: ${d.Score.toFixed(2)}%</span>
|
| 493 |
+
</div>
|
| 494 |
+
`;
|
| 495 |
+
showTooltip(html, event);
|
| 496 |
+
})
|
| 497 |
+
.on("mouseleave", hideTooltip);
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
// Set default value to "Average" if it exists
|
| 501 |
+
const defaultEval = evaluations.includes("Average") ? "Average" : evaluations[0];
|
| 502 |
+
select.property("value", defaultEval);
|
| 503 |
+
|
| 504 |
+
// Initial chart
|
| 505 |
+
updateChart(defaultEval);
|
| 506 |
+
|
| 507 |
+
// Update on dropdown change
|
| 508 |
+
select.on("change", function() {
|
| 509 |
+
updateChart(this.value);
|
| 510 |
+
});
|
| 511 |
+
|
| 512 |
+
// Resize handling
|
| 513 |
+
const rerender = () => updateChart(select.property("value"));
|
| 514 |
+
if (window.ResizeObserver) {
|
| 515 |
+
const ro = new ResizeObserver(() => rerender());
|
| 516 |
+
ro.observe(container);
|
| 517 |
+
} else {
|
| 518 |
+
window.addEventListener('resize', rerender);
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
// Listen for ColorPalettes changes
|
| 522 |
+
if (window.ColorPalettes && typeof window.ColorPalettes.addListener === 'function') {
|
| 523 |
+
window.ColorPalettes.addListener(() => {
|
| 524 |
+
colors = getColors();
|
| 525 |
+
buildLegend();
|
| 526 |
+
updateChart(select.property("value"));
|
| 527 |
+
});
|
| 528 |
+
}
|
| 529 |
+
};
|
| 530 |
+
|
| 531 |
+
if (document.readyState === 'loading') {
|
| 532 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 533 |
+
} else {
|
| 534 |
+
ensureD3(bootstrap);
|
| 535 |
+
}
|
| 536 |
+
})();
|
| 537 |
+
</script>
|
app/src/content/embeds/d3-rl-aime25.html
ADDED
|
@@ -0,0 +1,536 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-grpo-aime25"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-grpo-aime25 {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
.d3-grpo-aime25 svg {
|
| 10 |
+
display: block;
|
| 11 |
+
width: 100%;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
.d3-grpo-aime25 .axis path {
|
| 15 |
+
stroke: none;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
.d3-grpo-aime25 .axis line {
|
| 19 |
+
stroke: var(--axis-color);
|
| 20 |
+
shape-rendering: crispEdges;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.d3-grpo-aime25 .axis text {
|
| 24 |
+
fill: var(--tick-color);
|
| 25 |
+
font-size: 11px;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.d3-grpo-aime25 .grid line {
|
| 29 |
+
stroke: var(--grid-color);
|
| 30 |
+
stroke-dasharray: 2,2;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.d3-grpo-aime25 .line {
|
| 34 |
+
fill: none;
|
| 35 |
+
stroke-width: 2.5;
|
| 36 |
+
stroke-linejoin: round;
|
| 37 |
+
stroke-linecap: round;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.d3-grpo-aime25 .axis-label {
|
| 41 |
+
fill: var(--text-color);
|
| 42 |
+
font-size: 12px;
|
| 43 |
+
font-weight: 600;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
.d3-grpo-aime25 .header {
|
| 47 |
+
display: flex;
|
| 48 |
+
align-items: center;
|
| 49 |
+
justify-content: space-between;
|
| 50 |
+
flex-wrap: wrap;
|
| 51 |
+
gap: 16px;
|
| 52 |
+
margin-top: 12px;
|
| 53 |
+
padding-top: 12px;
|
| 54 |
+
border-top: 1px solid var(--border-color);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
.d3-grpo-aime25 .legend {
|
| 58 |
+
display: flex;
|
| 59 |
+
flex-direction: column;
|
| 60 |
+
align-items: flex-start;
|
| 61 |
+
gap: 6px;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.d3-grpo-aime25 .legend-title {
|
| 65 |
+
font-size: 12px;
|
| 66 |
+
font-weight: 700;
|
| 67 |
+
color: var(--text-color);
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.d3-grpo-aime25 .legend .items {
|
| 71 |
+
display: flex;
|
| 72 |
+
flex-wrap: wrap;
|
| 73 |
+
gap: 8px 14px;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.d3-grpo-aime25 .legend .item {
|
| 77 |
+
display: inline-flex;
|
| 78 |
+
align-items: center;
|
| 79 |
+
gap: 6px;
|
| 80 |
+
white-space: nowrap;
|
| 81 |
+
font-size: 12px;
|
| 82 |
+
color: var(--text-color);
|
| 83 |
+
cursor: pointer;
|
| 84 |
+
user-select: none;
|
| 85 |
+
opacity: 1;
|
| 86 |
+
transition: opacity 0.2s ease;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.d3-grpo-aime25 .legend .item.dimmed {
|
| 90 |
+
opacity: 0.3;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.d3-grpo-aime25 .legend .swatch {
|
| 94 |
+
width: 14px;
|
| 95 |
+
height: 14px;
|
| 96 |
+
border-radius: 3px;
|
| 97 |
+
border: 1px solid var(--border-color);
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
.d3-grpo-aime25 .controls {
|
| 101 |
+
display: flex;
|
| 102 |
+
gap: 16px;
|
| 103 |
+
align-items: center;
|
| 104 |
+
justify-content: flex-end;
|
| 105 |
+
flex-wrap: wrap;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.d3-grpo-aime25 .controls .control-group {
|
| 109 |
+
display: flex;
|
| 110 |
+
flex-direction: column;
|
| 111 |
+
align-items: flex-start;
|
| 112 |
+
gap: 6px;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
.d3-grpo-aime25 .controls label {
|
| 116 |
+
font-size: 12px;
|
| 117 |
+
font-weight: 700;
|
| 118 |
+
color: var(--text-color);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.d3-grpo-aime25 .controls .toggle-group {
|
| 122 |
+
display: flex;
|
| 123 |
+
gap: 8px;
|
| 124 |
+
align-items: center;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.d3-grpo-aime25 .controls .toggle-btn {
|
| 128 |
+
padding: 6px 12px;
|
| 129 |
+
font-size: 12px;
|
| 130 |
+
border: 1px solid var(--border-color);
|
| 131 |
+
border-radius: 8px;
|
| 132 |
+
background: var(--surface-bg);
|
| 133 |
+
color: var(--text-color);
|
| 134 |
+
cursor: pointer;
|
| 135 |
+
transition: all 0.2s ease;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.d3-grpo-aime25 .controls .toggle-btn:hover {
|
| 139 |
+
background: var(--primary-color);
|
| 140 |
+
color: white;
|
| 141 |
+
border-color: var(--primary-color);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.d3-grpo-aime25 .controls .toggle-btn.active {
|
| 145 |
+
background: var(--primary-color);
|
| 146 |
+
color: white;
|
| 147 |
+
border-color: var(--primary-color);
|
| 148 |
+
}
|
| 149 |
+
</style>
|
| 150 |
+
<script>
|
| 151 |
+
(() => {
|
| 152 |
+
const ensureD3 = (cb) => {
|
| 153 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 154 |
+
let s = document.getElementById('d3-cdn-script');
|
| 155 |
+
if (!s) {
|
| 156 |
+
s = document.createElement('script');
|
| 157 |
+
s.id = 'd3-cdn-script';
|
| 158 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 159 |
+
document.head.appendChild(s);
|
| 160 |
+
}
|
| 161 |
+
const onReady = () => {
|
| 162 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 163 |
+
};
|
| 164 |
+
s.addEventListener('load', onReady, { once: true });
|
| 165 |
+
if (window.d3) onReady();
|
| 166 |
+
};
|
| 167 |
+
|
| 168 |
+
const bootstrap = () => {
|
| 169 |
+
const scriptEl = document.currentScript;
|
| 170 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 171 |
+
if (!(container && container.classList && container.classList.contains('d3-grpo-aime25'))) {
|
| 172 |
+
const candidates = Array.from(document.querySelectorAll('.d3-grpo-aime25'))
|
| 173 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 174 |
+
container = candidates[candidates.length - 1] || null;
|
| 175 |
+
}
|
| 176 |
+
if (!container) return;
|
| 177 |
+
if (container.dataset) {
|
| 178 |
+
if (container.dataset.mounted === 'true') return;
|
| 179 |
+
container.dataset.mounted = 'true';
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
// Data loading configuration
|
| 183 |
+
let mountEl = container;
|
| 184 |
+
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
|
| 185 |
+
mountEl = mountEl.parentElement;
|
| 186 |
+
}
|
| 187 |
+
let providedData = null;
|
| 188 |
+
try {
|
| 189 |
+
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
|
| 190 |
+
if (attr && attr.trim()) {
|
| 191 |
+
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 192 |
+
}
|
| 193 |
+
} catch (_) {}
|
| 194 |
+
|
| 195 |
+
const DEFAULT_CSV = '/data/grpo/aime25_perf.csv';
|
| 196 |
+
const ensureDataPrefix = (p) => {
|
| 197 |
+
if (typeof p !== 'string' || !p) return p;
|
| 198 |
+
if (p.startsWith('/')) return p;
|
| 199 |
+
return `/data/${p}`;
|
| 200 |
+
};
|
| 201 |
+
const normalizeInput = (inp) => Array.isArray(inp)
|
| 202 |
+
? inp.map(ensureDataPrefix)
|
| 203 |
+
: (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
|
| 204 |
+
|
| 205 |
+
const CSV_PATHS = Array.isArray(providedData)
|
| 206 |
+
? normalizeInput(providedData)
|
| 207 |
+
: (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
|
| 208 |
+
DEFAULT_CSV,
|
| 209 |
+
'./assets/data/grpo/aime25_perf.csv',
|
| 210 |
+
'../assets/data/grpo/aime25_perf.csv',
|
| 211 |
+
'../../assets/data/grpo/aime25_perf.csv'
|
| 212 |
+
]);
|
| 213 |
+
|
| 214 |
+
const fetchFirstAvailable = async (paths) => {
|
| 215 |
+
const errors = [];
|
| 216 |
+
for (const p of paths) {
|
| 217 |
+
try {
|
| 218 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 219 |
+
if (r.ok) return await r.text();
|
| 220 |
+
errors.push(`${p}: ${r.status}`);
|
| 221 |
+
} catch (e) {
|
| 222 |
+
errors.push(`${p}: ${e.message}`);
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
|
| 226 |
+
};
|
| 227 |
+
|
| 228 |
+
// Tooltip setup
|
| 229 |
+
container.style.position = container.style.position || 'relative';
|
| 230 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 231 |
+
let tipInner;
|
| 232 |
+
if (!tip) {
|
| 233 |
+
tip = document.createElement('div');
|
| 234 |
+
tip.className = 'd3-tooltip';
|
| 235 |
+
Object.assign(tip.style, {
|
| 236 |
+
position: 'absolute',
|
| 237 |
+
top: '0px',
|
| 238 |
+
left: '0px',
|
| 239 |
+
transform: 'translate(-9999px, -9999px)',
|
| 240 |
+
pointerEvents: 'none',
|
| 241 |
+
padding: '8px 10px',
|
| 242 |
+
borderRadius: '8px',
|
| 243 |
+
fontSize: '12px',
|
| 244 |
+
lineHeight: '1.35',
|
| 245 |
+
border: '1px solid var(--border-color)',
|
| 246 |
+
background: 'var(--surface-bg)',
|
| 247 |
+
color: 'var(--text-color)',
|
| 248 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 249 |
+
opacity: '0',
|
| 250 |
+
transition: 'opacity .12s ease',
|
| 251 |
+
zIndex: '1000'
|
| 252 |
+
});
|
| 253 |
+
tipInner = document.createElement('div');
|
| 254 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 255 |
+
tipInner.style.textAlign = 'left';
|
| 256 |
+
tip.appendChild(tipInner);
|
| 257 |
+
container.appendChild(tip);
|
| 258 |
+
} else {
|
| 259 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
// SVG setup
|
| 263 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 264 |
+
const gRoot = svg.append('g');
|
| 265 |
+
const gGrid = gRoot.append('g').attr('class', 'grid');
|
| 266 |
+
const gLines = gRoot.append('g').attr('class', 'lines');
|
| 267 |
+
const gAxes = gRoot.append('g').attr('class', 'axes');
|
| 268 |
+
|
| 269 |
+
// State
|
| 270 |
+
let width = 800, height = 400;
|
| 271 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 272 |
+
let series = [];
|
| 273 |
+
let hiddenSeries = new Set();
|
| 274 |
+
|
| 275 |
+
// Color setup
|
| 276 |
+
const getColors = (count) => {
|
| 277 |
+
if (window.ColorPalettes && window.ColorPalettes.getColors) {
|
| 278 |
+
return window.ColorPalettes.getColors('categorical', count);
|
| 279 |
+
}
|
| 280 |
+
return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
|
| 281 |
+
};
|
| 282 |
+
|
| 283 |
+
function parseData(csvText) {
|
| 284 |
+
const rows = d3.csvParse(csvText);
|
| 285 |
+
|
| 286 |
+
// Get column names (excluding 'step')
|
| 287 |
+
const headers = Object.keys(rows[0]).filter(h => h !== 'step');
|
| 288 |
+
|
| 289 |
+
// Build series data
|
| 290 |
+
series = headers.map(header => {
|
| 291 |
+
const points = rows
|
| 292 |
+
.map(row => ({
|
| 293 |
+
step: +row.step,
|
| 294 |
+
value: +row[header]
|
| 295 |
+
}))
|
| 296 |
+
.filter(p => !isNaN(p.step) && !isNaN(p.value));
|
| 297 |
+
|
| 298 |
+
return {
|
| 299 |
+
name: header,
|
| 300 |
+
points
|
| 301 |
+
};
|
| 302 |
+
});
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
function updateSize() {
|
| 306 |
+
width = container.clientWidth || 800;
|
| 307 |
+
height = Math.max(320, Math.round(width / 2.5));
|
| 308 |
+
svg.attr('width', width).attr('height', height);
|
| 309 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 310 |
+
return {
|
| 311 |
+
innerWidth: width - margin.left - margin.right,
|
| 312 |
+
innerHeight: height - margin.top - margin.bottom
|
| 313 |
+
};
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
function render() {
|
| 317 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 318 |
+
if (series.length === 0) return;
|
| 319 |
+
|
| 320 |
+
// Filter visible series
|
| 321 |
+
const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
|
| 322 |
+
if (visibleSeries.length === 0) return;
|
| 323 |
+
|
| 324 |
+
// Get all points
|
| 325 |
+
const allPoints = visibleSeries.flatMap(s => s.points);
|
| 326 |
+
|
| 327 |
+
// Scales
|
| 328 |
+
const xScale = d3.scaleLinear()
|
| 329 |
+
.domain([0, d3.max(allPoints, d => d.step) || 1])
|
| 330 |
+
.range([0, innerWidth])
|
| 331 |
+
.nice();
|
| 332 |
+
|
| 333 |
+
const minVal = d3.min(allPoints, d => d.value);
|
| 334 |
+
const maxVal = d3.max(allPoints, d => d.value);
|
| 335 |
+
const yScale = d3.scaleLinear()
|
| 336 |
+
.domain([minVal * 0.95, maxVal * 1.05])
|
| 337 |
+
.range([innerHeight, 0]);
|
| 338 |
+
|
| 339 |
+
// Grid
|
| 340 |
+
gGrid.selectAll('.grid-y').data([0])
|
| 341 |
+
.join('g')
|
| 342 |
+
.attr('class', 'grid grid-y')
|
| 343 |
+
.call(d3.axisLeft(yScale)
|
| 344 |
+
.tickSize(-innerWidth)
|
| 345 |
+
.tickFormat('')
|
| 346 |
+
)
|
| 347 |
+
.call(g => g.select('.domain').remove());
|
| 348 |
+
|
| 349 |
+
// Colors
|
| 350 |
+
const colors = getColors(series.length);
|
| 351 |
+
const colorScale = (name) => {
|
| 352 |
+
const idx = series.findIndex(s => s.name === name);
|
| 353 |
+
return colors[idx % colors.length];
|
| 354 |
+
};
|
| 355 |
+
|
| 356 |
+
// Line generator
|
| 357 |
+
const line = d3.line()
|
| 358 |
+
.x(d => xScale(d.step))
|
| 359 |
+
.y(d => yScale(d.value))
|
| 360 |
+
.curve(d3.curveMonotoneX);
|
| 361 |
+
|
| 362 |
+
// Render lines
|
| 363 |
+
gLines.selectAll('.line')
|
| 364 |
+
.data(visibleSeries, d => d.name)
|
| 365 |
+
.join('path')
|
| 366 |
+
.attr('class', 'line')
|
| 367 |
+
.attr('d', d => line(d.points))
|
| 368 |
+
.attr('stroke', d => colorScale(d.name));
|
| 369 |
+
|
| 370 |
+
// Axes
|
| 371 |
+
gAxes.selectAll('.x-axis').data([0])
|
| 372 |
+
.join('g')
|
| 373 |
+
.attr('class', 'x-axis axis')
|
| 374 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 375 |
+
.call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
|
| 376 |
+
|
| 377 |
+
gAxes.selectAll('.y-axis').data([0])
|
| 378 |
+
.join('g')
|
| 379 |
+
.attr('class', 'y-axis axis')
|
| 380 |
+
.call(d3.axisLeft(yScale).ticks(8));
|
| 381 |
+
|
| 382 |
+
// Axis labels
|
| 383 |
+
gAxes.selectAll('.x-label').data([0])
|
| 384 |
+
.join('text')
|
| 385 |
+
.attr('class', 'x-label axis-label')
|
| 386 |
+
.attr('text-anchor', 'middle')
|
| 387 |
+
.attr('x', innerWidth / 2)
|
| 388 |
+
.attr('y', innerHeight + 45)
|
| 389 |
+
.text('Training step');
|
| 390 |
+
|
| 391 |
+
gAxes.selectAll('.y-label').data([0])
|
| 392 |
+
.join('text')
|
| 393 |
+
.attr('class', 'y-label axis-label')
|
| 394 |
+
.attr('text-anchor', 'middle')
|
| 395 |
+
.attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
|
| 396 |
+
.text('AIME 2025 Score (%)');
|
| 397 |
+
|
| 398 |
+
// Tooltip interactions
|
| 399 |
+
const bisect = d3.bisector(d => d.step).left;
|
| 400 |
+
|
| 401 |
+
svg.on('mousemove', function(event) {
|
| 402 |
+
const [mx] = d3.pointer(event, gRoot.node());
|
| 403 |
+
const step = xScale.invert(mx);
|
| 404 |
+
|
| 405 |
+
let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong><br/>`;
|
| 406 |
+
|
| 407 |
+
visibleSeries.forEach(s => {
|
| 408 |
+
const idx = bisect(s.points, step);
|
| 409 |
+
if (idx > 0 && idx < s.points.length) {
|
| 410 |
+
const p = s.points[idx];
|
| 411 |
+
const color = colorScale(s.name);
|
| 412 |
+
tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${p.value.toFixed(2)}%</div>`;
|
| 413 |
+
}
|
| 414 |
+
});
|
| 415 |
+
|
| 416 |
+
tipInner.innerHTML = tooltipHtml;
|
| 417 |
+
const tipBounds = tip.getBoundingClientRect();
|
| 418 |
+
const [px, py] = d3.pointer(event, container);
|
| 419 |
+
|
| 420 |
+
let tipX = px + 12;
|
| 421 |
+
let tipY = py - 12;
|
| 422 |
+
|
| 423 |
+
if (tipX + tipBounds.width > width - 10) {
|
| 424 |
+
tipX = px - tipBounds.width - 12;
|
| 425 |
+
}
|
| 426 |
+
if (tipY - tipBounds.height < 10) {
|
| 427 |
+
tipY = py + 20;
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
|
| 431 |
+
tip.style.opacity = '1';
|
| 432 |
+
});
|
| 433 |
+
|
| 434 |
+
svg.on('mouseleave', () => {
|
| 435 |
+
tip.style.opacity = '0';
|
| 436 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 437 |
+
});
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
function makeLegend() {
|
| 441 |
+
let header = container.querySelector('.header');
|
| 442 |
+
if (!header) {
|
| 443 |
+
header = document.createElement('div');
|
| 444 |
+
header.className = 'header';
|
| 445 |
+
container.appendChild(header);
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
let legend = header.querySelector('.legend');
|
| 449 |
+
if (!legend) {
|
| 450 |
+
legend = document.createElement('div');
|
| 451 |
+
legend.className = 'legend';
|
| 452 |
+
header.appendChild(legend);
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
let title = legend.querySelector('.legend-title');
|
| 456 |
+
if (!title) {
|
| 457 |
+
title = document.createElement('div');
|
| 458 |
+
title.className = 'legend-title';
|
| 459 |
+
title.textContent = 'Overlong Penalty';
|
| 460 |
+
legend.appendChild(title);
|
| 461 |
+
} else {
|
| 462 |
+
title.textContent = 'Overlong Penalty';
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
let items = legend.querySelector('.items');
|
| 466 |
+
if (!items) {
|
| 467 |
+
items = document.createElement('div');
|
| 468 |
+
items.className = 'items';
|
| 469 |
+
legend.appendChild(items);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
const colors = getColors(series.length);
|
| 473 |
+
|
| 474 |
+
items.innerHTML = '';
|
| 475 |
+
series.forEach((s, i) => {
|
| 476 |
+
const item = document.createElement('span');
|
| 477 |
+
item.className = 'item';
|
| 478 |
+
if (hiddenSeries.has(s.name)) {
|
| 479 |
+
item.classList.add('dimmed');
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
const swatch = document.createElement('span');
|
| 483 |
+
swatch.className = 'swatch';
|
| 484 |
+
swatch.style.background = colors[i % colors.length];
|
| 485 |
+
|
| 486 |
+
const text = document.createElement('span');
|
| 487 |
+
text.textContent = s.name;
|
| 488 |
+
|
| 489 |
+
item.appendChild(swatch);
|
| 490 |
+
item.appendChild(text);
|
| 491 |
+
items.appendChild(item);
|
| 492 |
+
|
| 493 |
+
item.addEventListener('click', () => {
|
| 494 |
+
if (hiddenSeries.has(s.name)) {
|
| 495 |
+
hiddenSeries.delete(s.name);
|
| 496 |
+
} else {
|
| 497 |
+
hiddenSeries.add(s.name);
|
| 498 |
+
}
|
| 499 |
+
makeLegend();
|
| 500 |
+
render();
|
| 501 |
+
});
|
| 502 |
+
});
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
// Load data and initialize
|
| 506 |
+
fetchFirstAvailable(CSV_PATHS)
|
| 507 |
+
.then(csvText => {
|
| 508 |
+
parseData(csvText);
|
| 509 |
+
makeLegend();
|
| 510 |
+
render();
|
| 511 |
+
|
| 512 |
+
// Responsiveness
|
| 513 |
+
if (window.ResizeObserver) {
|
| 514 |
+
const ro = new ResizeObserver(() => render());
|
| 515 |
+
ro.observe(container);
|
| 516 |
+
} else {
|
| 517 |
+
window.addEventListener('resize', render);
|
| 518 |
+
}
|
| 519 |
+
})
|
| 520 |
+
.catch(err => {
|
| 521 |
+
const pre = document.createElement('pre');
|
| 522 |
+
pre.style.color = '#f44336';
|
| 523 |
+
pre.style.fontSize = '12px';
|
| 524 |
+
pre.style.padding = '12px';
|
| 525 |
+
pre.textContent = `Error loading data: ${err.message}`;
|
| 526 |
+
container.appendChild(pre);
|
| 527 |
+
});
|
| 528 |
+
};
|
| 529 |
+
|
| 530 |
+
if (document.readyState === 'loading') {
|
| 531 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 532 |
+
} else {
|
| 533 |
+
ensureD3(bootstrap);
|
| 534 |
+
}
|
| 535 |
+
})();
|
| 536 |
+
</script>
|
app/src/content/embeds/d3-rl-full-length.html
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-grpo-full-length"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-grpo-full-length {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
.d3-grpo-full-length svg {
|
| 10 |
+
display: block;
|
| 11 |
+
width: 100%;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
.d3-grpo-full-length .axis path {
|
| 15 |
+
stroke: none;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
.d3-grpo-full-length .axis line {
|
| 19 |
+
stroke: var(--axis-color);
|
| 20 |
+
shape-rendering: crispEdges;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.d3-grpo-full-length .axis text {
|
| 24 |
+
fill: var(--tick-color);
|
| 25 |
+
font-size: 11px;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.d3-grpo-full-length .grid line {
|
| 29 |
+
stroke: var(--grid-color);
|
| 30 |
+
stroke-dasharray: 2,2;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.d3-grpo-full-length .confidence-band {
|
| 34 |
+
opacity: 0.15;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.d3-grpo-full-length .line {
|
| 38 |
+
fill: none;
|
| 39 |
+
stroke-width: 2;
|
| 40 |
+
stroke-linejoin: round;
|
| 41 |
+
stroke-linecap: round;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.d3-grpo-full-length .axis-label {
|
| 45 |
+
fill: var(--text-color);
|
| 46 |
+
font-size: 12px;
|
| 47 |
+
font-weight: 600;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.d3-grpo-full-length .header {
|
| 51 |
+
display: flex;
|
| 52 |
+
align-items: center;
|
| 53 |
+
justify-content: space-between;
|
| 54 |
+
flex-wrap: wrap;
|
| 55 |
+
gap: 16px;
|
| 56 |
+
margin-top: 12px;
|
| 57 |
+
padding-top: 12px;
|
| 58 |
+
border-top: 1px solid var(--border-color);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.d3-grpo-full-length .legend {
|
| 62 |
+
display: flex;
|
| 63 |
+
flex-direction: column;
|
| 64 |
+
align-items: flex-start;
|
| 65 |
+
gap: 6px;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
.d3-grpo-full-length .legend-title {
|
| 69 |
+
font-size: 12px;
|
| 70 |
+
font-weight: 700;
|
| 71 |
+
color: var(--text-color);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.d3-grpo-full-length .legend .items {
|
| 75 |
+
display: flex;
|
| 76 |
+
flex-wrap: wrap;
|
| 77 |
+
gap: 8px 14px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.d3-grpo-full-length .legend .item {
|
| 81 |
+
display: inline-flex;
|
| 82 |
+
align-items: center;
|
| 83 |
+
gap: 6px;
|
| 84 |
+
white-space: nowrap;
|
| 85 |
+
font-size: 12px;
|
| 86 |
+
color: var(--text-color);
|
| 87 |
+
cursor: pointer;
|
| 88 |
+
user-select: none;
|
| 89 |
+
opacity: 1;
|
| 90 |
+
transition: opacity 0.2s ease;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.d3-grpo-full-length .legend .item.dimmed {
|
| 94 |
+
opacity: 0.3;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.d3-grpo-full-length .legend .swatch {
|
| 98 |
+
width: 14px;
|
| 99 |
+
height: 14px;
|
| 100 |
+
border-radius: 3px;
|
| 101 |
+
border: 1px solid var(--border-color);
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
.d3-grpo-full-length .controls {
|
| 105 |
+
display: flex;
|
| 106 |
+
gap: 16px;
|
| 107 |
+
align-items: center;
|
| 108 |
+
justify-content: flex-end;
|
| 109 |
+
flex-wrap: wrap;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.d3-grpo-full-length .controls .control-group {
|
| 113 |
+
display: flex;
|
| 114 |
+
flex-direction: column;
|
| 115 |
+
align-items: flex-start;
|
| 116 |
+
gap: 6px;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.d3-grpo-full-length .controls label {
|
| 120 |
+
font-size: 12px;
|
| 121 |
+
font-weight: 700;
|
| 122 |
+
color: var(--text-color);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.d3-grpo-full-length .controls .toggle-group {
|
| 126 |
+
display: flex;
|
| 127 |
+
gap: 8px;
|
| 128 |
+
align-items: center;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
.d3-grpo-full-length .controls .toggle-btn {
|
| 132 |
+
padding: 6px 12px;
|
| 133 |
+
font-size: 12px;
|
| 134 |
+
border: 1px solid var(--border-color);
|
| 135 |
+
border-radius: 8px;
|
| 136 |
+
background: var(--surface-bg);
|
| 137 |
+
color: var(--text-color);
|
| 138 |
+
cursor: pointer;
|
| 139 |
+
transition: all 0.2s ease;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.d3-grpo-full-length .controls .toggle-btn:hover {
|
| 143 |
+
background: var(--primary-color);
|
| 144 |
+
color: white;
|
| 145 |
+
border-color: var(--primary-color);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.d3-grpo-full-length .controls .toggle-btn.active {
|
| 149 |
+
background: var(--primary-color);
|
| 150 |
+
color: white;
|
| 151 |
+
border-color: var(--primary-color);
|
| 152 |
+
}
|
| 153 |
+
</style>
|
| 154 |
+
<script>
|
| 155 |
+
(() => {
|
| 156 |
+
const ensureD3 = (cb) => {
|
| 157 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 158 |
+
let s = document.getElementById('d3-cdn-script');
|
| 159 |
+
if (!s) {
|
| 160 |
+
s = document.createElement('script');
|
| 161 |
+
s.id = 'd3-cdn-script';
|
| 162 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 163 |
+
document.head.appendChild(s);
|
| 164 |
+
}
|
| 165 |
+
const onReady = () => {
|
| 166 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 167 |
+
};
|
| 168 |
+
s.addEventListener('load', onReady, { once: true });
|
| 169 |
+
if (window.d3) onReady();
|
| 170 |
+
};
|
| 171 |
+
|
| 172 |
+
const bootstrap = () => {
|
| 173 |
+
const scriptEl = document.currentScript;
|
| 174 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 175 |
+
if (!(container && container.classList && container.classList.contains('d3-grpo-full-length'))) {
|
| 176 |
+
const candidates = Array.from(document.querySelectorAll('.d3-grpo-full-length'))
|
| 177 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 178 |
+
container = candidates[candidates.length - 1] || null;
|
| 179 |
+
}
|
| 180 |
+
if (!container) return;
|
| 181 |
+
if (container.dataset) {
|
| 182 |
+
if (container.dataset.mounted === 'true') return;
|
| 183 |
+
container.dataset.mounted = 'true';
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
// Data loading configuration
|
| 187 |
+
let mountEl = container;
|
| 188 |
+
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
|
| 189 |
+
mountEl = mountEl.parentElement;
|
| 190 |
+
}
|
| 191 |
+
let providedData = null;
|
| 192 |
+
try {
|
| 193 |
+
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
|
| 194 |
+
if (attr && attr.trim()) {
|
| 195 |
+
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 196 |
+
}
|
| 197 |
+
} catch (_) {}
|
| 198 |
+
|
| 199 |
+
const DEFAULT_CSV = '/data/grpo/rl_reward_curves.csv';
|
| 200 |
+
const ensureDataPrefix = (p) => {
|
| 201 |
+
if (typeof p !== 'string' || !p) return p;
|
| 202 |
+
// If it starts with /, it's already absolute
|
| 203 |
+
if (p.startsWith('/')) return p;
|
| 204 |
+
// Otherwise, prefix with /data/
|
| 205 |
+
return `/data/${p}`;
|
| 206 |
+
};
|
| 207 |
+
const normalizeInput = (inp) => Array.isArray(inp)
|
| 208 |
+
? inp.map(ensureDataPrefix)
|
| 209 |
+
: (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
|
| 210 |
+
|
| 211 |
+
const CSV_PATHS = Array.isArray(providedData)
|
| 212 |
+
? normalizeInput(providedData)
|
| 213 |
+
: (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
|
| 214 |
+
DEFAULT_CSV,
|
| 215 |
+
'./assets/data/grpo/rl_reward_curves.csv',
|
| 216 |
+
'../assets/data/grpo/rl_reward_curves.csv',
|
| 217 |
+
'../../assets/data/grpo/rl_reward_curves.csv'
|
| 218 |
+
]);
|
| 219 |
+
|
| 220 |
+
const fetchFirstAvailable = async (paths) => {
|
| 221 |
+
const errors = [];
|
| 222 |
+
for (const p of paths) {
|
| 223 |
+
try {
|
| 224 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 225 |
+
if (r.ok) return await r.text();
|
| 226 |
+
errors.push(`${p}: ${r.status}`);
|
| 227 |
+
} catch (e) {
|
| 228 |
+
errors.push(`${p}: ${e.message}`);
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
|
| 232 |
+
};
|
| 233 |
+
|
| 234 |
+
// Tooltip setup
|
| 235 |
+
container.style.position = container.style.position || 'relative';
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div');
|
| 240 |
+
tip.className = 'd3-tooltip';
|
| 241 |
+
Object.assign(tip.style, {
|
| 242 |
+
position: 'absolute',
|
| 243 |
+
top: '0px',
|
| 244 |
+
left: '0px',
|
| 245 |
+
transform: 'translate(-9999px, -9999px)',
|
| 246 |
+
pointerEvents: 'none',
|
| 247 |
+
padding: '8px 10px',
|
| 248 |
+
borderRadius: '8px',
|
| 249 |
+
fontSize: '12px',
|
| 250 |
+
lineHeight: '1.35',
|
| 251 |
+
border: '1px solid var(--border-color)',
|
| 252 |
+
background: 'var(--surface-bg)',
|
| 253 |
+
color: 'var(--text-color)',
|
| 254 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 255 |
+
opacity: '0',
|
| 256 |
+
transition: 'opacity .12s ease',
|
| 257 |
+
zIndex: '1000'
|
| 258 |
+
});
|
| 259 |
+
tipInner = document.createElement('div');
|
| 260 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 261 |
+
tipInner.style.textAlign = 'left';
|
| 262 |
+
tip.appendChild(tipInner);
|
| 263 |
+
container.appendChild(tip);
|
| 264 |
+
} else {
|
| 265 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
// SVG setup
|
| 269 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 270 |
+
const gRoot = svg.append('g');
|
| 271 |
+
const gGrid = gRoot.append('g').attr('class', 'grid');
|
| 272 |
+
const gBands = gRoot.append('g').attr('class', 'bands');
|
| 273 |
+
const gLines = gRoot.append('g').attr('class', 'lines');
|
| 274 |
+
const gAxes = gRoot.append('g').attr('class', 'axes');
|
| 275 |
+
|
| 276 |
+
// State
|
| 277 |
+
let width = 800, height = 400;
|
| 278 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 279 |
+
let rawData = {}; // Store both datasets
|
| 280 |
+
let series = [];
|
| 281 |
+
let hiddenSeries = new Set();
|
| 282 |
+
let showRunningAverage = true;
|
| 283 |
+
let currentMetric = 'reward'; // 'reward' or 'length'
|
| 284 |
+
const RUNNING_AVG_WINDOW = 50; // steps
|
| 285 |
+
|
| 286 |
+
// Color setup
|
| 287 |
+
const getColors = (count) => {
|
| 288 |
+
if (window.ColorPalettes && window.ColorPalettes.getColors) {
|
| 289 |
+
return window.ColorPalettes.getColors('categorical', count);
|
| 290 |
+
}
|
| 291 |
+
// Fallback colors
|
| 292 |
+
return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
|
| 293 |
+
};
|
| 294 |
+
|
| 295 |
+
// Calculate running average based on step window
|
| 296 |
+
function calculateRunningAverage(points, windowSize) {
|
| 297 |
+
if (points.length === 0) return [];
|
| 298 |
+
|
| 299 |
+
const avgPoints = [];
|
| 300 |
+
for (let i = 0; i < points.length; i++) {
|
| 301 |
+
const currentStep = points[i].step;
|
| 302 |
+
const minStep = currentStep - windowSize;
|
| 303 |
+
|
| 304 |
+
// Find all points within the window
|
| 305 |
+
const windowPoints = points.filter(p => p.step >= minStep && p.step <= currentStep);
|
| 306 |
+
|
| 307 |
+
if (windowPoints.length > 0) {
|
| 308 |
+
const avgMean = d3.mean(windowPoints, p => p.mean);
|
| 309 |
+
const avgMin = d3.mean(windowPoints, p => p.min);
|
| 310 |
+
const avgMax = d3.mean(windowPoints, p => p.max);
|
| 311 |
+
|
| 312 |
+
avgPoints.push({
|
| 313 |
+
step: currentStep,
|
| 314 |
+
mean: avgMean,
|
| 315 |
+
min: avgMin,
|
| 316 |
+
max: avgMax
|
| 317 |
+
});
|
| 318 |
+
}
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
return avgPoints;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
function parseData(csvText, metricType) {
|
| 325 |
+
const rows = d3.csvParse(csvText);
|
| 326 |
+
|
| 327 |
+
// Determine metric column suffix based on type
|
| 328 |
+
const metricSuffix = metricType === 'reward'
|
| 329 |
+
? 'train/reward'
|
| 330 |
+
: 'train/completions/mean_terminated_length';
|
| 331 |
+
|
| 332 |
+
// Extract run names (each run has _step, mean, MIN, MAX columns)
|
| 333 |
+
const runNames = [];
|
| 334 |
+
const headers = Object.keys(rows[0]);
|
| 335 |
+
|
| 336 |
+
headers.forEach(h => {
|
| 337 |
+
if (h.includes(` - ${metricSuffix}`) && !h.includes('MIN') && !h.includes('MAX')) {
|
| 338 |
+
const runName = h.split(' - ')[0];
|
| 339 |
+
runNames.push(runName);
|
| 340 |
+
}
|
| 341 |
+
});
|
| 342 |
+
|
| 343 |
+
// For v18.00, just use a simple label
|
| 344 |
+
const displayNameMap = {
|
| 345 |
+
'grpo-SmollM3-3B-GRPO-no-think-v18.00': 'No Penalty'
|
| 346 |
+
};
|
| 347 |
+
|
| 348 |
+
// Build series data using train/global_step for x-axis
|
| 349 |
+
series = runNames.map(runName => {
|
| 350 |
+
const meanCol = `${runName} - ${metricSuffix}`;
|
| 351 |
+
const minCol = `${meanCol}__MIN`;
|
| 352 |
+
const maxCol = `${meanCol}__MAX`;
|
| 353 |
+
|
| 354 |
+
const points = rows
|
| 355 |
+
.filter(row => row['train/global_step'] && row[meanCol])
|
| 356 |
+
.map(row => ({
|
| 357 |
+
step: +row['train/global_step'],
|
| 358 |
+
mean: +row[meanCol],
|
| 359 |
+
min: +row[minCol],
|
| 360 |
+
max: +row[maxCol]
|
| 361 |
+
}))
|
| 362 |
+
.filter(p => !isNaN(p.step) && !isNaN(p.mean));
|
| 363 |
+
|
| 364 |
+
// Calculate running average
|
| 365 |
+
const runningAvgPoints = calculateRunningAverage(points, RUNNING_AVG_WINDOW);
|
| 366 |
+
|
| 367 |
+
// Map to display name
|
| 368 |
+
const displayName = displayNameMap[runName] || runName;
|
| 369 |
+
|
| 370 |
+
return {
|
| 371 |
+
name: displayName,
|
| 372 |
+
fullName: runName,
|
| 373 |
+
points,
|
| 374 |
+
runningAvgPoints
|
| 375 |
+
};
|
| 376 |
+
});
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
function updateSize() {
|
| 380 |
+
width = container.clientWidth || 800;
|
| 381 |
+
height = Math.max(320, Math.round(width / 2.5));
|
| 382 |
+
svg.attr('width', width).attr('height', height);
|
| 383 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 384 |
+
return {
|
| 385 |
+
innerWidth: width - margin.left - margin.right,
|
| 386 |
+
innerHeight: height - margin.top - margin.bottom
|
| 387 |
+
};
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
function render() {
|
| 391 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 392 |
+
if (series.length === 0) return;
|
| 393 |
+
|
| 394 |
+
// Filter visible series
|
| 395 |
+
const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
|
| 396 |
+
if (visibleSeries.length === 0) return;
|
| 397 |
+
|
| 398 |
+
// Select which points to use based on running average toggle
|
| 399 |
+
const getPoints = (s) => showRunningAverage ? s.runningAvgPoints : s.points;
|
| 400 |
+
|
| 401 |
+
// No filtering for full-length data
|
| 402 |
+
const getFilteredPoints = (s) => getPoints(s);
|
| 403 |
+
|
| 404 |
+
// Get all points for domain calculation
|
| 405 |
+
const allPoints = visibleSeries.flatMap(s => getFilteredPoints(s));
|
| 406 |
+
|
| 407 |
+
// Scales - calculate from data
|
| 408 |
+
const xScale = d3.scaleLinear()
|
| 409 |
+
.domain([0, d3.max(allPoints, d => d.step) || 1])
|
| 410 |
+
.range([0, innerWidth])
|
| 411 |
+
.nice();
|
| 412 |
+
|
| 413 |
+
// Set y-axis domain based on current metric - calculate from data
|
| 414 |
+
const minVal = d3.min(allPoints, d => d.mean);
|
| 415 |
+
const maxVal = d3.max(allPoints, d => d.mean);
|
| 416 |
+
const yDomain = [minVal * 0.95, maxVal * 1.05];
|
| 417 |
+
|
| 418 |
+
const yScale = d3.scaleLinear()
|
| 419 |
+
.domain(yDomain)
|
| 420 |
+
.range([innerHeight, 0]);
|
| 421 |
+
|
| 422 |
+
// Grid
|
| 423 |
+
gGrid.selectAll('.grid-y').data([0])
|
| 424 |
+
.join('g')
|
| 425 |
+
.attr('class', 'grid grid-y')
|
| 426 |
+
.call(d3.axisLeft(yScale)
|
| 427 |
+
.tickSize(-innerWidth)
|
| 428 |
+
.tickFormat('')
|
| 429 |
+
)
|
| 430 |
+
.call(g => g.select('.domain').remove());
|
| 431 |
+
|
| 432 |
+
// Colors
|
| 433 |
+
const colors = getColors(series.length);
|
| 434 |
+
const colorScale = (name) => {
|
| 435 |
+
const idx = series.findIndex(s => s.name === name);
|
| 436 |
+
return colors[idx % colors.length];
|
| 437 |
+
};
|
| 438 |
+
|
| 439 |
+
// Line generator
|
| 440 |
+
const line = d3.line()
|
| 441 |
+
.x(d => xScale(d.step))
|
| 442 |
+
.y(d => yScale(d.mean))
|
| 443 |
+
.curve(d3.curveMonotoneX);
|
| 444 |
+
|
| 445 |
+
// Render lines
|
| 446 |
+
gLines.selectAll('.line')
|
| 447 |
+
.data(visibleSeries, d => d.name)
|
| 448 |
+
.join('path')
|
| 449 |
+
.attr('class', 'line')
|
| 450 |
+
.attr('d', d => line(getFilteredPoints(d)))
|
| 451 |
+
.attr('stroke', d => colorScale(d.name));
|
| 452 |
+
|
| 453 |
+
// Axes
|
| 454 |
+
const xAxis = gAxes.selectAll('.x-axis').data([0])
|
| 455 |
+
.join('g')
|
| 456 |
+
.attr('class', 'x-axis axis')
|
| 457 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 458 |
+
.call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
|
| 459 |
+
|
| 460 |
+
const yAxis = gAxes.selectAll('.y-axis').data([0])
|
| 461 |
+
.join('g')
|
| 462 |
+
.attr('class', 'y-axis axis')
|
| 463 |
+
.call(d3.axisLeft(yScale).ticks(8));
|
| 464 |
+
|
| 465 |
+
// Axis labels
|
| 466 |
+
gAxes.selectAll('.x-label').data([0])
|
| 467 |
+
.join('text')
|
| 468 |
+
.attr('class', 'x-label axis-label')
|
| 469 |
+
.attr('text-anchor', 'middle')
|
| 470 |
+
.attr('x', innerWidth / 2)
|
| 471 |
+
.attr('y', innerHeight + 45)
|
| 472 |
+
.text('Training step');
|
| 473 |
+
|
| 474 |
+
gAxes.selectAll('.y-label').data([0])
|
| 475 |
+
.join('text')
|
| 476 |
+
.attr('class', 'y-label axis-label')
|
| 477 |
+
.attr('text-anchor', 'middle')
|
| 478 |
+
.attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
|
| 479 |
+
.text(currentMetric === 'reward' ? 'Reward' : 'Mean Terminated Length');
|
| 480 |
+
|
| 481 |
+
// Tooltip interactions
|
| 482 |
+
const bisect = d3.bisector(d => d.step).left;
|
| 483 |
+
|
| 484 |
+
svg.on('mousemove', function(event) {
|
| 485 |
+
const [mx] = d3.pointer(event, gRoot.node());
|
| 486 |
+
const step = xScale.invert(mx);
|
| 487 |
+
|
| 488 |
+
let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong>`;
|
| 489 |
+
if (showRunningAverage) {
|
| 490 |
+
tooltipHtml += ` <span style="font-weight:normal;font-size:11px">(${RUNNING_AVG_WINDOW}-step avg)</span>`;
|
| 491 |
+
}
|
| 492 |
+
tooltipHtml += `<br/>`;
|
| 493 |
+
|
| 494 |
+
visibleSeries.forEach(s => {
|
| 495 |
+
const points = getFilteredPoints(s);
|
| 496 |
+
const idx = bisect(points, step);
|
| 497 |
+
if (idx > 0 && idx < points.length) {
|
| 498 |
+
const p = points[idx];
|
| 499 |
+
const color = colorScale(s.name);
|
| 500 |
+
const valueStr = currentMetric === 'reward'
|
| 501 |
+
? `${(p.mean * 100).toFixed(1)}%`
|
| 502 |
+
: `${p.mean.toFixed(1)} tokens`;
|
| 503 |
+
tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${valueStr}</div>`;
|
| 504 |
+
}
|
| 505 |
+
});
|
| 506 |
+
|
| 507 |
+
tipInner.innerHTML = tooltipHtml;
|
| 508 |
+
const tipBounds = tip.getBoundingClientRect();
|
| 509 |
+
const [px, py] = d3.pointer(event, container);
|
| 510 |
+
|
| 511 |
+
let tipX = px + 12;
|
| 512 |
+
let tipY = py - 12;
|
| 513 |
+
|
| 514 |
+
if (tipX + tipBounds.width > width - 10) {
|
| 515 |
+
tipX = px - tipBounds.width - 12;
|
| 516 |
+
}
|
| 517 |
+
if (tipY - tipBounds.height < 10) {
|
| 518 |
+
tipY = py + 20;
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
|
| 522 |
+
tip.style.opacity = '1';
|
| 523 |
+
});
|
| 524 |
+
|
| 525 |
+
svg.on('mouseleave', () => {
|
| 526 |
+
tip.style.opacity = '0';
|
| 527 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 528 |
+
});
|
| 529 |
+
}
|
| 530 |
+
|
| 531 |
+
function makeLegend() {
|
| 532 |
+
let header = container.querySelector('.header');
|
| 533 |
+
if (!header) {
|
| 534 |
+
header = document.createElement('div');
|
| 535 |
+
header.className = 'header';
|
| 536 |
+
container.appendChild(header);
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
let legend = header.querySelector('.legend');
|
| 540 |
+
if (!legend) {
|
| 541 |
+
legend = document.createElement('div');
|
| 542 |
+
legend.className = 'legend';
|
| 543 |
+
header.appendChild(legend);
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
let title = legend.querySelector('.legend-title');
|
| 547 |
+
if (!title) {
|
| 548 |
+
title = document.createElement('div');
|
| 549 |
+
title.className = 'legend-title';
|
| 550 |
+
title.textContent = 'Configuration';
|
| 551 |
+
legend.appendChild(title);
|
| 552 |
+
} else {
|
| 553 |
+
title.textContent = 'Configuration';
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
let items = legend.querySelector('.items');
|
| 557 |
+
if (!items) {
|
| 558 |
+
items = document.createElement('div');
|
| 559 |
+
items.className = 'items';
|
| 560 |
+
legend.appendChild(items);
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
const colors = getColors(series.length);
|
| 564 |
+
|
| 565 |
+
items.innerHTML = '';
|
| 566 |
+
series.forEach((s, i) => {
|
| 567 |
+
const item = document.createElement('span');
|
| 568 |
+
item.className = 'item';
|
| 569 |
+
if (hiddenSeries.has(s.name)) {
|
| 570 |
+
item.classList.add('dimmed');
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
const swatch = document.createElement('span');
|
| 574 |
+
swatch.className = 'swatch';
|
| 575 |
+
swatch.style.background = colors[i % colors.length];
|
| 576 |
+
|
| 577 |
+
const text = document.createElement('span');
|
| 578 |
+
text.textContent = s.name;
|
| 579 |
+
|
| 580 |
+
item.appendChild(swatch);
|
| 581 |
+
item.appendChild(text);
|
| 582 |
+
items.appendChild(item);
|
| 583 |
+
|
| 584 |
+
item.addEventListener('click', () => {
|
| 585 |
+
if (hiddenSeries.has(s.name)) {
|
| 586 |
+
hiddenSeries.delete(s.name);
|
| 587 |
+
} else {
|
| 588 |
+
hiddenSeries.add(s.name);
|
| 589 |
+
}
|
| 590 |
+
makeLegend();
|
| 591 |
+
render();
|
| 592 |
+
});
|
| 593 |
+
});
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
function makeControls() {
|
| 597 |
+
let header = container.querySelector('.header');
|
| 598 |
+
if (!header) {
|
| 599 |
+
header = document.createElement('div');
|
| 600 |
+
header.className = 'header';
|
| 601 |
+
container.appendChild(header);
|
| 602 |
+
}
|
| 603 |
+
|
| 604 |
+
let controls = header.querySelector('.controls');
|
| 605 |
+
if (!controls) {
|
| 606 |
+
controls = document.createElement('div');
|
| 607 |
+
controls.className = 'controls';
|
| 608 |
+
header.appendChild(controls);
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
controls.innerHTML = '';
|
| 612 |
+
|
| 613 |
+
// Metric selection group
|
| 614 |
+
const metricGroup = document.createElement('div');
|
| 615 |
+
metricGroup.className = 'control-group';
|
| 616 |
+
|
| 617 |
+
const metricLabel = document.createElement('label');
|
| 618 |
+
metricLabel.textContent = 'Metric';
|
| 619 |
+
metricGroup.appendChild(metricLabel);
|
| 620 |
+
|
| 621 |
+
const metricToggleGroup = document.createElement('div');
|
| 622 |
+
metricToggleGroup.className = 'toggle-group';
|
| 623 |
+
|
| 624 |
+
const rewardBtn = document.createElement('button');
|
| 625 |
+
rewardBtn.className = 'toggle-btn' + (currentMetric === 'reward' ? ' active' : '');
|
| 626 |
+
rewardBtn.textContent = 'Reward';
|
| 627 |
+
rewardBtn.addEventListener('click', () => {
|
| 628 |
+
if (currentMetric !== 'reward') {
|
| 629 |
+
currentMetric = 'reward';
|
| 630 |
+
parseData(rawData.reward, 'reward');
|
| 631 |
+
makeControls();
|
| 632 |
+
makeLegend();
|
| 633 |
+
render();
|
| 634 |
+
}
|
| 635 |
+
});
|
| 636 |
+
|
| 637 |
+
const lengthBtn = document.createElement('button');
|
| 638 |
+
lengthBtn.className = 'toggle-btn' + (currentMetric === 'length' ? ' active' : '');
|
| 639 |
+
lengthBtn.textContent = 'Length';
|
| 640 |
+
lengthBtn.addEventListener('click', () => {
|
| 641 |
+
if (currentMetric !== 'length') {
|
| 642 |
+
currentMetric = 'length';
|
| 643 |
+
parseData(rawData.length, 'length');
|
| 644 |
+
makeControls();
|
| 645 |
+
makeLegend();
|
| 646 |
+
render();
|
| 647 |
+
}
|
| 648 |
+
});
|
| 649 |
+
|
| 650 |
+
metricToggleGroup.appendChild(rewardBtn);
|
| 651 |
+
metricToggleGroup.appendChild(lengthBtn);
|
| 652 |
+
metricGroup.appendChild(metricToggleGroup);
|
| 653 |
+
controls.appendChild(metricGroup);
|
| 654 |
+
|
| 655 |
+
// Display options group
|
| 656 |
+
const displayGroup = document.createElement('div');
|
| 657 |
+
displayGroup.className = 'control-group';
|
| 658 |
+
|
| 659 |
+
const displayLabel = document.createElement('label');
|
| 660 |
+
displayLabel.textContent = 'Display';
|
| 661 |
+
displayGroup.appendChild(displayLabel);
|
| 662 |
+
|
| 663 |
+
const displayToggleGroup = document.createElement('div');
|
| 664 |
+
displayToggleGroup.className = 'toggle-group';
|
| 665 |
+
|
| 666 |
+
const runningAvgBtn = document.createElement('button');
|
| 667 |
+
runningAvgBtn.className = 'toggle-btn' + (showRunningAverage ? ' active' : '');
|
| 668 |
+
runningAvgBtn.textContent = `Running Avg (${RUNNING_AVG_WINDOW} steps)`;
|
| 669 |
+
runningAvgBtn.addEventListener('click', () => {
|
| 670 |
+
showRunningAverage = !showRunningAverage;
|
| 671 |
+
makeControls();
|
| 672 |
+
render();
|
| 673 |
+
});
|
| 674 |
+
|
| 675 |
+
displayToggleGroup.appendChild(runningAvgBtn);
|
| 676 |
+
displayGroup.appendChild(displayToggleGroup);
|
| 677 |
+
controls.appendChild(displayGroup);
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
// Load both datasets
|
| 681 |
+
const REWARD_PATHS = [
|
| 682 |
+
'/data/grpo/rl_reward_full_length.csv',
|
| 683 |
+
'./assets/data/grpo/rl_reward_full_length.csv',
|
| 684 |
+
'../assets/data/grpo/rl_reward_full_length.csv',
|
| 685 |
+
'../../assets/data/grpo/rl_reward_full_length.csv'
|
| 686 |
+
];
|
| 687 |
+
|
| 688 |
+
const LENGTH_PATHS = [
|
| 689 |
+
'/data/grpo/rl_mean_terminated_length_full_length.csv',
|
| 690 |
+
'./assets/data/grpo/rl_mean_terminated_length_full_length.csv',
|
| 691 |
+
'../assets/data/grpo/rl_mean_terminated_length_full_length.csv',
|
| 692 |
+
'../../assets/data/grpo/rl_mean_terminated_length_full_length.csv'
|
| 693 |
+
];
|
| 694 |
+
|
| 695 |
+
Promise.all([
|
| 696 |
+
fetchFirstAvailable(REWARD_PATHS),
|
| 697 |
+
fetchFirstAvailable(LENGTH_PATHS)
|
| 698 |
+
])
|
| 699 |
+
.then(([rewardCsvText, lengthCsvText]) => {
|
| 700 |
+
// Store both datasets
|
| 701 |
+
rawData.reward = rewardCsvText;
|
| 702 |
+
rawData.length = lengthCsvText;
|
| 703 |
+
|
| 704 |
+
// Initialize with reward data
|
| 705 |
+
parseData(rewardCsvText, 'reward');
|
| 706 |
+
makeLegend();
|
| 707 |
+
makeControls();
|
| 708 |
+
render();
|
| 709 |
+
|
| 710 |
+
// Responsiveness
|
| 711 |
+
if (window.ResizeObserver) {
|
| 712 |
+
const ro = new ResizeObserver(() => render());
|
| 713 |
+
ro.observe(container);
|
| 714 |
+
} else {
|
| 715 |
+
window.addEventListener('resize', render);
|
| 716 |
+
}
|
| 717 |
+
})
|
| 718 |
+
.catch(err => {
|
| 719 |
+
const pre = document.createElement('pre');
|
| 720 |
+
pre.style.color = '#f44336';
|
| 721 |
+
pre.style.fontSize = '12px';
|
| 722 |
+
pre.style.padding = '12px';
|
| 723 |
+
pre.textContent = `Error loading data: ${err.message}`;
|
| 724 |
+
container.appendChild(pre);
|
| 725 |
+
});
|
| 726 |
+
};
|
| 727 |
+
|
| 728 |
+
if (document.readyState === 'loading') {
|
| 729 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 730 |
+
} else {
|
| 731 |
+
ensureD3(bootstrap);
|
| 732 |
+
}
|
| 733 |
+
})();
|
| 734 |
+
</script>
|
app/src/content/embeds/d3-rl-reward-curves.html
ADDED
|
@@ -0,0 +1,770 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-grpo-reward-curves"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-grpo-reward-curves {
|
| 4 |
+
width: 100%;
|
| 5 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
| 6 |
+
position: relative;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
.d3-grpo-reward-curves svg {
|
| 10 |
+
display: block;
|
| 11 |
+
width: 100%;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
.d3-grpo-reward-curves .axis path {
|
| 15 |
+
stroke: none;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
.d3-grpo-reward-curves .axis line {
|
| 19 |
+
stroke: var(--axis-color);
|
| 20 |
+
shape-rendering: crispEdges;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
.d3-grpo-reward-curves .axis text {
|
| 24 |
+
fill: var(--tick-color);
|
| 25 |
+
font-size: 11px;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
.d3-grpo-reward-curves .grid line {
|
| 29 |
+
stroke: var(--grid-color);
|
| 30 |
+
stroke-dasharray: 2,2;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.d3-grpo-reward-curves .confidence-band {
|
| 34 |
+
opacity: 0.15;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
.d3-grpo-reward-curves .line {
|
| 38 |
+
fill: none;
|
| 39 |
+
stroke-width: 2;
|
| 40 |
+
stroke-linejoin: round;
|
| 41 |
+
stroke-linecap: round;
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
.d3-grpo-reward-curves .axis-label {
|
| 45 |
+
fill: var(--text-color);
|
| 46 |
+
font-size: 12px;
|
| 47 |
+
font-weight: 600;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.d3-grpo-reward-curves .header {
|
| 51 |
+
display: flex;
|
| 52 |
+
align-items: center;
|
| 53 |
+
justify-content: space-between;
|
| 54 |
+
flex-wrap: wrap;
|
| 55 |
+
gap: 16px;
|
| 56 |
+
margin-top: 12px;
|
| 57 |
+
padding-top: 12px;
|
| 58 |
+
border-top: 1px solid var(--border-color);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.d3-grpo-reward-curves .legend {
|
| 62 |
+
display: flex;
|
| 63 |
+
flex-direction: column;
|
| 64 |
+
align-items: flex-start;
|
| 65 |
+
gap: 6px;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
.d3-grpo-reward-curves .legend-title {
|
| 69 |
+
font-size: 12px;
|
| 70 |
+
font-weight: 700;
|
| 71 |
+
color: var(--text-color);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.d3-grpo-reward-curves .legend .items {
|
| 75 |
+
display: flex;
|
| 76 |
+
flex-wrap: wrap;
|
| 77 |
+
gap: 8px 14px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.d3-grpo-reward-curves .legend .item {
|
| 81 |
+
display: inline-flex;
|
| 82 |
+
align-items: center;
|
| 83 |
+
gap: 6px;
|
| 84 |
+
white-space: nowrap;
|
| 85 |
+
font-size: 12px;
|
| 86 |
+
color: var(--text-color);
|
| 87 |
+
cursor: pointer;
|
| 88 |
+
user-select: none;
|
| 89 |
+
opacity: 1;
|
| 90 |
+
transition: opacity 0.2s ease;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
.d3-grpo-reward-curves .legend .item.dimmed {
|
| 94 |
+
opacity: 0.3;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.d3-grpo-reward-curves .legend .swatch {
|
| 98 |
+
width: 14px;
|
| 99 |
+
height: 14px;
|
| 100 |
+
border-radius: 3px;
|
| 101 |
+
border: 1px solid var(--border-color);
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
.d3-grpo-reward-curves .controls {
|
| 105 |
+
display: flex;
|
| 106 |
+
gap: 16px;
|
| 107 |
+
align-items: center;
|
| 108 |
+
justify-content: flex-end;
|
| 109 |
+
flex-wrap: wrap;
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
.d3-grpo-reward-curves .controls .control-group {
|
| 113 |
+
display: flex;
|
| 114 |
+
flex-direction: column;
|
| 115 |
+
align-items: flex-start;
|
| 116 |
+
gap: 6px;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.d3-grpo-reward-curves .controls label {
|
| 120 |
+
font-size: 12px;
|
| 121 |
+
font-weight: 700;
|
| 122 |
+
color: var(--text-color);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
.d3-grpo-reward-curves .controls .toggle-group {
|
| 126 |
+
display: flex;
|
| 127 |
+
gap: 8px;
|
| 128 |
+
align-items: center;
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
.d3-grpo-reward-curves .controls .toggle-btn {
|
| 132 |
+
padding: 6px 12px;
|
| 133 |
+
font-size: 12px;
|
| 134 |
+
border: 1px solid var(--border-color);
|
| 135 |
+
border-radius: 8px;
|
| 136 |
+
background: var(--surface-bg);
|
| 137 |
+
color: var(--text-color);
|
| 138 |
+
cursor: pointer;
|
| 139 |
+
transition: all 0.2s ease;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
.d3-grpo-reward-curves .controls .toggle-btn:hover {
|
| 143 |
+
background: var(--primary-color);
|
| 144 |
+
color: white;
|
| 145 |
+
border-color: var(--primary-color);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.d3-grpo-reward-curves .controls .toggle-btn.active {
|
| 149 |
+
background: var(--primary-color);
|
| 150 |
+
color: white;
|
| 151 |
+
border-color: var(--primary-color);
|
| 152 |
+
}
|
| 153 |
+
</style>
|
| 154 |
+
<script>
|
| 155 |
+
(() => {
|
| 156 |
+
const ensureD3 = (cb) => {
|
| 157 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 158 |
+
let s = document.getElementById('d3-cdn-script');
|
| 159 |
+
if (!s) {
|
| 160 |
+
s = document.createElement('script');
|
| 161 |
+
s.id = 'd3-cdn-script';
|
| 162 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 163 |
+
document.head.appendChild(s);
|
| 164 |
+
}
|
| 165 |
+
const onReady = () => {
|
| 166 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 167 |
+
};
|
| 168 |
+
s.addEventListener('load', onReady, { once: true });
|
| 169 |
+
if (window.d3) onReady();
|
| 170 |
+
};
|
| 171 |
+
|
| 172 |
+
const bootstrap = () => {
|
| 173 |
+
const scriptEl = document.currentScript;
|
| 174 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 175 |
+
if (!(container && container.classList && container.classList.contains('d3-grpo-reward-curves'))) {
|
| 176 |
+
const candidates = Array.from(document.querySelectorAll('.d3-grpo-reward-curves'))
|
| 177 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 178 |
+
container = candidates[candidates.length - 1] || null;
|
| 179 |
+
}
|
| 180 |
+
if (!container) return;
|
| 181 |
+
if (container.dataset) {
|
| 182 |
+
if (container.dataset.mounted === 'true') return;
|
| 183 |
+
container.dataset.mounted = 'true';
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
// Data loading configuration
|
| 187 |
+
let mountEl = container;
|
| 188 |
+
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) {
|
| 189 |
+
mountEl = mountEl.parentElement;
|
| 190 |
+
}
|
| 191 |
+
let providedData = null;
|
| 192 |
+
try {
|
| 193 |
+
const attr = mountEl && mountEl.getAttribute ? mountEl.getAttribute('data-datafiles') : null;
|
| 194 |
+
if (attr && attr.trim()) {
|
| 195 |
+
providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
|
| 196 |
+
}
|
| 197 |
+
} catch (_) {}
|
| 198 |
+
|
| 199 |
+
const DEFAULT_CSV = '/data/grpo/rl_reward_curves.csv';
|
| 200 |
+
const ensureDataPrefix = (p) => {
|
| 201 |
+
if (typeof p !== 'string' || !p) return p;
|
| 202 |
+
// If it starts with /, it's already absolute
|
| 203 |
+
if (p.startsWith('/')) return p;
|
| 204 |
+
// Otherwise, prefix with /data/
|
| 205 |
+
return `/data/${p}`;
|
| 206 |
+
};
|
| 207 |
+
const normalizeInput = (inp) => Array.isArray(inp)
|
| 208 |
+
? inp.map(ensureDataPrefix)
|
| 209 |
+
: (typeof inp === 'string' ? [ensureDataPrefix(inp)] : null);
|
| 210 |
+
|
| 211 |
+
const CSV_PATHS = Array.isArray(providedData)
|
| 212 |
+
? normalizeInput(providedData)
|
| 213 |
+
: (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
|
| 214 |
+
DEFAULT_CSV,
|
| 215 |
+
'./assets/data/grpo/rl_reward_curves.csv',
|
| 216 |
+
'../assets/data/grpo/rl_reward_curves.csv',
|
| 217 |
+
'../../assets/data/grpo/rl_reward_curves.csv'
|
| 218 |
+
]);
|
| 219 |
+
|
| 220 |
+
const fetchFirstAvailable = async (paths) => {
|
| 221 |
+
const errors = [];
|
| 222 |
+
for (const p of paths) {
|
| 223 |
+
try {
|
| 224 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 225 |
+
if (r.ok) return await r.text();
|
| 226 |
+
errors.push(`${p}: ${r.status}`);
|
| 227 |
+
} catch (e) {
|
| 228 |
+
errors.push(`${p}: ${e.message}`);
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
throw new Error(`CSV not found. Tried:\n${errors.join('\n')}`);
|
| 232 |
+
};
|
| 233 |
+
|
| 234 |
+
// Tooltip setup
|
| 235 |
+
container.style.position = container.style.position || 'relative';
|
| 236 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 237 |
+
let tipInner;
|
| 238 |
+
if (!tip) {
|
| 239 |
+
tip = document.createElement('div');
|
| 240 |
+
tip.className = 'd3-tooltip';
|
| 241 |
+
Object.assign(tip.style, {
|
| 242 |
+
position: 'absolute',
|
| 243 |
+
top: '0px',
|
| 244 |
+
left: '0px',
|
| 245 |
+
transform: 'translate(-9999px, -9999px)',
|
| 246 |
+
pointerEvents: 'none',
|
| 247 |
+
padding: '8px 10px',
|
| 248 |
+
borderRadius: '8px',
|
| 249 |
+
fontSize: '12px',
|
| 250 |
+
lineHeight: '1.35',
|
| 251 |
+
border: '1px solid var(--border-color)',
|
| 252 |
+
background: 'var(--surface-bg)',
|
| 253 |
+
color: 'var(--text-color)',
|
| 254 |
+
boxShadow: '0 4px 24px rgba(0,0,0,.18)',
|
| 255 |
+
opacity: '0',
|
| 256 |
+
transition: 'opacity .12s ease',
|
| 257 |
+
zIndex: '1000'
|
| 258 |
+
});
|
| 259 |
+
tipInner = document.createElement('div');
|
| 260 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 261 |
+
tipInner.style.textAlign = 'left';
|
| 262 |
+
tip.appendChild(tipInner);
|
| 263 |
+
container.appendChild(tip);
|
| 264 |
+
} else {
|
| 265 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
// SVG setup
|
| 269 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 270 |
+
const gRoot = svg.append('g');
|
| 271 |
+
const gGrid = gRoot.append('g').attr('class', 'grid');
|
| 272 |
+
const gBands = gRoot.append('g').attr('class', 'bands');
|
| 273 |
+
const gLines = gRoot.append('g').attr('class', 'lines');
|
| 274 |
+
const gAxes = gRoot.append('g').attr('class', 'axes');
|
| 275 |
+
|
| 276 |
+
// State
|
| 277 |
+
let width = 800, height = 400;
|
| 278 |
+
const margin = { top: 16, right: 28, bottom: 56, left: 64 };
|
| 279 |
+
let rawData = {}; // Store both datasets
|
| 280 |
+
let series = [];
|
| 281 |
+
let hiddenSeries = new Set();
|
| 282 |
+
let showRunningAverage = true;
|
| 283 |
+
let currentMetric = 'reward'; // 'reward' or 'length'
|
| 284 |
+
const RUNNING_AVG_WINDOW = 50; // steps
|
| 285 |
+
|
| 286 |
+
// Color setup
|
| 287 |
+
const getColors = (count) => {
|
| 288 |
+
if (window.ColorPalettes && window.ColorPalettes.getColors) {
|
| 289 |
+
return window.ColorPalettes.getColors('categorical', count);
|
| 290 |
+
}
|
| 291 |
+
// Fallback colors
|
| 292 |
+
return ['#4E79A7', '#F28E2B', '#E15759', '#76B7B2', '#59A14F', '#EDC948'];
|
| 293 |
+
};
|
| 294 |
+
|
| 295 |
+
// Calculate running average based on step window
|
| 296 |
+
function calculateRunningAverage(points, windowSize) {
|
| 297 |
+
if (points.length === 0) return [];
|
| 298 |
+
|
| 299 |
+
const avgPoints = [];
|
| 300 |
+
for (let i = 0; i < points.length; i++) {
|
| 301 |
+
const currentStep = points[i].step;
|
| 302 |
+
const minStep = currentStep - windowSize;
|
| 303 |
+
|
| 304 |
+
// Find all points within the window
|
| 305 |
+
const windowPoints = points.filter(p => p.step >= minStep && p.step <= currentStep);
|
| 306 |
+
|
| 307 |
+
if (windowPoints.length > 0) {
|
| 308 |
+
const avgMean = d3.mean(windowPoints, p => p.mean);
|
| 309 |
+
const avgMin = d3.mean(windowPoints, p => p.min);
|
| 310 |
+
const avgMax = d3.mean(windowPoints, p => p.max);
|
| 311 |
+
|
| 312 |
+
avgPoints.push({
|
| 313 |
+
step: currentStep,
|
| 314 |
+
mean: avgMean,
|
| 315 |
+
min: avgMin,
|
| 316 |
+
max: avgMax
|
| 317 |
+
});
|
| 318 |
+
}
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
return avgPoints;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
function parseData(csvText, metricType) {
|
| 325 |
+
const rows = d3.csvParse(csvText);
|
| 326 |
+
|
| 327 |
+
// Determine metric column suffix based on type
|
| 328 |
+
const metricSuffix = metricType === 'reward'
|
| 329 |
+
? 'train/rewards/strip_reasoning_accuracy_reward/mean'
|
| 330 |
+
: 'train/completions/mean_terminated_length';
|
| 331 |
+
|
| 332 |
+
// Extract run names (each run has _step, mean, MIN, MAX columns)
|
| 333 |
+
const runNames = [];
|
| 334 |
+
const headers = Object.keys(rows[0]);
|
| 335 |
+
|
| 336 |
+
headers.forEach(h => {
|
| 337 |
+
if (h.includes(` - ${metricSuffix}`) && !h.includes('MIN') && !h.includes('MAX')) {
|
| 338 |
+
const runName = h.split(' - ')[0];
|
| 339 |
+
runNames.push(runName);
|
| 340 |
+
}
|
| 341 |
+
});
|
| 342 |
+
|
| 343 |
+
// Mapping of v27.x to overlong penalty ranges
|
| 344 |
+
const penaltyRangeMap = {
|
| 345 |
+
'v27.00': '1.5-2k',
|
| 346 |
+
'v27.01': '2-2.5k',
|
| 347 |
+
'v27.02': '2.5-3k',
|
| 348 |
+
'v27.03': '3-3.5k',
|
| 349 |
+
'v27.04': '3.5-4k',
|
| 350 |
+
'v27.05': '4-4.5k'
|
| 351 |
+
};
|
| 352 |
+
|
| 353 |
+
// Build series data using train/global_step for x-axis
|
| 354 |
+
series = runNames.map(runName => {
|
| 355 |
+
const meanCol = `${runName} - ${metricSuffix}`;
|
| 356 |
+
const minCol = `${meanCol}__MIN`;
|
| 357 |
+
const maxCol = `${meanCol}__MAX`;
|
| 358 |
+
|
| 359 |
+
const points = rows
|
| 360 |
+
.filter(row => row['train/global_step'] && row[meanCol])
|
| 361 |
+
.map(row => ({
|
| 362 |
+
step: +row['train/global_step'],
|
| 363 |
+
mean: +row[meanCol],
|
| 364 |
+
min: +row[minCol],
|
| 365 |
+
max: +row[maxCol]
|
| 366 |
+
}))
|
| 367 |
+
.filter(p => !isNaN(p.step) && !isNaN(p.mean));
|
| 368 |
+
|
| 369 |
+
// Calculate running average
|
| 370 |
+
const runningAvgPoints = calculateRunningAverage(points, RUNNING_AVG_WINDOW);
|
| 371 |
+
|
| 372 |
+
// Extract version from run name
|
| 373 |
+
const versionMatch = runName.match(/v(\d+\.\d+)/);
|
| 374 |
+
const version = versionMatch ? `v${versionMatch[1]}` : runName;
|
| 375 |
+
|
| 376 |
+
// Map to penalty range label
|
| 377 |
+
const displayName = penaltyRangeMap[version] || version;
|
| 378 |
+
|
| 379 |
+
return {
|
| 380 |
+
name: displayName,
|
| 381 |
+
version: version, // Keep original version for sorting
|
| 382 |
+
fullName: runName,
|
| 383 |
+
points,
|
| 384 |
+
runningAvgPoints
|
| 385 |
+
};
|
| 386 |
+
});
|
| 387 |
+
|
| 388 |
+
// Sort series by version number to get correct order
|
| 389 |
+
series.sort((a, b) => {
|
| 390 |
+
const getVersionNum = (v) => {
|
| 391 |
+
const match = v.version?.match(/v(\d+)\.(\d+)/);
|
| 392 |
+
return match ? parseFloat(`${match[1]}.${match[2]}`) : 0;
|
| 393 |
+
};
|
| 394 |
+
return getVersionNum(a) - getVersionNum(b);
|
| 395 |
+
});
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
function updateSize() {
|
| 399 |
+
width = container.clientWidth || 800;
|
| 400 |
+
height = Math.max(320, Math.round(width / 2.5));
|
| 401 |
+
svg.attr('width', width).attr('height', height);
|
| 402 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 403 |
+
return {
|
| 404 |
+
innerWidth: width - margin.left - margin.right,
|
| 405 |
+
innerHeight: height - margin.top - margin.bottom
|
| 406 |
+
};
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
function render() {
|
| 410 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 411 |
+
if (series.length === 0) return;
|
| 412 |
+
|
| 413 |
+
// Filter visible series
|
| 414 |
+
const visibleSeries = series.filter(s => !hiddenSeries.has(s.name));
|
| 415 |
+
if (visibleSeries.length === 0) return;
|
| 416 |
+
|
| 417 |
+
// Select which points to use based on running average toggle
|
| 418 |
+
const getPoints = (s) => showRunningAverage ? s.runningAvgPoints : s.points;
|
| 419 |
+
|
| 420 |
+
// Filter points to limit x-axis to 500 steps and y-axis based on metric
|
| 421 |
+
const MAX_STEPS = 500;
|
| 422 |
+
const getFilteredPoints = (s) => {
|
| 423 |
+
const pts = getPoints(s);
|
| 424 |
+
if (currentMetric === 'reward') {
|
| 425 |
+
const MIN_REWARD = 0.55;
|
| 426 |
+
const MAX_REWARD = 0.9;
|
| 427 |
+
return pts.filter(p => p.step <= MAX_STEPS && p.mean >= MIN_REWARD && p.mean <= MAX_REWARD);
|
| 428 |
+
} else {
|
| 429 |
+
// For length, no y-axis filtering
|
| 430 |
+
return pts.filter(p => p.step <= MAX_STEPS);
|
| 431 |
+
}
|
| 432 |
+
};
|
| 433 |
+
|
| 434 |
+
// Get all points for domain calculation
|
| 435 |
+
const allPoints = visibleSeries.flatMap(s => getFilteredPoints(s));
|
| 436 |
+
|
| 437 |
+
// Scales
|
| 438 |
+
const xScale = d3.scaleLinear()
|
| 439 |
+
.domain([0, MAX_STEPS])
|
| 440 |
+
.range([0, innerWidth])
|
| 441 |
+
.nice();
|
| 442 |
+
|
| 443 |
+
// Set y-axis domain based on current metric
|
| 444 |
+
let yDomain;
|
| 445 |
+
if (currentMetric === 'reward') {
|
| 446 |
+
yDomain = [0.55, 0.9];
|
| 447 |
+
} else {
|
| 448 |
+
// For length, calculate from data
|
| 449 |
+
const minVal = d3.min(allPoints, d => d.mean);
|
| 450 |
+
const maxVal = d3.max(allPoints, d => d.mean);
|
| 451 |
+
yDomain = [minVal * 0.95, maxVal * 1.05];
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
const yScale = d3.scaleLinear()
|
| 455 |
+
.domain(yDomain)
|
| 456 |
+
.range([innerHeight, 0]);
|
| 457 |
+
|
| 458 |
+
// Grid
|
| 459 |
+
gGrid.selectAll('.grid-y').data([0])
|
| 460 |
+
.join('g')
|
| 461 |
+
.attr('class', 'grid grid-y')
|
| 462 |
+
.call(d3.axisLeft(yScale)
|
| 463 |
+
.tickSize(-innerWidth)
|
| 464 |
+
.tickFormat('')
|
| 465 |
+
)
|
| 466 |
+
.call(g => g.select('.domain').remove());
|
| 467 |
+
|
| 468 |
+
// Colors
|
| 469 |
+
const colors = getColors(series.length);
|
| 470 |
+
const colorScale = (name) => {
|
| 471 |
+
const idx = series.findIndex(s => s.name === name);
|
| 472 |
+
return colors[idx % colors.length];
|
| 473 |
+
};
|
| 474 |
+
|
| 475 |
+
// Line generator
|
| 476 |
+
const line = d3.line()
|
| 477 |
+
.x(d => xScale(d.step))
|
| 478 |
+
.y(d => yScale(d.mean))
|
| 479 |
+
.curve(d3.curveMonotoneX);
|
| 480 |
+
|
| 481 |
+
// Render lines
|
| 482 |
+
gLines.selectAll('.line')
|
| 483 |
+
.data(visibleSeries, d => d.name)
|
| 484 |
+
.join('path')
|
| 485 |
+
.attr('class', 'line')
|
| 486 |
+
.attr('d', d => line(getFilteredPoints(d)))
|
| 487 |
+
.attr('stroke', d => colorScale(d.name));
|
| 488 |
+
|
| 489 |
+
// Axes
|
| 490 |
+
const xAxis = gAxes.selectAll('.x-axis').data([0])
|
| 491 |
+
.join('g')
|
| 492 |
+
.attr('class', 'x-axis axis')
|
| 493 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 494 |
+
.call(d3.axisBottom(xScale).ticks(Math.min(10, Math.floor(innerWidth / 80))));
|
| 495 |
+
|
| 496 |
+
const yAxis = gAxes.selectAll('.y-axis').data([0])
|
| 497 |
+
.join('g')
|
| 498 |
+
.attr('class', 'y-axis axis')
|
| 499 |
+
.call(d3.axisLeft(yScale).ticks(8));
|
| 500 |
+
|
| 501 |
+
// Axis labels
|
| 502 |
+
gAxes.selectAll('.x-label').data([0])
|
| 503 |
+
.join('text')
|
| 504 |
+
.attr('class', 'x-label axis-label')
|
| 505 |
+
.attr('text-anchor', 'middle')
|
| 506 |
+
.attr('x', innerWidth / 2)
|
| 507 |
+
.attr('y', innerHeight + 45)
|
| 508 |
+
.text('Training step');
|
| 509 |
+
|
| 510 |
+
gAxes.selectAll('.y-label').data([0])
|
| 511 |
+
.join('text')
|
| 512 |
+
.attr('class', 'y-label axis-label')
|
| 513 |
+
.attr('text-anchor', 'middle')
|
| 514 |
+
.attr('transform', `translate(-48,${innerHeight / 2}) rotate(-90)`)
|
| 515 |
+
.text(currentMetric === 'reward' ? 'Reward' : 'Mean Terminated Length');
|
| 516 |
+
|
| 517 |
+
// Tooltip interactions
|
| 518 |
+
const bisect = d3.bisector(d => d.step).left;
|
| 519 |
+
|
| 520 |
+
svg.on('mousemove', function(event) {
|
| 521 |
+
const [mx] = d3.pointer(event, gRoot.node());
|
| 522 |
+
const step = xScale.invert(mx);
|
| 523 |
+
|
| 524 |
+
let tooltipHtml = `<strong>Step: ${Math.round(step)}</strong>`;
|
| 525 |
+
if (showRunningAverage) {
|
| 526 |
+
tooltipHtml += ` <span style="font-weight:normal;font-size:11px">(${RUNNING_AVG_WINDOW}-step avg)</span>`;
|
| 527 |
+
}
|
| 528 |
+
tooltipHtml += `<br/>`;
|
| 529 |
+
|
| 530 |
+
visibleSeries.forEach(s => {
|
| 531 |
+
const points = getFilteredPoints(s);
|
| 532 |
+
const idx = bisect(points, step);
|
| 533 |
+
if (idx > 0 && idx < points.length) {
|
| 534 |
+
const p = points[idx];
|
| 535 |
+
const color = colorScale(s.name);
|
| 536 |
+
const valueStr = currentMetric === 'reward'
|
| 537 |
+
? `${(p.mean * 100).toFixed(1)}%`
|
| 538 |
+
: `${p.mean.toFixed(1)} tokens`;
|
| 539 |
+
tooltipHtml += `<div style="margin-top:4px"><span style="color:${color}">●</span> ${s.name}: ${valueStr}</div>`;
|
| 540 |
+
}
|
| 541 |
+
});
|
| 542 |
+
|
| 543 |
+
tipInner.innerHTML = tooltipHtml;
|
| 544 |
+
const tipBounds = tip.getBoundingClientRect();
|
| 545 |
+
const [px, py] = d3.pointer(event, container);
|
| 546 |
+
|
| 547 |
+
let tipX = px + 12;
|
| 548 |
+
let tipY = py - 12;
|
| 549 |
+
|
| 550 |
+
if (tipX + tipBounds.width > width - 10) {
|
| 551 |
+
tipX = px - tipBounds.width - 12;
|
| 552 |
+
}
|
| 553 |
+
if (tipY - tipBounds.height < 10) {
|
| 554 |
+
tipY = py + 20;
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
tip.style.transform = `translate(${tipX}px, ${tipY}px)`;
|
| 558 |
+
tip.style.opacity = '1';
|
| 559 |
+
});
|
| 560 |
+
|
| 561 |
+
svg.on('mouseleave', () => {
|
| 562 |
+
tip.style.opacity = '0';
|
| 563 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 564 |
+
});
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
function makeLegend() {
|
| 568 |
+
let header = container.querySelector('.header');
|
| 569 |
+
if (!header) {
|
| 570 |
+
header = document.createElement('div');
|
| 571 |
+
header.className = 'header';
|
| 572 |
+
container.appendChild(header);
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
let legend = header.querySelector('.legend');
|
| 576 |
+
if (!legend) {
|
| 577 |
+
legend = document.createElement('div');
|
| 578 |
+
legend.className = 'legend';
|
| 579 |
+
header.appendChild(legend);
|
| 580 |
+
}
|
| 581 |
+
|
| 582 |
+
let title = legend.querySelector('.legend-title');
|
| 583 |
+
if (!title) {
|
| 584 |
+
title = document.createElement('div');
|
| 585 |
+
title.className = 'legend-title';
|
| 586 |
+
title.textContent = 'Overlong Penalty';
|
| 587 |
+
legend.appendChild(title);
|
| 588 |
+
} else {
|
| 589 |
+
title.textContent = 'Overlong Penalty';
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
let items = legend.querySelector('.items');
|
| 593 |
+
if (!items) {
|
| 594 |
+
items = document.createElement('div');
|
| 595 |
+
items.className = 'items';
|
| 596 |
+
legend.appendChild(items);
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
const colors = getColors(series.length);
|
| 600 |
+
|
| 601 |
+
items.innerHTML = '';
|
| 602 |
+
series.forEach((s, i) => {
|
| 603 |
+
const item = document.createElement('span');
|
| 604 |
+
item.className = 'item';
|
| 605 |
+
if (hiddenSeries.has(s.name)) {
|
| 606 |
+
item.classList.add('dimmed');
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
const swatch = document.createElement('span');
|
| 610 |
+
swatch.className = 'swatch';
|
| 611 |
+
swatch.style.background = colors[i % colors.length];
|
| 612 |
+
|
| 613 |
+
const text = document.createElement('span');
|
| 614 |
+
text.textContent = s.name;
|
| 615 |
+
|
| 616 |
+
item.appendChild(swatch);
|
| 617 |
+
item.appendChild(text);
|
| 618 |
+
items.appendChild(item);
|
| 619 |
+
|
| 620 |
+
item.addEventListener('click', () => {
|
| 621 |
+
if (hiddenSeries.has(s.name)) {
|
| 622 |
+
hiddenSeries.delete(s.name);
|
| 623 |
+
} else {
|
| 624 |
+
hiddenSeries.add(s.name);
|
| 625 |
+
}
|
| 626 |
+
makeLegend();
|
| 627 |
+
render();
|
| 628 |
+
});
|
| 629 |
+
});
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
function makeControls() {
|
| 633 |
+
let header = container.querySelector('.header');
|
| 634 |
+
if (!header) {
|
| 635 |
+
header = document.createElement('div');
|
| 636 |
+
header.className = 'header';
|
| 637 |
+
container.appendChild(header);
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
let controls = header.querySelector('.controls');
|
| 641 |
+
if (!controls) {
|
| 642 |
+
controls = document.createElement('div');
|
| 643 |
+
controls.className = 'controls';
|
| 644 |
+
header.appendChild(controls);
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
controls.innerHTML = '';
|
| 648 |
+
|
| 649 |
+
// Metric selection group
|
| 650 |
+
const metricGroup = document.createElement('div');
|
| 651 |
+
metricGroup.className = 'control-group';
|
| 652 |
+
|
| 653 |
+
const metricLabel = document.createElement('label');
|
| 654 |
+
metricLabel.textContent = 'Metric';
|
| 655 |
+
metricGroup.appendChild(metricLabel);
|
| 656 |
+
|
| 657 |
+
const metricToggleGroup = document.createElement('div');
|
| 658 |
+
metricToggleGroup.className = 'toggle-group';
|
| 659 |
+
|
| 660 |
+
const rewardBtn = document.createElement('button');
|
| 661 |
+
rewardBtn.className = 'toggle-btn' + (currentMetric === 'reward' ? ' active' : '');
|
| 662 |
+
rewardBtn.textContent = 'Reward';
|
| 663 |
+
rewardBtn.addEventListener('click', () => {
|
| 664 |
+
if (currentMetric !== 'reward') {
|
| 665 |
+
currentMetric = 'reward';
|
| 666 |
+
parseData(rawData.reward, 'reward');
|
| 667 |
+
makeControls();
|
| 668 |
+
makeLegend();
|
| 669 |
+
render();
|
| 670 |
+
}
|
| 671 |
+
});
|
| 672 |
+
|
| 673 |
+
const lengthBtn = document.createElement('button');
|
| 674 |
+
lengthBtn.className = 'toggle-btn' + (currentMetric === 'length' ? ' active' : '');
|
| 675 |
+
lengthBtn.textContent = 'Length';
|
| 676 |
+
lengthBtn.addEventListener('click', () => {
|
| 677 |
+
if (currentMetric !== 'length') {
|
| 678 |
+
currentMetric = 'length';
|
| 679 |
+
parseData(rawData.length, 'length');
|
| 680 |
+
makeControls();
|
| 681 |
+
makeLegend();
|
| 682 |
+
render();
|
| 683 |
+
}
|
| 684 |
+
});
|
| 685 |
+
|
| 686 |
+
metricToggleGroup.appendChild(rewardBtn);
|
| 687 |
+
metricToggleGroup.appendChild(lengthBtn);
|
| 688 |
+
metricGroup.appendChild(metricToggleGroup);
|
| 689 |
+
controls.appendChild(metricGroup);
|
| 690 |
+
|
| 691 |
+
// Display options group
|
| 692 |
+
const displayGroup = document.createElement('div');
|
| 693 |
+
displayGroup.className = 'control-group';
|
| 694 |
+
|
| 695 |
+
const displayLabel = document.createElement('label');
|
| 696 |
+
displayLabel.textContent = 'Display';
|
| 697 |
+
displayGroup.appendChild(displayLabel);
|
| 698 |
+
|
| 699 |
+
const displayToggleGroup = document.createElement('div');
|
| 700 |
+
displayToggleGroup.className = 'toggle-group';
|
| 701 |
+
|
| 702 |
+
const runningAvgBtn = document.createElement('button');
|
| 703 |
+
runningAvgBtn.className = 'toggle-btn' + (showRunningAverage ? ' active' : '');
|
| 704 |
+
runningAvgBtn.textContent = `Running Avg (${RUNNING_AVG_WINDOW} steps)`;
|
| 705 |
+
runningAvgBtn.addEventListener('click', () => {
|
| 706 |
+
showRunningAverage = !showRunningAverage;
|
| 707 |
+
makeControls();
|
| 708 |
+
render();
|
| 709 |
+
});
|
| 710 |
+
|
| 711 |
+
displayToggleGroup.appendChild(runningAvgBtn);
|
| 712 |
+
displayGroup.appendChild(displayToggleGroup);
|
| 713 |
+
controls.appendChild(displayGroup);
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
// Load both datasets
|
| 717 |
+
const REWARD_PATHS = [
|
| 718 |
+
'/data/grpo/rl_reward_curves.csv',
|
| 719 |
+
'./assets/data/grpo/rl_reward_curves.csv',
|
| 720 |
+
'../assets/data/grpo/rl_reward_curves.csv',
|
| 721 |
+
'../../assets/data/grpo/rl_reward_curves.csv'
|
| 722 |
+
];
|
| 723 |
+
|
| 724 |
+
const LENGTH_PATHS = [
|
| 725 |
+
'/data/grpo/rl_mean_teminated_lengths.csv',
|
| 726 |
+
'./assets/data/grpo/rl_mean_teminated_lengths.csv',
|
| 727 |
+
'../assets/data/grpo/rl_mean_teminated_lengths.csv',
|
| 728 |
+
'../../assets/data/grpo/rl_mean_teminated_lengths.csv'
|
| 729 |
+
];
|
| 730 |
+
|
| 731 |
+
Promise.all([
|
| 732 |
+
fetchFirstAvailable(REWARD_PATHS),
|
| 733 |
+
fetchFirstAvailable(LENGTH_PATHS)
|
| 734 |
+
])
|
| 735 |
+
.then(([rewardCsvText, lengthCsvText]) => {
|
| 736 |
+
// Store both datasets
|
| 737 |
+
rawData.reward = rewardCsvText;
|
| 738 |
+
rawData.length = lengthCsvText;
|
| 739 |
+
|
| 740 |
+
// Initialize with reward data
|
| 741 |
+
parseData(rewardCsvText, 'reward');
|
| 742 |
+
makeLegend();
|
| 743 |
+
makeControls();
|
| 744 |
+
render();
|
| 745 |
+
|
| 746 |
+
// Responsiveness
|
| 747 |
+
if (window.ResizeObserver) {
|
| 748 |
+
const ro = new ResizeObserver(() => render());
|
| 749 |
+
ro.observe(container);
|
| 750 |
+
} else {
|
| 751 |
+
window.addEventListener('resize', render);
|
| 752 |
+
}
|
| 753 |
+
})
|
| 754 |
+
.catch(err => {
|
| 755 |
+
const pre = document.createElement('pre');
|
| 756 |
+
pre.style.color = '#f44336';
|
| 757 |
+
pre.style.fontSize = '12px';
|
| 758 |
+
pre.style.padding = '12px';
|
| 759 |
+
pre.textContent = `Error loading data: ${err.message}`;
|
| 760 |
+
container.appendChild(pre);
|
| 761 |
+
});
|
| 762 |
+
};
|
| 763 |
+
|
| 764 |
+
if (document.readyState === 'loading') {
|
| 765 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 766 |
+
} else {
|
| 767 |
+
ensureD3(bootstrap);
|
| 768 |
+
}
|
| 769 |
+
})();
|
| 770 |
+
</script>
|
app/src/content/embeds/d3-rl-token-comparison.html
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-grpo-token-comparison"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-grpo-token-comparison {
|
| 4 |
+
width: 100%;
|
| 5 |
+
position: relative;
|
| 6 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 7 |
+
}
|
| 8 |
+
.d3-grpo-token-comparison svg {
|
| 9 |
+
display: block;
|
| 10 |
+
width: 100%;
|
| 11 |
+
}
|
| 12 |
+
.d3-grpo-token-comparison .bar {
|
| 13 |
+
stroke: none;
|
| 14 |
+
}
|
| 15 |
+
.d3-grpo-token-comparison .axes path,
|
| 16 |
+
.d3-grpo-token-comparison .axes line {
|
| 17 |
+
stroke: var(--axis-color, var(--text-color));
|
| 18 |
+
}
|
| 19 |
+
.d3-grpo-token-comparison .axes text {
|
| 20 |
+
fill: var(--tick-color, var(--muted-color));
|
| 21 |
+
font-size: 11px;
|
| 22 |
+
}
|
| 23 |
+
.d3-grpo-token-comparison .grid line {
|
| 24 |
+
stroke: var(--grid-color, rgba(0,0,0,.08));
|
| 25 |
+
}
|
| 26 |
+
.d3-grpo-token-comparison .chart-title {
|
| 27 |
+
font-size: 13px;
|
| 28 |
+
font-weight: 600;
|
| 29 |
+
fill: var(--text-color);
|
| 30 |
+
}
|
| 31 |
+
.d3-grpo-token-comparison .d3-tooltip {
|
| 32 |
+
position: absolute;
|
| 33 |
+
top: 0;
|
| 34 |
+
left: 0;
|
| 35 |
+
transform: translate(-9999px, -9999px);
|
| 36 |
+
pointer-events: none;
|
| 37 |
+
padding: 8px 10px;
|
| 38 |
+
border-radius: 8px;
|
| 39 |
+
font-size: 12px;
|
| 40 |
+
line-height: 1.35;
|
| 41 |
+
border: 1px solid var(--border-color);
|
| 42 |
+
background: var(--surface-bg);
|
| 43 |
+
color: var(--text-color);
|
| 44 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 45 |
+
opacity: 0;
|
| 46 |
+
transition: opacity .12s ease;
|
| 47 |
+
}
|
| 48 |
+
.d3-grpo-token-comparison .d3-tooltip__inner {
|
| 49 |
+
text-align: left;
|
| 50 |
+
}
|
| 51 |
+
.d3-grpo-token-comparison .legend {
|
| 52 |
+
display: flex;
|
| 53 |
+
flex-direction: column;
|
| 54 |
+
align-items: flex-start;
|
| 55 |
+
gap: 6px;
|
| 56 |
+
margin-top: 16px;
|
| 57 |
+
}
|
| 58 |
+
.d3-grpo-token-comparison .legend-title {
|
| 59 |
+
font-size: 12px;
|
| 60 |
+
font-weight: 700;
|
| 61 |
+
color: var(--text-color);
|
| 62 |
+
}
|
| 63 |
+
.d3-grpo-token-comparison .legend .items {
|
| 64 |
+
display: flex;
|
| 65 |
+
flex-wrap: wrap;
|
| 66 |
+
gap: 8px 14px;
|
| 67 |
+
}
|
| 68 |
+
.d3-grpo-token-comparison .legend .item {
|
| 69 |
+
display: inline-flex;
|
| 70 |
+
align-items: center;
|
| 71 |
+
gap: 6px;
|
| 72 |
+
white-space: nowrap;
|
| 73 |
+
font-size: 12px;
|
| 74 |
+
color: var(--text-color);
|
| 75 |
+
}
|
| 76 |
+
.d3-grpo-token-comparison .legend .swatch {
|
| 77 |
+
width: 14px;
|
| 78 |
+
height: 14px;
|
| 79 |
+
border-radius: 3px;
|
| 80 |
+
border: 1px solid var(--border-color);
|
| 81 |
+
}
|
| 82 |
+
.d3-grpo-token-comparison .controls {
|
| 83 |
+
display: flex;
|
| 84 |
+
gap: 16px;
|
| 85 |
+
align-items: center;
|
| 86 |
+
justify-content: flex-end;
|
| 87 |
+
flex-wrap: wrap;
|
| 88 |
+
margin-top: 8px;
|
| 89 |
+
}
|
| 90 |
+
.d3-grpo-token-comparison .control-group {
|
| 91 |
+
display: flex;
|
| 92 |
+
flex-direction: column;
|
| 93 |
+
align-items: flex-start;
|
| 94 |
+
gap: 6px;
|
| 95 |
+
}
|
| 96 |
+
.d3-grpo-token-comparison .controls label {
|
| 97 |
+
font-size: 12px;
|
| 98 |
+
font-weight: 700;
|
| 99 |
+
color: var(--text-color);
|
| 100 |
+
}
|
| 101 |
+
.d3-grpo-token-comparison .controls select {
|
| 102 |
+
font-size: 12px;
|
| 103 |
+
padding: 8px 28px 8px 10px;
|
| 104 |
+
border: 1px solid var(--border-color);
|
| 105 |
+
border-radius: 8px;
|
| 106 |
+
background: var(--surface-bg);
|
| 107 |
+
color: var(--text-color);
|
| 108 |
+
cursor: pointer;
|
| 109 |
+
}
|
| 110 |
+
.d3-grpo-token-comparison .slider-container {
|
| 111 |
+
display: flex;
|
| 112 |
+
flex-direction: column;
|
| 113 |
+
gap: 8px;
|
| 114 |
+
min-width: 300px;
|
| 115 |
+
}
|
| 116 |
+
.d3-grpo-token-comparison .slider-row {
|
| 117 |
+
display: flex;
|
| 118 |
+
align-items: center;
|
| 119 |
+
gap: 12px;
|
| 120 |
+
}
|
| 121 |
+
.d3-grpo-token-comparison input[type="range"] {
|
| 122 |
+
flex: 1;
|
| 123 |
+
height: 6px;
|
| 124 |
+
border-radius: 3px;
|
| 125 |
+
background: var(--border-color);
|
| 126 |
+
outline: none;
|
| 127 |
+
-webkit-appearance: none;
|
| 128 |
+
cursor: pointer;
|
| 129 |
+
}
|
| 130 |
+
.d3-grpo-token-comparison input[type="range"]::-webkit-slider-thumb {
|
| 131 |
+
-webkit-appearance: none;
|
| 132 |
+
appearance: none;
|
| 133 |
+
width: 16px;
|
| 134 |
+
height: 16px;
|
| 135 |
+
border-radius: 50%;
|
| 136 |
+
background: var(--primary-color);
|
| 137 |
+
cursor: pointer;
|
| 138 |
+
}
|
| 139 |
+
.d3-grpo-token-comparison input[type="range"]::-moz-range-thumb {
|
| 140 |
+
width: 16px;
|
| 141 |
+
height: 16px;
|
| 142 |
+
border-radius: 50%;
|
| 143 |
+
background: var(--primary-color);
|
| 144 |
+
cursor: pointer;
|
| 145 |
+
border: none;
|
| 146 |
+
}
|
| 147 |
+
.d3-grpo-token-comparison .slider-value {
|
| 148 |
+
font-size: 12px;
|
| 149 |
+
font-weight: 600;
|
| 150 |
+
color: var(--text-color);
|
| 151 |
+
min-width: 60px;
|
| 152 |
+
text-align: right;
|
| 153 |
+
}
|
| 154 |
+
</style>
|
| 155 |
+
<script>
|
| 156 |
+
(() => {
|
| 157 |
+
const ensureD3 = (cb) => {
|
| 158 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 159 |
+
let s = document.getElementById('d3-cdn-script');
|
| 160 |
+
if (!s) {
|
| 161 |
+
s = document.createElement('script');
|
| 162 |
+
s.id = 'd3-cdn-script';
|
| 163 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 164 |
+
document.head.appendChild(s);
|
| 165 |
+
}
|
| 166 |
+
const onReady = () => {
|
| 167 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 168 |
+
};
|
| 169 |
+
s.addEventListener('load', onReady, { once: true });
|
| 170 |
+
if (window.d3) onReady();
|
| 171 |
+
};
|
| 172 |
+
|
| 173 |
+
const bootstrap = () => {
|
| 174 |
+
const scriptEl = document.currentScript;
|
| 175 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 176 |
+
if (!(container && container.classList && container.classList.contains('d3-grpo-token-comparison'))) {
|
| 177 |
+
const candidates = Array.from(document.querySelectorAll('.d3-grpo-token-comparison'))
|
| 178 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 179 |
+
container = candidates[candidates.length - 1] || null;
|
| 180 |
+
}
|
| 181 |
+
if (!container) return;
|
| 182 |
+
if (container.dataset) {
|
| 183 |
+
if (container.dataset.mounted === 'true') return;
|
| 184 |
+
container.dataset.mounted = 'true';
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
// Tooltip
|
| 188 |
+
container.style.position = container.style.position || 'relative';
|
| 189 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 190 |
+
let tipInner;
|
| 191 |
+
if (!tip) {
|
| 192 |
+
tip = document.createElement('div');
|
| 193 |
+
tip.className = 'd3-tooltip';
|
| 194 |
+
tipInner = document.createElement('div');
|
| 195 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 196 |
+
tip.appendChild(tipInner);
|
| 197 |
+
container.appendChild(tip);
|
| 198 |
+
} else {
|
| 199 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
const showTooltip = (html, event) => {
|
| 203 |
+
tipInner.innerHTML = html;
|
| 204 |
+
tip.style.opacity = '1';
|
| 205 |
+
const [mx, my] = d3.pointer(event, container);
|
| 206 |
+
tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
|
| 207 |
+
};
|
| 208 |
+
|
| 209 |
+
const hideTooltip = () => {
|
| 210 |
+
tip.style.opacity = '0';
|
| 211 |
+
setTimeout(() => {
|
| 212 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 213 |
+
}, 120);
|
| 214 |
+
};
|
| 215 |
+
|
| 216 |
+
// SVG scaffolding
|
| 217 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 218 |
+
const gRoot = svg.append('g');
|
| 219 |
+
|
| 220 |
+
let width = 800, height = 400;
|
| 221 |
+
const margin = { top: 40, right: 16, bottom: 56, left: 60 };
|
| 222 |
+
|
| 223 |
+
// Dataset configurations
|
| 224 |
+
const datasetConfigs = [
|
| 225 |
+
{
|
| 226 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_00-step-000000400_aime25_2025-10-16T01-18-56.json',
|
| 227 |
+
name: '1.5-2k',
|
| 228 |
+
id: 'grpo-1.5-2k'
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_01-step-000000400_aime25_2025-10-16T01-22-56.json',
|
| 232 |
+
name: '2-2.5k',
|
| 233 |
+
id: 'grpo-2-2.5k'
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_02-step-000000400_aime25_2025-10-16T01-23-56.json',
|
| 237 |
+
name: '2.5-3k',
|
| 238 |
+
id: 'grpo-2.5-3k'
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_03-step-000000400_aime25_2025-10-16T01-58-31.json',
|
| 242 |
+
name: '3-3.5k',
|
| 243 |
+
id: 'grpo-3-3.5k'
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_04-step-000000400_aime25_2025-10-16T04-55-04.json',
|
| 247 |
+
name: '3.5-4k',
|
| 248 |
+
id: 'grpo-3.5-4k'
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
file: 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-GRPO-no-think_v27_05-step-000000400_aime25_2025-10-16T06-12-05.json',
|
| 252 |
+
name: '4-4.5k',
|
| 253 |
+
id: 'grpo-4-4.5k'
|
| 254 |
+
}
|
| 255 |
+
];
|
| 256 |
+
|
| 257 |
+
const fetchFirstAvailable = async (filename) => {
|
| 258 |
+
const paths = [
|
| 259 |
+
`/data/grpo/histograms/${filename}`,
|
| 260 |
+
`./assets/data/grpo/histograms/${filename}`,
|
| 261 |
+
`../assets/data/grpo/histograms/${filename}`,
|
| 262 |
+
`../../assets/data/grpo/histograms/${filename}`
|
| 263 |
+
];
|
| 264 |
+
|
| 265 |
+
for (const p of paths) {
|
| 266 |
+
try {
|
| 267 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 268 |
+
if (r.ok) return await r.json();
|
| 269 |
+
} catch (e) {}
|
| 270 |
+
}
|
| 271 |
+
throw new Error(`JSON not found: ${filename}`);
|
| 272 |
+
};
|
| 273 |
+
|
| 274 |
+
// Load baseline (APO No-Think)
|
| 275 |
+
const baselineFile = 'HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json';
|
| 276 |
+
|
| 277 |
+
fetchFirstAvailable(baselineFile)
|
| 278 |
+
.then(async (baselineData) => {
|
| 279 |
+
// Load all comparison datasets
|
| 280 |
+
const comparisonData = await Promise.all(
|
| 281 |
+
datasetConfigs.map(async (config) => ({
|
| 282 |
+
...config,
|
| 283 |
+
data: await fetchFirstAvailable(config.file)
|
| 284 |
+
}))
|
| 285 |
+
);
|
| 286 |
+
|
| 287 |
+
// Get colors
|
| 288 |
+
const colors = window.ColorPalettes
|
| 289 |
+
? window.ColorPalettes.getColors('categorical', 2)
|
| 290 |
+
: ['#4e79a7', '#f28e2c'];
|
| 291 |
+
|
| 292 |
+
let selectedComparison = comparisonData[0];
|
| 293 |
+
let currentIndex = 0;
|
| 294 |
+
|
| 295 |
+
// Create controls
|
| 296 |
+
const controls = document.createElement('div');
|
| 297 |
+
controls.className = 'controls';
|
| 298 |
+
|
| 299 |
+
const sliderContainer = document.createElement('div');
|
| 300 |
+
sliderContainer.className = 'slider-container';
|
| 301 |
+
|
| 302 |
+
const label = document.createElement('label');
|
| 303 |
+
label.textContent = 'Overlong Penalty';
|
| 304 |
+
label.style.fontSize = '12px';
|
| 305 |
+
label.style.fontWeight = '700';
|
| 306 |
+
label.style.color = 'var(--text-color)';
|
| 307 |
+
|
| 308 |
+
const sliderRow = document.createElement('div');
|
| 309 |
+
sliderRow.className = 'slider-row';
|
| 310 |
+
|
| 311 |
+
const slider = document.createElement('input');
|
| 312 |
+
slider.type = 'range';
|
| 313 |
+
slider.min = '0';
|
| 314 |
+
slider.max = String(comparisonData.length - 1);
|
| 315 |
+
slider.value = '0';
|
| 316 |
+
slider.step = '1';
|
| 317 |
+
|
| 318 |
+
const sliderValue = document.createElement('span');
|
| 319 |
+
sliderValue.className = 'slider-value';
|
| 320 |
+
sliderValue.textContent = comparisonData[0].name;
|
| 321 |
+
|
| 322 |
+
slider.addEventListener('input', (e) => {
|
| 323 |
+
currentIndex = parseInt(e.target.value);
|
| 324 |
+
selectedComparison = comparisonData[currentIndex];
|
| 325 |
+
sliderValue.textContent = selectedComparison.name;
|
| 326 |
+
render();
|
| 327 |
+
});
|
| 328 |
+
|
| 329 |
+
sliderRow.appendChild(slider);
|
| 330 |
+
sliderRow.appendChild(sliderValue);
|
| 331 |
+
sliderContainer.appendChild(label);
|
| 332 |
+
sliderContainer.appendChild(sliderRow);
|
| 333 |
+
controls.appendChild(sliderContainer);
|
| 334 |
+
container.appendChild(controls);
|
| 335 |
+
|
| 336 |
+
// Create legend
|
| 337 |
+
const legend = document.createElement('div');
|
| 338 |
+
legend.className = 'legend';
|
| 339 |
+
|
| 340 |
+
const legendTitle = document.createElement('div');
|
| 341 |
+
legendTitle.className = 'legend-title';
|
| 342 |
+
legendTitle.textContent = 'Legend';
|
| 343 |
+
|
| 344 |
+
const legendItems = document.createElement('div');
|
| 345 |
+
legendItems.className = 'items';
|
| 346 |
+
|
| 347 |
+
['APO No-Think (Baseline)', 'GRPO on Math with Overlong Penalty'].forEach((name, idx) => {
|
| 348 |
+
const item = document.createElement('span');
|
| 349 |
+
item.className = 'item';
|
| 350 |
+
|
| 351 |
+
const swatch = document.createElement('span');
|
| 352 |
+
swatch.className = 'swatch';
|
| 353 |
+
swatch.style.background = colors[idx];
|
| 354 |
+
|
| 355 |
+
const text = document.createElement('span');
|
| 356 |
+
text.textContent = name;
|
| 357 |
+
|
| 358 |
+
item.appendChild(swatch);
|
| 359 |
+
item.appendChild(text);
|
| 360 |
+
legendItems.appendChild(item);
|
| 361 |
+
});
|
| 362 |
+
|
| 363 |
+
legend.appendChild(legendTitle);
|
| 364 |
+
legend.appendChild(legendItems);
|
| 365 |
+
container.appendChild(legend);
|
| 366 |
+
|
| 367 |
+
function updateSize() {
|
| 368 |
+
width = container.clientWidth || 800;
|
| 369 |
+
height = Math.max(400, Math.round(width / 2.2));
|
| 370 |
+
svg.attr('width', width).attr('height', height);
|
| 371 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 372 |
+
return {
|
| 373 |
+
innerWidth: width - margin.left - margin.right,
|
| 374 |
+
innerHeight: height - margin.top - margin.bottom
|
| 375 |
+
};
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
function render() {
|
| 379 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 380 |
+
|
| 381 |
+
// Clear previous
|
| 382 |
+
gRoot.selectAll('*').remove();
|
| 383 |
+
|
| 384 |
+
const datasets = [
|
| 385 |
+
{ name: 'APO No-Think (Baseline)', data: baselineData, idx: 0 },
|
| 386 |
+
{ name: selectedComparison.name, data: selectedComparison.data, idx: 1 }
|
| 387 |
+
];
|
| 388 |
+
|
| 389 |
+
// Use 0-4k for both to keep scales comparable
|
| 390 |
+
const xDomain = [0, 4000];
|
| 391 |
+
|
| 392 |
+
// Create bins for both datasets
|
| 393 |
+
const allBins = datasets.map(ds => {
|
| 394 |
+
const tokens = ds.data.token_counts;
|
| 395 |
+
const bins = d3.bin()
|
| 396 |
+
.domain(xDomain)
|
| 397 |
+
.thresholds(30)(tokens);
|
| 398 |
+
return { ...ds, bins };
|
| 399 |
+
});
|
| 400 |
+
|
| 401 |
+
// Find max frequency across both datasets for shared y-scale
|
| 402 |
+
const maxFreq = d3.max(allBins.flatMap(d => d.bins.map(b => b.length)));
|
| 403 |
+
|
| 404 |
+
const xScale = d3.scaleLinear()
|
| 405 |
+
.domain(xDomain)
|
| 406 |
+
.range([0, innerWidth]);
|
| 407 |
+
|
| 408 |
+
const yScale = d3.scaleLinear()
|
| 409 |
+
.domain([0, maxFreq])
|
| 410 |
+
.range([innerHeight, 0])
|
| 411 |
+
.nice();
|
| 412 |
+
|
| 413 |
+
// Grid
|
| 414 |
+
gRoot.append('g')
|
| 415 |
+
.attr('class', 'grid')
|
| 416 |
+
.call(
|
| 417 |
+
d3.axisLeft(yScale)
|
| 418 |
+
.ticks(5)
|
| 419 |
+
.tickSize(-innerWidth)
|
| 420 |
+
.tickFormat('')
|
| 421 |
+
)
|
| 422 |
+
.call(g => g.select('.domain').remove());
|
| 423 |
+
|
| 424 |
+
// Draw histograms (comparison first, baseline second so baseline is on top)
|
| 425 |
+
allBins.reverse().forEach(({ name, bins, idx, data }) => {
|
| 426 |
+
gRoot.selectAll(`rect.bar-${idx}`)
|
| 427 |
+
.data(bins)
|
| 428 |
+
.join('rect')
|
| 429 |
+
.attr('class', `bar bar-${idx}`)
|
| 430 |
+
.attr('x', d => xScale(d.x0))
|
| 431 |
+
.attr('y', d => yScale(d.length))
|
| 432 |
+
.attr('width', d => Math.max(1, xScale(d.x1) - xScale(d.x0) - 1))
|
| 433 |
+
.attr('height', d => Math.max(0, innerHeight - yScale(d.length)))
|
| 434 |
+
.attr('fill', colors[idx])
|
| 435 |
+
.attr('opacity', 0.6)
|
| 436 |
+
.on('mouseenter', (event, d) => {
|
| 437 |
+
const stats = data.statistics;
|
| 438 |
+
const html = `<strong>${name}</strong><br/>Tokens: ${d.x0.toFixed(0)} - ${d.x1.toFixed(0)}<br/>Count: ${d.length}<br/>Mean: ${stats.mean.toFixed(0)} | Median: ${stats.median.toFixed(0)}`;
|
| 439 |
+
showTooltip(html, event);
|
| 440 |
+
})
|
| 441 |
+
.on('mouseleave', hideTooltip);
|
| 442 |
+
});
|
| 443 |
+
|
| 444 |
+
// X axis
|
| 445 |
+
const xAxis = gRoot.append('g')
|
| 446 |
+
.attr('class', 'axes')
|
| 447 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 448 |
+
.call(d3.axisBottom(xScale).ticks(8).tickFormat(d3.format(',d')));
|
| 449 |
+
|
| 450 |
+
xAxis.select('.domain').remove();
|
| 451 |
+
|
| 452 |
+
// Y axis
|
| 453 |
+
const yAxis = gRoot.append('g')
|
| 454 |
+
.attr('class', 'axes')
|
| 455 |
+
.call(d3.axisLeft(yScale).ticks(6));
|
| 456 |
+
|
| 457 |
+
yAxis.select('.domain').remove();
|
| 458 |
+
|
| 459 |
+
// X axis label
|
| 460 |
+
gRoot.append('text')
|
| 461 |
+
.attr('class', 'axes')
|
| 462 |
+
.attr('x', innerWidth / 2)
|
| 463 |
+
.attr('y', innerHeight + 40)
|
| 464 |
+
.attr('text-anchor', 'middle')
|
| 465 |
+
.style('font-size', '12px')
|
| 466 |
+
.style('fill', 'var(--text-color)')
|
| 467 |
+
.text('Token count');
|
| 468 |
+
|
| 469 |
+
// Y axis label
|
| 470 |
+
gRoot.append('text')
|
| 471 |
+
.attr('class', 'axes')
|
| 472 |
+
.attr('transform', 'rotate(-90)')
|
| 473 |
+
.attr('x', -innerHeight / 2)
|
| 474 |
+
.attr('y', -45)
|
| 475 |
+
.attr('text-anchor', 'middle')
|
| 476 |
+
.style('font-size', '12px')
|
| 477 |
+
.style('fill', 'var(--text-color)')
|
| 478 |
+
.text('Frequency');
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
render();
|
| 482 |
+
|
| 483 |
+
if (window.ResizeObserver) {
|
| 484 |
+
const ro = new ResizeObserver(() => render());
|
| 485 |
+
ro.observe(container);
|
| 486 |
+
} else {
|
| 487 |
+
window.addEventListener('resize', render);
|
| 488 |
+
}
|
| 489 |
+
})
|
| 490 |
+
.catch((err) => {
|
| 491 |
+
const pre = document.createElement('pre');
|
| 492 |
+
pre.textContent = 'Error loading data: ' + err.message;
|
| 493 |
+
pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
|
| 494 |
+
container.appendChild(pre);
|
| 495 |
+
});
|
| 496 |
+
};
|
| 497 |
+
|
| 498 |
+
if (document.readyState === 'loading') {
|
| 499 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 500 |
+
} else {
|
| 501 |
+
ensureD3(bootstrap);
|
| 502 |
+
}
|
| 503 |
+
})();
|
| 504 |
+
</script>
|
app/src/content/embeds/d3-rl-token-histogram.html
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<div class="d3-grpo-token-histogram"></div>
|
| 2 |
+
<style>
|
| 3 |
+
.d3-grpo-token-histogram {
|
| 4 |
+
width: 100%;
|
| 5 |
+
position: relative;
|
| 6 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 7 |
+
}
|
| 8 |
+
.d3-grpo-token-histogram svg {
|
| 9 |
+
display: block;
|
| 10 |
+
width: 100%;
|
| 11 |
+
}
|
| 12 |
+
.d3-grpo-token-histogram .bar {
|
| 13 |
+
stroke: none;
|
| 14 |
+
}
|
| 15 |
+
.d3-grpo-token-histogram .axes path,
|
| 16 |
+
.d3-grpo-token-histogram .axes line {
|
| 17 |
+
stroke: var(--axis-color, var(--text-color));
|
| 18 |
+
}
|
| 19 |
+
.d3-grpo-token-histogram .axes text {
|
| 20 |
+
fill: var(--tick-color, var(--muted-color));
|
| 21 |
+
font-size: 11px;
|
| 22 |
+
}
|
| 23 |
+
.d3-grpo-token-histogram .grid line {
|
| 24 |
+
stroke: var(--grid-color, rgba(0,0,0,.08));
|
| 25 |
+
}
|
| 26 |
+
.d3-grpo-token-histogram .chart-title {
|
| 27 |
+
font-size: 13px;
|
| 28 |
+
font-weight: 600;
|
| 29 |
+
fill: var(--text-color);
|
| 30 |
+
}
|
| 31 |
+
.d3-grpo-token-histogram .d3-tooltip {
|
| 32 |
+
position: absolute;
|
| 33 |
+
top: 0;
|
| 34 |
+
left: 0;
|
| 35 |
+
transform: translate(-9999px, -9999px);
|
| 36 |
+
pointer-events: none;
|
| 37 |
+
padding: 8px 10px;
|
| 38 |
+
border-radius: 8px;
|
| 39 |
+
font-size: 12px;
|
| 40 |
+
line-height: 1.35;
|
| 41 |
+
border: 1px solid var(--border-color);
|
| 42 |
+
background: var(--surface-bg);
|
| 43 |
+
color: var(--text-color);
|
| 44 |
+
box-shadow: 0 4px 24px rgba(0,0,0,.18);
|
| 45 |
+
opacity: 0;
|
| 46 |
+
transition: opacity .12s ease;
|
| 47 |
+
}
|
| 48 |
+
.d3-grpo-token-histogram .d3-tooltip__inner {
|
| 49 |
+
text-align: left;
|
| 50 |
+
}
|
| 51 |
+
</style>
|
| 52 |
+
<script>
|
| 53 |
+
(() => {
|
| 54 |
+
const ensureD3 = (cb) => {
|
| 55 |
+
if (window.d3 && typeof window.d3.select === 'function') return cb();
|
| 56 |
+
let s = document.getElementById('d3-cdn-script');
|
| 57 |
+
if (!s) {
|
| 58 |
+
s = document.createElement('script');
|
| 59 |
+
s.id = 'd3-cdn-script';
|
| 60 |
+
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
|
| 61 |
+
document.head.appendChild(s);
|
| 62 |
+
}
|
| 63 |
+
const onReady = () => {
|
| 64 |
+
if (window.d3 && typeof window.d3.select === 'function') cb();
|
| 65 |
+
};
|
| 66 |
+
s.addEventListener('load', onReady, { once: true });
|
| 67 |
+
if (window.d3) onReady();
|
| 68 |
+
};
|
| 69 |
+
|
| 70 |
+
const bootstrap = () => {
|
| 71 |
+
const scriptEl = document.currentScript;
|
| 72 |
+
let container = scriptEl ? scriptEl.previousElementSibling : null;
|
| 73 |
+
if (!(container && container.classList && container.classList.contains('d3-grpo-token-histogram'))) {
|
| 74 |
+
const candidates = Array.from(document.querySelectorAll('.d3-grpo-token-histogram'))
|
| 75 |
+
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
|
| 76 |
+
container = candidates[candidates.length - 1] || null;
|
| 77 |
+
}
|
| 78 |
+
if (!container) return;
|
| 79 |
+
if (container.dataset) {
|
| 80 |
+
if (container.dataset.mounted === 'true') return;
|
| 81 |
+
container.dataset.mounted = 'true';
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
// Tooltip
|
| 85 |
+
container.style.position = container.style.position || 'relative';
|
| 86 |
+
let tip = container.querySelector('.d3-tooltip');
|
| 87 |
+
let tipInner;
|
| 88 |
+
if (!tip) {
|
| 89 |
+
tip = document.createElement('div');
|
| 90 |
+
tip.className = 'd3-tooltip';
|
| 91 |
+
tipInner = document.createElement('div');
|
| 92 |
+
tipInner.className = 'd3-tooltip__inner';
|
| 93 |
+
tip.appendChild(tipInner);
|
| 94 |
+
container.appendChild(tip);
|
| 95 |
+
} else {
|
| 96 |
+
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
const showTooltip = (html, event) => {
|
| 100 |
+
tipInner.innerHTML = html;
|
| 101 |
+
tip.style.opacity = '1';
|
| 102 |
+
const [mx, my] = d3.pointer(event, container);
|
| 103 |
+
tip.style.transform = `translate(${mx + 12}px, ${my - 12}px)`;
|
| 104 |
+
};
|
| 105 |
+
|
| 106 |
+
const hideTooltip = () => {
|
| 107 |
+
tip.style.opacity = '0';
|
| 108 |
+
setTimeout(() => {
|
| 109 |
+
tip.style.transform = 'translate(-9999px, -9999px)';
|
| 110 |
+
}, 120);
|
| 111 |
+
};
|
| 112 |
+
|
| 113 |
+
// SVG scaffolding
|
| 114 |
+
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
|
| 115 |
+
const gRoot = svg.append('g');
|
| 116 |
+
|
| 117 |
+
let width = 800, height = 400;
|
| 118 |
+
const margin = { top: 40, right: 16, bottom: 56, left: 60 };
|
| 119 |
+
|
| 120 |
+
// Data loading
|
| 121 |
+
const JSON_PATHS = [
|
| 122 |
+
'/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
|
| 123 |
+
'./assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
|
| 124 |
+
'../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json',
|
| 125 |
+
'../../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-no-think_main_aime25_2025-10-02T13-20-35.json'
|
| 126 |
+
];
|
| 127 |
+
|
| 128 |
+
const JSON_PATHS_THINK = [
|
| 129 |
+
'/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
|
| 130 |
+
'./assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
|
| 131 |
+
'../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json',
|
| 132 |
+
'../../assets/data/grpo/histograms/HuggingFaceH4_details_HuggingFaceH4__SmolLM3-3B-APO-think_main_aime25_2025-10-17T09-02-45.json'
|
| 133 |
+
];
|
| 134 |
+
|
| 135 |
+
const fetchFirstAvailable = async (paths) => {
|
| 136 |
+
for (const p of paths) {
|
| 137 |
+
try {
|
| 138 |
+
const r = await fetch(p, { cache: 'no-cache' });
|
| 139 |
+
if (r.ok) return await r.json();
|
| 140 |
+
} catch (e) {}
|
| 141 |
+
}
|
| 142 |
+
throw new Error('JSON not found');
|
| 143 |
+
};
|
| 144 |
+
|
| 145 |
+
Promise.all([
|
| 146 |
+
fetchFirstAvailable(JSON_PATHS),
|
| 147 |
+
fetchFirstAvailable(JSON_PATHS_THINK)
|
| 148 |
+
])
|
| 149 |
+
.then(([dataNoThink, dataThink]) => {
|
| 150 |
+
const datasets = [
|
| 151 |
+
{ name: 'No-Think', data: dataNoThink, title: 'APO No-Think' },
|
| 152 |
+
{ name: 'Think', data: dataThink, title: 'APO Think' }
|
| 153 |
+
];
|
| 154 |
+
|
| 155 |
+
// Get colors
|
| 156 |
+
const colors = window.ColorPalettes
|
| 157 |
+
? window.ColorPalettes.getColors('categorical', 2)
|
| 158 |
+
: ['#4e79a7', '#f28e2c'];
|
| 159 |
+
|
| 160 |
+
function updateSize() {
|
| 161 |
+
width = container.clientWidth || 800;
|
| 162 |
+
height = Math.max(400, Math.round(width / 2.2));
|
| 163 |
+
svg.attr('width', width).attr('height', height);
|
| 164 |
+
gRoot.attr('transform', `translate(${margin.left},${margin.top})`);
|
| 165 |
+
return {
|
| 166 |
+
innerWidth: width - margin.left - margin.right,
|
| 167 |
+
innerHeight: height - margin.top - margin.bottom
|
| 168 |
+
};
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
function render() {
|
| 172 |
+
const { innerWidth, innerHeight } = updateSize();
|
| 173 |
+
|
| 174 |
+
// Clear previous
|
| 175 |
+
gRoot.selectAll('*').remove();
|
| 176 |
+
|
| 177 |
+
// Calculate histogram bins for each dataset
|
| 178 |
+
const chartWidth = (innerWidth - 40) / 2;
|
| 179 |
+
const histograms = datasets.map((ds, idx) => {
|
| 180 |
+
const tokens = ds.data.token_counts;
|
| 181 |
+
const xOffset = idx * (chartWidth + 40);
|
| 182 |
+
|
| 183 |
+
// Use independent x-axis ranges optimized for each dataset
|
| 184 |
+
// No-Think: most data is 0-4k range (median 723)
|
| 185 |
+
// Think: data is spread around median 16,769
|
| 186 |
+
const xDomain = idx === 0
|
| 187 |
+
? [0, 4000] // No-Think: focus on 0-4k range
|
| 188 |
+
: [0, 32000]; // Think: show full range
|
| 189 |
+
|
| 190 |
+
// Create bins
|
| 191 |
+
const bins = d3.bin()
|
| 192 |
+
.domain(xDomain)
|
| 193 |
+
.thresholds(30)(tokens);
|
| 194 |
+
|
| 195 |
+
const xScale = d3.scaleLinear()
|
| 196 |
+
.domain(xDomain)
|
| 197 |
+
.range([0, chartWidth]);
|
| 198 |
+
|
| 199 |
+
const yScale = d3.scaleLinear()
|
| 200 |
+
.domain([0, d3.max(bins, d => d.length)])
|
| 201 |
+
.range([innerHeight, 0])
|
| 202 |
+
.nice();
|
| 203 |
+
|
| 204 |
+
return { ds, bins, xScale, yScale, xOffset, idx };
|
| 205 |
+
});
|
| 206 |
+
|
| 207 |
+
// Draw each histogram
|
| 208 |
+
histograms.forEach(({ ds, bins, xScale, yScale, xOffset, idx }) => {
|
| 209 |
+
const g = gRoot.append('g').attr('transform', `translate(${xOffset},0)`);
|
| 210 |
+
|
| 211 |
+
// Title
|
| 212 |
+
g.append('text')
|
| 213 |
+
.attr('class', 'chart-title')
|
| 214 |
+
.attr('x', chartWidth / 2)
|
| 215 |
+
.attr('y', -12)
|
| 216 |
+
.attr('text-anchor', 'middle')
|
| 217 |
+
.text(ds.title);
|
| 218 |
+
|
| 219 |
+
// Bars
|
| 220 |
+
g.selectAll('rect.bar')
|
| 221 |
+
.data(bins)
|
| 222 |
+
.join('rect')
|
| 223 |
+
.attr('class', 'bar')
|
| 224 |
+
.attr('x', d => xScale(d.x0))
|
| 225 |
+
.attr('y', d => yScale(d.length))
|
| 226 |
+
.attr('width', d => Math.max(1, xScale(d.x1) - xScale(d.x0) - 1))
|
| 227 |
+
.attr('height', d => Math.max(0, innerHeight - yScale(d.length)))
|
| 228 |
+
.attr('fill', colors[idx])
|
| 229 |
+
.attr('opacity', 0.8)
|
| 230 |
+
.on('mouseenter', (event, d) => {
|
| 231 |
+
const html = `<strong>Tokens: ${d.x0.toFixed(0)} - ${d.x1.toFixed(0)}</strong><br/>Count: ${d.length}`;
|
| 232 |
+
showTooltip(html, event);
|
| 233 |
+
})
|
| 234 |
+
.on('mouseleave', hideTooltip);
|
| 235 |
+
|
| 236 |
+
// X axis
|
| 237 |
+
const xAxis = g.append('g')
|
| 238 |
+
.attr('class', 'axes')
|
| 239 |
+
.attr('transform', `translate(0,${innerHeight})`)
|
| 240 |
+
.call(d3.axisBottom(xScale).ticks(5).tickFormat(d3.format(',d')));
|
| 241 |
+
|
| 242 |
+
xAxis.select('.domain').remove();
|
| 243 |
+
|
| 244 |
+
// Y axis
|
| 245 |
+
const yAxis = g.append('g')
|
| 246 |
+
.attr('class', 'axes')
|
| 247 |
+
.call(d3.axisLeft(yScale).ticks(5));
|
| 248 |
+
|
| 249 |
+
yAxis.select('.domain').remove();
|
| 250 |
+
|
| 251 |
+
// Grid
|
| 252 |
+
g.append('g')
|
| 253 |
+
.attr('class', 'grid')
|
| 254 |
+
.call(
|
| 255 |
+
d3.axisLeft(yScale)
|
| 256 |
+
.ticks(5)
|
| 257 |
+
.tickSize(-chartWidth)
|
| 258 |
+
.tickFormat('')
|
| 259 |
+
)
|
| 260 |
+
.call(g => g.select('.domain').remove());
|
| 261 |
+
|
| 262 |
+
// X axis label
|
| 263 |
+
g.append('text')
|
| 264 |
+
.attr('class', 'axes')
|
| 265 |
+
.attr('x', chartWidth / 2)
|
| 266 |
+
.attr('y', innerHeight + 40)
|
| 267 |
+
.attr('text-anchor', 'middle')
|
| 268 |
+
.style('font-size', '12px')
|
| 269 |
+
.style('fill', 'var(--text-color)')
|
| 270 |
+
.text('Token count');
|
| 271 |
+
|
| 272 |
+
// Y axis label (only for left chart)
|
| 273 |
+
if (idx === 0) {
|
| 274 |
+
g.append('text')
|
| 275 |
+
.attr('class', 'axes')
|
| 276 |
+
.attr('transform', 'rotate(-90)')
|
| 277 |
+
.attr('x', -innerHeight / 2)
|
| 278 |
+
.attr('y', -45)
|
| 279 |
+
.attr('text-anchor', 'middle')
|
| 280 |
+
.style('font-size', '12px')
|
| 281 |
+
.style('fill', 'var(--text-color)')
|
| 282 |
+
.text('Frequency');
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
// Statistics text
|
| 286 |
+
const stats = ds.data.statistics;
|
| 287 |
+
const statsText = `Mean: ${stats.mean.toFixed(0)} | Median: ${stats.median.toFixed(0)}`;
|
| 288 |
+
g.append('text')
|
| 289 |
+
.attr('x', chartWidth / 2)
|
| 290 |
+
.attr('y', innerHeight + 54)
|
| 291 |
+
.attr('text-anchor', 'middle')
|
| 292 |
+
.style('font-size', '11px')
|
| 293 |
+
.style('fill', 'var(--muted-color)')
|
| 294 |
+
.text(statsText);
|
| 295 |
+
});
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
render();
|
| 299 |
+
|
| 300 |
+
if (window.ResizeObserver) {
|
| 301 |
+
const ro = new ResizeObserver(() => render());
|
| 302 |
+
ro.observe(container);
|
| 303 |
+
} else {
|
| 304 |
+
window.addEventListener('resize', render);
|
| 305 |
+
}
|
| 306 |
+
})
|
| 307 |
+
.catch((err) => {
|
| 308 |
+
const pre = document.createElement('pre');
|
| 309 |
+
pre.textContent = 'Error loading data: ' + err.message;
|
| 310 |
+
pre.style.cssText = 'color:red;font-size:12px;padding:12px;margin:0;';
|
| 311 |
+
container.appendChild(pre);
|
| 312 |
+
});
|
| 313 |
+
};
|
| 314 |
+
|
| 315 |
+
if (document.readyState === 'loading') {
|
| 316 |
+
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
|
| 317 |
+
} else {
|
| 318 |
+
ensureD3(bootstrap);
|
| 319 |
+
}
|
| 320 |
+
})();
|
| 321 |
+
</script>
|