lewtun HF Staff commited on
Commit
f9f1452
·
1 Parent(s): a977468

Polish RL

Browse files
app/src/content/embeds/d3-grpo-aime25.html CHANGED
@@ -27,7 +27,7 @@
27
 
28
  .d3-grpo-aime25 .grid line {
29
  stroke: var(--grid-color);
30
- stroke-dasharray: 2, 2;
31
  }
32
 
33
  .d3-grpo-aime25 .line {
@@ -190,9 +190,9 @@
190
  if (attr && attr.trim()) {
191
  providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
192
  }
193
- } catch (_) { }
194
 
195
- const DEFAULT_CSV = '/data/aime25_perf.csv';
196
  const ensureDataPrefix = (p) => {
197
  if (typeof p !== 'string' || !p) return p;
198
  if (p.startsWith('/')) return p;
@@ -205,11 +205,11 @@
205
  const CSV_PATHS = Array.isArray(providedData)
206
  ? normalizeInput(providedData)
207
  : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
208
- DEFAULT_CSV,
209
- './assets/data/aime25_perf.csv',
210
- '../assets/data/aime25_perf.csv',
211
- '../../assets/data/aime25_perf.csv'
212
- ]);
213
 
214
  const fetchFirstAvailable = async (paths) => {
215
  const errors = [];
@@ -398,7 +398,7 @@
398
  // Tooltip interactions
399
  const bisect = d3.bisector(d => d.step).left;
400
 
401
- svg.on('mousemove', function (event) {
402
  const [mx] = d3.pointer(event, gRoot.node());
403
  const step = xScale.invert(mx);
404
 
@@ -456,10 +456,10 @@
456
  if (!title) {
457
  title = document.createElement('div');
458
  title.className = 'legend-title';
459
- title.textContent = 'Overlong Penalty';
460
  legend.appendChild(title);
461
  } else {
462
- title.textContent = 'Overlong Penalty';
463
  }
464
 
465
  let items = legend.querySelector('.items');
@@ -533,4 +533,4 @@
533
  ensureD3(bootstrap);
534
  }
535
  })();
536
- </script>
 
27
 
28
  .d3-grpo-aime25 .grid line {
29
  stroke: var(--grid-color);
30
+ stroke-dasharray: 2,2;
31
  }
32
 
33
  .d3-grpo-aime25 .line {
 
190
  if (attr && attr.trim()) {
191
  providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
192
  }
193
+ } catch (_) {}
194
 
195
+ const DEFAULT_CSV = '/data/grpo/aime25_perf.csv';
196
  const ensureDataPrefix = (p) => {
197
  if (typeof p !== 'string' || !p) return p;
198
  if (p.startsWith('/')) return p;
 
205
  const CSV_PATHS = Array.isArray(providedData)
206
  ? normalizeInput(providedData)
207
  : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
208
+ DEFAULT_CSV,
209
+ './assets/data/grpo/aime25_perf.csv',
210
+ '../assets/data/grpo/aime25_perf.csv',
211
+ '../../assets/data/grpo/aime25_perf.csv'
212
+ ]);
213
 
214
  const fetchFirstAvailable = async (paths) => {
215
  const errors = [];
 
398
  // Tooltip interactions
399
  const bisect = d3.bisector(d => d.step).left;
400
 
401
+ svg.on('mousemove', function(event) {
402
  const [mx] = d3.pointer(event, gRoot.node());
403
  const step = xScale.invert(mx);
404
 
 
456
  if (!title) {
457
  title = document.createElement('div');
458
  title.className = 'legend-title';
459
+ title.innerHTML = 'Overlong penalty (L<sub>cache</sub> - L<sub>max</sub>)';
460
  legend.appendChild(title);
461
  } else {
462
+ title.innerHTML = 'Overlong penalty (L<sub>cache</sub> - L<sub>max</sub>)';
463
  }
464
 
465
  let items = legend.querySelector('.items');
 
533
  ensureD3(bootstrap);
534
  }
535
  })();
536
+ </script>
app/src/content/embeds/d3-grpo-reward-curves.html CHANGED
@@ -27,7 +27,7 @@
27
 
28
  .d3-grpo-reward-curves .grid line {
29
  stroke: var(--grid-color);
30
- stroke-dasharray: 2, 2;
31
  }
32
 
33
  .d3-grpo-reward-curves .confidence-band {
@@ -194,9 +194,9 @@
194
  if (attr && attr.trim()) {
195
  providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
196
  }
197
- } catch (_) { }
198
 
199
- const DEFAULT_CSV = '/data/rl_reward_curves.csv';
200
  const ensureDataPrefix = (p) => {
201
  if (typeof p !== 'string' || !p) return p;
202
  // If it starts with /, it's already absolute
@@ -211,11 +211,11 @@
211
  const CSV_PATHS = Array.isArray(providedData)
212
  ? normalizeInput(providedData)
213
  : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
214
- DEFAULT_CSV,
215
- './assets/data/rl_reward_curves.csv',
216
- '../assets/data/rl_reward_curves.csv',
217
- '../../assets/data/rl_reward_curves.csv'
218
- ]);
219
 
220
  const fetchFirstAvailable = async (paths) => {
221
  const errors = [];
@@ -517,7 +517,7 @@
517
  // Tooltip interactions
518
  const bisect = d3.bisector(d => d.step).left;
519
 
520
- svg.on('mousemove', function (event) {
521
  const [mx] = d3.pointer(event, gRoot.node());
522
  const step = xScale.invert(mx);
523
 
@@ -583,10 +583,10 @@
583
  if (!title) {
584
  title = document.createElement('div');
585
  title.className = 'legend-title';
586
- title.textContent = 'Overlong Penalty';
587
  legend.appendChild(title);
588
  } else {
589
- title.textContent = 'Overlong Penalty';
590
  }
591
 
592
  let items = legend.querySelector('.items');
@@ -687,45 +687,21 @@
687
  metricToggleGroup.appendChild(lengthBtn);
688
  metricGroup.appendChild(metricToggleGroup);
689
  controls.appendChild(metricGroup);
690
-
691
- // Display options group
692
- const displayGroup = document.createElement('div');
693
- displayGroup.className = 'control-group';
694
-
695
- const displayLabel = document.createElement('label');
696
- displayLabel.textContent = 'Display';
697
- displayGroup.appendChild(displayLabel);
698
-
699
- const displayToggleGroup = document.createElement('div');
700
- displayToggleGroup.className = 'toggle-group';
701
-
702
- const runningAvgBtn = document.createElement('button');
703
- runningAvgBtn.className = 'toggle-btn' + (showRunningAverage ? ' active' : '');
704
- runningAvgBtn.textContent = `Running Avg (${RUNNING_AVG_WINDOW} steps)`;
705
- runningAvgBtn.addEventListener('click', () => {
706
- showRunningAverage = !showRunningAverage;
707
- makeControls();
708
- render();
709
- });
710
-
711
- displayToggleGroup.appendChild(runningAvgBtn);
712
- displayGroup.appendChild(displayToggleGroup);
713
- controls.appendChild(displayGroup);
714
  }
715
 
716
  // Load both datasets
717
  const REWARD_PATHS = [
718
- '/data/rl_reward_curves.csv',
719
- './assets/data/rl_reward_curves.csv',
720
- '../assets/data/rl_reward_curves.csv',
721
- '../../assets/data/rl_reward_curves.csv'
722
  ];
723
 
724
  const LENGTH_PATHS = [
725
- '/data/rl_mean_teminated_lengths.csv',
726
- './assets/data/rl_mean_teminated_lengths.csv',
727
- '../assets/data/rl_mean_teminated_lengths.csv',
728
- '../../assets/data/rl_mean_teminated_lengths.csv'
729
  ];
730
 
731
  Promise.all([
@@ -767,4 +743,4 @@
767
  ensureD3(bootstrap);
768
  }
769
  })();
770
- </script>
 
27
 
28
  .d3-grpo-reward-curves .grid line {
29
  stroke: var(--grid-color);
30
+ stroke-dasharray: 2,2;
31
  }
32
 
33
  .d3-grpo-reward-curves .confidence-band {
 
194
  if (attr && attr.trim()) {
195
  providedData = attr.trim().startsWith('[') ? JSON.parse(attr) : attr.trim();
196
  }
197
+ } catch (_) {}
198
 
199
+ const DEFAULT_CSV = '/data/grpo/rl_reward_curves.csv';
200
  const ensureDataPrefix = (p) => {
201
  if (typeof p !== 'string' || !p) return p;
202
  // If it starts with /, it's already absolute
 
211
  const CSV_PATHS = Array.isArray(providedData)
212
  ? normalizeInput(providedData)
213
  : (typeof providedData === 'string' ? normalizeInput(providedData) || [DEFAULT_CSV] : [
214
+ DEFAULT_CSV,
215
+ './assets/data/grpo/rl_reward_curves.csv',
216
+ '../assets/data/grpo/rl_reward_curves.csv',
217
+ '../../assets/data/grpo/rl_reward_curves.csv'
218
+ ]);
219
 
220
  const fetchFirstAvailable = async (paths) => {
221
  const errors = [];
 
517
  // Tooltip interactions
518
  const bisect = d3.bisector(d => d.step).left;
519
 
520
+ svg.on('mousemove', function(event) {
521
  const [mx] = d3.pointer(event, gRoot.node());
522
  const step = xScale.invert(mx);
523
 
 
583
  if (!title) {
584
  title = document.createElement('div');
585
  title.className = 'legend-title';
586
+ title.innerHTML = 'Overlong penalty (L<sub>cache</sub> - L<sub>max</sub>)';
587
  legend.appendChild(title);
588
  } else {
589
+ title.innerHTML = 'Overlong penalty (L<sub>cache</sub> - L<sub>max</sub>)';
590
  }
591
 
592
  let items = legend.querySelector('.items');
 
687
  metricToggleGroup.appendChild(lengthBtn);
688
  metricGroup.appendChild(metricToggleGroup);
689
  controls.appendChild(metricGroup);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
  }
691
 
692
  // Load both datasets
693
  const REWARD_PATHS = [
694
+ '/data/grpo/rl_reward_curves.csv',
695
+ './assets/data/grpo/rl_reward_curves.csv',
696
+ '../assets/data/grpo/rl_reward_curves.csv',
697
+ '../../assets/data/grpo/rl_reward_curves.csv'
698
  ];
699
 
700
  const LENGTH_PATHS = [
701
+ '/data/grpo/rl_mean_teminated_lengths.csv',
702
+ './assets/data/grpo/rl_mean_teminated_lengths.csv',
703
+ '../assets/data/grpo/rl_mean_teminated_lengths.csv',
704
+ '../../assets/data/grpo/rl_mean_teminated_lengths.csv'
705
  ];
706
 
707
  Promise.all([
 
743
  ensureD3(bootstrap);
744
  }
745
  })();
746
+ </script>
app/src/content/embeds/d3-grpo-token-comparison.html CHANGED
@@ -5,36 +5,29 @@
5
  position: relative;
6
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
7
  }
8
-
9
  .d3-grpo-token-comparison svg {
10
  display: block;
11
  width: 100%;
12
  }
13
-
14
  .d3-grpo-token-comparison .bar {
15
  stroke: none;
16
  }
17
-
18
  .d3-grpo-token-comparison .axes path,
19
  .d3-grpo-token-comparison .axes line {
20
  stroke: var(--axis-color, var(--text-color));
21
  }
22
-
23
  .d3-grpo-token-comparison .axes text {
24
  fill: var(--tick-color, var(--muted-color));
25
  font-size: 11px;
26
  }
27
-
28
  .d3-grpo-token-comparison .grid line {
29
- stroke: var(--grid-color, rgba(0, 0, 0, .08));
30
  }
31
-
32
  .d3-grpo-token-comparison .chart-title {
33
  font-size: 13px;
34
  font-weight: 600;
35
  fill: var(--text-color);
36
  }
37
-
38
  .d3-grpo-token-comparison .d3-tooltip {
39
  position: absolute;
40
  top: 0;
@@ -48,15 +41,13 @@
48
  border: 1px solid var(--border-color);
49
  background: var(--surface-bg);
50
  color: var(--text-color);
51
- box-shadow: 0 4px 24px rgba(0, 0, 0, .18);
52
  opacity: 0;
53
  transition: opacity .12s ease;
54
  }
55
-
56
  .d3-grpo-token-comparison .d3-tooltip__inner {
57
  text-align: left;
58
  }
59
-
60
  .d3-grpo-token-comparison .legend {
61
  display: flex;
62
  flex-direction: column;
@@ -64,19 +55,16 @@
64
  gap: 6px;
65
  margin-top: 16px;
66
  }
67
-
68
  .d3-grpo-token-comparison .legend-title {
69
  font-size: 12px;
70
  font-weight: 700;
71
  color: var(--text-color);
72
  }
73
-
74
  .d3-grpo-token-comparison .legend .items {
75
  display: flex;
76
  flex-wrap: wrap;
77
  gap: 8px 14px;
78
  }
79
-
80
  .d3-grpo-token-comparison .legend .item {
81
  display: inline-flex;
82
  align-items: center;
@@ -85,14 +73,12 @@
85
  font-size: 12px;
86
  color: var(--text-color);
87
  }
88
-
89
  .d3-grpo-token-comparison .legend .swatch {
90
  width: 14px;
91
  height: 14px;
92
  border-radius: 3px;
93
  border: 1px solid var(--border-color);
94
  }
95
-
96
  .d3-grpo-token-comparison .controls {
97
  display: flex;
98
  gap: 16px;
@@ -101,20 +87,17 @@
101
  flex-wrap: wrap;
102
  margin-top: 8px;
103
  }
104
-
105
  .d3-grpo-token-comparison .control-group {
106
  display: flex;
107
  flex-direction: column;
108
  align-items: flex-start;
109
  gap: 6px;
110
  }
111
-
112
  .d3-grpo-token-comparison .controls label {
113
  font-size: 12px;
114
  font-weight: 700;
115
  color: var(--text-color);
116
  }
117
-
118
  .d3-grpo-token-comparison .controls select {
119
  font-size: 12px;
120
  padding: 8px 28px 8px 10px;
@@ -124,20 +107,17 @@
124
  color: var(--text-color);
125
  cursor: pointer;
126
  }
127
-
128
  .d3-grpo-token-comparison .slider-container {
129
  display: flex;
130
  flex-direction: column;
131
  gap: 8px;
132
  min-width: 300px;
133
  }
134
-
135
  .d3-grpo-token-comparison .slider-row {
136
  display: flex;
137
  align-items: center;
138
  gap: 12px;
139
  }
140
-
141
  .d3-grpo-token-comparison input[type="range"] {
142
  flex: 1;
143
  height: 6px;
@@ -147,7 +127,6 @@
147
  -webkit-appearance: none;
148
  cursor: pointer;
149
  }
150
-
151
  .d3-grpo-token-comparison input[type="range"]::-webkit-slider-thumb {
152
  -webkit-appearance: none;
153
  appearance: none;
@@ -157,7 +136,6 @@
157
  background: var(--primary-color);
158
  cursor: pointer;
159
  }
160
-
161
  .d3-grpo-token-comparison input[type="range"]::-moz-range-thumb {
162
  width: 16px;
163
  height: 16px;
@@ -166,7 +144,6 @@
166
  cursor: pointer;
167
  border: none;
168
  }
169
-
170
  .d3-grpo-token-comparison .slider-value {
171
  font-size: 12px;
172
  font-weight: 600;
@@ -279,17 +256,17 @@
279
 
280
  const fetchFirstAvailable = async (filename) => {
281
  const paths = [
282
- `/data/${filename}`,
283
- `./assets/data/${filename}`,
284
- `../assets/data/${filename}`,
285
- `../../assets/data/${filename}`
286
  ];
287
 
288
  for (const p of paths) {
289
  try {
290
  const r = await fetch(p, { cache: 'no-cache' });
291
  if (r.ok) return await r.json();
292
- } catch (e) { }
293
  }
294
  throw new Error(`JSON not found: ${filename}`);
295
  };
@@ -367,7 +344,7 @@
367
  const legendItems = document.createElement('div');
368
  legendItems.className = 'items';
369
 
370
- ['APO No-Think (Baseline)', 'GRPO on Math with Overlong Penalty'].forEach((name, idx) => {
371
  const item = document.createElement('span');
372
  item.className = 'item';
373
 
@@ -524,4 +501,4 @@
524
  ensureD3(bootstrap);
525
  }
526
  })();
527
- </script>
 
5
  position: relative;
6
  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
7
  }
 
8
  .d3-grpo-token-comparison svg {
9
  display: block;
10
  width: 100%;
11
  }
 
12
  .d3-grpo-token-comparison .bar {
13
  stroke: none;
14
  }
 
15
  .d3-grpo-token-comparison .axes path,
16
  .d3-grpo-token-comparison .axes line {
17
  stroke: var(--axis-color, var(--text-color));
18
  }
 
19
  .d3-grpo-token-comparison .axes text {
20
  fill: var(--tick-color, var(--muted-color));
21
  font-size: 11px;
22
  }
 
23
  .d3-grpo-token-comparison .grid line {
24
+ stroke: var(--grid-color, rgba(0,0,0,.08));
25
  }
 
26
  .d3-grpo-token-comparison .chart-title {
27
  font-size: 13px;
28
  font-weight: 600;
29
  fill: var(--text-color);
30
  }
 
31
  .d3-grpo-token-comparison .d3-tooltip {
32
  position: absolute;
33
  top: 0;
 
41
  border: 1px solid var(--border-color);
42
  background: var(--surface-bg);
43
  color: var(--text-color);
44
+ box-shadow: 0 4px 24px rgba(0,0,0,.18);
45
  opacity: 0;
46
  transition: opacity .12s ease;
47
  }
 
48
  .d3-grpo-token-comparison .d3-tooltip__inner {
49
  text-align: left;
50
  }
 
51
  .d3-grpo-token-comparison .legend {
52
  display: flex;
53
  flex-direction: column;
 
55
  gap: 6px;
56
  margin-top: 16px;
57
  }
 
58
  .d3-grpo-token-comparison .legend-title {
59
  font-size: 12px;
60
  font-weight: 700;
61
  color: var(--text-color);
62
  }
 
63
  .d3-grpo-token-comparison .legend .items {
64
  display: flex;
65
  flex-wrap: wrap;
66
  gap: 8px 14px;
67
  }
 
68
  .d3-grpo-token-comparison .legend .item {
69
  display: inline-flex;
70
  align-items: center;
 
73
  font-size: 12px;
74
  color: var(--text-color);
75
  }
 
76
  .d3-grpo-token-comparison .legend .swatch {
77
  width: 14px;
78
  height: 14px;
79
  border-radius: 3px;
80
  border: 1px solid var(--border-color);
81
  }
 
82
  .d3-grpo-token-comparison .controls {
83
  display: flex;
84
  gap: 16px;
 
87
  flex-wrap: wrap;
88
  margin-top: 8px;
89
  }
 
90
  .d3-grpo-token-comparison .control-group {
91
  display: flex;
92
  flex-direction: column;
93
  align-items: flex-start;
94
  gap: 6px;
95
  }
 
96
  .d3-grpo-token-comparison .controls label {
97
  font-size: 12px;
98
  font-weight: 700;
99
  color: var(--text-color);
100
  }
 
101
  .d3-grpo-token-comparison .controls select {
102
  font-size: 12px;
103
  padding: 8px 28px 8px 10px;
 
107
  color: var(--text-color);
108
  cursor: pointer;
109
  }
 
110
  .d3-grpo-token-comparison .slider-container {
111
  display: flex;
112
  flex-direction: column;
113
  gap: 8px;
114
  min-width: 300px;
115
  }
 
116
  .d3-grpo-token-comparison .slider-row {
117
  display: flex;
118
  align-items: center;
119
  gap: 12px;
120
  }
 
121
  .d3-grpo-token-comparison input[type="range"] {
122
  flex: 1;
123
  height: 6px;
 
127
  -webkit-appearance: none;
128
  cursor: pointer;
129
  }
 
130
  .d3-grpo-token-comparison input[type="range"]::-webkit-slider-thumb {
131
  -webkit-appearance: none;
132
  appearance: none;
 
136
  background: var(--primary-color);
137
  cursor: pointer;
138
  }
 
139
  .d3-grpo-token-comparison input[type="range"]::-moz-range-thumb {
140
  width: 16px;
141
  height: 16px;
 
144
  cursor: pointer;
145
  border: none;
146
  }
 
147
  .d3-grpo-token-comparison .slider-value {
148
  font-size: 12px;
149
  font-weight: 600;
 
256
 
257
  const fetchFirstAvailable = async (filename) => {
258
  const paths = [
259
+ `/data/grpo/histograms/${filename}`,
260
+ `./assets/data/grpo/histograms/${filename}`,
261
+ `../assets/data/grpo/histograms/${filename}`,
262
+ `../../assets/data/grpo/histograms/${filename}`
263
  ];
264
 
265
  for (const p of paths) {
266
  try {
267
  const r = await fetch(p, { cache: 'no-cache' });
268
  if (r.ok) return await r.json();
269
+ } catch (e) {}
270
  }
271
  throw new Error(`JSON not found: ${filename}`);
272
  };
 
344
  const legendItems = document.createElement('div');
345
  legendItems.className = 'items';
346
 
347
+ ['APO No-Think (Baseline)', 'GRPO with overlong penalty'].forEach((name, idx) => {
348
  const item = document.createElement('span');
349
  item.className = 'item';
350
 
 
501
  ensureD3(bootstrap);
502
  }
503
  })();
504
+ </script>