lusxvr commited on
Commit
1d06a79
·
1 Parent(s): 61e07e3
app/src/content/article.mdx CHANGED
@@ -261,7 +261,7 @@ Each of our ablations trains a 450M model with maximal image size of 1536x1536 p
261
  ### How does FineVision compare against the Baselines?
262
  Compared against existing VLM training datasets, FineVision produces significantly higher benchmark ranks than the other options.
263
 
264
- <HtmlEmbed src="against-baselines.html" title="D3 Line" desc="TODO - Average Rank of Models trained on different open source datasets." />
265
 
266
  ### How contaminated are the datasets?
267
  To investigate data leakage from benchmarks into this dataset, we construct a deduplication pipeline based on the sample images. We embed the images of 66 image-test datasets from the lmms-eval framework using the SSCD descriptor, and compute the cosine similarity between our samples and the test-set embeddings. Whenever a sample has a similarity higher than a threshold of 0.95 it is assumed to be a duplicate. While our tests with various thresholds show that this is flagging some samples that are not actual duplicates (especially if the image depicts similar but different images in detail, like graphs or tables), we preferred to err on the side of caution. We open-source the deduplication pipeline here as well as the precomputed test-set embedding’s here.
 
261
  ### How does FineVision compare against the Baselines?
262
  Compared against existing VLM training datasets, FineVision produces significantly higher benchmark ranks than the other options.
263
 
264
+ <HtmlEmbed src="d3-line.html" title="D3 Line" desc="TODO - Average Rank of Models trained on different open source datasets." />
265
 
266
  ### How contaminated are the datasets?
267
  To investigate data leakage from benchmarks into this dataset, we construct a deduplication pipeline based on the sample images. We embed the images of 66 image-test datasets from the lmms-eval framework using the SSCD descriptor, and compute the cosine similarity between our samples and the test-set embeddings. Whenever a sample has a similarity higher than a threshold of 0.95 it is assumed to be a duplicate. While our tests with various thresholds show that this is flagging some samples that are not actual duplicates (especially if the image depicts similar but different images in detail, like graphs or tables), we preferred to err on the side of caution. We open-source the deduplication pipeline here as well as the precomputed test-set embedding’s here.
app/src/content/embeds/d3-line.html CHANGED
@@ -146,11 +146,18 @@
146
  controls.appendChild(legendInline);
147
  controls.appendChild(labelMetric);
148
 
149
- // Create SVG
150
  const svg = d3.select(container).append('svg')
151
  .attr('width', '100%')
152
  .style('display', 'block');
153
 
 
 
 
 
 
 
 
154
  // Groups
155
  const gRoot = svg.append('g');
156
  const gGrid = gRoot.append('g').attr('class', 'grid');
@@ -200,16 +207,30 @@
200
  let xScale = d3.scaleLinear();
201
  let yScale = d3.scaleLinear();
202
 
203
- // Line generators
204
- const lineGenSmooth = d3.line()
205
- .curve(d3.curveCatmullRom.alpha(0.05))
206
- .x((d) => xScale(d.step))
207
- .y((d) => yScale(d.value));
208
- const lineGenStep = d3.line()
209
- .curve(d3.curveStepAfter)
210
  .x((d) => xScale(d.step))
211
  .y((d) => yScale(d.value));
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  // Hover elements
214
  const hoverLine = gHover.append('line').attr('stroke-width', 1);
215
 
@@ -222,7 +243,7 @@
222
  const gridColor = isDark ? 'rgba(255,255,255,0.08)' : 'rgba(0,0,0,0.05)';
223
 
224
  width = container.clientWidth || 800;
225
- height = Math.max(260, Math.round(width / 3));
226
  svg.attr('width', width).attr('height', height);
227
 
228
  const innerWidth = width - margin.left - margin.right;
@@ -359,32 +380,104 @@
359
 
360
  const { innerWidth, innerHeight } = updateScales();
361
 
362
- // Bind lines
363
  const series = runs.map((r, i) => ({
364
  run: r,
365
  color: pool[i % pool.length],
 
366
  values: (map[r]||[])
367
  .slice()
368
  .sort((a,b)=>a.step-b.step)
369
  .map(pt => isRankStrict ? { step: pt.step, value: Math.round(pt.value) } : pt)
370
- .filter(pt => !isRankStrict || (pt.step % 1000 === 0))
371
  }));
 
 
372
  const paths = gLines.selectAll('path.run-line').data(series, d=>d.run);
373
- const gen = isRank ? lineGenStep : lineGenSmooth;
374
  paths.enter().append('path').attr('class','run-line').attr('fill','none').attr('stroke-width',2)
375
  .attr('stroke', d=>d.color).attr('opacity',0.9)
376
- .attr('d', d=>gen(d.values))
377
  .merge(paths)
378
  .transition().duration(200)
379
  .attr('stroke', d=>d.color)
380
- .attr('d', d=>gen(d.values));
381
  paths.exit().remove();
382
 
383
- // No point markers for rank metrics (and none for others)
384
  gPoints.selectAll('*').remove();
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
- // Inline legend content (row, right side) compact
387
- legendInline.innerHTML = series.map(s => `<span style="display:inline-flex;align-items:center;gap:6px;white-space:nowrap;"><span style="width:18px;height:10px;background:${s.color};border-radius:3px;display:inline-block"></span><span>${s.run}</span></span>`).join('');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  // Hover
390
  const stepSet = new Set(); series.forEach(s=>s.values.forEach(v=>stepSet.add(v.step)));
 
146
  controls.appendChild(legendInline);
147
  controls.appendChild(labelMetric);
148
 
149
+ // Create SVG with marker definitions
150
  const svg = d3.select(container).append('svg')
151
  .attr('width', '100%')
152
  .style('display', 'block');
153
 
154
+ // Add marker definitions for different shapes
155
+ const defs = svg.append('defs');
156
+
157
+ // Academic marker shapes
158
+ const markerShapes = ['circle', 'square', 'triangle', 'diamond', 'inverted-triangle'];
159
+ const markerSize = 8;
160
+
161
  // Groups
162
  const gRoot = svg.append('g');
163
  const gGrid = gRoot.append('g').attr('class', 'grid');
 
207
  let xScale = d3.scaleLinear();
208
  let yScale = d3.scaleLinear();
209
 
210
+ // Line generators - simple linear connections
211
+ const lineGen = d3.line()
 
 
 
 
 
212
  .x((d) => xScale(d.step))
213
  .y((d) => yScale(d.value));
214
 
215
+ // Function to draw different marker shapes
216
+ function drawMarker(selection, shape, size) {
217
+ const s = size / 2;
218
+ switch (shape) {
219
+ case 'circle':
220
+ return selection.append('circle').attr('r', s);
221
+ case 'square':
222
+ return selection.append('rect').attr('x', -s).attr('y', -s).attr('width', size).attr('height', size);
223
+ case 'triangle':
224
+ return selection.append('path').attr('d', `M0,${-s * 1.2} L${s * 1.1},${s * 0.6} L${-s * 1.1},${s * 0.6} Z`);
225
+ case 'diamond':
226
+ return selection.append('path').attr('d', `M0,${-s * 1.2} L${s * 1.1},0 L0,${s * 1.2} L${-s * 1.1},0 Z`);
227
+ case 'inverted-triangle':
228
+ return selection.append('path').attr('d', `M0,${s * 1.2} L${s * 1.1},${-s * 0.6} L${-s * 1.1},${-s * 0.6} Z`);
229
+ default:
230
+ return selection.append('circle').attr('r', s);
231
+ }
232
+ }
233
+
234
  // Hover elements
235
  const hoverLine = gHover.append('line').attr('stroke-width', 1);
236
 
 
243
  const gridColor = isDark ? 'rgba(255,255,255,0.08)' : 'rgba(0,0,0,0.05)';
244
 
245
  width = container.clientWidth || 800;
246
+ height = Math.max(360, Math.round(width / 2.2));
247
  svg.attr('width', width).attr('height', height);
248
 
249
  const innerWidth = width - margin.left - margin.right;
 
380
 
381
  const { innerWidth, innerHeight } = updateScales();
382
 
383
+ // Bind lines and markers
384
  const series = runs.map((r, i) => ({
385
  run: r,
386
  color: pool[i % pool.length],
387
+ marker: markerShapes[i % markerShapes.length],
388
  values: (map[r]||[])
389
  .slice()
390
  .sort((a,b)=>a.step-b.step)
391
  .map(pt => isRankStrict ? { step: pt.step, value: Math.round(pt.value) } : pt)
 
392
  }));
393
+
394
+ // Draw lines
395
  const paths = gLines.selectAll('path.run-line').data(series, d=>d.run);
 
396
  paths.enter().append('path').attr('class','run-line').attr('fill','none').attr('stroke-width',2)
397
  .attr('stroke', d=>d.color).attr('opacity',0.9)
398
+ .attr('d', d=>lineGen(d.values))
399
  .merge(paths)
400
  .transition().duration(200)
401
  .attr('stroke', d=>d.color)
402
+ .attr('d', d=>lineGen(d.values));
403
  paths.exit().remove();
404
 
405
+ // Draw markers for each data point
406
  gPoints.selectAll('*').remove();
407
+ series.forEach((s, seriesIndex) => {
408
+ const pointGroup = gPoints.selectAll(`.points-${seriesIndex}`)
409
+ .data(s.values)
410
+ .join('g')
411
+ .attr('class', `points-${seriesIndex}`)
412
+ .attr('transform', d => `translate(${xScale(d.step)},${yScale(d.value)})`);
413
+
414
+ drawMarker(pointGroup, s.marker, markerSize)
415
+ .attr('fill', s.color)
416
+ .attr('stroke', s.color)
417
+ .attr('stroke-width', 1.5)
418
+ .style('cursor', 'crosshair');
419
+ });
420
 
421
+ // Inline legend content with marker shapes
422
+ legendInline.innerHTML = '';
423
+ series.forEach(s => {
424
+ const legendItem = document.createElement('span');
425
+ legendItem.style.cssText = 'display:inline-flex;align-items:center;gap:6px;white-space:nowrap;';
426
+
427
+ // Create small SVG for marker shape
428
+ const markerSvg = document.createElementNS('http://www.w3.org/2000/svg', 'svg');
429
+ markerSvg.setAttribute('width', '16');
430
+ markerSvg.setAttribute('height', '12');
431
+ markerSvg.style.display = 'inline-block';
432
+
433
+ const g = document.createElementNS('http://www.w3.org/2000/svg', 'g');
434
+ g.setAttribute('transform', 'translate(8,6)');
435
+
436
+ let shape;
437
+ const size = 6;
438
+ const halfSize = size / 2;
439
+ switch(s.marker) {
440
+ case 'circle':
441
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
442
+ shape.setAttribute('r', halfSize);
443
+ break;
444
+ case 'square':
445
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'rect');
446
+ shape.setAttribute('x', -halfSize);
447
+ shape.setAttribute('y', -halfSize);
448
+ shape.setAttribute('width', size);
449
+ shape.setAttribute('height', size);
450
+ break;
451
+ case 'triangle':
452
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'path');
453
+ shape.setAttribute('d', `M0,${-halfSize * 1.2} L${halfSize * 1.1},${halfSize * 0.6} L${-halfSize * 1.1},${halfSize * 0.6} Z`);
454
+ break;
455
+ case 'diamond':
456
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'path');
457
+ shape.setAttribute('d', `M0,${-halfSize * 1.2} L${halfSize * 1.1},0 L0,${halfSize * 1.2} L${-halfSize * 1.1},0 Z`);
458
+ break;
459
+ case 'inverted-triangle':
460
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'path');
461
+ shape.setAttribute('d', `M0,${halfSize * 1.2} L${halfSize * 1.1},${-halfSize * 0.6} L${-halfSize * 1.1},${-halfSize * 0.6} Z`);
462
+ break;
463
+ default:
464
+ shape = document.createElementNS('http://www.w3.org/2000/svg', 'circle');
465
+ shape.setAttribute('r', halfSize);
466
+ }
467
+ shape.setAttribute('fill', s.color);
468
+ shape.setAttribute('stroke', s.color);
469
+ shape.setAttribute('stroke-width', '1');
470
+
471
+ g.appendChild(shape);
472
+ markerSvg.appendChild(g);
473
+
474
+ const label = document.createElement('span');
475
+ label.textContent = s.run;
476
+
477
+ legendItem.appendChild(markerSvg);
478
+ legendItem.appendChild(label);
479
+ legendInline.appendChild(legendItem);
480
+ });
481
 
482
  // Hover
483
  const stepSet = new Set(); series.forEach(s=>s.values.forEach(v=>stepSet.add(v.step)));