finephrase / app /src /content /embeds /experiment-overview.html
joelniklaus's picture
joelniklaus HF Staff
fix dark mode issue
1006dbd
<div class="d3-experiment-overview" style="width:100%;margin:10px 0;aspect-ratio:1/1;min-height:520px;"></div>
<style>
.d3-experiment-overview { position: relative; font-family: system-ui, -apple-system, sans-serif; }
</style>
<script>
(() => {
const ensureD3 = (cb) => {
if (window.d3 && typeof window.d3.select === 'function' && typeof window.d3.sankey === 'function') return cb();
const loadSankey = () => {
if (typeof window.d3.sankey === 'function') return cb();
let s2 = document.getElementById('d3-sankey-cdn');
if (!s2) {
s2 = document.createElement('script');
s2.id = 'd3-sankey-cdn';
s2.src = 'https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js';
document.head.appendChild(s2);
}
s2.addEventListener('load', cb, { once: true });
};
let s = document.getElementById('d3-cdn-script');
if (!s) {
s = document.createElement('script');
s.id = 'd3-cdn-script';
s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
document.head.appendChild(s);
}
if (window.d3 && typeof window.d3.select === 'function') { loadSankey(); return; }
s.addEventListener('load', loadSankey, { once: true });
};
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-experiment-overview'))) {
const cs = Array.from(document.querySelectorAll('.d3-experiment-overview'))
.filter(el => !(el.dataset && el.dataset.mounted === 'true'));
container = cs[cs.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
// Read data path from HtmlEmbed attribute
let mountEl = container;
while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement;
const dataAttr = mountEl?.getAttribute?.('data-datafiles');
const dataPaths = dataAttr
? [dataAttr.includes('/') ? dataAttr : `/data/${dataAttr}`]
: ['/data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json', '../assets/data/rephrasing_metadata.json', '../../assets/data/rephrasing_metadata.json'];
const fetchFirst = async (paths) => {
for (const p of paths) {
try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return r.json(); } catch(_) {}
}
throw new Error('Data not found');
};
fetchFirst(dataPaths).then(data => buildChart(data)).catch(err => {
container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`;
});
function buildChart(rawData) {
// Map source dataset strings to display names
const sourceMap = {
'fineweb-edu-hq-20BT': 'FW-Edu HQ',
'fineweb-edu-lq-20BT': 'FW-Edu LQ',
'dclm-37BT': 'DCLM',
'cosmopedia-25BT': 'Cosmopedia',
};
// Map prompt paths to display names and categories
const promptMap = {
'format/tutorial.md': { name: 'Tutorial', cat: 'Format' },
'format/faq.md': { name: 'FAQ', cat: 'Format' },
'format/math.md': { name: 'Math', cat: 'Format' },
'format/table.md': { name: 'Table', cat: 'Format' },
'format/commentary.md': { name: 'Commentary', cat: 'Format' },
'format/discussion.md': { name: 'Discussion', cat: 'Format' },
'format/article.md': { name: 'Article', cat: 'Format' },
'nemotron/diverse_qa_pairs.md': { name: 'Diverse QA', cat: 'Nemotron' },
'nemotron/knowledge_list.md': { name: 'Knowledge List', cat: 'Nemotron' },
'nemotron/wikipedia_style_rephrasing.md': { name: 'Wikipedia Style', cat: 'Nemotron' },
'nemotron/extract_knowledge.md': { name: 'Extract Knowledge', cat: 'Nemotron' },
'nemotron/distill.md': { name: 'Distill', cat: 'Nemotron' },
'rewire/guided_rewrite_original.md': { name: 'Guided Rewrite', cat: 'REWIRE' },
'rewire/guided_rewrite_improved.md': { name: 'Guided Rewrite+', cat: 'REWIRE' },
};
// Map model IDs to family names
const modelFamilyMap = (modelId) => {
if (modelId.includes('gemma')) return 'Gemma';
if (modelId.includes('Qwen') || modelId.includes('qwen')) return 'Qwen';
if (modelId.includes('Falcon') || modelId.includes('falcon')) return 'Falcon';
if (modelId.includes('granite') || modelId.includes('Granite')) return 'Granite';
if (modelId.includes('Llama') || modelId.includes('llama')) return 'Llama';
if (modelId.includes('SmolLM') || modelId.includes('smollm')) return 'SmolLM2';
return modelId;
};
// Build link counts from data
const linkCounts = {};
const key = (a, b) => `${a}|||${b}`;
rawData.forEach(exp => {
const src = sourceMap[exp.source_dataset];
const promptInfo = promptMap[exp.prompt];
const family = modelFamilyMap(exp.model);
if (!src || !promptInfo) return;
const spKey = key(src, promptInfo.name);
linkCounts[spKey] = (linkCounts[spKey] || 0) + 1;
const pmKey = key(promptInfo.name, family);
linkCounts[pmKey] = (linkCounts[pmKey] || 0) + 1;
});
// Collect unique names in order
const sources = [...new Set(rawData.map(e => sourceMap[e.source_dataset]).filter(Boolean))];
const prompts = [...new Set(rawData.map(e => promptMap[e.prompt]?.name).filter(Boolean))];
const models = [...new Set(rawData.map(e => modelFamilyMap(e.model)).filter(Boolean))];
// Build node list
const nodes = [];
sources.forEach(name => nodes.push({ name, col: 'source' }));
prompts.forEach(name => {
const info = Object.values(promptMap).find(p => p.name === name);
nodes.push({ name, col: 'prompt', cat: info?.cat || 'Other' });
});
models.forEach(name => nodes.push({ name, col: 'model' }));
const ni = (name) => nodes.findIndex(n => n.name === name);
// Build links
const links = [];
Object.entries(linkCounts).forEach(([k, value]) => {
const [from, to] = k.split('|||');
const s = ni(from), t = ni(to);
if (s >= 0 && t >= 0) links.push({ source: s, target: t, value });
});
// Colors
const sourceColors = { 'FW-Edu HQ': '#6B8DB5', 'FW-Edu LQ': '#B58B9B', 'DCLM': '#7B82C8', 'Cosmopedia': '#8BA878' };
const catColors = { 'Format': '#4EA5B7', 'Nemotron': '#76b900', 'REWIRE': '#1877F2' };
const familyColors = { 'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e', 'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474' };
const nodeColor = (d) => {
if (d.col === 'source') return sourceColors[d.name] || '#888';
if (d.col === 'prompt') return catColors[d.cat] || '#888';
if (d.col === 'model') return familyColors[d.name] || '#888';
return '#888';
};
// SVG
const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block');
const render = () => {
const width = container.clientWidth || 800;
const height = Math.max(520, width);
svg.attr('width', width).attr('height', height);
svg.selectAll('*').remove();
const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
const textColor = isDark ? 'rgba(255,255,255,0.78)' : 'rgba(0,0,0,0.68)';
const mutedText = isDark ? 'rgba(255,255,255,0.35)' : 'rgba(0,0,0,0.30)';
const linkOpacity = isDark ? 0.20 : 0.35;
const linkHoverOpacity = isDark ? 0.50 : 0.65;
const fontSize = Math.max(10, Math.min(14, width / 65));
const ml = width * 0.005, mr = width * 0.01;
const mt = height * 0.04, mb = height * 0.01;
const sankeyGen = d3.sankey()
.nodeId(d => d.index)
.nodeWidth(Math.max(8, width * 0.012))
.nodePadding(Math.max(3, height * 0.012))
.nodeSort(null)
.extent([[ml, mt], [width - mr, height - mb]]);
const graph = sankeyGen({
nodes: nodes.map((d, i) => ({ ...d, index: i })),
links: links.map(d => ({ ...d }))
});
// Column headers
const modelNodes = graph.nodes.filter(n => n.col === 'model');
const colLabels = [
{ text: 'Source Dataset', x: graph.nodes.filter(n => n.col === 'source')[0]?.x0 || ml, anchor: 'start' },
{ text: 'Prompt Strategy', x: graph.nodes.filter(n => n.col === 'prompt')[0]?.x1 || width * 0.35, anchor: 'end' },
{ text: 'Model Family', x: (modelNodes[0]?.x1 || width * 0.75), anchor: 'end' },
];
svg.selectAll('text.col-header')
.data(colLabels).join('text')
.attr('class', 'col-header')
.attr('x', d => d.x).attr('y', mt - 8)
.attr('text-anchor', d => d.anchor)
.attr('fill', mutedText)
.attr('font-size', (fontSize * 1.4) + 'px')
.attr('font-weight', '700')
.attr('font-family', 'system-ui, -apple-system, sans-serif')
.attr('letter-spacing', '0.5px')
.attr('text-transform', 'uppercase')
.text(d => d.text);
// Category brackets for prompts
const catGroups = {};
graph.nodes.filter(n => n.col === 'prompt').forEach(n => {
if (!catGroups[n.cat]) catGroups[n.cat] = { min: Infinity, max: -Infinity };
catGroups[n.cat].min = Math.min(catGroups[n.cat].min, n.y0);
catGroups[n.cat].max = Math.max(catGroups[n.cat].max, n.y1);
});
const bracketX = (graph.nodes.find(n => n.col === 'prompt')?.x1 || 0) + 5;
Object.entries(catGroups).forEach(([cat, { min: y0, max: y1 }]) => {
const midY = (y0 + y1) / 2;
svg.append('line')
.attr('x1', bracketX).attr('x2', bracketX)
.attr('y1', y0 + 2).attr('y2', y1 - 2)
.attr('stroke', catColors[cat]).attr('stroke-width', 1.5)
.attr('stroke-opacity', 0.35).attr('stroke-linecap', 'round');
svg.append('text')
.attr('x', bracketX + 4).attr('y', midY)
.attr('dominant-baseline', 'central')
.attr('fill', catColors[cat]).attr('fill-opacity', 0.45)
.attr('font-size', (fontSize * 1.3) + 'px')
.attr('font-weight', '600')
.attr('font-family', 'system-ui, -apple-system, sans-serif')
.attr('letter-spacing', '0.3px')
.text(cat);
});
// Links
const gLinks = svg.append('g').attr('class', 'links');
const linkPath = d3.sankeyLinkHorizontal();
const linkEls = gLinks.selectAll('path')
.data(graph.links).join('path')
.attr('d', linkPath)
.attr('fill', 'none')
.attr('stroke', d => nodeColor(d.source))
.attr('stroke-width', d => Math.max(1, d.width))
.attr('stroke-opacity', linkOpacity)
.style('mix-blend-mode', isDark ? 'screen' : 'multiply');
// Nodes
const gNodes = svg.append('g').attr('class', 'nodes');
const nodeEls = gNodes.selectAll('rect')
.data(graph.nodes).join('rect')
.attr('x', d => d.x0).attr('y', d => d.y0)
.attr('width', d => d.x1 - d.x0)
.attr('height', d => Math.max(1, d.y1 - d.y0))
.attr('fill', d => nodeColor(d))
.attr('fill-opacity', 0.85).attr('rx', 2)
.attr('stroke', d => nodeColor(d))
.attr('stroke-width', 0.5).attr('stroke-opacity', 0.3);
// Node labels (interactive, same hover as node rects)
const gLabels = svg.append('g').attr('class', 'labels');
graph.nodes.forEach(d => {
const midY = (d.y0 + d.y1) / 2;
const isSource = d.col === 'source';
let labelX, anchor;
if (isSource) { labelX = d.x1 + 5; anchor = 'start'; }
else { labelX = d.x0 - 5; anchor = 'end'; }
const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
const total = Math.max(totalIn, totalOut);
gLabels.append('text')
.datum(d)
.attr('class', 'node-label')
.attr('x', labelX).attr('y', midY - (total > 1 ? fontSize * 0.3 : 0))
.attr('text-anchor', anchor).attr('dominant-baseline', 'central')
.attr('fill', textColor)
.attr('font-size', fontSize + 'px').attr('font-weight', '600')
.attr('font-family', 'system-ui, -apple-system, sans-serif')
.style('cursor', 'pointer')
.text(d.name);
if (total > 1) {
gLabels.append('text')
.datum(d)
.attr('class', 'node-label')
.attr('x', labelX).attr('y', midY + fontSize * 0.55)
.attr('text-anchor', anchor).attr('dominant-baseline', 'central')
.attr('fill', mutedText)
.attr('font-size', (fontSize * 0.8) + 'px')
.attr('font-family', 'system-ui, -apple-system, sans-serif')
.style('cursor', 'pointer')
.text(total + ' exp.');
}
});
// Tooltip
container.style.position = container.style.position || 'relative';
let tip = container.querySelector('.d3-tooltip');
let tipInner;
if (!tip) {
tip = document.createElement('div');
tip.className = 'd3-tooltip';
Object.assign(tip.style, {
position: 'absolute', top: '0px', left: '0px',
transform: 'translate(-9999px, -9999px)',
pointerEvents: 'none', padding: '8px 12px', borderRadius: '10px',
fontSize: '12px', lineHeight: '1.4',
border: '1px solid var(--border-color)',
background: 'var(--surface-bg)', color: 'var(--text-color)',
boxShadow: '0 6px 24px rgba(0,0,0,.25)',
opacity: '0', transition: 'opacity .12s ease',
backdropFilter: 'saturate(1.12) blur(8px)',
zIndex: '20', maxWidth: '280px'
});
tipInner = document.createElement('div');
tipInner.className = 'd3-tooltip__inner';
tip.appendChild(tipInner);
container.appendChild(tip);
} else {
tipInner = tip.querySelector('.d3-tooltip__inner') || tip;
}
const positionTip = (ev) => {
const [mx, my] = d3.pointer(ev, container);
const bw = tip.offsetWidth || 220, bh = tip.offsetHeight || 60;
const ox = (mx + bw + 20 > width) ? -(bw + 12) : 12;
const oy = (my + bh + 20 > height) ? -(bh + 12) : 14;
tip.style.transform = `translate(${Math.round(mx + ox)}px, ${Math.round(my + oy)}px)`;
};
const showTip = (ev, html) => { tipInner.innerHTML = html; tip.style.opacity = '1'; positionTip(ev); };
const hideTip = () => { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px, -9999px)'; };
// Interaction
linkEls
.on('mouseenter', function (ev, d) {
linkEls.attr('stroke-opacity', l => l === d ? linkHoverOpacity * 1.5 : linkOpacity * 0.3);
showTip(ev, `<b>${d.source.name}</b> \u2192 <b>${d.target.name}</b><br/><span style="color:var(--muted-color);">${d.value} experiment${d.value > 1 ? 's' : ''}</span>`);
})
.on('mousemove', positionTip)
.on('mouseleave', function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); });
// Shared node hover handlers (used by both rects and labels)
const onNodeEnter = function (ev, d) {
const connected = new Set();
(d.sourceLinks || []).forEach(l => connected.add(l.index));
(d.targetLinks || []).forEach(l => connected.add(l.index));
linkEls.attr('stroke-opacity', l => connected.has(l.index) ? linkHoverOpacity : linkOpacity * 0.15);
const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0);
const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0);
const total = Math.max(totalIn, totalOut);
let info = `<b style="font-size:14px;">${d.name}</b>`;
if (d.cat) info += ` <span style="color:${catColors[d.cat]};font-size:12px;">(${d.cat})</span>`;
info += `<br/><span style="color:var(--muted-color);">${total} experiment${total > 1 ? 's' : ''}</span>`;
showTip(ev, info);
};
const onNodeLeave = function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); };
nodeEls.style('cursor', 'pointer')
.on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
gLabels.selectAll('.node-label')
.on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave);
};
if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container);
else window.addEventListener('resize', render);
new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] });
render();
}
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true });
} else { ensureD3(bootstrap); }
})();
</script>