Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| <div class="d3-experiment-overview" style="width:100%;margin:10px 0;aspect-ratio:1/1;min-height:520px;"></div> | |
| <style> | |
| .d3-experiment-overview { position: relative; font-family: system-ui, -apple-system, sans-serif; } | |
| </style> | |
| <script> | |
| (() => { | |
| const ensureD3 = (cb) => { | |
| if (window.d3 && typeof window.d3.select === 'function' && typeof window.d3.sankey === 'function') return cb(); | |
| const loadSankey = () => { | |
| if (typeof window.d3.sankey === 'function') return cb(); | |
| let s2 = document.getElementById('d3-sankey-cdn'); | |
| if (!s2) { | |
| s2 = document.createElement('script'); | |
| s2.id = 'd3-sankey-cdn'; | |
| s2.src = 'https://cdn.jsdelivr.net/npm/d3-sankey@0.12.3/dist/d3-sankey.min.js'; | |
| document.head.appendChild(s2); | |
| } | |
| s2.addEventListener('load', cb, { once: true }); | |
| }; | |
| let s = document.getElementById('d3-cdn-script'); | |
| if (!s) { | |
| s = document.createElement('script'); | |
| s.id = 'd3-cdn-script'; | |
| s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; | |
| document.head.appendChild(s); | |
| } | |
| if (window.d3 && typeof window.d3.select === 'function') { loadSankey(); return; } | |
| s.addEventListener('load', loadSankey, { once: true }); | |
| }; | |
| const bootstrap = () => { | |
| const scriptEl = document.currentScript; | |
| let container = scriptEl ? scriptEl.previousElementSibling : null; | |
| if (!(container && container.classList && container.classList.contains('d3-experiment-overview'))) { | |
| const cs = Array.from(document.querySelectorAll('.d3-experiment-overview')) | |
| .filter(el => !(el.dataset && el.dataset.mounted === 'true')); | |
| container = cs[cs.length - 1] || null; | |
| } | |
| if (!container) return; | |
| if (container.dataset) { | |
| if (container.dataset.mounted === 'true') return; | |
| container.dataset.mounted = 'true'; | |
| } | |
| // Read data path from HtmlEmbed attribute | |
| let mountEl = container; | |
| while (mountEl && !mountEl.getAttribute?.('data-datafiles')) mountEl = mountEl.parentElement; | |
| const dataAttr = mountEl?.getAttribute?.('data-datafiles'); | |
| const dataPaths = dataAttr | |
| ? [dataAttr.includes('/') ? dataAttr : `/data/${dataAttr}`] | |
| : ['/data/rephrasing_metadata.json', './assets/data/rephrasing_metadata.json', '../assets/data/rephrasing_metadata.json', '../../assets/data/rephrasing_metadata.json']; | |
| const fetchFirst = async (paths) => { | |
| for (const p of paths) { | |
| try { const r = await fetch(p, { cache: 'no-cache' }); if (r.ok) return r.json(); } catch(_) {} | |
| } | |
| throw new Error('Data not found'); | |
| }; | |
| fetchFirst(dataPaths).then(data => buildChart(data)).catch(err => { | |
| container.innerHTML = `<pre style="color:red;padding:12px;">Error loading data: ${err.message}</pre>`; | |
| }); | |
| function buildChart(rawData) { | |
| // Map source dataset strings to display names | |
| const sourceMap = { | |
| 'fineweb-edu-hq-20BT': 'FW-Edu HQ', | |
| 'fineweb-edu-lq-20BT': 'FW-Edu LQ', | |
| 'dclm-37BT': 'DCLM', | |
| 'cosmopedia-25BT': 'Cosmopedia', | |
| }; | |
| // Map prompt paths to display names and categories | |
| const promptMap = { | |
| 'format/tutorial.md': { name: 'Tutorial', cat: 'Format' }, | |
| 'format/faq.md': { name: 'FAQ', cat: 'Format' }, | |
| 'format/math.md': { name: 'Math', cat: 'Format' }, | |
| 'format/table.md': { name: 'Table', cat: 'Format' }, | |
| 'format/commentary.md': { name: 'Commentary', cat: 'Format' }, | |
| 'format/discussion.md': { name: 'Discussion', cat: 'Format' }, | |
| 'format/article.md': { name: 'Article', cat: 'Format' }, | |
| 'nemotron/diverse_qa_pairs.md': { name: 'Diverse QA', cat: 'Nemotron' }, | |
| 'nemotron/knowledge_list.md': { name: 'Knowledge List', cat: 'Nemotron' }, | |
| 'nemotron/wikipedia_style_rephrasing.md': { name: 'Wikipedia Style', cat: 'Nemotron' }, | |
| 'nemotron/extract_knowledge.md': { name: 'Extract Knowledge', cat: 'Nemotron' }, | |
| 'nemotron/distill.md': { name: 'Distill', cat: 'Nemotron' }, | |
| 'rewire/guided_rewrite_original.md': { name: 'Guided Rewrite', cat: 'REWIRE' }, | |
| 'rewire/guided_rewrite_improved.md': { name: 'Guided Rewrite+', cat: 'REWIRE' }, | |
| }; | |
| // Map model IDs to family names | |
| const modelFamilyMap = (modelId) => { | |
| if (modelId.includes('gemma')) return 'Gemma'; | |
| if (modelId.includes('Qwen') || modelId.includes('qwen')) return 'Qwen'; | |
| if (modelId.includes('Falcon') || modelId.includes('falcon')) return 'Falcon'; | |
| if (modelId.includes('granite') || modelId.includes('Granite')) return 'Granite'; | |
| if (modelId.includes('Llama') || modelId.includes('llama')) return 'Llama'; | |
| if (modelId.includes('SmolLM') || modelId.includes('smollm')) return 'SmolLM2'; | |
| return modelId; | |
| }; | |
| // Build link counts from data | |
| const linkCounts = {}; | |
| const key = (a, b) => `${a}|||${b}`; | |
| rawData.forEach(exp => { | |
| const src = sourceMap[exp.source_dataset]; | |
| const promptInfo = promptMap[exp.prompt]; | |
| const family = modelFamilyMap(exp.model); | |
| if (!src || !promptInfo) return; | |
| const spKey = key(src, promptInfo.name); | |
| linkCounts[spKey] = (linkCounts[spKey] || 0) + 1; | |
| const pmKey = key(promptInfo.name, family); | |
| linkCounts[pmKey] = (linkCounts[pmKey] || 0) + 1; | |
| }); | |
| // Collect unique names in order | |
| const sources = [...new Set(rawData.map(e => sourceMap[e.source_dataset]).filter(Boolean))]; | |
| const prompts = [...new Set(rawData.map(e => promptMap[e.prompt]?.name).filter(Boolean))]; | |
| const models = [...new Set(rawData.map(e => modelFamilyMap(e.model)).filter(Boolean))]; | |
| // Build node list | |
| const nodes = []; | |
| sources.forEach(name => nodes.push({ name, col: 'source' })); | |
| prompts.forEach(name => { | |
| const info = Object.values(promptMap).find(p => p.name === name); | |
| nodes.push({ name, col: 'prompt', cat: info?.cat || 'Other' }); | |
| }); | |
| models.forEach(name => nodes.push({ name, col: 'model' })); | |
| const ni = (name) => nodes.findIndex(n => n.name === name); | |
| // Build links | |
| const links = []; | |
| Object.entries(linkCounts).forEach(([k, value]) => { | |
| const [from, to] = k.split('|||'); | |
| const s = ni(from), t = ni(to); | |
| if (s >= 0 && t >= 0) links.push({ source: s, target: t, value }); | |
| }); | |
| // Colors | |
| const sourceColors = { 'FW-Edu HQ': '#6B8DB5', 'FW-Edu LQ': '#B58B9B', 'DCLM': '#7B82C8', 'Cosmopedia': '#8BA878' }; | |
| const catColors = { 'Format': '#4EA5B7', 'Nemotron': '#76b900', 'REWIRE': '#1877F2' }; | |
| const familyColors = { 'Gemma': '#5b9bd5', 'Qwen': '#e07b54', 'SmolLM2': '#e06b9e', 'Falcon': '#c9a046', 'Granite': '#9a8ec2', 'Llama': '#8bc474' }; | |
| const nodeColor = (d) => { | |
| if (d.col === 'source') return sourceColors[d.name] || '#888'; | |
| if (d.col === 'prompt') return catColors[d.cat] || '#888'; | |
| if (d.col === 'model') return familyColors[d.name] || '#888'; | |
| return '#888'; | |
| }; | |
| // SVG | |
| const svg = d3.select(container).append('svg').attr('width', '100%').style('display', 'block'); | |
| const render = () => { | |
| const width = container.clientWidth || 800; | |
| const height = Math.max(520, width); | |
| svg.attr('width', width).attr('height', height); | |
| svg.selectAll('*').remove(); | |
| const isDark = document.documentElement.getAttribute('data-theme') === 'dark'; | |
| const textColor = isDark ? 'rgba(255,255,255,0.78)' : 'rgba(0,0,0,0.68)'; | |
| const mutedText = isDark ? 'rgba(255,255,255,0.35)' : 'rgba(0,0,0,0.30)'; | |
| const linkOpacity = isDark ? 0.20 : 0.35; | |
| const linkHoverOpacity = isDark ? 0.50 : 0.65; | |
| const fontSize = Math.max(10, Math.min(14, width / 65)); | |
| const ml = width * 0.005, mr = width * 0.01; | |
| const mt = height * 0.04, mb = height * 0.01; | |
| const sankeyGen = d3.sankey() | |
| .nodeId(d => d.index) | |
| .nodeWidth(Math.max(8, width * 0.012)) | |
| .nodePadding(Math.max(3, height * 0.012)) | |
| .nodeSort(null) | |
| .extent([[ml, mt], [width - mr, height - mb]]); | |
| const graph = sankeyGen({ | |
| nodes: nodes.map((d, i) => ({ ...d, index: i })), | |
| links: links.map(d => ({ ...d })) | |
| }); | |
| // Column headers | |
| const modelNodes = graph.nodes.filter(n => n.col === 'model'); | |
| const colLabels = [ | |
| { text: 'Source Dataset', x: graph.nodes.filter(n => n.col === 'source')[0]?.x0 || ml, anchor: 'start' }, | |
| { text: 'Prompt Strategy', x: graph.nodes.filter(n => n.col === 'prompt')[0]?.x1 || width * 0.35, anchor: 'end' }, | |
| { text: 'Model Family', x: (modelNodes[0]?.x1 || width * 0.75), anchor: 'end' }, | |
| ]; | |
| svg.selectAll('text.col-header') | |
| .data(colLabels).join('text') | |
| .attr('class', 'col-header') | |
| .attr('x', d => d.x).attr('y', mt - 8) | |
| .attr('text-anchor', d => d.anchor) | |
| .attr('fill', mutedText) | |
| .attr('font-size', (fontSize * 1.4) + 'px') | |
| .attr('font-weight', '700') | |
| .attr('font-family', 'system-ui, -apple-system, sans-serif') | |
| .attr('letter-spacing', '0.5px') | |
| .attr('text-transform', 'uppercase') | |
| .text(d => d.text); | |
| // Category brackets for prompts | |
| const catGroups = {}; | |
| graph.nodes.filter(n => n.col === 'prompt').forEach(n => { | |
| if (!catGroups[n.cat]) catGroups[n.cat] = { min: Infinity, max: -Infinity }; | |
| catGroups[n.cat].min = Math.min(catGroups[n.cat].min, n.y0); | |
| catGroups[n.cat].max = Math.max(catGroups[n.cat].max, n.y1); | |
| }); | |
| const bracketX = (graph.nodes.find(n => n.col === 'prompt')?.x1 || 0) + 5; | |
| Object.entries(catGroups).forEach(([cat, { min: y0, max: y1 }]) => { | |
| const midY = (y0 + y1) / 2; | |
| svg.append('line') | |
| .attr('x1', bracketX).attr('x2', bracketX) | |
| .attr('y1', y0 + 2).attr('y2', y1 - 2) | |
| .attr('stroke', catColors[cat]).attr('stroke-width', 1.5) | |
| .attr('stroke-opacity', 0.35).attr('stroke-linecap', 'round'); | |
| svg.append('text') | |
| .attr('x', bracketX + 4).attr('y', midY) | |
| .attr('dominant-baseline', 'central') | |
| .attr('fill', catColors[cat]).attr('fill-opacity', 0.45) | |
| .attr('font-size', (fontSize * 1.3) + 'px') | |
| .attr('font-weight', '600') | |
| .attr('font-family', 'system-ui, -apple-system, sans-serif') | |
| .attr('letter-spacing', '0.3px') | |
| .text(cat); | |
| }); | |
| // Links | |
| const gLinks = svg.append('g').attr('class', 'links'); | |
| const linkPath = d3.sankeyLinkHorizontal(); | |
| const linkEls = gLinks.selectAll('path') | |
| .data(graph.links).join('path') | |
| .attr('d', linkPath) | |
| .attr('fill', 'none') | |
| .attr('stroke', d => nodeColor(d.source)) | |
| .attr('stroke-width', d => Math.max(1, d.width)) | |
| .attr('stroke-opacity', linkOpacity) | |
| .style('mix-blend-mode', isDark ? 'screen' : 'multiply'); | |
| // Nodes | |
| const gNodes = svg.append('g').attr('class', 'nodes'); | |
| const nodeEls = gNodes.selectAll('rect') | |
| .data(graph.nodes).join('rect') | |
| .attr('x', d => d.x0).attr('y', d => d.y0) | |
| .attr('width', d => d.x1 - d.x0) | |
| .attr('height', d => Math.max(1, d.y1 - d.y0)) | |
| .attr('fill', d => nodeColor(d)) | |
| .attr('fill-opacity', 0.85).attr('rx', 2) | |
| .attr('stroke', d => nodeColor(d)) | |
| .attr('stroke-width', 0.5).attr('stroke-opacity', 0.3); | |
| // Node labels (interactive, same hover as node rects) | |
| const gLabels = svg.append('g').attr('class', 'labels'); | |
| graph.nodes.forEach(d => { | |
| const midY = (d.y0 + d.y1) / 2; | |
| const isSource = d.col === 'source'; | |
| let labelX, anchor; | |
| if (isSource) { labelX = d.x1 + 5; anchor = 'start'; } | |
| else { labelX = d.x0 - 5; anchor = 'end'; } | |
| const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0); | |
| const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0); | |
| const total = Math.max(totalIn, totalOut); | |
| gLabels.append('text') | |
| .datum(d) | |
| .attr('class', 'node-label') | |
| .attr('x', labelX).attr('y', midY - (total > 1 ? fontSize * 0.3 : 0)) | |
| .attr('text-anchor', anchor).attr('dominant-baseline', 'central') | |
| .attr('fill', textColor) | |
| .attr('font-size', fontSize + 'px').attr('font-weight', '600') | |
| .attr('font-family', 'system-ui, -apple-system, sans-serif') | |
| .style('cursor', 'pointer') | |
| .text(d.name); | |
| if (total > 1) { | |
| gLabels.append('text') | |
| .datum(d) | |
| .attr('class', 'node-label') | |
| .attr('x', labelX).attr('y', midY + fontSize * 0.55) | |
| .attr('text-anchor', anchor).attr('dominant-baseline', 'central') | |
| .attr('fill', mutedText) | |
| .attr('font-size', (fontSize * 0.8) + 'px') | |
| .attr('font-family', 'system-ui, -apple-system, sans-serif') | |
| .style('cursor', 'pointer') | |
| .text(total + ' exp.'); | |
| } | |
| }); | |
| // Tooltip | |
| container.style.position = container.style.position || 'relative'; | |
| let tip = container.querySelector('.d3-tooltip'); | |
| let tipInner; | |
| if (!tip) { | |
| tip = document.createElement('div'); | |
| tip.className = 'd3-tooltip'; | |
| Object.assign(tip.style, { | |
| position: 'absolute', top: '0px', left: '0px', | |
| transform: 'translate(-9999px, -9999px)', | |
| pointerEvents: 'none', padding: '8px 12px', borderRadius: '10px', | |
| fontSize: '12px', lineHeight: '1.4', | |
| border: '1px solid var(--border-color)', | |
| background: 'var(--surface-bg)', color: 'var(--text-color)', | |
| boxShadow: '0 6px 24px rgba(0,0,0,.25)', | |
| opacity: '0', transition: 'opacity .12s ease', | |
| backdropFilter: 'saturate(1.12) blur(8px)', | |
| zIndex: '20', maxWidth: '280px' | |
| }); | |
| tipInner = document.createElement('div'); | |
| tipInner.className = 'd3-tooltip__inner'; | |
| tip.appendChild(tipInner); | |
| container.appendChild(tip); | |
| } else { | |
| tipInner = tip.querySelector('.d3-tooltip__inner') || tip; | |
| } | |
| const positionTip = (ev) => { | |
| const [mx, my] = d3.pointer(ev, container); | |
| const bw = tip.offsetWidth || 220, bh = tip.offsetHeight || 60; | |
| const ox = (mx + bw + 20 > width) ? -(bw + 12) : 12; | |
| const oy = (my + bh + 20 > height) ? -(bh + 12) : 14; | |
| tip.style.transform = `translate(${Math.round(mx + ox)}px, ${Math.round(my + oy)}px)`; | |
| }; | |
| const showTip = (ev, html) => { tipInner.innerHTML = html; tip.style.opacity = '1'; positionTip(ev); }; | |
| const hideTip = () => { tip.style.opacity = '0'; tip.style.transform = 'translate(-9999px, -9999px)'; }; | |
| // Interaction | |
| linkEls | |
| .on('mouseenter', function (ev, d) { | |
| linkEls.attr('stroke-opacity', l => l === d ? linkHoverOpacity * 1.5 : linkOpacity * 0.3); | |
| showTip(ev, `<b>${d.source.name}</b> \u2192 <b>${d.target.name}</b><br/><span style="color:var(--muted-color);">${d.value} experiment${d.value > 1 ? 's' : ''}</span>`); | |
| }) | |
| .on('mousemove', positionTip) | |
| .on('mouseleave', function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); }); | |
| // Shared node hover handlers (used by both rects and labels) | |
| const onNodeEnter = function (ev, d) { | |
| const connected = new Set(); | |
| (d.sourceLinks || []).forEach(l => connected.add(l.index)); | |
| (d.targetLinks || []).forEach(l => connected.add(l.index)); | |
| linkEls.attr('stroke-opacity', l => connected.has(l.index) ? linkHoverOpacity : linkOpacity * 0.15); | |
| const totalIn = (d.targetLinks || []).reduce((s, l) => s + l.value, 0); | |
| const totalOut = (d.sourceLinks || []).reduce((s, l) => s + l.value, 0); | |
| const total = Math.max(totalIn, totalOut); | |
| let info = `<b style="font-size:14px;">${d.name}</b>`; | |
| if (d.cat) info += ` <span style="color:${catColors[d.cat]};font-size:12px;">(${d.cat})</span>`; | |
| info += `<br/><span style="color:var(--muted-color);">${total} experiment${total > 1 ? 's' : ''}</span>`; | |
| showTip(ev, info); | |
| }; | |
| const onNodeLeave = function () { linkEls.attr('stroke-opacity', linkOpacity); hideTip(); }; | |
| nodeEls.style('cursor', 'pointer') | |
| .on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave); | |
| gLabels.selectAll('.node-label') | |
| .on('mouseenter', onNodeEnter).on('mousemove', positionTip).on('mouseleave', onNodeLeave); | |
| }; | |
| if (window.ResizeObserver) new ResizeObserver(() => render()).observe(container); | |
| else window.addEventListener('resize', render); | |
| new MutationObserver(() => render()).observe(document.documentElement, { attributes: true, attributeFilter: ['data-theme'] }); | |
| render(); | |
| } | |
| }; | |
| if (document.readyState === 'loading') { | |
| document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); | |
| } else { ensureD3(bootstrap); } | |
| })(); | |
| </script> | |