| <div class="d3-mmlu-heatmap"> |
| <div class="heatmap-container"></div> |
| <div class="legend-container"></div> |
| </div> |
| <style> |
| .d3-mmlu-heatmap { |
| position: relative; |
| margin: 24px 0; |
| } |
| |
| .d3-mmlu-heatmap .heatmap-container { |
| width: 100%; |
| } |
| |
| .d3-mmlu-heatmap .legend-container { |
| margin-top: 8px; |
| padding: 0 8px; |
| } |
| |
| .d3-mmlu-heatmap .legend-title { |
| font-size: 12px; |
| font-weight: 600; |
| color: var(--text-color); |
| margin-bottom: 12px; |
| text-align: center; |
| } |
| |
| .d3-mmlu-heatmap .legend-grid { |
| display: grid; |
| grid-template-columns: 1fr 1fr; |
| gap: 8px 24px; |
| font-size: 11px; |
| color: var(--text-color); |
| } |
| |
| .d3-mmlu-heatmap .legend-column { |
| display: flex; |
| flex-direction: column; |
| gap: 8px; |
| } |
| |
| .d3-mmlu-heatmap .legend-item { |
| display: flex; |
| align-items: flex-start; |
| gap: 8px; |
| } |
| |
| .d3-mmlu-heatmap .legend-label { |
| font-weight: 700; |
| min-width: 20px; |
| } |
| |
| .d3-mmlu-heatmap .legend-text { |
| flex: 1; |
| line-height: 1.4; |
| } |
| |
| .d3-mmlu-heatmap .axis-label { |
| fill: var(--text-color); |
| font-size: 11px; |
| font-weight: 600; |
| } |
| |
| .d3-mmlu-heatmap .cell-text { |
| fill: var(--text-color); |
| font-size: 10px; |
| font-weight: 600; |
| pointer-events: none; |
| } |
| |
| @media (max-width: 768px) { |
| .d3-mmlu-heatmap .legend-grid { |
| grid-template-columns: 1fr; |
| } |
| } |
| </style> |
| <script> |
| (() => { |
| |
| const ensureD3 = (cb) => { |
| if (window.d3 && typeof window.d3.select === 'function') return cb(); |
| let s = document.getElementById('d3-cdn-script'); |
| if (!s) { |
| s = document.createElement('script'); |
| s.id = 'd3-cdn-script'; |
| s.src = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js'; |
| document.head.appendChild(s); |
| } |
| const onReady = () => { if (window.d3 && typeof window.d3.select === 'function') cb(); }; |
| s.addEventListener('load', onReady, { once: true }); |
| if (window.d3) onReady(); |
| }; |
| |
| const bootstrap = () => { |
| const scriptEl = document.currentScript; |
| let container = scriptEl ? scriptEl.previousElementSibling : null; |
| if (!(container && container.classList && container.classList.contains('d3-mmlu-heatmap'))) { |
| const cs = Array.from(document.querySelectorAll('.d3-mmlu-heatmap')).filter(el => !(el.dataset && el.dataset.mounted === 'true')); |
| container = cs[cs.length - 1] || null; |
| } |
| if (!container) return; |
| if (container.dataset) { |
| if (container.dataset.mounted === 'true') return; |
| container.dataset.mounted = 'true'; |
| } |
| |
| |
| container.style.position = container.style.position || 'relative'; |
| let tip = container.querySelector('.d3-tooltip'); |
| let tipInner; |
| if (!tip) { |
| tip = document.createElement('div'); |
| tip.className = 'd3-tooltip'; |
| Object.assign(tip.style, { |
| position: 'absolute', |
| top: '0px', |
| left: '0px', |
| transform: 'translate(-9999px, -9999px)', |
| pointerEvents: 'none', |
| padding: '8px 10px', |
| borderRadius: '8px', |
| fontSize: '12px', |
| lineHeight: '1.35', |
| border: '1px solid var(--border-color)', |
| background: 'var(--surface-bg)', |
| color: 'var(--text-color)', |
| boxShadow: '0 4px 24px rgba(0,0,0,.18)', |
| opacity: '0', |
| transition: 'opacity .12s ease' |
| }); |
| tipInner = document.createElement('div'); |
| tipInner.className = 'd3-tooltip__inner'; |
| tipInner.style.textAlign = 'left'; |
| tip.appendChild(tipInner); |
| container.appendChild(tip); |
| } else { |
| tipInner = tip.querySelector('.d3-tooltip__inner') || tip; |
| } |
| |
| |
| const heatmapContainer = container.querySelector('.heatmap-container'); |
| const svg = d3.select(heatmapContainer).append('svg').attr('width', '100%').style('display', 'block'); |
| const defs = svg.append('defs'); |
| const gRoot = svg.append('g'); |
| const gCells = gRoot.append('g'); |
| const gAxes = gRoot.append('g'); |
| |
| |
| const models = [ |
| 'Mistral-7B-v0.1', |
| 'Qwen1.5-7B', |
| 'gemma-7b', |
| 'phi-2', |
| 'DeciLM-7B' |
| ]; |
| |
| const promptFormats = [ |
| '...? -> choice1/choice2/...', |
| 'Q:...? A: -> choice1/choice2/...', |
| 'Question: ...? Answer: -> choice1/choice2/...', |
| 'Question: ...? Choices: ... Answer: -> choice1/choice2/...', |
| 'Question: ...? Choices: A. ... Answer: -> choice1/choice2/...', |
| 'Question: ...? Choices: (A) ... Answer: -> choice1/choice2/...', |
| 'Question: ...? Choices: A. ... Answer: -> A/B/C/D', |
| 'Question: ...? Choices: (A) Answer: -> (A)/(B)/(C)/(D)' |
| ]; |
| |
| const matrix = [ |
| [49.0, 50.5, 52.1, 54.5, 56.4, 55.4, 55.5, 57.0], |
| [37.6, 41.8, 43.5, 47.9, 50.8, 51.2, 22.9, 47.7], |
| [44.6, 48.0, 47.6, 53.5, 54.2, 54.9, 56.4, 50.7], |
| [39.1, 44.3, 46.5, 46.1, 47.1, 48.4, 51.7, 45.8], |
| [43.6, 48.9, 49.5, 51.0, 51.3, 52.0, 52.8, 52.3] |
| ]; |
| |
| |
| const getDivergingColors = (count) => { |
| try { |
| if (window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') { |
| return window.ColorPalettes.getColors('diverging', count); |
| } |
| } catch (_) { } |
| |
| const colors = []; |
| for (let i = 0; i < count; i++) { |
| const t = i / (count - 1); |
| |
| if (t < 0.25) { |
| |
| const r = Math.round(75 + (t / 0.25) * 50); |
| const g = Math.round(0 + (t / 0.25) * 30); |
| const b = Math.round(130 + (t / 0.25) * 50); |
| colors.push(`rgb(${r}, ${g}, ${b})`); |
| } else if (t < 0.5) { |
| |
| const t2 = (t - 0.25) / 0.25; |
| const r = Math.round(125 - t2 * 75); |
| const g = Math.round(30 + t2 * 100); |
| const b = Math.round(180 - t2 * 80); |
| colors.push(`rgb(${r}, ${g}, ${b})`); |
| } else if (t < 0.75) { |
| |
| const t2 = (t - 0.5) / 0.25; |
| const r = Math.round(50 + t2 * 50); |
| const g = Math.round(130 + t2 * 70); |
| const b = Math.round(100 - t2 * 50); |
| colors.push(`rgb(${r}, ${g}, ${b})`); |
| } else { |
| |
| const t2 = (t - 0.75) / 0.25; |
| const r = Math.round(100 + t2 * 155); |
| const g = Math.round(200 - t2 * 50); |
| const b = Math.round(50 - t2 * 50); |
| colors.push(`rgb(${r}, ${g}, ${b})`); |
| } |
| } |
| return colors; |
| }; |
| |
| const palette = getDivergingColors(10); |
| |
| let width = 900; |
| const margin = { top: 10, right: 20, bottom: 20, left: 100 }; |
| |
| function updateSize() { |
| width = container.clientWidth || 900; |
| |
| |
| const nRows = models.length; |
| const nCols = promptFormats.length; |
| const innerWidth = width - margin.left - margin.right; |
| const maxDim = Math.max(nRows, nCols); |
| const availableSize = Math.min(innerWidth, 600); |
| const cellSize = availableSize / maxDim; |
| const gridWidth = cellSize * nCols; |
| const gridHeight = cellSize * nRows; |
| const labelsHeight = 15; |
| |
| |
| const actualWidth = margin.left + gridWidth + margin.right; |
| const actualHeight = margin.top + gridHeight + labelsHeight + margin.bottom; |
| |
| svg |
| .attr('viewBox', `0 0 ${actualWidth} ${actualHeight}`) |
| .attr('preserveAspectRatio', 'xMidYMin meet') |
| .style('width', '100%') |
| .style('height', 'auto'); |
| |
| gRoot.attr('transform', `translate(${margin.left},${margin.top})`); |
| return { innerWidth: gridWidth, innerHeight: gridHeight + labelsHeight }; |
| } |
| |
| function getColorScale(values, minV, maxV) { |
| const hasPalette = palette.length > 0; |
| if (hasPalette && window.ColorPalettes && typeof window.ColorPalettes.getColors === 'function') { |
| |
| const sorted = [...values].sort((a, b) => a - b); |
| const n = sorted.length; |
| |
| const quantiles = []; |
| for (let i = 0; i <= 10; i++) { |
| const q = i / 10; |
| |
| const transformedQ = q < 0.5 |
| ? Math.pow(q * 2, 1.5) / 2 |
| : 0.5 + Math.pow((q - 0.5) * 2, 1.5) / 2; |
| const idx = Math.floor(transformedQ * (n - 1)); |
| quantiles.push(sorted[Math.min(idx, n - 1)]); |
| } |
| const scale = d3.scaleQuantile().domain(quantiles).range(palette); |
| return (v) => scale(v); |
| } |
| |
| |
| const linearScale = d3.scaleLinear() |
| .domain([minV, maxV]) |
| .range([0, 1]) |
| .clamp(true); |
| |
| return (v) => { |
| const t = linearScale(v); |
| |
| let transformedT; |
| if (t < 0.5) { |
| transformedT = Math.pow(t * 2, 1.8) / 2; |
| } else { |
| transformedT = 0.5 + Math.pow((t - 0.5) * 2, 1.8) / 2; |
| } |
| |
| |
| if (transformedT < 0.25) { |
| const r = Math.round(75 + (transformedT / 0.25) * 50); |
| const g = Math.round(0 + (transformedT / 0.25) * 30); |
| const b = Math.round(130 + (transformedT / 0.25) * 50); |
| return `rgb(${r}, ${g}, ${b})`; |
| } else if (transformedT < 0.5) { |
| const t2 = (transformedT - 0.25) / 0.25; |
| const r = Math.round(125 - t2 * 75); |
| const g = Math.round(30 + t2 * 100); |
| const b = Math.round(180 - t2 * 80); |
| return `rgb(${r}, ${g}, ${b})`; |
| } else if (transformedT < 0.75) { |
| const t2 = (transformedT - 0.5) / 0.25; |
| const r = Math.round(50 + t2 * 50); |
| const g = Math.round(130 + t2 * 70); |
| const b = Math.round(100 - t2 * 50); |
| return `rgb(${r}, ${g}, ${b})`; |
| } else { |
| const t2 = (transformedT - 0.75) / 0.25; |
| const r = Math.round(100 + t2 * 155); |
| const g = Math.round(200 - t2 * 50); |
| const b = Math.round(50 - t2 * 50); |
| return `rgb(${r}, ${g}, ${b})`; |
| } |
| }; |
| } |
| |
| function chooseReadableTextColor(bgColor) { |
| try { |
| const m = String(bgColor || '').match(/rgb\(([^)]+)\)/); |
| if (!m) return '#0e1116'; |
| const [r, g, b] = m[1].split(',').map(s => parseFloat(s.trim())); |
| const luminance = (0.299 * r + 0.587 * g + 0.114 * b) / 255; |
| return luminance < 0.5 ? '#ffffff' : '#0e1116'; |
| } catch (_) { |
| return '#0e1116'; |
| } |
| } |
| |
| function render() { |
| const { innerWidth, innerHeight } = updateSize(); |
| const nRows = models.length; |
| const nCols = promptFormats.length; |
| |
| |
| const maxDim = Math.max(nRows, nCols); |
| const cellSize = innerWidth / maxDim; |
| const gridWidth = cellSize * nCols; |
| const gridHeight = cellSize * nRows; |
| |
| const gridOffsetX = 0; |
| const gridOffsetY = 0; |
| |
| const x = d3.scaleBand() |
| .domain(d3.range(nCols)) |
| .range([0, gridWidth]) |
| .paddingInner(0.08); |
| |
| const y = d3.scaleBand() |
| .domain(d3.range(nRows)) |
| .range([0, gridHeight]) |
| .paddingInner(0.08); |
| |
| |
| const flatData = []; |
| let minVal = Infinity, maxVal = -Infinity; |
| for (let r = 0; r < nRows; r++) { |
| for (let c = 0; c < nCols; c++) { |
| const value = matrix[r][c]; |
| if (value < minVal) minVal = value; |
| if (value > maxVal) maxVal = value; |
| flatData.push({ r, c, value, model: models[r], format: promptFormats[c] }); |
| } |
| } |
| |
| const colorScale = getColorScale(flatData.map(d => d.value), minVal, maxVal); |
| |
| gCells.attr('transform', `translate(${gridOffsetX}, ${gridOffsetY})`); |
| |
| |
| const cells = gCells.selectAll('g.cell') |
| .data(flatData, d => `${d.r}-${d.c}`); |
| |
| const cellsEnter = cells.enter() |
| .append('g') |
| .attr('class', 'cell'); |
| |
| cellsEnter.append('rect') |
| .attr('rx', 3) |
| .attr('ry', 3) |
| .on('mousemove', (event, d) => { |
| const [px, py] = d3.pointer(event, container); |
| tipInner.innerHTML = `<strong>${d.model}</strong><br/>${d.format}<br/>Score: ${d.value.toFixed(1)}`; |
| tip.style.transform = `translate(${px + 10}px, ${py + 10}px)`; |
| tip.style.opacity = '1'; |
| }) |
| .on('mouseleave', () => { |
| tip.style.opacity = '0'; |
| }); |
| |
| cellsEnter.append('text') |
| .attr('class', 'cell-text') |
| .attr('text-anchor', 'middle') |
| .attr('dominant-baseline', 'middle'); |
| |
| const cellsMerged = cellsEnter.merge(cells); |
| |
| cellsMerged.select('rect') |
| .attr('x', d => x(d.c)) |
| .attr('y', d => y(d.r)) |
| .attr('width', Math.max(1, x.bandwidth())) |
| .attr('height', Math.max(1, y.bandwidth())) |
| .attr('fill', d => colorScale(d.value)) |
| .attr('stroke', 'var(--border-color)') |
| .attr('stroke-width', 0.5); |
| |
| cellsMerged.select('text') |
| .attr('x', d => x(d.c) + x.bandwidth() / 2) |
| .attr('y', d => y(d.r) + y.bandwidth() / 2) |
| .text(d => d.value.toFixed(1)) |
| .style('fill', function(d) { |
| try { |
| const rect = this.parentNode.querySelector('rect'); |
| const bg = rect ? getComputedStyle(rect).fill : colorScale(d.value); |
| return chooseReadableTextColor(bg); |
| } catch (_) { |
| return '#0e1116'; |
| } |
| }); |
| |
| cells.exit().remove(); |
| |
| |
| gAxes.selectAll('*').remove(); |
| gAxes.attr('transform', `translate(${gridOffsetX}, ${gridOffsetY})`); |
| |
| |
| gAxes.append('g') |
| .selectAll('text') |
| .data(promptFormats) |
| .join('text') |
| .attr('class', 'axis-label') |
| .attr('text-anchor', 'middle') |
| .attr('x', (_, i) => x(i) + x.bandwidth() / 2) |
| .attr('y', gridHeight + 12) |
| .text((d, i) => String.fromCharCode(65 + i)); |
| |
| |
| gAxes.append('g') |
| .selectAll('text') |
| .data(models) |
| .join('text') |
| .attr('class', 'axis-label') |
| .attr('text-anchor', 'end') |
| .attr('x', -10) |
| .attr('y', (_, i) => y(i) + y.bandwidth() / 2) |
| .attr('dominant-baseline', 'middle') |
| .text(d => d); |
| |
| |
| const legendContainer = container.querySelector('.legend-container'); |
| legendContainer.innerHTML = ''; |
| |
| const legendTitle = document.createElement('div'); |
| legendTitle.className = 'legend-title'; |
| legendTitle.textContent = 'Prompt Formats:'; |
| legendContainer.appendChild(legendTitle); |
| |
| const legendGrid = document.createElement('div'); |
| legendGrid.className = 'legend-grid'; |
| |
| |
| const column1 = document.createElement('div'); |
| column1.className = 'legend-column'; |
| |
| |
| const column2 = document.createElement('div'); |
| column2.className = 'legend-column'; |
| |
| promptFormats.forEach((format, i) => { |
| const item = document.createElement('div'); |
| item.className = 'legend-item'; |
| |
| const label = document.createElement('span'); |
| label.className = 'legend-label'; |
| label.textContent = `${String.fromCharCode(65 + i)}.`; |
| |
| const text = document.createElement('span'); |
| text.className = 'legend-text'; |
| text.textContent = format; |
| |
| item.appendChild(label); |
| item.appendChild(text); |
| |
| |
| if (i < 4) { |
| column1.appendChild(item); |
| } else { |
| column2.appendChild(item); |
| } |
| }); |
| |
| legendGrid.appendChild(column1); |
| legendGrid.appendChild(column2); |
| |
| legendContainer.appendChild(legendGrid); |
| } |
| |
| |
| render(); |
| const rerender = () => render(); |
| if (window.ResizeObserver) { |
| const ro = new ResizeObserver(() => rerender()); |
| ro.observe(container); |
| } else { |
| window.addEventListener('resize', rerender); |
| } |
| }; |
| |
| if (document.readyState === 'loading') { |
| document.addEventListener('DOMContentLoaded', () => ensureD3(bootstrap), { once: true }); |
| } else { |
| ensureD3(bootstrap); |
| } |
| })(); |
| </script> |
|
|