on-policy-distillation / app /src /content /embeds /banner-sequence-alignment-svg.html
tfrere's picture
tfrere HF Staff
update wording
3f44950
raw
history blame
20.1 kB
<div class="sequence-alignment-visualization"
style="width:100%;margin:10px 0;aspect-ratio:3/1;min-height:260px;position:relative;overflow:hidden;background:var(--surface-bg);border-radius:12px;border:1px solid var(--border-color);box-shadow:0 2px 8px rgba(0, 0, 0, 0.08);display:flex;">
<div class="section-container" style="flex:1;position:relative;border-right:1px dashed var(--border-color);padding:20px;">
<div style="position:absolute;top:10px;left:20px;font-weight:600;font-size:12px;color:var(--text-color);">
1. Cross-tokenizer sequence alignment
</div>
<div id="svg-section1" style="width:100%;height:calc(100% - 30px);margin-top:30px;"></div>
</div>
<div class="section-container" style="flex:1;position:relative;padding:20px;">
<div style="position:absolute;top:10px;left:20px;font-weight:600;font-size:12px;color:var(--text-color);">
2. Logprob merging
</div>
<div id="svg-section2" style="width:100%;height:calc(100% - 30px);margin-top:30px;"></div>
</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/svg.js/3.2.5/svg.min.js"></script>
<script>
(function () {
const getColors = () => {
const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
return {
originalToken: isDark ? 'rgba(134, 239, 172, 0.4)' : 'rgba(187, 247, 208, 0.7)',
subToken: isDark ? 'rgba(251, 191, 36, 0.7)' : 'rgba(253, 224, 71, 0.8)',
mergedToken: isDark ? 'rgba(147, 197, 253, 0.5)' : 'rgba(191, 219, 254, 0.7)',
text: isDark ? 'rgba(255, 255, 255, 0.95)' : 'rgba(0, 0, 0, 0.9)',
line: isDark ? 'rgba(255, 255, 255, 0.3)' : 'rgba(0, 0, 0, 0.35)',
plus: isDark ? 'rgba(255, 255, 255, 0.7)' : 'rgba(0, 0, 0, 0.7)',
};
};
function drawSection1() {
const container = document.getElementById('svg-section1');
if (!container) return;
container.innerHTML = '';
const colors = getColors();
const draw = SVG().addTo(container).size('100%', '100%');
const width = container.clientWidth || 400;
const height = container.clientHeight || 220;
// Calculate scale based on available width
const baseTotalWidth = 130 + 130 + 50 + 100 + 150 + (20 * 4);
const scale = Math.min(1, (width - 40) / baseTotalWidth);
const padding = 20 * scale;
const tokenHeight = 32 * scale;
const spacing = 20 * scale;
const subTokenSmallSize = 26 * scale;
const subTokenLargeWidth = 65 * scale;
const subTokenLargeHeight = 26 * scale;
const originalWords = [
{ text: '<think>', subTokens: [
{ id: 0, type: 'small' },
{ id: 1, type: 'small' },
{ id: 2, type: 'small' }
], width: 130 * scale },
{ text: 'Hugging Face', subTokens: [
{ id: 3, type: 'large' }
], width: 130 * scale },
{ text: 'is', subTokens: [
{ id: 4, type: 'small' }
], width: 50 * scale },
{ text: 'awesome!', subTokens: [
{ id: 5, type: 'large' }
], width: 100 * scale },
{ text: '</think>', subTokens: [
{ id: 6, type: 'small' },
{ id: 7, type: 'small' },
{ id: 8, type: 'small' },
{ id: 9, type: 'small' }
], width: 150 * scale }
];
// Store word centers for alignment
const wordCenters = [];
let currentX = padding;
const originalY = height * 0.20;
const subTokenY = height * 0.50;
const mergedY = height * 0.78;
// Draw original words and sub-tokens
originalWords.forEach((word, wordIdx) => {
const wordCenterX = currentX + word.width / 2;
wordCenters.push(wordCenterX);
// Original word (green)
draw.rect(word.width, tokenHeight)
.move(currentX, originalY - tokenHeight / 2)
.radius(12 * scale)
.fill(colors.originalToken)
.stroke({ color: colors.line, width: 2 * scale });
const textEl = draw.text(word.text)
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 12 * scale,
anchor: 'middle',
weight: '500'
})
.fill(colors.text);
// Center text in the box
textEl.cx(currentX + word.width / 2);
textEl.cy(originalY);
// Sub-tokens (yellow) below - small squares or large rectangles
const subTokenGap = 3 * scale;
// Calculate total width for centering
let subTokensTotalWidth = 0;
word.subTokens.forEach((st, idx) => {
subTokensTotalWidth += st.type === 'small' ? subTokenSmallSize : subTokenLargeWidth;
if (idx < word.subTokens.length - 1) subTokensTotalWidth += subTokenGap;
});
let subTokenX = currentX + (word.width - subTokensTotalWidth) / 2;
word.subTokens.forEach((st) => {
const stWidth = st.type === 'small' ? subTokenSmallSize : subTokenLargeWidth;
const stHeight = st.type === 'small' ? subTokenSmallSize : subTokenLargeHeight;
draw.rect(stWidth, stHeight)
.move(subTokenX, subTokenY - stHeight / 2)
.radius(7 * scale)
.fill(colors.subToken)
.stroke({ color: colors.line, width: 2 * scale });
const subTextEl = draw.text(st.id.toString())
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 11 * scale,
anchor: 'middle',
weight: '600'
})
.fill(colors.text);
// Center text in the box
subTextEl.cx(subTokenX + stWidth / 2);
subTextEl.cy(subTokenY);
subTokenX += stWidth + subTokenGap;
});
// Vertical dotted line from word center
draw.line(wordCenterX, originalY + tokenHeight / 2 + (5 * scale), wordCenterX, mergedY - tokenHeight / 2 - (5 * scale))
.stroke({ color: colors.line, width: 2 * scale, dasharray: '5,5' });
currentX += word.width + spacing;
});
// Merged tokens (blue rectangles) at bottom
// Groups of tokens: some have multiple rectangles side-by-side
const mergedTokenGroups = [
{ ids: [0], alignToWord: 0 }, // Single token "0" under <think>
{ ids: [1, 2], alignToWord: 1 }, // Two tokens "1", "2" under Hugging Face
{ ids: [3], alignToWord: 2 }, // Single token "3" under is
{ ids: [4], alignToWord: 3 }, // Single token "4" under awesome!
{ ids: [5], alignToWord: 4 } // Single token "5" under </think>
];
const mergedTokenWidth = 52 * scale;
const mergedTokenGap = 3 * scale;
mergedTokenGroups.forEach((group) => {
const groupTotalWidth = group.ids.length * mergedTokenWidth + (group.ids.length - 1) * mergedTokenGap;
// Determine X position: either aligned to word or custom position
let groupX;
if (group.alignToWord !== null) {
groupX = wordCenters[group.alignToWord] - groupTotalWidth / 2;
} else {
groupX = group.x - groupTotalWidth / 2;
}
// Draw each token in the group
group.ids.forEach((id, idIdx) => {
const tokenX = groupX + idIdx * (mergedTokenWidth + mergedTokenGap);
draw.rect(mergedTokenWidth, tokenHeight)
.move(tokenX, mergedY - tokenHeight / 2)
.radius(10 * scale)
.fill(colors.mergedToken)
.stroke({ color: colors.line, width: 2 * scale });
const mergedTextEl = draw.text(id.toString())
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 12 * scale,
anchor: 'middle',
weight: '600'
})
.fill(colors.text);
// Center text in the box
mergedTextEl.cx(tokenX + mergedTokenWidth / 2);
mergedTextEl.cy(mergedY);
});
});
}
function drawSection2() {
const container = document.getElementById('svg-section2');
if (!container) return;
container.innerHTML = '';
const colors = getColors();
const draw = SVG().addTo(container).size('100%', '100%');
const width = container.clientWidth || 400;
const height = container.clientHeight || 220;
// Calculate scale based on available width
// Total: (22+48+22) + 130 + 50 + 100 + (22+22+48+22) = 92 + 130 + 50 + 100 + 114 = 486 base + gaps
const baseTotalWidth = 92 + 130 + 50 + 100 + 114 + (20 * 4);
const scale = Math.min(1, (width - 40) / baseTotalWidth);
const padding = 20 * scale;
const logprobStackHeight = 60 * scale;
const logprobRectSize = 13 * scale;
const logprobSpacing = 3 * scale;
const rectsPerStack = 4; // Input stacks have 4 rectangles
const singleStackWidth = logprobRectSize;
const spacing = 20 * scale;
const labelHeight = 26 * scale;
// Input groups (top, yellow) - with separate nodes for brackets/slashes
const inputGroups = [
{ text: '<', width: 22 * scale, groupId: 0 },
{ text: 'think', width: 48 * scale, groupId: 0 },
{ text: '>', width: 22 * scale, groupId: 0 },
{ text: 'Hugging Face', width: 130 * scale, groupId: 1 },
{ text: 'is', width: 50 * scale, groupId: 2 },
{ text: 'awesome!', width: 100 * scale, groupId: 3 },
{ text: '<', width: 22 * scale, groupId: 4 },
{ text: '/', width: 22 * scale, groupId: 4 },
{ text: 'think', width: 48 * scale, groupId: 4 },
{ text: '>', width: 22 * scale, groupId: 4 }
];
// Stack configuration per group
const groupStacks = {
0: 3, // <think> has 3 stacks
1: 1, // Hugging Face has 1 stack
2: 1, // is has 1 stack
3: 1, // awesome! has 1 stack
4: 4 // </think> has 4 stacks
};
// Output groups (bottom, blue) - 6 separate words
const outputGroups = [
{ text: '<think>', alignToInput: 0 },
{ text: 'Hugging', alignToInput: 1, offset: -40 * scale },
{ text: 'Face', alignToInput: 1, offset: 40 * scale },
{ text: 'is', alignToInput: 2 },
{ text: 'awesome!', alignToInput: 3 },
{ text: '</think>', alignToInput: 4 }
];
// Track centers for each unique group (for output alignment)
const inputCenters = [null, null, null, null, null]; // 5 groups: 0=<think>, 1=Hugging Face, 2=is, 3=awesome!, 4=</think>
const groupXPositions = {}; // Track X positions for each group
let currentX = padding;
const inputY = height * 0.20;
const outputY = height * 0.68;
const nodeGap = 3 * scale; // Small gap between nodes in same group
// Draw input logprob tensors (yellow stacks)
const nodePositions = []; // Track each node's position for stacks
inputGroups.forEach((group, groupIdx) => {
// Track start of each unique group
if (!groupXPositions[group.groupId]) {
groupXPositions[group.groupId] = { start: currentX, nodes: [] };
}
const nodeCenterX = currentX + group.width / 2;
groupXPositions[group.groupId].nodes.push({ x: currentX, width: group.width, centerX: nodeCenterX });
nodePositions.push({ centerX: nodeCenterX, groupId: group.groupId, text: group.text });
// Draw stack above this node (4 rectangles stacked vertically)
const stackX = nodeCenterX - singleStackWidth / 2;
for (let i = 0; i < rectsPerStack; i++) {
const rectY = inputY - logprobStackHeight / 2 + i * (logprobRectSize + logprobSpacing);
draw.rect(logprobRectSize, logprobRectSize)
.move(stackX, rectY)
.radius(4 * scale)
.fill(colors.subToken)
.stroke({ color: colors.line, width: 1.5 * scale });
}
// Draw plus sign to the right of stack if not last in group
const isLastInGroup = groupIdx === inputGroups.length - 1 || inputGroups[groupIdx + 1].groupId !== group.groupId;
if (!isLastInGroup) {
// Calculate next node's center
const nextGroup = inputGroups[groupIdx + 1];
const nextNodeCenterX = currentX + group.width + nodeGap + (nextGroup.width / 2);
// Center the + between this stack and the next stack
const plusX = (nodeCenterX + nextNodeCenterX) / 2;
const plusTextEl = draw.text('+')
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 16 * scale,
anchor: 'middle',
weight: 'bold'
})
.fill(colors.plus);
plusTextEl.cx(plusX);
plusTextEl.cy(inputY);
}
// Word label
const labelY = inputY + logprobStackHeight / 2 + (22 * scale);
draw.rect(group.width, labelHeight)
.move(currentX, labelY - labelHeight / 2)
.radius(8 * scale)
.fill(colors.subToken)
.stroke({ color: colors.line, width: 1.5 * scale });
const inputTextEl = draw.text(group.text)
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 11 * scale,
anchor: 'middle',
weight: '500'
})
.fill(colors.text);
inputTextEl.cx(nodeCenterX);
inputTextEl.cy(labelY);
// Move to next position
currentX += group.width + (isLastInGroup ? spacing : nodeGap);
});
// Calculate center for each group (for output alignment)
Object.keys(groupXPositions).forEach(groupId => {
const group = groupXPositions[groupId];
const totalWidth = group.nodes.reduce((sum, node) => sum + node.width, 0) + (group.nodes.length - 1) * nodeGap;
const groupCenter = group.start + totalWidth / 2;
inputCenters[groupId] = groupCenter;
});
// Draw output logprob tensors (blue stacks) - 5 rectangles per stack
outputGroups.forEach((output) => {
// Calculate position based on input alignment
const inputCenter = inputCenters[output.alignToInput];
const outputCenterX = inputCenter + (output.offset || 0);
// Draw vertical dotted line from input to output
const inputLabelY = inputY + logprobStackHeight / 2 + (22 * scale);
draw.line(outputCenterX, inputLabelY + labelHeight / 2 + (3 * scale), outputCenterX, outputY - logprobStackHeight / 2 - (3 * scale))
.stroke({ color: colors.line, width: 2 * scale, dasharray: '5,5' });
// Single output stack (merged) - 5 rectangles stacked vertically
const outputRectsPerStack = 5;
const stackX = outputCenterX - singleStackWidth / 2;
for (let i = 0; i < outputRectsPerStack; i++) {
const rectY = outputY - logprobStackHeight / 2 + i * (logprobRectSize + logprobSpacing);
draw.rect(logprobRectSize, logprobRectSize)
.move(stackX, rectY)
.radius(4 * scale)
.fill(colors.mergedToken)
.stroke({ color: colors.line, width: 1.5 * scale });
}
// Word label below output stack
const outputLabelY = outputY + logprobStackHeight / 2 + (35 * scale);
const labelWidth = output.text.length * 9 * scale + 15 * scale; // Dynamic width based on text
draw.rect(labelWidth, labelHeight)
.move(outputCenterX - labelWidth / 2, outputLabelY - labelHeight / 2)
.radius(8 * scale)
.fill(colors.mergedToken)
.stroke({ color: colors.line, width: 1.5 * scale });
const outputTextEl = draw.text(output.text)
.font({
family: '-apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif',
size: 11 * scale,
anchor: 'middle',
weight: '500'
})
.fill(colors.text);
outputTextEl.cx(outputCenterX);
outputTextEl.cy(outputLabelY);
});
}
function resize() {
drawSection1();
drawSection2();
}
// Watch for theme changes
const observer = new MutationObserver(() => {
resize();
});
observer.observe(document.documentElement, {
attributes: true,
attributeFilter: ['data-theme']
});
// Initialize
const container = document.querySelector('.sequence-alignment-visualization');
if (container) {
if (window.ResizeObserver) {
const ro = new ResizeObserver(resize);
ro.observe(container);
} else {
window.addEventListener('resize', resize);
}
// Wait for SVG.js to load
const checkSVG = () => {
if (typeof SVG !== 'undefined') {
resize();
} else {
setTimeout(checkSVG, 50);
}
};
checkSVG();
}
})();
</script>