on-policy-distillation / app /src /content /embeds /banner-sequence-alignment.html
tfrere's picture
tfrere HF Staff
update post-citation
9ec3e0f
<div class="sequence-alignment-visualization"
style="width:100%;margin:10px 0;aspect-ratio:3/1;min-height:260px;position:relative;overflow:hidden;background:var(--surface-bg);border-radius:12px;border:1px solid var(--border-color);box-shadow:0 2px 8px rgba(0, 0, 0, 0.08);display:flex;">
<div class="section-container" style="flex:1;position:relative;border-right:1px dashed var(--border-color);padding:20px;">
<div style="position:absolute;top:10px;left:20px;font-weight:600;font-size:12px;color:var(--text-color);">
1. Determine the token merges
</div>
<canvas id="canvas-section1" style="width:100%;height:100%;display:block;"></canvas>
</div>
<div class="section-container" style="flex:1;position:relative;padding:20px;">
<div style="position:absolute;top:10px;left:20px;font-weight:600;font-size:12px;color:var(--text-color);">
2. Add logprob tensors in the merged positions
</div>
<canvas id="canvas-section2" style="width:100%;height:100%;display:block;"></canvas>
</div>
</div>
<script>
(() => {
const getColors = () => {
const isDark = document.documentElement.getAttribute('data-theme') === 'dark';
return {
originalToken: isDark ? 'rgba(134, 239, 172, 0.3)' : 'rgba(187, 247, 208, 0.6)',
subToken: isDark ? 'rgba(251, 191, 36, 0.6)' : 'rgba(253, 224, 71, 0.7)',
mergedToken: isDark ? 'rgba(147, 197, 253, 0.4)' : 'rgba(191, 219, 254, 0.6)',
text: isDark ? 'rgba(255, 255, 255, 0.9)' : 'rgba(0, 0, 0, 0.85)',
line: isDark ? 'rgba(255, 255, 255, 0.25)' : 'rgba(0, 0, 0, 0.3)',
plus: isDark ? 'rgba(255, 255, 255, 0.6)' : 'rgba(0, 0, 0, 0.6)',
};
};
// Helper: rounded rectangle
if (!CanvasRenderingContext2D.prototype.roundRect) {
CanvasRenderingContext2D.prototype.roundRect = function(x, y, w, h, r) {
if (w < 2 * r) r = w / 2;
if (h < 2 * r) r = h / 2;
this.moveTo(x + r, y);
this.lineTo(x + w - r, y);
this.quadraticCurveTo(x + w, y, x + w, y + r);
this.lineTo(x + w, y + h - r);
this.quadraticCurveTo(x + w, y + h, x + w - r, y + h);
this.lineTo(x + r, y + h);
this.quadraticCurveTo(x, y + h, x, y + h - r);
this.lineTo(x, y + r);
this.quadraticCurveTo(x, y, x + r, y);
};
}
const drawSection1 = (canvas, colors) => {
const ctx = canvas.getContext('2d');
const width = canvas.width;
const height = canvas.height;
ctx.clearRect(0, 0, width, height);
// Calculate scale based on available width
const totalContentWidth = 180 + 120 + 50 + 100 + 200 + (15 * 4); // widths + spacings
const scale = Math.min(1, (width - 40) / totalContentWidth);
const padding = 20 * scale;
const tokenHeight = 32 * scale;
const spacing = 15 * scale;
const subTokenSize = 18 * scale;
const originalWords = [
{ text: '<think>', subTokens: [0, 1, 2], width: 180 * scale },
{ text: 'Hugging Face', subTokens: [3], width: 120 * scale },
{ text: 'is', subTokens: [4], width: 50 * scale },
{ text: 'awesome!', subTokens: [5], width: 100 * scale },
{ text: '</think>', subTokens: [6, 7, 8, 9], width: 200 * scale }
];
let currentX = padding;
const originalY = height * 0.2;
const subTokenY = height * 0.45;
const mergedY = height * 0.7;
// Draw original words and sub-tokens
originalWords.forEach((word) => {
// Original word (green)
ctx.fillStyle = colors.originalToken;
ctx.beginPath();
ctx.roundRect(currentX, originalY - tokenHeight / 2, word.width, tokenHeight, 8);
ctx.fill();
ctx.fillStyle = colors.text;
ctx.font = `${10 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
ctx.fillText(word.text, currentX + word.width / 2, originalY);
// Sub-tokens (yellow) below
const subTokenSpacing = word.width / (word.subTokens.length + 1);
word.subTokens.forEach((stId, stIdx) => {
const stX = currentX + subTokenSpacing * (stIdx + 1) - subTokenSize / 2;
ctx.fillStyle = colors.subToken;
ctx.beginPath();
ctx.roundRect(stX, subTokenY - subTokenSize / 2, subTokenSize, subTokenSize, 5);
ctx.fill();
ctx.fillStyle = colors.text;
ctx.font = `${9 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.fillText(stId.toString(), stX + subTokenSize / 2, subTokenY);
});
// Vertical dotted line
ctx.strokeStyle = colors.line;
ctx.lineWidth = 1;
ctx.setLineDash([3, 3]);
ctx.beginPath();
ctx.moveTo(currentX + word.width / 2, originalY + tokenHeight / 2);
ctx.lineTo(currentX + word.width / 2, mergedY - tokenHeight / 2);
ctx.stroke();
ctx.setLineDash([]);
currentX += word.width + spacing;
});
// Merged tokens (blue rectangles) at bottom - showing IDs (0, 1, 2, 3, 4)
const mergedTokens = [
{ id: 0, fromSubTokens: [0, 1, 2], width: 60 * scale },
{ id: 1, fromSubTokens: [3], width: 60 * scale },
{ id: 2, fromSubTokens: [4], width: 60 * scale },
{ id: 3, fromSubTokens: [5], width: 60 * scale },
{ id: 4, fromSubTokens: [6, 7, 8, 9], width: 60 * scale }
];
currentX = padding + (originalWords.reduce((sum, w) => sum + w.width + spacing, -spacing) - mergedTokens.reduce((sum, t) => sum + t.width + spacing, -spacing)) / 2;
mergedTokens.forEach((token) => {
ctx.fillStyle = colors.mergedToken;
ctx.beginPath();
ctx.roundRect(currentX, mergedY - tokenHeight / 2, token.width, tokenHeight, 8);
ctx.fill();
ctx.fillStyle = colors.text;
ctx.font = `${10 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
ctx.fillText(token.id.toString(), currentX + token.width / 2, mergedY);
currentX += token.width + spacing;
});
};
const drawSection2 = (canvas, colors) => {
const ctx = canvas.getContext('2d');
const width = canvas.width;
const height = canvas.height;
ctx.clearRect(0, 0, width, height);
// Calculate scale based on available width
const totalContentWidth = 180 + 120 + 50 + 100 + 200 + (20 * 4); // widths + spacings
const scale = Math.min(1, (width - 40) / totalContentWidth);
const padding = 20 * scale;
const logprobStackHeight = 60 * scale;
const logprobRectSize = 12 * scale;
const logprobSpacing = 3 * scale;
const rectsPerStack = 4; // Input stacks have 4 rectangles
const stackWidth = (logprobRectSize + logprobSpacing) * rectsPerStack - logprobSpacing;
const spacing = 20 * scale;
const words = [
{ text: '<think>', stacks: 3, width: 180 * scale },
{ text: 'Hugging Face', stacks: 1, width: 120 * scale },
{ text: 'is', stacks: 1, width: 50 * scale },
{ text: 'awesome!', stacks: 1, width: 100 * scale },
{ text: '</think>', stacks: 4, width: 200 * scale }
];
let currentX = padding;
const inputY = height * 0.2;
const outputY = height * 0.7;
// Draw input logprob tensors (yellow stacks)
words.forEach((word) => {
const wordCenterX = currentX + word.width / 2;
// Draw multiple stacks for words that merge
for (let s = 0; s < word.stacks; s++) {
const stackX = wordCenterX - (word.stacks - 1) * (stackWidth + 8) / 2 + s * (stackWidth + 8);
// Draw stack of rectangles
for (let i = 0; i < rectsPerStack; i++) {
const rectY = inputY - logprobStackHeight / 2 + i * (logprobRectSize + logprobSpacing);
ctx.fillStyle = colors.subToken;
ctx.beginPath();
ctx.roundRect(stackX, rectY, logprobRectSize, logprobRectSize, 3);
ctx.fill();
}
// Draw plus signs between stacks (except last)
if (s < word.stacks - 1) {
ctx.fillStyle = colors.plus;
ctx.font = `bold ${14 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.textAlign = 'center';
ctx.fillText('+', stackX + stackWidth / 2 + (4 * scale), inputY);
}
}
// Word label below stacks
const labelY = inputY + logprobStackHeight / 2 + (12 * scale);
ctx.fillStyle = colors.originalToken;
ctx.beginPath();
ctx.roundRect(currentX, labelY - (8 * scale), word.width, 16 * scale, 4 * scale);
ctx.fill();
ctx.fillStyle = colors.text;
ctx.font = `${9 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
ctx.fillText(word.text, currentX + word.width / 2, labelY);
// Vertical dotted line
ctx.strokeStyle = colors.line;
ctx.lineWidth = 1;
ctx.setLineDash([3, 3]);
ctx.beginPath();
ctx.moveTo(wordCenterX, labelY + (8 * scale));
ctx.lineTo(wordCenterX, outputY - logprobStackHeight / 2);
ctx.stroke();
ctx.setLineDash([]);
currentX += word.width + spacing;
});
// Draw output logprob tensors (blue stacks) - 5 rectangles per stack
currentX = padding;
words.forEach((word) => {
const wordCenterX = currentX + word.width / 2;
// Single output stack (merged) - 5 rectangles!
const outputRectsPerStack = 5;
const outputStackWidth = (logprobRectSize + logprobSpacing) * outputRectsPerStack - logprobSpacing;
const adjustedStackX = wordCenterX - outputStackWidth / 2;
for (let i = 0; i < outputRectsPerStack; i++) {
const rectY = outputY - logprobStackHeight / 2 + i * (logprobRectSize + logprobSpacing);
const rectX = adjustedStackX + i * (logprobRectSize + logprobSpacing);
ctx.fillStyle = colors.mergedToken;
ctx.beginPath();
ctx.roundRect(rectX, rectY, logprobRectSize, logprobRectSize, 3);
ctx.fill();
}
// Word label below output stacks
const labelY = outputY + logprobStackHeight / 2 + (12 * scale);
ctx.fillStyle = colors.mergedToken;
ctx.beginPath();
ctx.roundRect(currentX, labelY - (8 * scale), word.width, 16 * scale, 4 * scale);
ctx.fill();
ctx.fillStyle = colors.text;
ctx.font = `${9 * scale}px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif`;
ctx.textAlign = 'center';
ctx.textBaseline = 'middle';
ctx.fillText(word.text, currentX + word.width / 2, labelY);
currentX += word.width + spacing;
});
};
const setupCanvas = (canvasId, drawFn) => {
const container = document.querySelector('.sequence-alignment-visualization');
if (!container) return;
const canvas = document.getElementById(canvasId);
if (!canvas) return;
const resize = () => {
const sectionContainer = canvas.closest('.section-container');
if (!sectionContainer) return;
const containerRect = container.getBoundingClientRect();
const sectionRect = sectionContainer.getBoundingClientRect();
// Account for padding (20px each side) and label space (30px at top)
const width = Math.max(100, sectionRect.width - 40);
const height = Math.max(150, containerRect.height - 50);
canvas.width = width;
canvas.height = height;
const colors = getColors();
drawFn(canvas, colors);
};
// Watch for theme changes
const observer = new MutationObserver(() => {
resize();
});
observer.observe(document.documentElement, {
attributes: true,
attributeFilter: ['data-theme']
});
if (window.ResizeObserver) {
const ro = new ResizeObserver(resize);
ro.observe(container);
} else {
window.addEventListener('resize', resize);
}
resize();
};
const bootstrap = () => {
setupCanvas('canvas-section1', drawSection1);
setupCanvas('canvas-section2', drawSection2);
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
} else {
bootstrap();
}
})();
</script>