Clémentine
Init
ffdff5d
<div class="d3-tokenization">
<svg viewBox="0 0 800 400" xmlns="http://www.w3.org/2000/svg">
<defs>
<marker id="arrowhead-tok" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto">
<polygon points="0 0, 10 3, 0 6" fill="currentColor" />
</marker>
</defs>
<!-- Input Text -->
<rect x="50" y="50" width="200" height="80" rx="5" class="box"/>
<text x="150" y="75" text-anchor="middle" class="text title">Input Text</text>
<text x="150" y="100" text-anchor="middle" class="text label">"Hello, world!"</text>
<!-- Arrow 1 -->
<path d="M 250 90 L 290 90" class="arrow" marker-end="url(#arrowhead-tok)"/>
<!-- Tokenizer -->
<rect x="290" y="60" width="120" height="60" rx="5" class="process"/>
<text x="350" y="85" text-anchor="middle" class="text title">Tokenizer</text>
<text x="350" y="105" text-anchor="middle" class="text label" font-size="10">Split into tokens</text>
<!-- Arrow 2 -->
<path d="M 410 90 L 450 90" class="arrow" marker-end="url(#arrowhead-tok)"/>
<!-- Tokens -->
<rect x="450" y="30" width="280" height="120" rx="5" class="box"/>
<text x="590" y="55" text-anchor="middle" class="text title">Tokens</text>
<!-- Token boxes -->
<rect x="470" y="70" width="60" height="30" rx="3" class="token-box"/>
<text x="500" y="90" text-anchor="middle" class="text token">Hello</text>
<rect x="540" y="70" width="40" height="30" rx="3" class="token-box"/>
<text x="560" y="90" text-anchor="middle" class="text token">,</text>
<rect x="590" y="70" width="60" height="30" rx="3" class="token-box"/>
<text x="620" y="90" text-anchor="middle" class="text token">world</text>
<rect x="660" y="70" width="40" height="30" rx="3" class="token-box"/>
<text x="680" y="90" text-anchor="middle" class="text token">!</text>
<!-- Token IDs -->
<text x="500" y="125" text-anchor="middle" class="text token-id">[5425]</text>
<text x="560" y="125" text-anchor="middle" class="text token-id">[11]</text>
<text x="620" y="125" text-anchor="middle" class="text token-id">[1917]</text>
<text x="680" y="125" text-anchor="middle" class="text token-id">[0]</text>
<!-- Arrow 3 -->
<path d="M 590 150 L 590 190" class="arrow" marker-end="url(#arrowhead-tok)"/>
<!-- Model -->
<rect x="480" y="190" width="220" height="100" rx="5" class="model"/>
<text x="590" y="215" text-anchor="middle" class="text title">Language Model</text>
<!-- Model internal representation -->
<g transform="translate(520, 230)">
<circle cx="20" cy="15" r="8" class="node-circle"/>
<circle cx="50" cy="15" r="8" class="node-circle"/>
<circle cx="80" cy="15" r="8" class="node-circle"/>
<circle cx="110" cy="15" r="8" class="node-circle"/>
<circle cx="140" cy="15" r="8" class="node-circle"/>
</g>
<text x="590" y="275" text-anchor="middle" class="text label" font-size="10">Process & Generate</text>
<!-- Arrow 4 -->
<path d="M 590 290 L 590 330" class="arrow" marker-end="url(#arrowhead-tok)"/>
<!-- Output -->
<rect x="490" y="330" width="200" height="50" rx="5" class="box"/>
<text x="590" y="360" text-anchor="middle" class="text label">Output / Prediction</text>
</svg>
</div>
<style>
.d3-tokenization {
position: relative;
width: 100%;
}
.d3-tokenization svg {
display: block;
width: 100%;
height: auto;
}
.d3-tokenization .box {
fill: var(--surface-bg, #f0f4ff);
stroke: var(--primary-color, #4169e1);
stroke-width: 2;
}
.d3-tokenization .process {
fill: #fff8e1;
stroke: #ff9800;
stroke-width: 2;
}
.d3-tokenization .model {
fill: #e8f5e9;
stroke: #4caf50;
stroke-width: 2;
}
.d3-tokenization .text {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
fill: var(--text-color, #333);
}
.d3-tokenization .title {
font-size: 14px;
font-weight: 600;
}
.d3-tokenization .label {
font-size: 12px;
}
.d3-tokenization .token {
font-size: 11px;
font-family: 'Monaco', 'Courier New', monospace;
}
.d3-tokenization .token-id {
font-size: 9px;
fill: var(--muted-color, #666);
}
.d3-tokenization .arrow {
fill: none;
stroke: var(--muted-color, #666);
stroke-width: 2;
color: var(--muted-color, #666);
}
.d3-tokenization .token-box {
fill: white;
stroke: var(--primary-color, #4169e1);
stroke-width: 1.5;
}
.d3-tokenization .node-circle {
fill: #81c784;
opacity: 0.7;
}
[data-theme="dark"] .d3-tokenization .box {
fill: rgba(65, 105, 225, 0.1);
}
[data-theme="dark"] .d3-tokenization .token-box {
fill: var(--surface-bg, #1a1a1a);
}
[data-theme="dark"] .d3-tokenization .process {
fill: rgba(255, 152, 0, 0.15);
}
[data-theme="dark"] .d3-tokenization .model {
fill: rgba(76, 175, 80, 0.15);
}
</style>
<script>
(() => {
const bootstrap = () => {
const scriptEl = document.currentScript;
let container = scriptEl ? scriptEl.previousElementSibling : null;
if (!(container && container.classList && container.classList.contains('d3-tokenization'))) {
const candidates = Array.from(document.querySelectorAll('.d3-tokenization'))
.filter((el) => !(el.dataset && el.dataset.mounted === 'true'));
container = candidates[candidates.length - 1] || null;
}
if (!container) return;
if (container.dataset) {
if (container.dataset.mounted === 'true') return;
container.dataset.mounted = 'true';
}
};
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', bootstrap, { once: true });
} else {
bootstrap();
}
})();
</script>