|
|
<div class="d3-tokenization"> |
|
|
<svg viewBox="0 0 800 400" xmlns="http://www.w3.org/2000/svg"> |
|
|
<defs> |
|
|
<marker id="arrowhead-tok" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto"> |
|
|
<polygon points="0 0, 10 3, 0 6" fill="currentColor" /> |
|
|
</marker> |
|
|
</defs> |
|
|
|
|
|
|
|
|
<rect x="50" y="50" width="200" height="80" rx="5" class="box"/> |
|
|
<text x="150" y="75" text-anchor="middle" class="text title">Input Text</text> |
|
|
<text x="150" y="100" text-anchor="middle" class="text label">"Hello, world!"</text> |
|
|
|
|
|
|
|
|
<path d="M 250 90 L 290 90" class="arrow" marker-end="url(#arrowhead-tok)"/> |
|
|
|
|
|
|
|
|
<rect x="290" y="60" width="120" height="60" rx="5" class="process"/> |
|
|
<text x="350" y="85" text-anchor="middle" class="text title">Tokenizer</text> |
|
|
<text x="350" y="105" text-anchor="middle" class="text label" font-size="10">Split into tokens</text> |
|
|
|
|
|
|
|
|
<path d="M 410 90 L 450 90" class="arrow" marker-end="url(#arrowhead-tok)"/> |
|
|
|
|
|
|
|
|
<rect x="450" y="30" width="280" height="120" rx="5" class="box"/> |
|
|
<text x="590" y="55" text-anchor="middle" class="text title">Tokens</text> |
|
|
|
|
|
|
|
|
<rect x="470" y="70" width="60" height="30" rx="3" class="token-box"/> |
|
|
<text x="500" y="90" text-anchor="middle" class="text token">Hello</text> |
|
|
|
|
|
<rect x="540" y="70" width="40" height="30" rx="3" class="token-box"/> |
|
|
<text x="560" y="90" text-anchor="middle" class="text token">,</text> |
|
|
|
|
|
<rect x="590" y="70" width="60" height="30" rx="3" class="token-box"/> |
|
|
<text x="620" y="90" text-anchor="middle" class="text token">world</text> |
|
|
|
|
|
<rect x="660" y="70" width="40" height="30" rx="3" class="token-box"/> |
|
|
<text x="680" y="90" text-anchor="middle" class="text token">!</text> |
|
|
|
|
|
|
|
|
<text x="500" y="125" text-anchor="middle" class="text token-id">[5425]</text> |
|
|
<text x="560" y="125" text-anchor="middle" class="text token-id">[11]</text> |
|
|
<text x="620" y="125" text-anchor="middle" class="text token-id">[1917]</text> |
|
|
<text x="680" y="125" text-anchor="middle" class="text token-id">[0]</text> |
|
|
|
|
|
|
|
|
<path d="M 590 150 L 590 190" class="arrow" marker-end="url(#arrowhead-tok)"/> |
|
|
|
|
|
|
|
|
<rect x="480" y="190" width="220" height="100" rx="5" class="model"/> |
|
|
<text x="590" y="215" text-anchor="middle" class="text title">Language Model</text> |
|
|
|
|
|
|
|
|
<g transform="translate(520, 230)"> |
|
|
<circle cx="20" cy="15" r="8" class="node-circle"/> |
|
|
<circle cx="50" cy="15" r="8" class="node-circle"/> |
|
|
<circle cx="80" cy="15" r="8" class="node-circle"/> |
|
|
<circle cx="110" cy="15" r="8" class="node-circle"/> |
|
|
<circle cx="140" cy="15" r="8" class="node-circle"/> |
|
|
</g> |
|
|
<text x="590" y="275" text-anchor="middle" class="text label" font-size="10">Process & Generate</text> |
|
|
|
|
|
|
|
|
<path d="M 590 290 L 590 330" class="arrow" marker-end="url(#arrowhead-tok)"/> |
|
|
|
|
|
|
|
|
<rect x="490" y="330" width="200" height="50" rx="5" class="box"/> |
|
|
<text x="590" y="360" text-anchor="middle" class="text label">Output / Prediction</text> |
|
|
</svg> |
|
|
</div> |
|
|
<style> |
|
|
.d3-tokenization { |
|
|
position: relative; |
|
|
width: 100%; |
|
|
} |
|
|
.d3-tokenization svg { |
|
|
display: block; |
|
|
width: 100%; |
|
|
height: auto; |
|
|
} |
|
|
.d3-tokenization .box { |
|
|
fill: var(--surface-bg, #f0f4ff); |
|
|
stroke: var(--primary-color, #4169e1); |
|
|
stroke-width: 2; |
|
|
} |
|
|
.d3-tokenization .process { |
|
|
fill: #fff8e1; |
|
|
stroke: #ff9800; |
|
|
stroke-width: 2; |
|
|
} |
|
|
.d3-tokenization .model { |
|
|
fill: #e8f5e9; |
|
|
stroke: #4caf50; |
|
|
stroke-width: 2; |
|
|
} |
|
|
.d3-tokenization .text { |
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
|
|
fill: var(--text-color, #333); |
|
|
} |
|
|
.d3-tokenization .title { |
|
|
font-size: 14px; |
|
|
font-weight: 600; |
|
|
} |
|
|
.d3-tokenization .label { |
|
|
font-size: 12px; |
|
|
} |
|
|
.d3-tokenization .token { |
|
|
font-size: 11px; |
|
|
font-family: 'Monaco', 'Courier New', monospace; |
|
|
} |
|
|
.d3-tokenization .token-id { |
|
|
font-size: 9px; |
|
|
fill: var(--muted-color, #666); |
|
|
} |
|
|
.d3-tokenization .arrow { |
|
|
fill: none; |
|
|
stroke: var(--muted-color, #666); |
|
|
stroke-width: 2; |
|
|
color: var(--muted-color, #666); |
|
|
} |
|
|
.d3-tokenization .token-box { |
|
|
fill: white; |
|
|
stroke: var(--primary-color, #4169e1); |
|
|
stroke-width: 1.5; |
|
|
} |
|
|
.d3-tokenization .node-circle { |
|
|
fill: #81c784; |
|
|
opacity: 0.7; |
|
|
} |
|
|
[data-theme="dark"] .d3-tokenization .box { |
|
|
fill: rgba(65, 105, 225, 0.1); |
|
|
} |
|
|
[data-theme="dark"] .d3-tokenization .token-box { |
|
|
fill: var(--surface-bg, #1a1a1a); |
|
|
} |
|
|
[data-theme="dark"] .d3-tokenization .process { |
|
|
fill: rgba(255, 152, 0, 0.15); |
|
|
} |
|
|
[data-theme="dark"] .d3-tokenization .model { |
|
|
fill: rgba(76, 175, 80, 0.15); |
|
|
} |
|
|
</style> |
|
|
<script> |
|
|
(() => { |
|
|
const bootstrap = () => { |
|
|
const scriptEl = document.currentScript; |
|
|
let container = scriptEl ? scriptEl.previousElementSibling : null; |
|
|
if (!(container && container.classList && container.classList.contains('d3-tokenization'))) { |
|
|
const candidates = Array.from(document.querySelectorAll('.d3-tokenization')) |
|
|
.filter((el) => !(el.dataset && el.dataset.mounted === 'true')); |
|
|
container = candidates[candidates.length - 1] || null; |
|
|
} |
|
|
if (!container) return; |
|
|
if (container.dataset) { |
|
|
if (container.dataset.mounted === 'true') return; |
|
|
container.dataset.mounted = 'true'; |
|
|
} |
|
|
}; |
|
|
|
|
|
if (document.readyState === 'loading') { |
|
|
document.addEventListener('DOMContentLoaded', bootstrap, { once: true }); |
|
|
} else { |
|
|
bootstrap(); |
|
|
} |
|
|
})(); |
|
|
</script> |