Deploy demo @ 04347af
Browse files- README.md +15 -4
- index.html +45 -18
- js/app.js +426 -0
- js/tokenizers.js +18 -0
- js/worker.js +69 -0
- style.css +402 -18
README.md
CHANGED
|
@@ -1,10 +1,21 @@
|
|
| 1 |
---
|
| 2 |
title: Malayalam Tokenizer Comparison
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Malayalam Tokenizer Comparison
|
| 3 |
+
emoji: 🔤
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
tags:
|
| 10 |
+
- malayalam
|
| 11 |
+
- tokenizer
|
| 12 |
+
- nlp
|
| 13 |
---
|
| 14 |
|
| 15 |
+
# Malayalam Tokenizer Comparison
|
| 16 |
+
|
| 17 |
+
Visualize how different tokenizers split Malayalam text into tokens.
|
| 18 |
+
Compare Malayalam-specific tokenizers (BPE and Unigram) side-by-side with
|
| 19 |
+
popular models like GPT-4, LLaMA, Mistral, and others.
|
| 20 |
+
|
| 21 |
+
Built from [smc/malayalam-tokenizer](https://github.com/smc/malayalam-tokenizer).
|
index.html
CHANGED
|
@@ -1,19 +1,46 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
</html>
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Malayalam Tokenizer Comparison</title>
|
| 7 |
+
<link rel="stylesheet" href="style.css">
|
| 8 |
+
</head>
|
| 9 |
+
<body>
|
| 10 |
+
<header>
|
| 11 |
+
<h1>Malayalam Tokenizer Comparison</h1>
|
| 12 |
+
<p>Visualize how different tokenizers split Malayalam text into tokens.</p>
|
| 13 |
+
</header>
|
| 14 |
+
|
| 15 |
+
<main>
|
| 16 |
+
<section class="input-section">
|
| 17 |
+
<label for="text-input">Input text</label>
|
| 18 |
+
<textarea
|
| 19 |
+
id="text-input"
|
| 20 |
+
rows="4"
|
| 21 |
+
placeholder="Type or paste Malayalam text here…"
|
| 22 |
+
spellcheck="false"
|
| 23 |
+
autocomplete="off"
|
| 24 |
+
>കേരളം ദക്ഷിണേന്ത്യയിലെ ഒരു സംസ്ഥാനമാണ്.</textarea>
|
| 25 |
+
</section>
|
| 26 |
+
|
| 27 |
+
<section class="add-tokenizer-section">
|
| 28 |
+
<select id="tokenizer-select">
|
| 29 |
+
<option value="">— choose a tokenizer —</option>
|
| 30 |
+
</select>
|
| 31 |
+
<button id="add-btn" type="button" disabled>Add tokenizer</button>
|
| 32 |
+
</section>
|
| 33 |
+
|
| 34 |
+
<div id="panels" role="list" aria-label="Tokenizer panels"></div>
|
| 35 |
+
|
| 36 |
+
<p id="empty-state" class="empty-state" aria-live="polite">
|
| 37 |
+
No tokenizers added yet. Choose one from the dropdown above.
|
| 38 |
+
</p>
|
| 39 |
+
</main>
|
| 40 |
+
|
| 41 |
+
<!-- Token-color highlight styles injected by app.js -->
|
| 42 |
+
<style id="highlight-styles"></style>
|
| 43 |
+
|
| 44 |
+
<script type="module" src="js/app.js"></script>
|
| 45 |
+
</body>
|
| 46 |
</html>
|
js/app.js
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { TOKENIZER_OPTIONS } from "./tokenizers.js";
|
| 2 |
+
|
| 3 |
+
// ─── Token color palette ──────────────────────────────────────────────────────
|
| 4 |
+
// Reference CSS custom properties so dark-mode overrides apply automatically.
|
| 5 |
+
// The index wraps modulo 12; values are defined in style.css as --tok-N.
|
| 6 |
+
|
| 7 |
+
const NUM_TOKEN_COLORS = 12;
|
| 8 |
+
|
| 9 |
+
function tokenColorVar(index) {
|
| 10 |
+
return `var(--tok-${index % NUM_TOKEN_COLORS})`;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
const DEBOUNCE_MS = 350;
|
| 14 |
+
|
| 15 |
+
// ─── State ────────────────────────────────────────────────────────────────────
|
| 16 |
+
|
| 17 |
+
/** @type {Map<number, PanelState>} */
|
| 18 |
+
const panels = new Map();
|
| 19 |
+
let panelCounter = 0;
|
| 20 |
+
|
| 21 |
+
/**
|
| 22 |
+
* @typedef {{
|
| 23 |
+
* modelId: string,
|
| 24 |
+
* name: string,
|
| 25 |
+
* worker: Worker,
|
| 26 |
+
* panel: HTMLElement,
|
| 27 |
+
* textMirror: HTMLElement,
|
| 28 |
+
* statsEl: HTMLElement,
|
| 29 |
+
* tbody: HTMLElement,
|
| 30 |
+
* tokenCount: number,
|
| 31 |
+
* }} PanelState
|
| 32 |
+
*/
|
| 33 |
+
|
| 34 |
+
// ─── DOM refs ─────────────────────────────────────────────────────────────────
|
| 35 |
+
|
| 36 |
+
const textInput = /** @type {HTMLTextAreaElement} */ (
|
| 37 |
+
document.getElementById("text-input")
|
| 38 |
+
);
|
| 39 |
+
const selectEl = /** @type {HTMLSelectElement} */ (
|
| 40 |
+
document.getElementById("tokenizer-select")
|
| 41 |
+
);
|
| 42 |
+
const addBtn = /** @type {HTMLButtonElement} */ (
|
| 43 |
+
document.getElementById("add-btn")
|
| 44 |
+
);
|
| 45 |
+
const panelsEl = /** @type {HTMLElement} */ (
|
| 46 |
+
document.getElementById("panels")
|
| 47 |
+
);
|
| 48 |
+
const emptyState = /** @type {HTMLElement} */ (
|
| 49 |
+
document.getElementById("empty-state")
|
| 50 |
+
);
|
| 51 |
+
const highlightStyleEl = /** @type {HTMLStyleElement} */ (
|
| 52 |
+
document.getElementById("highlight-styles")
|
| 53 |
+
);
|
| 54 |
+
|
| 55 |
+
// ─── Populate dropdown ────────────────────────────────────────────────────────
|
| 56 |
+
|
| 57 |
+
for (const [modelId, name] of Object.entries(TOKENIZER_OPTIONS)) {
|
| 58 |
+
if (!modelId) continue;
|
| 59 |
+
const opt = document.createElement("option");
|
| 60 |
+
opt.value = modelId;
|
| 61 |
+
opt.textContent = name;
|
| 62 |
+
selectEl.appendChild(opt);
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
selectEl.addEventListener("change", () => {
|
| 66 |
+
addBtn.disabled = !selectEl.value;
|
| 67 |
+
});
|
| 68 |
+
|
| 69 |
+
addBtn.addEventListener("click", () => {
|
| 70 |
+
const modelId = selectEl.value;
|
| 71 |
+
if (!modelId) return;
|
| 72 |
+
const name = TOKENIZER_OPTIONS[modelId] ?? modelId;
|
| 73 |
+
addPanel(modelId, name);
|
| 74 |
+
selectEl.value = "";
|
| 75 |
+
addBtn.disabled = true;
|
| 76 |
+
});
|
| 77 |
+
|
| 78 |
+
// ─── Text input ───────────────────────────────────────────────────────────────
|
| 79 |
+
|
| 80 |
+
let debounceTimer = null;
|
| 81 |
+
|
| 82 |
+
textInput.addEventListener("input", () => {
|
| 83 |
+
clearTimeout(debounceTimer);
|
| 84 |
+
debounceTimer = setTimeout(retokenizeAll, DEBOUNCE_MS);
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
function retokenizeAll() {
|
| 88 |
+
const text = textInput.value;
|
| 89 |
+
for (const id of panels.keys()) {
|
| 90 |
+
runTokenizer(id, text);
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
// ─── Panel lifecycle ──────────────────────────────────────────────────────────
|
| 95 |
+
|
| 96 |
+
function addPanel(modelId, name) {
|
| 97 |
+
const id = panelCounter++;
|
| 98 |
+
|
| 99 |
+
const panel = document.createElement("div");
|
| 100 |
+
panel.className = "panel loading";
|
| 101 |
+
panel.setAttribute("role", "listitem");
|
| 102 |
+
panel.dataset.panelId = String(id);
|
| 103 |
+
panel.innerHTML = panelTemplate(name, modelId);
|
| 104 |
+
|
| 105 |
+
panel
|
| 106 |
+
.querySelector(".remove-btn")
|
| 107 |
+
.addEventListener("click", () => removePanel(id));
|
| 108 |
+
|
| 109 |
+
panelsEl.appendChild(panel);
|
| 110 |
+
updateEmptyState();
|
| 111 |
+
|
| 112 |
+
const worker = new Worker("js/worker.js", { type: "module" });
|
| 113 |
+
worker.addEventListener("message", (ev) => onWorkerMessage(id, ev.data));
|
| 114 |
+
worker.addEventListener("error", (ev) =>
|
| 115 |
+
onWorkerError(id, ev.message ?? "Worker error"),
|
| 116 |
+
);
|
| 117 |
+
|
| 118 |
+
panels.set(id, {
|
| 119 |
+
modelId,
|
| 120 |
+
name,
|
| 121 |
+
worker,
|
| 122 |
+
panel,
|
| 123 |
+
textMirror: panel.querySelector(".panel-text"),
|
| 124 |
+
statsEl: panel.querySelector(".panel-stats"),
|
| 125 |
+
tbody: panel.querySelector("tbody"),
|
| 126 |
+
tokenCount: 0,
|
| 127 |
+
});
|
| 128 |
+
|
| 129 |
+
runTokenizer(id, textInput.value);
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
function removePanel(id) {
|
| 133 |
+
const state = panels.get(id);
|
| 134 |
+
if (!state) return;
|
| 135 |
+
clearPanelHighlights(id, state.tokenCount);
|
| 136 |
+
state.worker.terminate();
|
| 137 |
+
state.panel.remove();
|
| 138 |
+
panels.delete(id);
|
| 139 |
+
updateEmptyState();
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
function updateEmptyState() {
|
| 143 |
+
emptyState.classList.toggle("hidden", panels.size > 0);
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
function panelTemplate(name, modelId) {
|
| 147 |
+
return `
|
| 148 |
+
<div class="panel-header">
|
| 149 |
+
<a class="panel-title" href="https://huggingface.co/${esc(modelId)}" target="_blank" rel="noopener noreferrer" title="${esc(modelId)}">${esc(name)}</a>
|
| 150 |
+
<button class="remove-btn" type="button" aria-label="Remove ${esc(name)}">Remove</button>
|
| 151 |
+
</div>
|
| 152 |
+
<div class="panel-loading">
|
| 153 |
+
<span class="spinner" aria-hidden="true"></span>
|
| 154 |
+
Loading tokenizer…
|
| 155 |
+
</div>
|
| 156 |
+
<div class="panel-text" aria-label="Tokenized text"></div>
|
| 157 |
+
<div class="panel-stats"></div>
|
| 158 |
+
<div class="panel-table-wrap">
|
| 159 |
+
<table aria-label="Token list">
|
| 160 |
+
<thead>
|
| 161 |
+
<tr>
|
| 162 |
+
<th></th>
|
| 163 |
+
<th>#</th>
|
| 164 |
+
<th>Token</th>
|
| 165 |
+
<th>ID</th>
|
| 166 |
+
</tr>
|
| 167 |
+
</thead>
|
| 168 |
+
<tbody></tbody>
|
| 169 |
+
</table>
|
| 170 |
+
</div>`;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
// ─── Worker communication ─────────────────────────────────────────────────────
|
| 174 |
+
|
| 175 |
+
function runTokenizer(id, text) {
|
| 176 |
+
const state = panels.get(id);
|
| 177 |
+
if (!state) return;
|
| 178 |
+
state.panel.classList.add("loading");
|
| 179 |
+
state.worker.postMessage({ model_id: state.modelId, text });
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
/**
|
| 183 |
+
* @param {number} id
|
| 184 |
+
* @param {{ token_ids: number[], decoded: string[], margins: number[] }} data
|
| 185 |
+
*/
|
| 186 |
+
function onWorkerMessage(id, data) {
|
| 187 |
+
const state = panels.get(id);
|
| 188 |
+
if (!state) return;
|
| 189 |
+
|
| 190 |
+
state.panel.classList.remove("loading");
|
| 191 |
+
|
| 192 |
+
const { token_ids, decoded, margins } = data;
|
| 193 |
+
const text = textInput.value;
|
| 194 |
+
|
| 195 |
+
// Clear stale highlights
|
| 196 |
+
clearPanelHighlights(id, state.tokenCount);
|
| 197 |
+
state.tokenCount = token_ids.length;
|
| 198 |
+
|
| 199 |
+
// Sync mirror text (single text node — required for Range offsets)
|
| 200 |
+
state.textMirror.textContent = text;
|
| 201 |
+
|
| 202 |
+
// Compute per-token character offsets in the original string
|
| 203 |
+
const offsets = computeOffsets(text, decoded, margins ?? []);
|
| 204 |
+
|
| 205 |
+
// Paint highlights via CSS Custom Highlight API
|
| 206 |
+
paintHighlights(id, state.textMirror, offsets);
|
| 207 |
+
|
| 208 |
+
// Stats bar
|
| 209 |
+
const ratio =
|
| 210 |
+
text.length > 0 ? (text.length / token_ids.length).toFixed(2) : "—";
|
| 211 |
+
state.statsEl.innerHTML =
|
| 212 |
+
`<span>Tokens: <strong>${token_ids.length}</strong></span>` +
|
| 213 |
+
`<span>Chars/token: <strong>${ratio}</strong></span>` +
|
| 214 |
+
`<span>Characters: <strong>${text.length}</strong></span>`;
|
| 215 |
+
|
| 216 |
+
// Table
|
| 217 |
+
renderTable(id, state, token_ids, decoded, offsets);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
function onWorkerError(id, msg) {
|
| 221 |
+
const state = panels.get(id);
|
| 222 |
+
if (!state) return;
|
| 223 |
+
state.panel.classList.remove("loading");
|
| 224 |
+
state.textMirror.textContent = `! ${msg}`;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
// ─── Offset computation ───────────────────────────────────────────────────────
|
| 228 |
+
|
| 229 |
+
/**
|
| 230 |
+
* Walk `decoded` tokens greedily left-to-right against `text`, returning
|
| 231 |
+
* [{start, end}|null] for each token.
|
| 232 |
+
*
|
| 233 |
+
* The HuggingFace worker decodes each token individually. Metaspace converts
|
| 234 |
+
* the leading ▁ (U+2581) back to a space, so `decoded` values should
|
| 235 |
+
* concatenate to the original text (modulo special tokens like <s>, </s>).
|
| 236 |
+
*
|
| 237 |
+
* @param {string} text
|
| 238 |
+
* @param {string[]} decoded
|
| 239 |
+
* @param {number[]} margins — BERT: margin>0 means a word boundary space precedes
|
| 240 |
+
* @returns {Array<{start:number,end:number}|null>}
|
| 241 |
+
*/
|
| 242 |
+
function computeOffsets(text, decoded, margins) {
|
| 243 |
+
const offsets = [];
|
| 244 |
+
let cursor = 0;
|
| 245 |
+
|
| 246 |
+
for (let i = 0; i < decoded.length; i++) {
|
| 247 |
+
const tok = decoded[i] ?? "";
|
| 248 |
+
|
| 249 |
+
if (tok.length === 0) {
|
| 250 |
+
offsets.push(null);
|
| 251 |
+
continue;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
// BERT word-boundary space: margin > 0 means we should skip a space
|
| 255 |
+
if (margins[i] > 0 && text[cursor] === " ") {
|
| 256 |
+
cursor++;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
// Greedy forward search from cursor
|
| 260 |
+
const idx = text.indexOf(tok, cursor);
|
| 261 |
+
if (idx === -1) {
|
| 262 |
+
// Special token or mismatch — no visible range
|
| 263 |
+
offsets.push(null);
|
| 264 |
+
continue;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
offsets.push({ start: idx, end: idx + tok.length });
|
| 268 |
+
cursor = idx + tok.length;
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
return offsets;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
// ─── CSS Custom Highlight API ─────────────────────────────────────────────────
|
| 275 |
+
|
| 276 |
+
// Track which ::highlight() rules have been injected to avoid duplication
|
| 277 |
+
const injectedRules = new Set();
|
| 278 |
+
|
| 279 |
+
/**
|
| 280 |
+
* Ensure ::highlight(name) rule exists in the shared <style> element.
|
| 281 |
+
* @param {string} name
|
| 282 |
+
* @param {string} bgColor
|
| 283 |
+
*/
|
| 284 |
+
function ensureRule(name, bgColor) {
|
| 285 |
+
if (injectedRules.has(name)) return;
|
| 286 |
+
injectedRules.add(name);
|
| 287 |
+
const sheet = highlightStyleEl.sheet;
|
| 288 |
+
sheet.insertRule(
|
| 289 |
+
`::highlight(${name}) { background-color: ${bgColor}; color: var(--tok-text); }`,
|
| 290 |
+
sheet.cssRules.length,
|
| 291 |
+
);
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
/**
|
| 295 |
+
* Ensure the hover ::highlight() rule exists for a panel.
|
| 296 |
+
* @param {number} panelId
|
| 297 |
+
*/
|
| 298 |
+
function ensureHoverRule(panelId) {
|
| 299 |
+
const name = `p${panelId}-hover`;
|
| 300 |
+
if (injectedRules.has(name)) return;
|
| 301 |
+
injectedRules.add(name);
|
| 302 |
+
const sheet = highlightStyleEl.sheet;
|
| 303 |
+
sheet.insertRule(
|
| 304 |
+
`::highlight(${name}) { outline: 2px solid var(--tok-hover); border-radius: 2px; }`,
|
| 305 |
+
sheet.cssRules.length,
|
| 306 |
+
);
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
/**
|
| 310 |
+
* Register one CSS Highlight per token offset.
|
| 311 |
+
* Name: `p{panelId}-t{tokenIndex}`
|
| 312 |
+
* Color: var(--tok-N) from CSS
|
| 313 |
+
*/
|
| 314 |
+
function paintHighlights(panelId, mirrorEl, offsets) {
|
| 315 |
+
if (!CSS.highlights) return;
|
| 316 |
+
|
| 317 |
+
const textNode = mirrorEl.firstChild;
|
| 318 |
+
if (!textNode) return;
|
| 319 |
+
|
| 320 |
+
for (let i = 0; i < offsets.length; i++) {
|
| 321 |
+
const off = offsets[i];
|
| 322 |
+
if (!off) continue;
|
| 323 |
+
|
| 324 |
+
const name = `p${panelId}-t${i}`;
|
| 325 |
+
|
| 326 |
+
ensureRule(name, tokenColorVar(i));
|
| 327 |
+
|
| 328 |
+
const range = new Range();
|
| 329 |
+
range.setStart(textNode, off.start);
|
| 330 |
+
range.setEnd(textNode, off.end);
|
| 331 |
+
CSS.highlights.set(name, new Highlight(range));
|
| 332 |
+
}
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
/**
|
| 336 |
+
* Remove all named highlights for a panel.
|
| 337 |
+
*/
|
| 338 |
+
function clearPanelHighlights(panelId, tokenCount) {
|
| 339 |
+
if (!CSS.highlights) return;
|
| 340 |
+
for (let i = 0; i < tokenCount; i++) {
|
| 341 |
+
CSS.highlights.delete(`p${panelId}-t${i}`);
|
| 342 |
+
}
|
| 343 |
+
CSS.highlights.delete(`p${panelId}-hover`);
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
// ─── Token table ──────────────────────────────────────────────────────────────
|
| 347 |
+
|
| 348 |
+
/**
|
| 349 |
+
* @param {number} panelId
|
| 350 |
+
* @param {PanelState} state
|
| 351 |
+
* @param {number[]} token_ids
|
| 352 |
+
* @param {string[]} decoded
|
| 353 |
+
* @param {Array<{start:number,end:number}|null>} offsets
|
| 354 |
+
*/
|
| 355 |
+
function renderTable(panelId, state, token_ids, decoded, offsets) {
|
| 356 |
+
const { tbody, textMirror } = state;
|
| 357 |
+
tbody.innerHTML = "";
|
| 358 |
+
|
| 359 |
+
ensureHoverRule(panelId);
|
| 360 |
+
|
| 361 |
+
const fragment = document.createDocumentFragment();
|
| 362 |
+
|
| 363 |
+
for (let i = 0; i < token_ids.length; i++) {
|
| 364 |
+
const color = tokenColorVar(i);
|
| 365 |
+
const off = offsets[i];
|
| 366 |
+
|
| 367 |
+
const tr = document.createElement("tr");
|
| 368 |
+
tr.style.setProperty("--row-color", color);
|
| 369 |
+
tr.dataset.tokenIndex = String(i);
|
| 370 |
+
|
| 371 |
+
// Color swatch
|
| 372 |
+
const tdSwatch = document.createElement("td");
|
| 373 |
+
tdSwatch.className = "color-swatch";
|
| 374 |
+
const swatch = document.createElement("span");
|
| 375 |
+
swatch.style.background = color;
|
| 376 |
+
tdSwatch.appendChild(swatch);
|
| 377 |
+
|
| 378 |
+
// Index
|
| 379 |
+
const tdIdx = document.createElement("td");
|
| 380 |
+
tdIdx.textContent = String(i + 1);
|
| 381 |
+
|
| 382 |
+
// Token text
|
| 383 |
+
const tdTok = document.createElement("td");
|
| 384 |
+
tdTok.className = "tok-cell";
|
| 385 |
+
tdTok.textContent = decoded[i] ?? "";
|
| 386 |
+
|
| 387 |
+
// Token ID
|
| 388 |
+
const tdId = document.createElement("td");
|
| 389 |
+
tdId.className = "id-cell";
|
| 390 |
+
tdId.textContent = String(token_ids[i]);
|
| 391 |
+
|
| 392 |
+
tr.append(tdSwatch, tdIdx, tdTok, tdId);
|
| 393 |
+
|
| 394 |
+
// Hover: show distinct outline highlight on the mirrored text
|
| 395 |
+
if (off) {
|
| 396 |
+
tr.addEventListener("mouseenter", () => {
|
| 397 |
+
tr.classList.add("active");
|
| 398 |
+
if (!CSS.highlights) return;
|
| 399 |
+
const node = textMirror.firstChild;
|
| 400 |
+
if (!node) return;
|
| 401 |
+
const r = new Range();
|
| 402 |
+
r.setStart(node, off.start);
|
| 403 |
+
r.setEnd(node, off.end);
|
| 404 |
+
CSS.highlights.set(`p${panelId}-hover`, new Highlight(r));
|
| 405 |
+
});
|
| 406 |
+
tr.addEventListener("mouseleave", () => {
|
| 407 |
+
tr.classList.remove("active");
|
| 408 |
+
CSS.highlights?.delete(`p${panelId}-hover`);
|
| 409 |
+
});
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
fragment.appendChild(tr);
|
| 413 |
+
}
|
| 414 |
+
|
| 415 |
+
tbody.appendChild(fragment);
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
// ─── Utility ──────────────────────────────────────────────────────────────────
|
| 419 |
+
|
| 420 |
+
function esc(str) {
|
| 421 |
+
return str
|
| 422 |
+
.replace(/&/g, "&")
|
| 423 |
+
.replace(/</g, "<")
|
| 424 |
+
.replace(/>/g, ">")
|
| 425 |
+
.replace(/"/g, """);
|
| 426 |
+
}
|
js/tokenizers.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Define list of tokenizers and their corresponding human-readable names
|
| 2 |
+
export const TOKENIZER_OPTIONS = Object.freeze({
|
| 3 |
+
"smcproject/malayalam-bpe-tokenizer": "Malayalam BPE toklenizer",
|
| 4 |
+
"smcproject/malayalam-unigram-tokenizer": "Malayalam Unigram toklenizer",
|
| 5 |
+
"Xenova/gpt-4": "gpt-4 / gpt-3.5-turbo / text-embedding-ada-002",
|
| 6 |
+
"Xenova/text-davinci-003": "text-davinci-003 / text-davinci-002",
|
| 7 |
+
"Xenova/gpt-3": "gpt-3",
|
| 8 |
+
"Xenova/grok-1-tokenizer": "Grok-1",
|
| 9 |
+
"Xenova/claude-tokenizer": "Claude",
|
| 10 |
+
"Xenova/mistral-tokenizer-v3": "Mistral v3",
|
| 11 |
+
"Xenova/mistral-tokenizer-v1": "Mistral v1",
|
| 12 |
+
"Xenova/gemma-tokenizer": "Gemma",
|
| 13 |
+
"Xenova/llama-3-tokenizer": "Llama 3",
|
| 14 |
+
"Xenova/llama-tokenizer": "LLaMA / Llama 2",
|
| 15 |
+
"Xenova/c4ai-command-r-v01-tokenizer": "Cohere Command-R",
|
| 16 |
+
"Xenova/t5-small": "T5",
|
| 17 |
+
"Xenova/bert-base-cased": "bert-base-cased",
|
| 18 |
+
});
|
js/worker.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Although not strictly necessary, we delegate the tokenization to a worker thread to avoid
|
| 2 |
+
// any potential issues with the tokenizer blocking the main thread (especially for large inputs).
|
| 3 |
+
|
| 4 |
+
import { AutoTokenizer } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.2.1";
|
| 5 |
+
|
| 6 |
+
// This is a map of all the tokenizer instances that we have loaded.
|
| 7 |
+
// model_id -> promise that resolves to tokenizer
|
| 8 |
+
const TOKENIZER_MAPPINGS = new Map();
|
| 9 |
+
|
| 10 |
+
// Listen for messages from the main thread
|
| 11 |
+
self.addEventListener("message", async (event) => {
|
| 12 |
+
const { model_id, text } = event.data;
|
| 13 |
+
|
| 14 |
+
// Only load the tokenizer if it hasn't been loaded yet
|
| 15 |
+
let tokenizerPromise = TOKENIZER_MAPPINGS.get(model_id);
|
| 16 |
+
if (!tokenizerPromise) {
|
| 17 |
+
// For visualization purposes, we may need to modify the tokenizer slightly
|
| 18 |
+
tokenizerPromise = AutoTokenizer.from_pretrained(model_id).then(
|
| 19 |
+
(tokenizer) => {
|
| 20 |
+
// NOTE: We just remove the StripDecoder from the llama tokenizer
|
| 21 |
+
const tokenizer_class = (
|
| 22 |
+
tokenizer._tokenizer_config?.tokenizer_class ?? ""
|
| 23 |
+
).replace(/Fast$/, "");
|
| 24 |
+
switch (tokenizer_class) {
|
| 25 |
+
case "LlamaTokenizer":
|
| 26 |
+
case "Grok1Tokenizer":
|
| 27 |
+
// tokenizer.decoder.decoders.at(-1).constructor.name === 'StripDecoder'
|
| 28 |
+
tokenizer.decoder.decoders.pop();
|
| 29 |
+
break;
|
| 30 |
+
case "T5Tokenizer":
|
| 31 |
+
tokenizer.decoder.addPrefixSpace = false;
|
| 32 |
+
break;
|
| 33 |
+
}
|
| 34 |
+
return tokenizer;
|
| 35 |
+
},
|
| 36 |
+
);
|
| 37 |
+
|
| 38 |
+
TOKENIZER_MAPPINGS.set(model_id, tokenizerPromise);
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
const tokenizer = await tokenizerPromise;
|
| 42 |
+
|
| 43 |
+
// Tokenize the input text
|
| 44 |
+
const token_ids = tokenizer.encode(text);
|
| 45 |
+
|
| 46 |
+
// Decode the token IDs back to text
|
| 47 |
+
let decoded = token_ids.map((x) => tokenizer.decode([x]));
|
| 48 |
+
|
| 49 |
+
// Minor post-processing for visualization purposes
|
| 50 |
+
let margins = [];
|
| 51 |
+
switch (tokenizer.constructor.name) {
|
| 52 |
+
case "BertTokenizer":
|
| 53 |
+
margins = decoded.map((x, i) => (i === 0 || x.startsWith("##") ? 0 : 8));
|
| 54 |
+
decoded = decoded.map((x) => x.replace("##", ""));
|
| 55 |
+
break;
|
| 56 |
+
case "T5Tokenizer":
|
| 57 |
+
if (decoded.length > 0 && decoded !== " ") {
|
| 58 |
+
decoded[0] = decoded[0].replace(/^ /, "");
|
| 59 |
+
}
|
| 60 |
+
break;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
// Send the output back to the main thread
|
| 64 |
+
self.postMessage({
|
| 65 |
+
token_ids,
|
| 66 |
+
decoded,
|
| 67 |
+
margins,
|
| 68 |
+
});
|
| 69 |
+
});
|
style.css
CHANGED
|
@@ -1,28 +1,412 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
body {
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
}
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
}
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
}
|
|
|
|
| 1 |
+
/* ─── Design tokens ─────────────────────────────────────────────────────── */
|
| 2 |
+
|
| 3 |
+
:root {
|
| 4 |
+
color-scheme: light dark;
|
| 5 |
+
--radius: 8px;
|
| 6 |
+
--radius-sm: 4px;
|
| 7 |
+
--font-sans: system-ui, -apple-system, sans-serif;
|
| 8 |
+
--font-mono: ui-monospace, "Cascadia Code", "Fira Code", monospace;
|
| 9 |
+
--shadow:
|
| 10 |
+
0 1px 4px color-mix(in srgb, CanvasText 8%, transparent), 0 2px 12px
|
| 11 |
+
color-mix(in srgb, CanvasText 5%, transparent);
|
| 12 |
+
|
| 13 |
+
/* Foreground color rendered over token highlight backgrounds */
|
| 14 |
+
--tok-text: CanvasText;
|
| 15 |
+
/* Hover outline color for highlighted token ranges */
|
| 16 |
+
--tok-hover: AccentColor;
|
| 17 |
+
|
| 18 |
+
/* Token color palette — 12 distinct hues in oklch, light-mode values.
|
| 19 |
+
Dark-mode overrides follow in the @media block below. */
|
| 20 |
+
--tok-0: oklch(80% 0.13 30);
|
| 21 |
+
--tok-1: oklch(82% 0.13 80);
|
| 22 |
+
--tok-2: oklch(80% 0.13 140);
|
| 23 |
+
--tok-3: oklch(80% 0.13 195);
|
| 24 |
+
--tok-4: oklch(80% 0.13 250);
|
| 25 |
+
--tok-5: oklch(80% 0.13 300);
|
| 26 |
+
--tok-6: oklch(80% 0.13 340);
|
| 27 |
+
--tok-7: oklch(82% 0.13 55);
|
| 28 |
+
--tok-8: oklch(80% 0.13 165);
|
| 29 |
+
--tok-9: oklch(80% 0.13 220);
|
| 30 |
+
--tok-10: oklch(80% 0.13 270);
|
| 31 |
+
--tok-11: oklch(80% 0.13 320);
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
@media (prefers-color-scheme: dark) {
|
| 35 |
+
:root {
|
| 36 |
+
/* Slightly higher lightness so highlights read on dark Canvas */
|
| 37 |
+
--tok-0: oklch(68% 0.16 30);
|
| 38 |
+
--tok-1: oklch(70% 0.16 80);
|
| 39 |
+
--tok-2: oklch(68% 0.16 140);
|
| 40 |
+
--tok-3: oklch(68% 0.16 195);
|
| 41 |
+
--tok-4: oklch(68% 0.16 250);
|
| 42 |
+
--tok-5: oklch(68% 0.16 300);
|
| 43 |
+
--tok-6: oklch(68% 0.16 340);
|
| 44 |
+
--tok-7: oklch(70% 0.16 55);
|
| 45 |
+
--tok-8: oklch(68% 0.16 165);
|
| 46 |
+
--tok-9: oklch(68% 0.16 220);
|
| 47 |
+
--tok-10: oklch(68% 0.16 270);
|
| 48 |
+
--tok-11: oklch(68% 0.16 320);
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/* ─── Reset ─────────────────────────────────────────────────────────────── */
|
| 53 |
+
|
| 54 |
+
*,
|
| 55 |
+
*::before,
|
| 56 |
+
*::after {
|
| 57 |
+
box-sizing: border-box;
|
| 58 |
+
margin: 0;
|
| 59 |
+
padding: 0;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/* ─── Base ──────────────────────────────────────────────────────────────── */
|
| 63 |
+
|
| 64 |
body {
|
| 65 |
+
font-family: var(--font-sans);
|
| 66 |
+
background: Canvas;
|
| 67 |
+
color: CanvasText;
|
| 68 |
+
line-height: 1.6;
|
| 69 |
+
min-height: 100dvh;
|
| 70 |
+
padding-block-end: 4rem;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/* ─── Header ────────────────────────────────────────────────────────────── */
|
| 74 |
+
|
| 75 |
+
header {
|
| 76 |
+
background: Canvas;
|
| 77 |
+
padding: 1.25rem 2rem;
|
| 78 |
+
max-width: 1400px;
|
| 79 |
+
margin-inline: auto;
|
| 80 |
+
h1 {
|
| 81 |
+
font-size: 1.35rem;
|
| 82 |
+
font-weight: 700;
|
| 83 |
+
letter-spacing: -0.01em;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
p {
|
| 87 |
+
font-size: 0.875rem;
|
| 88 |
+
color: GrayText;
|
| 89 |
+
margin-block-start: 0.2rem;
|
| 90 |
+
}
|
| 91 |
}
|
| 92 |
|
| 93 |
+
/* ─── Main layout ───────────────────────────────────────────────────────── */
|
| 94 |
+
|
| 95 |
+
main {
|
| 96 |
+
max-width: 1400px;
|
| 97 |
+
margin-inline: auto;
|
| 98 |
+
padding: 1.5rem 2rem;
|
| 99 |
+
display: grid;
|
| 100 |
+
gap: 1.25rem;
|
| 101 |
}
|
| 102 |
|
| 103 |
+
/* ─── Input section ─────────────────────────────────────────────────────── */
|
| 104 |
+
|
| 105 |
+
.input-section {
|
| 106 |
+
display: grid;
|
| 107 |
+
gap: 0.5rem;
|
| 108 |
+
|
| 109 |
+
label {
|
| 110 |
+
font-size: 0.8rem;
|
| 111 |
+
font-weight: 600;
|
| 112 |
+
text-transform: uppercase;
|
| 113 |
+
letter-spacing: 0.05em;
|
| 114 |
+
color: GrayText;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
textarea {
|
| 118 |
+
font-family: var(--font-mono);
|
| 119 |
+
font-size: 1.1rem;
|
| 120 |
+
line-height: 1.7;
|
| 121 |
+
background: Field;
|
| 122 |
+
color: FieldText;
|
| 123 |
+
border: 1px solid ButtonBorder;
|
| 124 |
+
border-radius: var(--radius);
|
| 125 |
+
padding: 0.75rem 1rem;
|
| 126 |
+
resize: vertical;
|
| 127 |
+
width: 100%;
|
| 128 |
+
transition: border-color 0.15s;
|
| 129 |
+
|
| 130 |
+
&:focus {
|
| 131 |
+
outline: none;
|
| 132 |
+
border-color: AccentColor;
|
| 133 |
+
box-shadow: 0 0 0 3px color-mix(in srgb, AccentColor 20%, transparent);
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
}
|
| 137 |
|
| 138 |
+
/* ─── Add tokenizer row ─────────────────────────────────────────────────── */
|
| 139 |
+
|
| 140 |
+
.add-tokenizer-section {
|
| 141 |
+
display: flex;
|
| 142 |
+
gap: 0.5rem;
|
| 143 |
+
align-items: center;
|
| 144 |
+
flex-wrap: wrap;
|
| 145 |
+
|
| 146 |
+
select {
|
| 147 |
+
font-family: var(--font-sans);
|
| 148 |
+
font-size: 0.9rem;
|
| 149 |
+
background: Field;
|
| 150 |
+
color: FieldText;
|
| 151 |
+
border: 1px solid ButtonBorder;
|
| 152 |
+
border-radius: var(--radius-sm);
|
| 153 |
+
padding: 0.45rem 0.75rem;
|
| 154 |
+
flex: 1 1 280px;
|
| 155 |
+
max-width: 480px;
|
| 156 |
+
cursor: pointer;
|
| 157 |
+
transition: border-color 0.15s;
|
| 158 |
+
|
| 159 |
+
&:focus {
|
| 160 |
+
outline: none;
|
| 161 |
+
border-color: AccentColor;
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
button {
|
| 166 |
+
font-family: var(--font-sans);
|
| 167 |
+
font-size: 0.9rem;
|
| 168 |
+
font-weight: 600;
|
| 169 |
+
background: AccentColor;
|
| 170 |
+
color: AccentColorText;
|
| 171 |
+
border: none;
|
| 172 |
+
border-radius: var(--radius-sm);
|
| 173 |
+
padding: 0.45rem 1.1rem;
|
| 174 |
+
cursor: pointer;
|
| 175 |
+
transition: opacity 0.15s;
|
| 176 |
+
white-space: nowrap;
|
| 177 |
+
|
| 178 |
+
&:hover:not(:disabled) {
|
| 179 |
+
opacity: 0.85;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
&:disabled {
|
| 183 |
+
opacity: 0.4;
|
| 184 |
+
cursor: not-allowed;
|
| 185 |
+
}
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
/* ─── Empty state ───────────────────────────────────────────────────────── */
|
| 190 |
+
|
| 191 |
+
.empty-state {
|
| 192 |
+
text-align: center;
|
| 193 |
+
color: GrayText;
|
| 194 |
+
font-size: 0.9rem;
|
| 195 |
+
padding: 2.5rem 1rem;
|
| 196 |
+
border: 1.5px dashed ButtonBorder;
|
| 197 |
+
border-radius: var(--radius);
|
| 198 |
+
background: Canvas;
|
| 199 |
+
|
| 200 |
+
&.hidden {
|
| 201 |
+
display: none;
|
| 202 |
+
}
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
/* ─── Panels grid ───────────────────────────────────────────────────────── */
|
| 206 |
+
|
| 207 |
+
#panels {
|
| 208 |
+
display: grid;
|
| 209 |
+
grid-template-columns: repeat(auto-fit, minmax(340px, 1fr));
|
| 210 |
+
gap: 1rem;
|
| 211 |
+
align-items: start;
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
/* ─── Individual tokenizer panel ────────────────────────────────────────── */
|
| 215 |
+
|
| 216 |
+
.panel {
|
| 217 |
+
background: Canvas;
|
| 218 |
+
border: 1px solid ButtonBorder;
|
| 219 |
+
border-radius: var(--radius);
|
| 220 |
+
box-shadow: var(--shadow);
|
| 221 |
+
display: grid;
|
| 222 |
+
grid-template-rows: auto auto auto 1fr;
|
| 223 |
+
overflow: hidden;
|
| 224 |
+
|
| 225 |
+
/* Panel header bar */
|
| 226 |
+
.panel-header {
|
| 227 |
+
display: flex;
|
| 228 |
+
align-items: center;
|
| 229 |
+
justify-content: space-between;
|
| 230 |
+
gap: 0.5rem;
|
| 231 |
+
padding: 0.65rem 1rem;
|
| 232 |
+
background: ButtonFace;
|
| 233 |
+
border-block-end: 1px solid ButtonBorder;
|
| 234 |
+
|
| 235 |
+
.panel-title {
|
| 236 |
+
font-size: 0.85rem;
|
| 237 |
+
font-weight: 600;
|
| 238 |
+
color: ButtonText;
|
| 239 |
+
text-decoration: none;
|
| 240 |
+
overflow: hidden;
|
| 241 |
+
text-overflow: ellipsis;
|
| 242 |
+
white-space: nowrap;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
.remove-btn {
|
| 246 |
+
font-size: 0.75rem;
|
| 247 |
+
font-weight: 600;
|
| 248 |
+
color: GrayText;
|
| 249 |
+
background: none;
|
| 250 |
+
border: 1px solid ButtonBorder;
|
| 251 |
+
border-radius: var(--radius-sm);
|
| 252 |
+
padding: 0.2rem 0.5rem;
|
| 253 |
+
cursor: pointer;
|
| 254 |
+
flex-shrink: 0;
|
| 255 |
+
transition:
|
| 256 |
+
color 0.12s,
|
| 257 |
+
border-color 0.12s;
|
| 258 |
+
|
| 259 |
+
&:hover {
|
| 260 |
+
color: LinkText;
|
| 261 |
+
border-color: LinkText;
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
/* Text mirror with highlights */
|
| 267 |
+
.panel-text {
|
| 268 |
+
font-family: var(--font-mono);
|
| 269 |
+
font-size: 1.05rem;
|
| 270 |
+
line-height: 1.8;
|
| 271 |
+
padding: 0.85rem 1rem;
|
| 272 |
+
border-block-end: 1px solid ButtonBorder;
|
| 273 |
+
white-space: pre-wrap;
|
| 274 |
+
word-break: break-all;
|
| 275 |
+
min-height: 3.5rem;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
/* Stats bar */
|
| 279 |
+
.panel-stats {
|
| 280 |
+
display: flex;
|
| 281 |
+
gap: 1.5rem;
|
| 282 |
+
padding: 0.45rem 1rem;
|
| 283 |
+
background: ButtonFace;
|
| 284 |
+
border-block-end: 1px solid ButtonBorder;
|
| 285 |
+
font-size: 0.78rem;
|
| 286 |
+
color: GrayText;
|
| 287 |
+
flex-wrap: wrap;
|
| 288 |
+
|
| 289 |
+
strong {
|
| 290 |
+
color: CanvasText;
|
| 291 |
+
font-weight: 600;
|
| 292 |
+
}
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
/* Token table */
|
| 296 |
+
.panel-table-wrap {
|
| 297 |
+
overflow-y: auto;
|
| 298 |
+
max-height: 360px;
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
table {
|
| 302 |
+
width: 100%;
|
| 303 |
+
border-collapse: collapse;
|
| 304 |
+
font-size: 0.82rem;
|
| 305 |
+
|
| 306 |
+
thead {
|
| 307 |
+
position: sticky;
|
| 308 |
+
top: 0;
|
| 309 |
+
background: ButtonFace;
|
| 310 |
+
z-index: 1;
|
| 311 |
+
|
| 312 |
+
th {
|
| 313 |
+
text-align: left;
|
| 314 |
+
padding: 0.4rem 0.75rem;
|
| 315 |
+
font-weight: 600;
|
| 316 |
+
font-size: 0.72rem;
|
| 317 |
+
text-transform: uppercase;
|
| 318 |
+
letter-spacing: 0.05em;
|
| 319 |
+
color: GrayText;
|
| 320 |
+
border-block-end: 1px solid ButtonBorder;
|
| 321 |
+
}
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
tbody tr {
|
| 325 |
+
cursor: default;
|
| 326 |
+
transition: background 0.08s;
|
| 327 |
+
|
| 328 |
+
&:hover {
|
| 329 |
+
background: color-mix(in srgb, ButtonFace 60%, Canvas);
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
&.active {
|
| 333 |
+
background: color-mix(
|
| 334 |
+
in srgb,
|
| 335 |
+
var(--row-color, AccentColor) 18%,
|
| 336 |
+
Canvas
|
| 337 |
+
);
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
td {
|
| 341 |
+
padding: 0.3rem 0.75rem;
|
| 342 |
+
border-block-end: 1px solid
|
| 343 |
+
color-mix(in srgb, ButtonBorder 60%, transparent);
|
| 344 |
+
vertical-align: middle;
|
| 345 |
+
|
| 346 |
+
&.tok-cell {
|
| 347 |
+
font-family: var(--font-mono);
|
| 348 |
+
font-size: 0.88rem;
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
&.id-cell {
|
| 352 |
+
font-family: var(--font-mono);
|
| 353 |
+
color: GrayText;
|
| 354 |
+
font-size: 0.8rem;
|
| 355 |
+
text-align: right;
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
&.color-swatch {
|
| 359 |
+
width: 18px;
|
| 360 |
+
padding-inline: 0.5rem;
|
| 361 |
+
|
| 362 |
+
span {
|
| 363 |
+
display: block;
|
| 364 |
+
width: 10px;
|
| 365 |
+
height: 10px;
|
| 366 |
+
border-radius: 50%;
|
| 367 |
+
background: var(--row-color, ButtonBorder);
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
}
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
/* Loading state */
|
| 375 |
+
&.loading {
|
| 376 |
+
.panel-text,
|
| 377 |
+
.panel-stats,
|
| 378 |
+
.panel-table-wrap {
|
| 379 |
+
opacity: 0.4;
|
| 380 |
+
pointer-events: none;
|
| 381 |
+
}
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
.panel-loading {
|
| 385 |
+
display: none;
|
| 386 |
+
align-items: center;
|
| 387 |
+
gap: 0.5rem;
|
| 388 |
+
padding: 0.6rem 1rem;
|
| 389 |
+
font-size: 0.8rem;
|
| 390 |
+
color: GrayText;
|
| 391 |
+
|
| 392 |
+
.spinner {
|
| 393 |
+
width: 14px;
|
| 394 |
+
height: 14px;
|
| 395 |
+
border: 2px solid ButtonBorder;
|
| 396 |
+
border-top-color: AccentColor;
|
| 397 |
+
border-radius: 50%;
|
| 398 |
+
animation: spin 0.7s linear infinite;
|
| 399 |
+
flex-shrink: 0;
|
| 400 |
+
}
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
&.loading .panel-loading {
|
| 404 |
+
display: flex;
|
| 405 |
+
}
|
| 406 |
}
|
| 407 |
|
| 408 |
+
@keyframes spin {
|
| 409 |
+
to {
|
| 410 |
+
transform: rotate(360deg);
|
| 411 |
+
}
|
| 412 |
}
|