quickgrid commited on
Commit
6e13729
·
verified ·
1 Parent(s): 087d59b

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +419 -113
index.html CHANGED
@@ -1,159 +1,465 @@
1
  <!DOCTYPE html>
2
- <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Universal Tokenizer Visualizer</title>
 
7
  <script src="https://cdn.tailwindcss.com"></script>
8
- <style>
9
- .token-chip {
10
- display: inline-block;
11
- padding: 2px 4px;
12
- margin: 2px;
13
- border-radius: 4px;
14
- font-family: monospace;
15
- transition: all 0.2s;
16
- cursor: default;
 
 
17
  }
18
- .token-chip:hover {
19
- filter: brightness(0.9);
20
- transform: translateY(-1px);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
22
- /* Color palette for tokens */
23
- .color-0 { background-color: #fca5a5; color: #7f1d1d; }
24
- .color-1 { background-color: #fcd34d; color: #78350f; }
25
- .color-2 { background-color: #86efac; color: #064e3b; }
26
- .color-3 { background-color: #93c5fd; color: #1e3a8a; }
27
- .color-4 { background-color: #c4b5fd; color: #4c1d95; }
28
- .color-5 { background-color: #f9a8d4; color: #701a75; }
29
  </style>
30
  </head>
31
- <body class="bg-slate-50 min-h-screen p-4 md:p-8">
32
- <div class="max-w-5xl mx-auto bg-white rounded-xl shadow-lg overflow-hidden border border-slate-200">
33
- <div class="bg-slate-900 p-6 text-white">
34
- <h1 class="text-2xl font-bold">Universal Tokenizer Visualizer</h1>
35
- <p class="text-slate-400 text-sm mt-1">Inspect how models see your text. Runs 100% in-browser.</p>
36
- </div>
37
-
38
- <div class="p-6 space-y-6">
39
- <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
40
- <div>
41
- <label class="block text-sm font-medium text-slate-700 mb-1">Hugging Face Model ID</label>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  <div class="flex gap-2">
43
- <input type="text" id="modelInput" value="deepseek-ai/DeepSeek-V2"
44
- class="flex-1 border rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-blue-500 outline-none">
45
- <button id="loadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg text-sm font-semibold hover:bg-blue-700 transition">
46
- Load
 
 
 
 
 
 
 
 
 
 
 
 
47
  </button>
48
  </div>
49
- <p class="text-xs text-slate-500 mt-1 italic">Note: Ensure the model has a tokenizer.json file.</p>
50
  </div>
51
- <div class="flex flex-col justify-end">
52
- <div id="status" class="text-sm font-medium text-slate-600 bg-slate-100 p-2 rounded-lg text-center border border-dashed border-slate-300">
53
- Ready to load
54
- </div>
 
 
 
 
55
  </div>
56
  </div>
57
-
58
- <div>
59
- <label class="block text-sm font-medium text-slate-700 mb-1">Input Text</label>
60
- <textarea id="inputText" rows="6"
61
- class="w-full border rounded-xl p-4 text-lg focus:ring-2 focus:ring-blue-500 outline-none"
62
- placeholder="Type something here to see tokens..."></textarea>
 
 
 
 
 
 
 
 
63
  </div>
 
 
 
64
 
65
- <div class="flex gap-4 border-t border-b py-3 text-sm font-mono">
66
- <div>Tokens: <span id="tokenCount" class="font-bold text-blue-600">0</span></div>
67
- <div>Characters: <span id="charCount" class="font-bold text-slate-600">0</span></div>
 
 
 
 
 
 
 
68
  </div>
 
 
 
 
 
 
 
 
 
 
69
 
70
- <div>
71
- <label class="block text-sm font-medium text-slate-700 mb-2">Tokenized Output</label>
72
- <div id="visualizer" class="min-h-[150px] p-4 bg-slate-50 rounded-xl border border-slate-200 leading-relaxed">
73
- </div>
 
 
 
 
 
74
  </div>
 
 
 
 
 
75
 
76
- <div>
77
- <label class="block text-sm font-medium text-slate-700 mb-2">Token IDs</label>
78
- <div id="tokenIds" class="text-xs font-mono p-3 bg-slate-900 text-slate-300 rounded-lg overflow-x-auto whitespace-nowrap">
79
- []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  </div>
81
  </div>
82
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
83
  </div>
84
 
85
  <script type="module">
86
- import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
 
88
  let tokenizer = null;
89
- const modelInput = document.getElementById('modelInput');
90
- const loadBtn = document.getElementById('loadBtn');
91
- const status = document.getElementById('status');
92
- const inputText = document.getElementById('inputText');
93
- const visualizer = document.getElementById('visualizer');
94
- const tokenCount = document.getElementById('tokenCount');
95
- const charCount = document.getElementById('charCount');
96
- const tokenIdsDiv = document.getElementById('tokenIds');
97
-
98
- async function loadModel(modelId) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  try {
100
- status.innerText = "⏳ Loading tokenizer...";
101
- status.className = "text-sm font-medium text-amber-600 bg-amber-50 p-2 rounded-lg text-center border border-amber-200";
102
 
103
- // We specify legacy: false to ensure it looks for tokenizer.json
104
- tokenizer = await AutoTokenizer.from_pretrained(modelId);
 
 
105
 
106
- status.innerText = `✅ Loaded: ${modelId}`;
107
- status.className = "text-sm font-medium text-emerald-600 bg-emerald-50 p-2 rounded-lg text-center border border-emerald-200";
108
- updateTokenization();
109
- } catch (e) {
110
- status.innerText = "❌ Error loading model";
111
- status.className = "text-sm font-medium text-red-600 bg-red-50 p-2 rounded-lg text-center border border-red-200";
112
- console.error(e);
 
 
 
 
 
 
 
113
  }
114
  }
115
 
116
- function updateTokenization() {
 
117
  if (!tokenizer) return;
 
118
 
119
- const text = inputText.value;
120
- if (!text) {
121
- visualizer.innerHTML = "";
122
- tokenCount.innerText = "0";
123
- charCount.innerText = "0";
124
- tokenIdsDiv.innerText = "[]";
 
125
  return;
126
  }
127
 
128
- // Encode text
129
- const tokens = tokenizer.encode(text);
130
- const decodedTokens = tokens.map(id => tokenizer.decode([id]));
131
-
132
- // Update Stats
133
- tokenCount.innerText = tokens.length;
134
- charCount.innerText = text.length;
135
- tokenIdsDiv.innerText = JSON.stringify(tokens);
136
-
137
- // Clear visualizer
138
- visualizer.innerHTML = "";
139
-
140
- // Create Visual Chips
141
- decodedTokens.forEach((token, index) => {
142
- const span = document.createElement('span');
143
- span.className = `token-chip color-${index % 6}`;
144
- // Handle spaces and newlines for visibility
145
- let displayToken = token.replace(/ /g, ' ').replace(/\n/g, '↵\n');
146
- span.innerText = displayToken;
147
- span.title = `ID: ${tokens[index]}`;
148
- visualizer.appendChild(span);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  });
150
  }
151
 
152
- loadBtn.addEventListener('click', () => loadModel(modelInput.value));
153
- inputText.addEventListener('input', updateTokenization);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
- // Initial Load
156
- loadModel(modelInput.value);
 
 
 
157
  </script>
158
  </body>
159
  </html>
 
1
  <!DOCTYPE html>
2
+ <html lang="en" class="light">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>LLM Tokenizer Visualizer</title>
7
+ <!-- Tailwind CSS -->
8
  <script src="https://cdn.tailwindcss.com"></script>
9
+ <script>
10
+ tailwind.config = {
11
+ darkMode: 'class',
12
+ theme: {
13
+ extend: {
14
+ fontFamily: {
15
+ sans: ['Inter', 'sans-serif'],
16
+ mono: ['Fira Code', 'monospace'],
17
+ },
18
+ }
19
+ }
20
  }
21
+ </script>
22
+ <!-- Google Fonts -->
23
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
24
+ <!-- Transformers.js -->
25
+ <script type="module">
26
+ import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.1';
27
+ window.AutoTokenizer = AutoTokenizer;
28
+ </script>
29
+ <style type="text/tailwindcss">
30
+ @layer utilities {
31
+ .content-auto {
32
+ content-visibility: auto;
33
+ }
34
+ .token-box {
35
+ white-space: pre;
36
+ word-break: break-all;
37
+ }
38
  }
 
 
 
 
 
 
 
39
  </style>
40
  </head>
41
+ <body class="bg-gray-50 dark:bg-gray-900 text-gray-900 dark:text-gray-100 min-h-screen transition-colors duration-200">
42
+ <div class="max-w-5xl mx-auto px-4 py-8">
43
+ <!-- Header -->
44
+ <header class="flex justify-between items-center mb-8">
45
+ <div>
46
+ <h1 class="text-3xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent">LLM Tokenizer Visualizer</h1>
47
+ <p class="text-gray-600 dark:text-gray-400 mt-1">Client-side tokenization for any Hugging Face model, powered by Transformers.js</p>
48
+ </div>
49
+ <button id="themeToggle" class="p-2 rounded-full hover:bg-gray-200 dark:hover:bg-gray-800 transition-colors">
50
+ <!-- Sun Icon (for dark mode) -->
51
+ <svg id="sunIcon" class="w-6 h-6 hidden" fill="none" stroke="currentColor" viewBox="0 0 24 24">
52
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z"></path>
53
+ </svg>
54
+ <!-- Moon Icon (for light mode) -->
55
+ <svg id="moonIcon" class="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
56
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M20.354 15.354A9 9 0 018.646 3.646 9.003 9.003 0 0012 21a9.003 9.003 0 008.354-5.646z"></path>
57
+ </svg>
58
+ </button>
59
+ </header>
60
+
61
+ <!-- Model Config Section -->
62
+ <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
63
+ <h2 class="text-lg font-semibold mb-4">Model Configuration</h2>
64
+ <div class="grid grid-cols-1 md:grid-cols-12 gap-4">
65
+ <div class="md:col-span-8">
66
+ <label class="block text-sm font-medium mb-2" for="modelId">Hugging Face Model ID</label>
67
  <div class="flex gap-2">
68
+ <input
69
+ type="text"
70
+ id="modelId"
71
+ value="deepseek-ai/DeepSeek-V4-Pro"
72
+ class="flex-1 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
73
+ placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct"
74
+ >
75
+ <button
76
+ id="loadModelBtn"
77
+ class="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg font-medium transition-colors flex items-center gap-2 disabled:opacity-50 disabled:cursor-not-allowed"
78
+ >
79
+ <svg id="loadSpinner" class="animate-spin w-4 h-4 hidden" fill="none" viewBox="0 0 24 24">
80
+ <circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
81
+ <path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
82
+ </svg>
83
+ <span id="loadBtnText">Load Model</span>
84
  </button>
85
  </div>
 
86
  </div>
87
+ <div class="md:col-span-4">
88
+ <label class="block text-sm font-medium mb-2" for="hfToken">Optional HF Token (for gated models)</label>
89
+ <input
90
+ type="password"
91
+ id="hfToken"
92
+ class="w-full px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
93
+ placeholder="hf_..."
94
+ >
95
  </div>
96
  </div>
97
+ <div class="mt-4">
98
+ <label class="block text-sm font-medium mb-2">Popular Models</label>
99
+ <select
100
+ id="popularModels"
101
+ class="w-full md:w-1/3 px-4 py-2 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all"
102
+ >
103
+ <option value="">Select a popular model...</option>
104
+ <option value="deepseek-ai/DeepSeek-V4-Pro">DeepSeek-V4-Pro</option>
105
+ <option value="meta-llama/Meta-Llama-3-8B-Instruct">Llama 3 8B Instruct</option>
106
+ <option value="mistralai/Mistral-7B-Instruct-v0.3">Mistral 7B v0.3</option>
107
+ <option value="google/gemma-2-2b-it">Gemma 2 2B IT</option>
108
+ <option value="Qwen/Qwen2.5-7B-Instruct">Qwen 2.5 7B Instruct</option>
109
+ <option value="microsoft/Phi-3-mini-4k-instruct">Phi-3 Mini 4K Instruct</option>
110
+ </select>
111
  </div>
112
+ <div id="modelError" class="hidden mt-3 p-3 bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300 rounded-lg text-sm"></div>
113
+ <div id="modelSuccess" class="hidden mt-3 p-3 bg-green-100 dark:bg-green-900/30 text-green-700 dark:text-green-300 rounded-lg text-sm"></div>
114
+ </section>
115
 
116
+ <!-- Text Input Section -->
117
+ <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
118
+ <div class="flex justify-between items-center mb-4">
119
+ <h2 class="text-lg font-semibold">Input Text</h2>
120
+ <button
121
+ id="clearTextBtn"
122
+ class="text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-100 transition-colors"
123
+ >
124
+ Clear
125
+ </button>
126
  </div>
127
+ <textarea
128
+ id="textInput"
129
+ rows="4"
130
+ class="w-full px-4 py-3 rounded-lg border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-700 focus:ring-2 focus:ring-blue-500 focus:border-transparent outline-none transition-all font-mono text-sm resize-none"
131
+ placeholder="Enter text to tokenize..."
132
+ disabled
133
+ >Hello, world! This is a test of the tokenizer. Let's see how many tokens this takes.
134
+ 你好,世界!这是一个分词器测试。
135
+ Special tokens: <|endoftext|> <s> </s></textarea>
136
+ </section>
137
 
138
+ <!-- Stats Section -->
139
+ <section class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
140
+ <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
141
+ <p class="text-sm text-gray-600 dark:text-gray-400">Total Tokens</p>
142
+ <p id="totalTokens" class="text-2xl font-bold mt-1">0</p>
143
+ </div>
144
+ <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
145
+ <p class="text-sm text-gray-600 dark:text-gray-400">Total Characters</p>
146
+ <p id="totalChars" class="text-2xl font-bold mt-1">0</p>
147
  </div>
148
+ <div class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-4">
149
+ <p class="text-sm text-gray-600 dark:text-gray-400">Tokens per Character</p>
150
+ <p id="tokenRatio" class="text-2xl font-bold mt-1">0.00</p>
151
+ </div>
152
+ </section>
153
 
154
+ <!-- Token Visualization Section -->
155
+ <section class="bg-white dark:bg-gray-800 rounded-xl shadow-sm p-6 mb-6">
156
+ <div class="flex justify-between items-center mb-4">
157
+ <h2 class="text-lg font-semibold">Token Visualization</h2>
158
+ <div class="flex gap-2">
159
+ <button
160
+ id="copyTokensBtn"
161
+ class="px-4 py-2 bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 rounded-lg text-sm font-medium transition-colors flex items-center gap-2 disabled:opacity-50"
162
+ disabled
163
+ >
164
+ <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
165
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
166
+ </svg>
167
+ Copy Tokens
168
+ </button>
169
+ <button
170
+ id="copyIdsBtn"
171
+ class="px-4 py-2 bg-gray-100 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-600 rounded-lg text-sm font-medium transition-colors flex items-center gap-2 disabled:opacity-50"
172
+ disabled
173
+ >
174
+ <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
175
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"></path>
176
+ </svg>
177
+ Copy IDs
178
+ </button>
179
  </div>
180
  </div>
181
+ <div
182
+ id="tokenContainer"
183
+ class="min-h-[120px] p-4 bg-gray-50 dark:bg-gray-700/50 rounded-lg overflow-x-auto flex flex-wrap gap-2 items-start content-start"
184
+ >
185
+ <p id="emptyState" class="text-gray-500 dark:text-gray-400 w-full text-center py-8">Load a model and enter text to see tokens</p>
186
+ </div>
187
+ <p class="text-xs text-gray-500 dark:text-gray-400 mt-2">Hover over tokens to see their ID. Special tokens are marked with a dashed border.</p>
188
+ </section>
189
+
190
+ <!-- Footer -->
191
+ <footer class="text-center text-gray-600 dark:text-gray-400 text-sm">
192
+ <p>Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank" class="text-blue-600 dark:text-blue-400 hover:underline">Transformers.js</a> | Runs entirely in your browser, no server required</p>
193
+ </footer>
194
  </div>
195
 
196
  <script type="module">
197
+ // Wait for Transformers.js to load
198
+ await new Promise(resolve => {
199
+ const checkInterval = setInterval(() => {
200
+ if (window.AutoTokenizer) clearInterval(checkInterval);
201
+ }, 100);
202
+ });
203
+
204
+ const { AutoTokenizer } = window;
205
+
206
+ // DOM Elements
207
+ const themeToggle = document.getElementById('themeToggle');
208
+ const sunIcon = document.getElementById('sunIcon');
209
+ const moonIcon = document.getElementById('moonIcon');
210
+ const modelIdInput = document.getElementById('modelId');
211
+ const hfTokenInput = document.getElementById('hfToken');
212
+ const loadModelBtn = document.getElementById('loadModelBtn');
213
+ const loadSpinner = document.getElementById('loadSpinner');
214
+ const loadBtnText = document.getElementById('loadBtnText');
215
+ const popularModelsSelect = document.getElementById('popularModels');
216
+ const modelError = document.getElementById('modelError');
217
+ const modelSuccess = document.getElementById('modelSuccess');
218
+ const textInput = document.getElementById('textInput');
219
+ const clearTextBtn = document.getElementById('clearTextBtn');
220
+ const tokenContainer = document.getElementById('tokenContainer');
221
+ const emptyState = document.getElementById('emptyState');
222
+ const totalTokensEl = document.getElementById('totalTokens');
223
+ const totalCharsEl = document.getElementById('totalChars');
224
+ const tokenRatioEl = document.getElementById('tokenRatio');
225
+ const copyTokensBtn = document.getElementById('copyTokensBtn');
226
+ const copyIdsBtn = document.getElementById('copyIdsBtn');
227
 
228
+ // State
229
  let tokenizer = null;
230
+ let currentTokens = [];
231
+ let currentTokenIds = [];
232
+ let isDark = localStorage.getItem('theme') === 'dark' || (!localStorage.getItem('theme') && window.matchMedia('(prefers-color-scheme: dark)').matches);
233
+
234
+ // Initialize theme
235
+ function initTheme() {
236
+ if (isDark) {
237
+ document.documentElement.classList.add('dark');
238
+ sunIcon.classList.remove('hidden');
239
+ moonIcon.classList.add('hidden');
240
+ } else {
241
+ document.documentElement.classList.remove('dark');
242
+ sunIcon.classList.add('hidden');
243
+ moonIcon.classList.remove('hidden');
244
+ }
245
+ }
246
+ initTheme();
247
+
248
+ // Theme toggle
249
+ themeToggle.addEventListener('click', () => {
250
+ isDark = !isDark;
251
+ localStorage.setItem('theme', isDark ? 'dark' : 'light');
252
+ initTheme();
253
+ // Re-render tokens to update colors
254
+ if (currentTokens.length > 0) renderTokens();
255
+ });
256
+
257
+ // Popular models select
258
+ popularModelsSelect.addEventListener('change', (e) => {
259
+ if (e.target.value) {
260
+ modelIdInput.value = e.target.value;
261
+ }
262
+ });
263
+
264
+ // Load model function
265
+ async function loadModel() {
266
+ const modelId = modelIdInput.value.trim();
267
+ if (!modelId) {
268
+ showError('Please enter a Hugging Face model ID');
269
+ return;
270
+ }
271
+
272
+ // Show loading state
273
+ loadModelBtn.disabled = true;
274
+ loadSpinner.classList.remove('hidden');
275
+ loadBtnText.textContent = 'Loading...';
276
+ hideMessages();
277
+ textInput.disabled = true;
278
+
279
  try {
280
+ const token = hfTokenInput.value.trim() || undefined;
281
+ tokenizer = await AutoTokenizer.from_pretrained(modelId, { token });
282
 
283
+ // Success
284
+ showSuccess(`Successfully loaded tokenizer for ${modelId}`);
285
+ textInput.disabled = false;
286
+ textInput.focus();
287
 
288
+ // Auto-tokenize existing text
289
+ if (textInput.value.trim()) {
290
+ tokenizeText();
291
+ }
292
+ } catch (err) {
293
+ console.error('Failed to load model:', err);
294
+ showError(`Failed to load model: ${err.message}. Check the model ID and ensure it's public (or provide a HF token for gated models).`);
295
+ tokenizer = null;
296
+ textInput.disabled = true;
297
+ } finally {
298
+ // Reset button state
299
+ loadModelBtn.disabled = false;
300
+ loadSpinner.classList.add('hidden');
301
+ loadBtnText.textContent = 'Load Model';
302
  }
303
  }
304
 
305
+ // Tokenize text function
306
+ function tokenizeText() {
307
  if (!tokenizer) return;
308
+ const text = textInput.value;
309
 
310
+ if (!text.trim()) {
311
+ currentTokens = [];
312
+ currentTokenIds = [];
313
+ renderTokens();
314
+ updateStats();
315
+ copyTokensBtn.disabled = true;
316
+ copyIdsBtn.disabled = true;
317
  return;
318
  }
319
 
320
+ try {
321
+ const output = tokenizer(text);
322
+ currentTokens = output.tokens;
323
+ currentTokenIds = output.input_ids;
324
+ renderTokens();
325
+ updateStats();
326
+ copyTokensBtn.disabled = false;
327
+ copyIdsBtn.disabled = false;
328
+ } catch (err) {
329
+ console.error('Tokenization failed:', err);
330
+ showError(`Tokenization failed: ${err.message}`);
331
+ }
332
+ }
333
+
334
+ // Render tokens
335
+ function renderTokens() {
336
+ tokenContainer.innerHTML = '';
337
+
338
+ if (currentTokens.length === 0) {
339
+ tokenContainer.appendChild(emptyState);
340
+ emptyState.classList.remove('hidden');
341
+ return;
342
+ }
343
+
344
+ emptyState.classList.add('hidden');
345
+
346
+ currentTokens.forEach((token, index) => {
347
+ const tokenId = currentTokenIds[index];
348
+ // Generate consistent pastel color based on token ID
349
+ const hue = (tokenId * 137) % 360; // 137 is prime for even distribution
350
+ const lightness = isDark ? '80%' : '90%';
351
+ const bgColor = `hsl(${hue}, 70%, ${lightness})`;
352
+ const textColor = isDark ? '#1f2937' : '#1f2937';
353
+
354
+ const tokenEl = document.createElement('span');
355
+ tokenEl.className = 'token-box px-2 py-1 rounded-md text-sm font-mono cursor-pointer transition-all hover:scale-105 hover:shadow-md';
356
+ tokenEl.style.backgroundColor = bgColor;
357
+ tokenEl.style.color = textColor;
358
+
359
+ // Replace whitespace with visible symbols
360
+ const displayToken = token
361
+ .replace(/\n/g, '↵')
362
+ .replace(/\t/g, '→')
363
+ .replace(/ /g, '·');
364
+ tokenEl.textContent = displayToken;
365
+
366
+ // Show token ID on hover
367
+ tokenEl.title = `Token ID: ${tokenId}\nRaw: ${token.replace(/\n/g, '\\n').replace(/\t/g, '\\t')}`;
368
+
369
+ // Highlight special tokens
370
+ if (tokenizer.special_tokens_map && Object.values(tokenizer.special_tokens_map).flat().includes(token)) {
371
+ tokenEl.classList.add('border', 'border-dashed', 'border-gray-400', 'dark:border-gray-500');
372
+ }
373
+
374
+ tokenContainer.appendChild(tokenEl);
375
  });
376
  }
377
 
378
+ // Update stats
379
+ function updateStats() {
380
+ const totalTokens = currentTokens.length;
381
+ const totalChars = textInput.value.length;
382
+ const ratio = totalChars > 0 ? (totalTokens / totalChars).toFixed(2) : '0.00';
383
+
384
+ totalTokensEl.textContent = totalTokens.toLocaleString();
385
+ totalCharsEl.textContent = totalChars.toLocaleString();
386
+ tokenRatioEl.textContent = ratio;
387
+ }
388
+
389
+ // Copy functions
390
+ async function copyToClipboard(text, btn) {
391
+ try {
392
+ await navigator.clipboard.writeText(text);
393
+ const originalText = btn.innerHTML;
394
+ btn.innerHTML = `
395
+ <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
396
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M5 13l4 4L19 7"></path>
397
+ </svg>
398
+ Copied!
399
+ `;
400
+ setTimeout(() => {
401
+ btn.innerHTML = originalText;
402
+ }, 2000);
403
+ } catch (err) {
404
+ showError('Failed to copy to clipboard');
405
+ }
406
+ }
407
+
408
+ copyTokensBtn.addEventListener('click', () => {
409
+ copyToClipboard(currentTokens.join('\n'), copyTokensBtn);
410
+ });
411
+
412
+ copyIdsBtn.addEventListener('click', () => {
413
+ copyToClipboard(JSON.stringify(currentTokenIds, null, 2), copyIdsBtn);
414
+ });
415
+
416
+ // Clear text
417
+ clearTextBtn.addEventListener('click', () => {
418
+ textInput.value = '';
419
+ tokenizeText();
420
+ });
421
+
422
+ // Helper functions
423
+ function showError(message) {
424
+ modelError.textContent = message;
425
+ modelError.classList.remove('hidden');
426
+ modelSuccess.classList.add('hidden');
427
+ }
428
+
429
+ function showSuccess(message) {
430
+ modelSuccess.textContent = message;
431
+ modelSuccess.classList.remove('hidden');
432
+ modelError.classList.add('hidden');
433
+ }
434
+
435
+ function hideMessages() {
436
+ modelError.classList.add('hidden');
437
+ modelSuccess.classList.add('hidden');
438
+ }
439
+
440
+ // Debounce for text input
441
+ function debounce(func, wait) {
442
+ let timeout;
443
+ return function(...args) {
444
+ clearTimeout(timeout);
445
+ timeout = setTimeout(() => func.apply(this, args), wait);
446
+ };
447
+ }
448
+
449
+ const debouncedTokenize = debounce(tokenizeText, 300);
450
+
451
+ // Event listeners
452
+ loadModelBtn.addEventListener('click', loadModel);
453
+ modelIdInput.addEventListener('keypress', (e) => {
454
+ if (e.key === 'Enter') loadModel();
455
+ });
456
+ textInput.addEventListener('input', debouncedTokenize);
457
 
458
+ // Auto-load default model on page load
459
+ window.addEventListener('load', () => {
460
+ // Wait a bit for Transformers.js to fully initialize
461
+ setTimeout(loadModel, 500);
462
+ });
463
  </script>
464
  </body>
465
  </html>