Debanjum commited on
Commit
ef61a64
·
0 Parent(s):

Create app to visualize text tokenization by any LLM on HF

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -0
  2. README.md +36 -0
  3. app.py +105 -0
  4. requirements.txt +4 -0
  5. templates/index.html +550 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application code
10
+ COPY . .
11
+
12
+ # Expose port 7860 (HF Spaces default)
13
+ EXPOSE 7860
14
+
15
+ # Run the application
16
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: HF Tokenizer Visualizer
3
+ emoji: 🔤
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ # HF Tokenizer Visualizer
12
+
13
+ A web app to visualize tokenization from any Hugging Face model. Inspired by OpenAI's tokenizer tool.
14
+
15
+ ## Features
16
+
17
+ - Load any tokenizer from Hugging Face Hub by model ID (e.g., `openai-community/gpt2`)
18
+ - Color-coded token visualization
19
+ - Hover to see token IDs
20
+ - Character and token count statistics
21
+ - Popular model suggestions
22
+
23
+ ## Usage
24
+
25
+ 1. Enter a Hugging Face model ID (e.g., `meta-llama/Llama-2-7b-hf`)
26
+ 2. Type or paste text you want to tokenize
27
+ 3. Click "Tokenize" to see the colorized output
28
+
29
+ ## Local Development
30
+
31
+ ```bash
32
+ pip install -r requirements.txt
33
+ python app.py
34
+ ```
35
+
36
+ Then open http://localhost:7860 in your browser.
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tokenizer Web Application
3
+ A simple webapp to visualize tokenization from any Hugging Face model.
4
+ """
5
+ import os
6
+ from flask import Flask, render_template, request, jsonify
7
+ from transformers import AutoTokenizer
8
+ import hashlib
9
+
10
+ app = Flask(__name__)
11
+
12
+ # Cache for loaded tokenizers
13
+ tokenizer_cache = {}
14
+
15
+
16
+ def get_color_for_token(token_id: int, total_colors: int = 10) -> str:
17
+ """Generate a consistent color for a token based on its ID."""
18
+ colors = [
19
+ "#FFEAA7", # Yellow
20
+ "#DFE6E9", # Light gray
21
+ "#A8E6CF", # Mint green
22
+ "#FDCB82", # Peach
23
+ "#C3AED6", # Lavender
24
+ "#FFB3BA", # Light pink
25
+ "#BAFFC9", # Light green
26
+ "#BAE1FF", # Light blue
27
+ "#FFE4E1", # Misty rose
28
+ "#E0BBE4", # Plum
29
+ ]
30
+ return colors[token_id % len(colors)]
31
+
32
+
33
+ def load_tokenizer(model_id: str):
34
+ """Load and cache a tokenizer from Hugging Face."""
35
+ if model_id not in tokenizer_cache:
36
+ try:
37
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
38
+ tokenizer_cache[model_id] = tokenizer
39
+ except Exception as e:
40
+ raise ValueError(f"Failed to load tokenizer for '{model_id}': {str(e)}")
41
+ return tokenizer_cache[model_id]
42
+
43
+
44
+ @app.route("/")
45
+ def index():
46
+ return render_template("index.html")
47
+
48
+
49
+ @app.route("/tokenize", methods=["POST"])
50
+ def tokenize():
51
+ data = request.json
52
+ model_id = data.get("model_id", "").strip()
53
+ text = data.get("text", "")
54
+
55
+ if not model_id:
56
+ return jsonify({"error": "Model ID is required"}), 400
57
+
58
+ if not text:
59
+ return jsonify({"error": "Text is required"}), 400
60
+
61
+ try:
62
+ tokenizer = load_tokenizer(model_id)
63
+
64
+ # Tokenize the text
65
+ encoding = tokenizer(text, return_offsets_mapping=False, add_special_tokens=True)
66
+ token_ids = encoding["input_ids"]
67
+
68
+ # Get token strings
69
+ tokens = []
70
+ for i, token_id in enumerate(token_ids):
71
+ token_str = tokenizer.decode([token_id])
72
+ tokens.append({
73
+ "id": token_id,
74
+ "text": token_str,
75
+ "color": get_color_for_token(i),
76
+ })
77
+
78
+ return jsonify({
79
+ "tokens": tokens,
80
+ "token_count": len(tokens),
81
+ "model_id": model_id,
82
+ })
83
+
84
+ except ValueError as e:
85
+ return jsonify({"error": str(e)}), 400
86
+ except Exception as e:
87
+ return jsonify({"error": f"Tokenization failed: {str(e)}"}), 500
88
+
89
+
90
+ @app.route("/models/suggestions")
91
+ def model_suggestions():
92
+ """Return a list of popular model suggestions."""
93
+ suggestions = [
94
+ "qwen/qwen3-4B",
95
+ "google/gemma-3-1b-it",
96
+ "openai/gpt-oss-20b",
97
+ "meta-llama/llama-3.2-3b",
98
+ ]
99
+ return jsonify(suggestions)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ # Use port 7860 for HF Spaces compatibility
104
+ port = int(os.environ.get("PORT", 7860))
105
+ app.run(debug=False, host="0.0.0.0", port=port)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ flask>=2.0.0
2
+ transformers>=4.30.0
3
+ huggingface_hub
4
+ gunicorn
templates/index.html ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>HF Tokenizer Visualizer</title>
7
+ <style>
8
+ * {
9
+ box-sizing: border-box;
10
+ margin: 0;
11
+ padding: 0;
12
+ }
13
+
14
+ body {
15
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
16
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
17
+ min-height: 100vh;
18
+ padding: 40px 20px;
19
+ }
20
+
21
+ .container {
22
+ max-width: 900px;
23
+ margin: 0 auto;
24
+ }
25
+
26
+ h1 {
27
+ color: white;
28
+ text-align: center;
29
+ margin-bottom: 10px;
30
+ font-size: 2.5rem;
31
+ }
32
+
33
+ .subtitle {
34
+ color: rgba(255, 255, 255, 0.8);
35
+ text-align: center;
36
+ margin-bottom: 30px;
37
+ font-size: 1.1rem;
38
+ }
39
+
40
+ .card {
41
+ background: white;
42
+ border-radius: 16px;
43
+ padding: 30px;
44
+ box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
45
+ }
46
+
47
+ .input-group {
48
+ margin-bottom: 20px;
49
+ }
50
+
51
+ label {
52
+ display: block;
53
+ font-weight: 600;
54
+ margin-bottom: 8px;
55
+ color: #333;
56
+ }
57
+
58
+ .model-input-wrapper {
59
+ position: relative;
60
+ }
61
+
62
+ input[type="text"] {
63
+ width: 100%;
64
+ padding: 14px 16px;
65
+ border: 2px solid #e0e0e0;
66
+ border-radius: 10px;
67
+ font-size: 16px;
68
+ transition: border-color 0.2s, box-shadow 0.2s;
69
+ }
70
+
71
+ input[type="text"]:focus {
72
+ outline: none;
73
+ border-color: #667eea;
74
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
75
+ }
76
+
77
+ .suggestions {
78
+ position: absolute;
79
+ top: 100%;
80
+ left: 0;
81
+ right: 0;
82
+ background: white;
83
+ border: 1px solid #e0e0e0;
84
+ border-radius: 10px;
85
+ margin-top: 4px;
86
+ box-shadow: 0 10px 40px rgba(0, 0, 0, 0.1);
87
+ z-index: 100;
88
+ display: none;
89
+ max-height: 250px;
90
+ overflow-y: auto;
91
+ }
92
+
93
+ .suggestions.show {
94
+ display: block;
95
+ }
96
+
97
+ .suggestion-item {
98
+ padding: 12px 16px;
99
+ cursor: pointer;
100
+ transition: background 0.15s;
101
+ border-bottom: 1px solid #f0f0f0;
102
+ }
103
+
104
+ .suggestion-item:last-child {
105
+ border-bottom: none;
106
+ }
107
+
108
+ .suggestion-item:hover {
109
+ background: #f5f5f5;
110
+ }
111
+
112
+ .suggestion-item code {
113
+ background: #e8e8e8;
114
+ padding: 2px 6px;
115
+ border-radius: 4px;
116
+ font-size: 14px;
117
+ }
118
+
119
+ textarea {
120
+ width: 100%;
121
+ padding: 14px 16px;
122
+ border: 2px solid #e0e0e0;
123
+ border-radius: 10px;
124
+ font-size: 16px;
125
+ min-height: 150px;
126
+ resize: vertical;
127
+ font-family: inherit;
128
+ transition: border-color 0.2s, box-shadow 0.2s;
129
+ }
130
+
131
+ textarea:focus {
132
+ outline: none;
133
+ border-color: #667eea;
134
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2);
135
+ }
136
+
137
+ .btn {
138
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
139
+ color: white;
140
+ border: none;
141
+ padding: 14px 32px;
142
+ font-size: 16px;
143
+ font-weight: 600;
144
+ border-radius: 10px;
145
+ cursor: pointer;
146
+ transition: transform 0.2s, box-shadow 0.2s;
147
+ width: 100%;
148
+ }
149
+
150
+ .btn:hover {
151
+ transform: translateY(-2px);
152
+ box-shadow: 0 10px 30px rgba(102, 126, 234, 0.4);
153
+ }
154
+
155
+ .btn:active {
156
+ transform: translateY(0);
157
+ }
158
+
159
+ .btn:disabled {
160
+ opacity: 0.6;
161
+ cursor: not-allowed;
162
+ transform: none;
163
+ }
164
+
165
+ .results {
166
+ margin-top: 30px;
167
+ padding-top: 30px;
168
+ border-top: 2px solid #f0f0f0;
169
+ }
170
+
171
+ .stats {
172
+ display: flex;
173
+ gap: 20px;
174
+ margin-bottom: 20px;
175
+ flex-wrap: wrap;
176
+ }
177
+
178
+ .stat {
179
+ background: #f8f9fa;
180
+ padding: 12px 20px;
181
+ border-radius: 10px;
182
+ font-size: 14px;
183
+ }
184
+
185
+ .stat-value {
186
+ font-weight: 700;
187
+ color: #667eea;
188
+ font-size: 20px;
189
+ }
190
+
191
+ .stat-label {
192
+ color: #666;
193
+ margin-top: 2px;
194
+ }
195
+
196
+ .tokens-display {
197
+ background: #f8f9fa;
198
+ border-radius: 12px;
199
+ padding: 20px;
200
+ line-height: 2.2;
201
+ min-height: 100px;
202
+ word-wrap: break-word;
203
+ overflow-wrap: break-word;
204
+ white-space: pre-wrap;
205
+ }
206
+
207
+ .token {
208
+ display: inline;
209
+ padding: 4px 2px;
210
+ margin: 2px 0;
211
+ border-radius: 4px;
212
+ font-family: 'SF Mono', Monaco, 'Cascadia Code', monospace;
213
+ font-size: 15px;
214
+ cursor: pointer;
215
+ transition: transform 0.1s, box-shadow 0.1s;
216
+ position: relative;
217
+ }
218
+
219
+ .token:hover {
220
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15);
221
+ z-index: 10;
222
+ }
223
+
224
+ .token-tooltip {
225
+ position: absolute;
226
+ bottom: 100%;
227
+ left: 50%;
228
+ transform: translateX(-50%);
229
+ background: #333;
230
+ color: white;
231
+ padding: 6px 10px;
232
+ border-radius: 6px;
233
+ font-size: 12px;
234
+ white-space: nowrap;
235
+ opacity: 0;
236
+ pointer-events: none;
237
+ transition: opacity 0.2s;
238
+ z-index: 100;
239
+ }
240
+
241
+ .token:hover .token-tooltip {
242
+ opacity: 1;
243
+ }
244
+
245
+ .error {
246
+ background: #fff5f5;
247
+ border: 1px solid #fed7d7;
248
+ color: #c53030;
249
+ padding: 14px 18px;
250
+ border-radius: 10px;
251
+ margin-top: 20px;
252
+ }
253
+
254
+ .loading {
255
+ display: none;
256
+ text-align: center;
257
+ padding: 40px;
258
+ }
259
+
260
+ .loading.show {
261
+ display: block;
262
+ }
263
+
264
+ .spinner {
265
+ width: 40px;
266
+ height: 40px;
267
+ border: 4px solid #f0f0f0;
268
+ border-top-color: #667eea;
269
+ border-radius: 50%;
270
+ animation: spin 0.8s linear infinite;
271
+ margin: 0 auto 10px;
272
+ }
273
+
274
+ @keyframes spin {
275
+ to { transform: rotate(360deg); }
276
+ }
277
+
278
+ .hidden {
279
+ display: none;
280
+ }
281
+
282
+ .token-ids-toggle {
283
+ margin-top: 15px;
284
+ }
285
+
286
+ .token-ids-toggle label {
287
+ display: inline-flex;
288
+ align-items: center;
289
+ gap: 8px;
290
+ cursor: pointer;
291
+ font-weight: normal;
292
+ }
293
+
294
+ .token-ids {
295
+ background: #2d3748;
296
+ color: #e2e8f0;
297
+ padding: 16px;
298
+ border-radius: 10px;
299
+ margin-top: 15px;
300
+ font-family: 'SF Mono', Monaco, monospace;
301
+ font-size: 13px;
302
+ overflow-x: auto;
303
+ white-space: pre-wrap;
304
+ word-break: break-all;
305
+ }
306
+
307
+ footer {
308
+ text-align: center;
309
+ margin-top: 30px;
310
+ color: rgba(255, 255, 255, 0.7);
311
+ font-size: 14px;
312
+ }
313
+
314
+ footer a {
315
+ color: white;
316
+ }
317
+ </style>
318
+ </head>
319
+ <body>
320
+ <div class="container">
321
+ <h1>🔤 HF Tokenizer</h1>
322
+ <p class="subtitle">Visualize tokenization from any Hugging Face model</p>
323
+
324
+ <div class="card">
325
+ <div class="input-group">
326
+ <label for="model-id">Model ID (org/model-name)</label>
327
+ <div class="model-input-wrapper">
328
+ <input
329
+ type="text"
330
+ id="model-id"
331
+ placeholder="e.g., openai-community/gpt2"
332
+ autocomplete="off"
333
+ >
334
+ <div class="suggestions" id="suggestions"></div>
335
+ </div>
336
+ </div>
337
+
338
+ <div class="input-group">
339
+ <label for="text-input">Text to tokenize</label>
340
+ <textarea
341
+ id="text-input"
342
+ placeholder="Enter text to tokenize..."
343
+ >Hello, world! This is a test of the tokenizer visualization tool.</textarea>
344
+ </div>
345
+
346
+ <button class="btn" id="tokenize-btn">Tokenize</button>
347
+
348
+ <div class="loading" id="loading">
349
+ <div class="spinner"></div>
350
+ <p>Loading tokenizer and processing...</p>
351
+ </div>
352
+
353
+ <div class="error hidden" id="error"></div>
354
+
355
+ <div class="results hidden" id="results">
356
+ <div class="stats">
357
+ <div class="stat">
358
+ <div class="stat-value" id="token-count">0</div>
359
+ <div class="stat-label">Tokens</div>
360
+ </div>
361
+ <div class="stat">
362
+ <div class="stat-value" id="char-count">0</div>
363
+ <div class="stat-label">Characters</div>
364
+ </div>
365
+ </div>
366
+
367
+ <label>Tokenized Output <span style="font-weight: normal; color: #888;">(hover for token IDs)</span></label>
368
+ <div class="tokens-display" id="tokens-display"></div>
369
+
370
+ <div class="token-ids-toggle">
371
+ <label>
372
+ <input type="checkbox" id="show-ids"> Show token IDs
373
+ </label>
374
+ </div>
375
+
376
+ <div class="token-ids hidden" id="token-ids"></div>
377
+ </div>
378
+ </div>
379
+
380
+ <footer>
381
+ Powered by 🤗 Hugging Face Transformers
382
+ </footer>
383
+ </div>
384
+
385
+ <script>
386
+ const modelInput = document.getElementById('model-id');
387
+ const textInput = document.getElementById('text-input');
388
+ const tokenizeBtn = document.getElementById('tokenize-btn');
389
+ const suggestionsDiv = document.getElementById('suggestions');
390
+ const loadingDiv = document.getElementById('loading');
391
+ const errorDiv = document.getElementById('error');
392
+ const resultsDiv = document.getElementById('results');
393
+ const tokensDisplay = document.getElementById('tokens-display');
394
+ const tokenCount = document.getElementById('token-count');
395
+ const charCount = document.getElementById('char-count');
396
+ const showIdsCheckbox = document.getElementById('show-ids');
397
+ const tokenIdsDiv = document.getElementById('token-ids');
398
+
399
+ let suggestions = [];
400
+ let currentTokens = [];
401
+
402
+ // Load model suggestions
403
+ fetch('/models/suggestions')
404
+ .then(res => res.json())
405
+ .then(data => {
406
+ suggestions = data;
407
+ });
408
+
409
+ // Model input focus/blur handling
410
+ modelInput.addEventListener('focus', () => {
411
+ if (suggestions.length > 0) {
412
+ showSuggestions(suggestions);
413
+ }
414
+ });
415
+
416
+ modelInput.addEventListener('input', () => {
417
+ const query = modelInput.value.toLowerCase();
418
+ const filtered = suggestions.filter(s => s.toLowerCase().includes(query));
419
+ if (filtered.length > 0 && document.activeElement === modelInput) {
420
+ showSuggestions(filtered);
421
+ } else {
422
+ hideSuggestions();
423
+ }
424
+ });
425
+
426
+ document.addEventListener('click', (e) => {
427
+ if (!modelInput.contains(e.target) && !suggestionsDiv.contains(e.target)) {
428
+ hideSuggestions();
429
+ }
430
+ });
431
+
432
+ function showSuggestions(items) {
433
+ suggestionsDiv.innerHTML = items.map(item =>
434
+ `<div class="suggestion-item" data-model="${item}"><code>${item}</code></div>`
435
+ ).join('');
436
+ suggestionsDiv.classList.add('show');
437
+ }
438
+
439
+ function hideSuggestions() {
440
+ suggestionsDiv.classList.remove('show');
441
+ }
442
+
443
+ suggestionsDiv.addEventListener('click', (e) => {
444
+ const item = e.target.closest('.suggestion-item');
445
+ if (item) {
446
+ modelInput.value = item.dataset.model;
447
+ hideSuggestions();
448
+ }
449
+ });
450
+
451
+ // Tokenize button click
452
+ tokenizeBtn.addEventListener('click', tokenize);
453
+
454
+ // Keyboard shortcut
455
+ document.addEventListener('keydown', (e) => {
456
+ if ((e.metaKey || e.ctrlKey) && e.key === 'Enter') {
457
+ tokenize();
458
+ }
459
+ });
460
+
461
+ async function tokenize() {
462
+ const modelId = modelInput.value.trim();
463
+ const text = textInput.value;
464
+
465
+ if (!modelId) {
466
+ showError('Please enter a model ID');
467
+ return;
468
+ }
469
+
470
+ if (!text) {
471
+ showError('Please enter some text to tokenize');
472
+ return;
473
+ }
474
+
475
+ hideError();
476
+ resultsDiv.classList.add('hidden');
477
+ loadingDiv.classList.add('show');
478
+ tokenizeBtn.disabled = true;
479
+
480
+ try {
481
+ const response = await fetch('/tokenize', {
482
+ method: 'POST',
483
+ headers: { 'Content-Type': 'application/json' },
484
+ body: JSON.stringify({ model_id: modelId, text })
485
+ });
486
+
487
+ const data = await response.json();
488
+
489
+ if (!response.ok) {
490
+ throw new Error(data.error || 'Tokenization failed');
491
+ }
492
+
493
+ currentTokens = data.tokens;
494
+ displayResults(data, text);
495
+
496
+ } catch (error) {
497
+ showError(error.message);
498
+ } finally {
499
+ loadingDiv.classList.remove('show');
500
+ tokenizeBtn.disabled = false;
501
+ }
502
+ }
503
+
504
+ function displayResults(data, text) {
505
+ tokenCount.textContent = data.token_count;
506
+ charCount.textContent = text.length;
507
+
508
+ // Display colorized tokens
509
+ tokensDisplay.innerHTML = data.tokens.map((token, i) => {
510
+ // Escape HTML and handle special characters
511
+ let displayText = token.text
512
+ .replace(/&/g, '&amp;')
513
+ .replace(/</g, '&lt;')
514
+ .replace(/>/g, '&gt;')
515
+ .replace(/\n/g, '↵\n')
516
+ .replace(/\t/g, '→')
517
+ .replace(/ /g, '·');
518
+
519
+ // Handle empty or whitespace-only tokens
520
+ if (displayText.trim() === '' && displayText !== '') {
521
+ displayText = displayText || '␣';
522
+ }
523
+
524
+ return `<span class="token" style="background-color: ${token.color}">` +
525
+ `<span class="token-tooltip">ID: ${token.id}</span>` +
526
+ `${displayText}</span>`;
527
+ }).join('');
528
+
529
+ // Update token IDs display
530
+ tokenIdsDiv.textContent = `[${data.tokens.map(t => t.id).join(', ')}]`;
531
+
532
+ resultsDiv.classList.remove('hidden');
533
+ }
534
+
535
+ function showError(message) {
536
+ errorDiv.textContent = message;
537
+ errorDiv.classList.remove('hidden');
538
+ }
539
+
540
+ function hideError() {
541
+ errorDiv.classList.add('hidden');
542
+ }
543
+
544
+ // Toggle token IDs
545
+ showIdsCheckbox.addEventListener('change', () => {
546
+ tokenIdsDiv.classList.toggle('hidden', !showIdsCheckbox.checked);
547
+ });
548
+ </script>
549
+ </body>
550
+ </html>