quickgrid commited on
Commit
087d59b
·
verified ·
1 Parent(s): 76938dc

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +145 -1508
index.html CHANGED
@@ -1,1522 +1,159 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>TokenLens — LLM Tokenizer Playground</title>
7
- <meta name="description" content="Visualize how large language models tokenize text. Powered by Transformers.js, runs entirely in your browser." />
8
- <link rel="preconnect" href="https://fonts.googleapis.com" />
9
- <link href="https://fonts.googleapis.com/css2?family=Bricolage+Grotesque:opsz,wght@12..96,300;12..96,400;12..96,500;12..96,600;12..96,700;12..96,800&family=JetBrains+Mono:wght@300;400;500;700&family=DM+Sans:wght@300;400;500&display=swap" rel="stylesheet" />
10
-
11
- <style>
12
- /* ─── Design Tokens ─────────────────────────────────── */
13
- :root {
14
- --bg: #060b14;
15
- --bg2: #0b1220;
16
- --bg3: #101828;
17
- --bg4: #162035;
18
- --border: #1a2d4a;
19
- --border2: #243d60;
20
- --glow: #1f3d6e;
21
- --text: #dce8f8;
22
- --text2: #7899c0;
23
- --text3: #3d5a80;
24
- --accent: #4d9ef5;
25
- --accent2: #8b6af5;
26
- --green: #34d89a;
27
- --amber: #f5a623;
28
- --red: #f55577;
29
-
30
- /* Token palette — 14 vivid colors for dark bg */
31
- --t0: #ff8080; --t0b: rgba(255,128,128,.18);
32
- --t1: #ffb84d; --t1b: rgba(255,184, 77,.18);
33
- --t2: #ffe066; --t2b: rgba(255,224,102,.18);
34
- --t3: #7aed91; --t3b: rgba(122,237,145,.18);
35
- --t4: #4ddfc0; --t4b: rgba( 77,223,192,.18);
36
- --t5: #56c8f5; --t5b: rgba( 86,200,245,.18);
37
- --t6: #748ef8; --t6b: rgba(116,142,248,.18);
38
- --t7: #c484f8; --t7b: rgba(196,132,248,.18);
39
- --t8: #f57cd4; --t8b: rgba(245,124,212,.18);
40
- --t9: #fa8072; --t9b: rgba(250,128,114,.18);
41
- --t10: #8be08b; --t10b: rgba(139,224,139,.18);
42
- --t11: #f0c040; --t11b: rgba(240,192, 64,.18);
43
- --t12: #60d4e0; --t12b: rgba( 96,212,224,.18);
44
- --t13: #e89060; --t13b: rgba(232,144, 96,.18);
45
- }
46
-
47
- /* ─── Reset ─────────────────────────────────────────── */
48
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
49
-
50
- html { scroll-behavior: smooth; }
51
-
52
- body {
53
- background: var(--bg);
54
- color: var(--text);
55
- font-family: 'DM Sans', sans-serif;
56
- min-height: 100vh;
57
- display: flex;
58
- flex-direction: column;
59
- overflow-x: hidden;
60
- }
61
-
62
- /* ─── Background FX ─────────────────────────────────── */
63
- #bg-canvas {
64
- position: fixed;
65
- inset: 0;
66
- pointer-events: none;
67
- z-index: 0;
68
- }
69
- .bg-gradient {
70
- position: fixed;
71
- inset: 0;
72
- pointer-events: none;
73
- z-index: 0;
74
- background:
75
- radial-gradient(ellipse 80% 50% at 20% 10%, rgba(77,158,245,.06) 0%, transparent 70%),
76
- radial-gradient(ellipse 60% 40% at 80% 90%, rgba(139,106,245,.05) 0%, transparent 60%),
77
- radial-gradient(ellipse 40% 30% at 60% 50%, rgba(52,216,154,.03) 0%, transparent 60%);
78
- }
79
- .dot-grid {
80
- position: fixed;
81
- inset: 0;
82
- pointer-events: none;
83
- z-index: 0;
84
- background-image: radial-gradient(circle, rgba(77,158,245,.12) 1px, transparent 1px);
85
- background-size: 36px 36px;
86
- mask-image: radial-gradient(ellipse 100% 100% at 50% 50%, black 30%, transparent 80%);
87
- }
88
-
89
- /* ─── Layout ─────────────────────────────────────────── */
90
- #app {
91
- position: relative;
92
- z-index: 1;
93
- display: flex;
94
- flex-direction: column;
95
- min-height: 100vh;
96
- }
97
-
98
- /* ─── Header ─────────────────────────────────────────── */
99
- header {
100
- display: flex;
101
- align-items: center;
102
- justify-content: space-between;
103
- padding: 0 32px;
104
- height: 64px;
105
- border-bottom: 1px solid var(--border);
106
- background: rgba(6,11,20,.85);
107
- backdrop-filter: blur(20px);
108
- position: sticky;
109
- top: 0;
110
- z-index: 100;
111
- }
112
-
113
- .logo {
114
- display: flex;
115
- align-items: center;
116
- gap: 10px;
117
- text-decoration: none;
118
- color: var(--text);
119
- }
120
- .logo-hex {
121
- width: 34px;
122
- height: 34px;
123
- background: linear-gradient(135deg, var(--accent), var(--accent2));
124
- clip-path: polygon(50% 0%, 93% 25%, 93% 75%, 50% 100%, 7% 75%, 7% 25%);
125
- display: flex;
126
- align-items: center;
127
- justify-content: center;
128
- font-size: 14px;
129
- font-family: 'JetBrains Mono', monospace;
130
- font-weight: 700;
131
- color: white;
132
- }
133
- .logo-name {
134
- font-family: 'Bricolage Grotesque', sans-serif;
135
- font-size: 20px;
136
- font-weight: 700;
137
- letter-spacing: -0.5px;
138
- background: linear-gradient(135deg, #dce8f8 40%, var(--accent));
139
- -webkit-background-clip: text;
140
- -webkit-text-fill-color: transparent;
141
- background-clip: text;
142
- }
143
- .logo-tag {
144
- font-size: 10px;
145
- font-family: 'JetBrains Mono', monospace;
146
- color: var(--text3);
147
- background: var(--bg3);
148
- border: 1px solid var(--border);
149
- padding: 2px 6px;
150
- border-radius: 4px;
151
- letter-spacing: .5px;
152
- }
153
-
154
- .header-right {
155
- display: flex;
156
- align-items: center;
157
- gap: 16px;
158
- }
159
- .header-badge {
160
- display: flex;
161
- align-items: center;
162
- gap: 6px;
163
- font-size: 12px;
164
- color: var(--text2);
165
- font-family: 'JetBrains Mono', monospace;
166
- }
167
- .header-badge .dot {
168
- width: 7px;
169
- height: 7px;
170
- border-radius: 50%;
171
- background: var(--green);
172
- box-shadow: 0 0 8px var(--green);
173
- animation: pulse-dot 2s ease-in-out infinite;
174
- }
175
- @keyframes pulse-dot {
176
- 0%,100% { opacity: 1; }
177
- 50% { opacity: .4; }
178
- }
179
-
180
- /* ─── Model Selector Bar ─────────────────────────────── */
181
- .model-bar {
182
- padding: 16px 32px;
183
- border-bottom: 1px solid var(--border);
184
- background: rgba(11,18,32,.7);
185
- backdrop-filter: blur(12px);
186
- }
187
- .model-bar-label {
188
- font-size: 11px;
189
- font-family: 'JetBrains Mono', monospace;
190
- color: var(--text3);
191
- letter-spacing: 1.5px;
192
- text-transform: uppercase;
193
- margin-bottom: 10px;
194
- }
195
- .model-tabs {
196
- display: flex;
197
- flex-wrap: wrap;
198
- gap: 8px;
199
- align-items: center;
200
- }
201
- .model-tab {
202
- display: flex;
203
- flex-direction: column;
204
- padding: 8px 14px;
205
- border: 1px solid var(--border);
206
- border-radius: 10px;
207
- background: var(--bg2);
208
- cursor: pointer;
209
- transition: all 0.2s ease;
210
- position: relative;
211
- overflow: hidden;
212
- min-width: 110px;
213
- }
214
- .model-tab::before {
215
- content: '';
216
- position: absolute;
217
- inset: 0;
218
- background: linear-gradient(135deg, var(--accent), var(--accent2));
219
- opacity: 0;
220
- transition: opacity 0.2s;
221
- }
222
- .model-tab:hover {
223
- border-color: var(--border2);
224
- transform: translateY(-1px);
225
- }
226
- .model-tab.active {
227
- border-color: var(--accent);
228
- box-shadow: 0 0 0 1px var(--accent), 0 0 20px rgba(77,158,245,.15);
229
- }
230
- .model-tab.active::before { opacity: .08; }
231
- .model-tab-name {
232
- font-family: 'Bricolage Grotesque', sans-serif;
233
- font-size: 13px;
234
- font-weight: 600;
235
- color: var(--text);
236
- position: relative;
237
- }
238
- .model-tab-org {
239
- font-size: 10px;
240
- color: var(--text2);
241
- font-family: 'JetBrains Mono', monospace;
242
- position: relative;
243
- margin-top: 1px;
244
- }
245
- .model-tab-vocab {
246
- font-size: 10px;
247
- color: var(--text3);
248
- font-family: 'JetBrains Mono', monospace;
249
- position: relative;
250
- }
251
- .model-org-dot {
252
- width: 6px;
253
- height: 6px;
254
- border-radius: 50%;
255
- display: inline-block;
256
- margin-right: 4px;
257
- position: relative;
258
- top: -1px;
259
- }
260
-
261
- /* Custom model row */
262
- .custom-model-row {
263
- display: flex;
264
- align-items: center;
265
- gap: 10px;
266
- margin-top: 12px;
267
- }
268
- .custom-model-row label {
269
- font-size: 11px;
270
- color: var(--text2);
271
- font-family: 'JetBrains Mono', monospace;
272
- white-space: nowrap;
273
- }
274
- .custom-input {
275
- flex: 1;
276
- max-width: 380px;
277
- background: var(--bg2);
278
- border: 1px solid var(--border);
279
- border-radius: 8px;
280
- color: var(--text);
281
- font-family: 'JetBrains Mono', monospace;
282
- font-size: 13px;
283
- padding: 7px 12px;
284
- outline: none;
285
- transition: border-color 0.2s;
286
- }
287
- .custom-input:focus { border-color: var(--accent); }
288
- .custom-input::placeholder { color: var(--text3); }
289
- .btn {
290
- padding: 7px 16px;
291
- border-radius: 8px;
292
- border: 1px solid var(--border2);
293
- background: linear-gradient(135deg, rgba(77,158,245,.15), rgba(139,106,245,.15));
294
- color: var(--accent);
295
- font-family: 'DM Sans', sans-serif;
296
- font-size: 13px;
297
- font-weight: 500;
298
- cursor: pointer;
299
- transition: all 0.2s;
300
- white-space: nowrap;
301
- }
302
- .btn:hover {
303
- background: linear-gradient(135deg, rgba(77,158,245,.25), rgba(139,106,245,.25));
304
- border-color: var(--accent);
305
- }
306
- .btn:active { transform: scale(.97); }
307
-
308
- /* ─── Main Split ─────────────────────────────────────── */
309
- main {
310
- flex: 1;
311
- display: grid;
312
- grid-template-columns: 1fr 1fr;
313
- gap: 0;
314
- min-height: 0;
315
- }
316
-
317
- /* ─── Left Panel (Input) ─────────────────────────────── */
318
- .input-panel {
319
- border-right: 1px solid var(--border);
320
- display: flex;
321
- flex-direction: column;
322
- padding: 0;
323
- }
324
- .panel-header {
325
- padding: 16px 24px 12px;
326
- border-bottom: 1px solid var(--border);
327
- display: flex;
328
- align-items: center;
329
- justify-content: space-between;
330
- }
331
- .panel-title {
332
- font-family: 'Bricolage Grotesque', sans-serif;
333
- font-size: 14px;
334
- font-weight: 600;
335
- color: var(--text2);
336
- letter-spacing: .3px;
337
- display: flex;
338
- align-items: center;
339
- gap: 8px;
340
- }
341
- .panel-title-icon {
342
- width: 20px;
343
- height: 20px;
344
- background: var(--bg4);
345
- border: 1px solid var(--border);
346
- border-radius: 5px;
347
- display: flex;
348
- align-items: center;
349
- justify-content: center;
350
- font-size: 11px;
351
- }
352
-
353
- .sample-btns {
354
- display: flex;
355
- gap: 6px;
356
- }
357
- .sample-btn {
358
- font-size: 11px;
359
- padding: 4px 10px;
360
- border-radius: 6px;
361
- border: 1px solid var(--border);
362
- background: var(--bg2);
363
- color: var(--text2);
364
- cursor: pointer;
365
- font-family: 'DM Sans', sans-serif;
366
- transition: all .15s;
367
- }
368
- .sample-btn:hover {
369
- border-color: var(--border2);
370
- color: var(--text);
371
- }
372
-
373
- #input-area {
374
- flex: 1;
375
- width: 100%;
376
- background: transparent;
377
- border: none;
378
- outline: none;
379
- resize: none;
380
- color: var(--text);
381
- font-family: 'DM Sans', sans-serif;
382
- font-size: 15px;
383
- line-height: 1.7;
384
- padding: 20px 24px;
385
- min-height: 220px;
386
- }
387
- #input-area::placeholder { color: var(--text3); }
388
-
389
- .char-counter {
390
- padding: 8px 24px;
391
- border-top: 1px solid var(--border);
392
- font-size: 11px;
393
- font-family: 'JetBrains Mono', monospace;
394
- color: var(--text3);
395
- text-align: right;
396
- }
397
-
398
- /* ─── Right Panel (Output) ───────────────────────────── */
399
- .output-panel {
400
- display: flex;
401
- flex-direction: column;
402
- overflow: hidden;
403
- }
404
-
405
- /* Stats row */
406
- .stats-row {
407
- display: grid;
408
- grid-template-columns: repeat(4, 1fr);
409
- border-bottom: 1px solid var(--border);
410
- }
411
- .stat-card {
412
- padding: 16px 20px;
413
- border-right: 1px solid var(--border);
414
- position: relative;
415
- overflow: hidden;
416
- }
417
- .stat-card:last-child { border-right: none; }
418
- .stat-card::after {
419
- content: '';
420
- position: absolute;
421
- bottom: 0;
422
- left: 0;
423
- right: 0;
424
- height: 2px;
425
- background: linear-gradient(90deg, transparent, var(--accent), transparent);
426
- opacity: 0;
427
- transition: opacity .3s;
428
- }
429
- .stat-card.highlight::after { opacity: 1; }
430
- .stat-label {
431
- font-size: 10px;
432
- font-family: 'JetBrains Mono', monospace;
433
- color: var(--text3);
434
- text-transform: uppercase;
435
- letter-spacing: 1px;
436
- margin-bottom: 6px;
437
- }
438
- .stat-value {
439
- font-family: 'Bricolage Grotesque', sans-serif;
440
- font-size: 26px;
441
- font-weight: 700;
442
- color: var(--text);
443
- line-height: 1;
444
- transition: all .3s;
445
- }
446
- .stat-card:nth-child(1) .stat-value { color: var(--accent); }
447
- .stat-card:nth-child(2) .stat-value { color: var(--green); }
448
- .stat-card:nth-child(3) .stat-value { color: var(--amber); }
449
- .stat-card:nth-child(4) .stat-value { color: var(--accent2); }
450
- .stat-sub {
451
- font-size: 10px;
452
- color: var(--text3);
453
- font-family: 'JetBrains Mono', monospace;
454
- margin-top: 3px;
455
- }
456
-
457
- /* View toggle */
458
- .view-toggle {
459
- display: flex;
460
- padding: 12px 20px;
461
- border-bottom: 1px solid var(--border);
462
- gap: 4px;
463
- align-items: center;
464
- justify-content: space-between;
465
- }
466
- .toggle-group {
467
- display: flex;
468
- gap: 4px;
469
- background: var(--bg2);
470
- border: 1px solid var(--border);
471
- border-radius: 8px;
472
- padding: 3px;
473
- }
474
- .toggle-btn {
475
- padding: 5px 14px;
476
- border-radius: 6px;
477
- border: none;
478
- background: transparent;
479
- color: var(--text2);
480
- font-family: 'DM Sans', sans-serif;
481
- font-size: 12px;
482
- font-weight: 500;
483
- cursor: pointer;
484
- transition: all .15s;
485
- }
486
- .toggle-btn.active {
487
- background: var(--bg4);
488
- color: var(--text);
489
- box-shadow: 0 1px 4px rgba(0,0,0,.3);
490
- }
491
- .special-toggle {
492
- display: flex;
493
- align-items: center;
494
- gap: 8px;
495
- font-size: 12px;
496
- color: var(--text2);
497
- }
498
- .toggle-switch {
499
- width: 32px;
500
- height: 18px;
501
- background: var(--bg4);
502
- border: 1px solid var(--border);
503
- border-radius: 9px;
504
- cursor: pointer;
505
- position: relative;
506
- transition: background .2s;
507
- }
508
- .toggle-switch::after {
509
- content: '';
510
- position: absolute;
511
- width: 12px;
512
- height: 12px;
513
- border-radius: 50%;
514
- background: var(--text3);
515
- top: 2px;
516
- left: 2px;
517
- transition: all .2s;
518
- }
519
- .toggle-switch.on { background: rgba(77,158,245,.3); border-color: var(--accent); }
520
- .toggle-switch.on::after { left: 16px; background: var(--accent); }
521
-
522
- /* Token Display */
523
- .token-display {
524
- flex: 1;
525
- overflow-y: auto;
526
- padding: 20px;
527
- scrollbar-width: thin;
528
- scrollbar-color: var(--border) transparent;
529
- }
530
-
531
- .placeholder-msg {
532
- display: flex;
533
- flex-direction: column;
534
- align-items: center;
535
- justify-content: center;
536
- height: 200px;
537
- gap: 16px;
538
- color: var(--text3);
539
- }
540
- .placeholder-icon {
541
- font-size: 40px;
542
- filter: grayscale(1) opacity(.3);
543
- }
544
- .placeholder-msg p {
545
- font-family: 'JetBrains Mono', monospace;
546
- font-size: 13px;
547
- text-align: center;
548
- }
549
-
550
- /* ─── Token Visualization Views ───────────────────────── */
551
-
552
- /* TEXT VIEW — inline colored token spans */
553
- .token-text-view {
554
- font-family: 'JetBrains Mono', monospace;
555
- font-size: 14px;
556
- line-height: 2.2;
557
- word-break: break-all;
558
- }
559
- .tok {
560
- display: inline;
561
- border-radius: 4px;
562
- padding: 1px 0;
563
- cursor: default;
564
- transition: filter .15s;
565
- position: relative;
566
- }
567
- .tok:hover { filter: brightness(1.3); }
568
- .tok-tooltip {
569
- display: none;
570
- position: absolute;
571
- bottom: 110%;
572
- left: 50%;
573
- transform: translateX(-50%);
574
- background: var(--bg4);
575
- border: 1px solid var(--border2);
576
- border-radius: 6px;
577
- padding: 5px 8px;
578
- font-size: 11px;
579
- white-space: nowrap;
580
- z-index: 50;
581
- pointer-events: none;
582
- box-shadow: 0 4px 20px rgba(0,0,0,.5);
583
- }
584
- .tok:hover .tok-tooltip { display: block; }
585
- .tok-tooltip-id { color: var(--accent); font-weight: 700; }
586
- .tok-tooltip-text { color: var(--text2); }
587
- .tok-space::before { content: '·'; opacity: .3; }
588
- .tok-newline::before { content: '↵'; opacity: .5; }
589
-
590
- /* ID VIEW — grid of token cards */
591
- .token-id-view {
592
- display: flex;
593
- flex-wrap: wrap;
594
- gap: 6px;
595
- }
596
- .tok-id-card {
597
- display: flex;
598
- flex-direction: column;
599
- align-items: center;
600
- border-radius: 8px;
601
- overflow: hidden;
602
- border: 1px solid;
603
- cursor: default;
604
- transition: transform .15s, box-shadow .15s;
605
- min-width: 52px;
606
- }
607
- .tok-id-card:hover {
608
- transform: translateY(-2px);
609
- box-shadow: 0 4px 16px rgba(0,0,0,.4);
610
- }
611
- .tok-id-top {
612
- padding: 3px 6px;
613
- font-family: 'JetBrains Mono', monospace;
614
- font-size: 11px;
615
- font-weight: 500;
616
- width: 100%;
617
- text-align: center;
618
- border-bottom: 1px solid rgba(255,255,255,.08);
619
- }
620
- .tok-id-bottom {
621
- padding: 2px 6px 3px;
622
- font-family: 'JetBrains Mono', monospace;
623
- font-size: 9px;
624
- color: rgba(255,255,255,.4);
625
- width: 100%;
626
- text-align: center;
627
- }
628
-
629
- /* PROBABILITY VIEW placeholder */
630
- .token-split-view {
631
- display: flex;
632
- flex-direction: column;
633
- gap: 3px;
634
- }
635
- .tok-split-row {
636
- display: flex;
637
- align-items: stretch;
638
- border-radius: 6px;
639
- overflow: hidden;
640
- border: 1px solid;
641
- font-family: 'JetBrains Mono', monospace;
642
- font-size: 12px;
643
- }
644
- .tok-split-idx {
645
- width: 38px;
646
- text-align: center;
647
- padding: 5px 4px;
648
- font-size: 10px;
649
- color: rgba(255,255,255,.3);
650
- border-right: 1px solid rgba(255,255,255,.06);
651
- display: flex;
652
- align-items: center;
653
- justify-content: center;
654
- }
655
- .tok-split-text {
656
- flex: 1;
657
- padding: 5px 8px;
658
- font-size: 13px;
659
- }
660
- .tok-split-id {
661
- padding: 5px 8px;
662
- font-size: 11px;
663
- color: rgba(255,255,255,.45);
664
- border-left: 1px solid rgba(255,255,255,.06);
665
- display: flex;
666
- align-items: center;
667
- }
668
-
669
- /* ─── Loading Overlay ────────────────────────────────── */
670
- #loading-overlay {
671
- position: fixed;
672
- inset: 0;
673
- background: rgba(6,11,20,.92);
674
- backdrop-filter: blur(8px);
675
- z-index: 1000;
676
- display: flex;
677
- flex-direction: column;
678
- align-items: center;
679
- justify-content: center;
680
- gap: 24px;
681
- transition: opacity .4s;
682
- }
683
- #loading-overlay.hidden { opacity: 0; pointer-events: none; }
684
-
685
- .loading-spinner {
686
- width: 56px;
687
- height: 56px;
688
- position: relative;
689
- }
690
- .loading-spinner::before,
691
- .loading-spinner::after {
692
- content: '';
693
- position: absolute;
694
- border-radius: 50%;
695
- border: 2px solid transparent;
696
- }
697
- .loading-spinner::before {
698
- inset: 0;
699
- border-top-color: var(--accent);
700
- animation: spin 1s linear infinite;
701
- }
702
- .loading-spinner::after {
703
- inset: 8px;
704
- border-top-color: var(--accent2);
705
- animation: spin .7s linear infinite reverse;
706
- }
707
- @keyframes spin { to { transform: rotate(360deg); } }
708
-
709
- .loading-text {
710
- font-family: 'Bricolage Grotesque', sans-serif;
711
- font-size: 20px;
712
- font-weight: 600;
713
- color: var(--text);
714
- }
715
- .loading-sub {
716
- font-family: 'JetBrains Mono', monospace;
717
- font-size: 12px;
718
- color: var(--text2);
719
- max-width: 360px;
720
- text-align: center;
721
- }
722
- .loading-bar-wrap {
723
- width: 300px;
724
- height: 3px;
725
- background: var(--bg3);
726
- border-radius: 2px;
727
- overflow: hidden;
728
- }
729
- .loading-bar {
730
- height: 100%;
731
- width: 0%;
732
- background: linear-gradient(90deg, var(--accent), var(--accent2));
733
- border-radius: 2px;
734
- transition: width .3s;
735
- }
736
- .loading-file {
737
- font-size: 11px;
738
- font-family: 'JetBrains Mono', monospace;
739
- color: var(--text3);
740
- }
741
-
742
- /* ─── Error Toast ────────────────────────────────────── */
743
- #toast {
744
- position: fixed;
745
- bottom: 24px;
746
- left: 50%;
747
- transform: translateX(-50%) translateY(80px);
748
- background: rgba(245,85,119,.15);
749
- border: 1px solid rgba(245,85,119,.4);
750
- color: var(--red);
751
- padding: 10px 20px;
752
- border-radius: 10px;
753
- font-size: 13px;
754
- font-family: 'JetBrains Mono', monospace;
755
- z-index: 500;
756
- transition: transform .3s;
757
- max-width: 500px;
758
- text-align: center;
759
- }
760
- #toast.show { transform: translateX(-50%) translateY(0); }
761
-
762
- /* ─── Footer ─────────────────────────────────────────── */
763
- footer {
764
- padding: 12px 32px;
765
- border-top: 1px solid var(--border);
766
- display: flex;
767
- align-items: center;
768
- justify-content: space-between;
769
- font-size: 11px;
770
- color: var(--text3);
771
- font-family: 'JetBrains Mono', monospace;
772
- background: rgba(6,11,20,.8);
773
- }
774
- footer a {
775
- color: var(--text2);
776
- text-decoration: none;
777
- transition: color .15s;
778
- }
779
- footer a:hover { color: var(--accent); }
780
-
781
- /* ─── Scrollbar ──────────────────────────────────────── */
782
- ::-webkit-scrollbar { width: 6px; }
783
- ::-webkit-scrollbar-track { background: transparent; }
784
- ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
785
- ::-webkit-scrollbar-thumb:hover { background: var(--border2); }
786
-
787
- /* ─── Model color indicator ──────────────────────────── */
788
- .model-indicator {
789
- display: flex;
790
- align-items: center;
791
- gap: 6px;
792
- font-size: 11px;
793
- font-family: 'JetBrains Mono', monospace;
794
- color: var(--text2);
795
- }
796
- .model-indicator-dot {
797
- width: 8px;
798
- height: 8px;
799
- border-radius: 50%;
800
- }
801
-
802
- /* ─── Responsive ─────────────────────────────────────── */
803
- @media (max-width: 900px) {
804
- header { padding: 0 16px; }
805
- .model-bar { padding: 12px 16px; }
806
- main { grid-template-columns: 1fr; }
807
- .input-panel { border-right: none; border-bottom: 1px solid var(--border); }
808
- .stats-row { grid-template-columns: repeat(2, 1fr); }
809
- .stat-card:nth-child(2) { border-right: none; }
810
- footer { flex-direction: column; gap: 4px; text-align: center; }
811
- }
812
-
813
- /* ─── Animations ─────────────────────────────────────── */
814
- @keyframes fadeIn {
815
- from { opacity: 0; transform: translateY(6px); }
816
- to { opacity: 1; transform: translateY(0); }
817
- }
818
- .fade-in {
819
- animation: fadeIn .25s ease forwards;
820
- }
821
- </style>
822
  </head>
823
- <body>
824
-
825
- <!-- Background -->
826
- <div class="bg-gradient"></div>
827
- <div class="dot-grid"></div>
828
-
829
- <div id="app">
830
-
831
- <!-- Header -->
832
- <header>
833
- <div class="logo">
834
- <div class="logo-hex">T</div>
835
- <span class="logo-name">TokenLens</span>
836
- <span class="logo-tag">v1.0</span>
837
- </div>
838
- <div class="header-right">
839
- <div class="header-badge">
840
- <span class="dot"></span>
841
- <span>runs in-browser · no server · no GPU</span>
842
- </div>
843
- </div>
844
- </header>
845
-
846
- <!-- Model Selector Bar -->
847
- <div class="model-bar">
848
- <div class="model-bar-label">▸ select tokenizer</div>
849
- <div class="model-tabs" id="model-tabs">
850
- <!-- populated by JS -->
851
- </div>
852
- <div class="custom-model-row">
853
- <label>HF model id:</label>
854
- <input class="custom-input" id="custom-model-input" type="text"
855
- placeholder="e.g. deepseek-ai/DeepSeek-V4-Pro or Xenova/gpt2" />
856
- <button class="btn" id="custom-model-btn">Load ↗</button>
857
- </div>
858
- </div>
859
-
860
- <!-- Main -->
861
- <main>
862
-
863
- <!-- Left: Input -->
864
- <div class="input-panel">
865
- <div class="panel-header">
866
- <div class="panel-title">
867
- <div class="panel-title-icon">✎</div>
868
- Input Text
869
- </div>
870
- <div class="sample-btns">
871
- <button class="sample-btn" data-sample="poetry">Poetry</button>
872
- <button class="sample-btn" data-sample="code">Code</button>
873
- <button class="sample-btn" data-sample="multilingual">Multi-lingual</button>
874
- <button class="sample-btn" data-sample="numbers">Numbers</button>
875
- <button class="sample-btn" data-sample="clear">Clear</button>
876
  </div>
877
- </div>
878
- <textarea id="input-area"
879
- placeholder="Type or paste text here to see how the tokenizer splits it into tokens…
880
- &#10;&#10;Try some special characters, code snippets, emojis 🦊, or multi-lingual text (日本語, العربية) to see how different models handle them differently."></textarea>
881
- <div class="char-counter"><span id="char-count">0</span> characters</div>
882
- </div>
883
-
884
- <!-- Right: Output -->
885
- <div class="output-panel">
886
 
887
- <!-- Stats -->
888
- <div class="stats-row">
889
- <div class="stat-card" id="sc-tokens">
890
- <div class="stat-label">Tokens</div>
891
- <div class="stat-value" id="stat-tokens">—</div>
892
- <div class="stat-sub" id="stat-model-name">no model loaded</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
893
  </div>
894
- <div class="stat-card" id="sc-chars">
895
- <div class="stat-label">Characters</div>
896
- <div class="stat-value" id="stat-chars">—</div>
897
- <div class="stat-sub">total input</div>
898
- </div>
899
- <div class="stat-card" id="sc-words">
900
- <div class="stat-label">Words</div>
901
- <div class="stat-value" id="stat-words">—</div>
902
- <div class="stat-sub">approx</div>
903
- </div>
904
- <div class="stat-card" id="sc-ratio">
905
- <div class="stat-label">Chars / Token</div>
906
- <div class="stat-value" id="stat-ratio">—</div>
907
- <div class="stat-sub">efficiency</div>
908
- </div>
909
- </div>
910
-
911
- <!-- View toggle -->
912
- <div class="view-toggle">
913
- <div class="toggle-group">
914
- <button class="toggle-btn active" data-view="text">Text View</button>
915
- <button class="toggle-btn" data-view="ids">ID Grid</button>
916
- <button class="toggle-btn" data-view="list">Token List</button>
917
- </div>
918
- <div class="model-indicator" id="model-indicator">
919
- <div class="model-indicator-dot" id="model-dot" style="background:#3d5a80"></div>
920
- <span id="model-indicator-label">no model</span>
921
- </div>
922
- </div>
923
-
924
- <!-- Token display area -->
925
- <div class="token-display" id="token-display">
926
- <div class="placeholder-msg" id="placeholder">
927
- <div class="placeholder-icon">⬡</div>
928
- <p>Select a model above and type something<br>to see tokenization in action</p>
929
- </div>
930
- </div>
931
-
932
- </div><!-- /output-panel -->
933
- </main>
934
-
935
- <footer>
936
- <span>TokenLens — Powered by <a href="https://github.com/xenova/transformers.js" target="_blank">Transformers.js</a> · Runs entirely in your browser</span>
937
- <span>Hover tokens to see IDs · Add models via custom input above</span>
938
- </footer>
939
-
940
- </div><!-- /app -->
941
-
942
- <!-- Loading Overlay -->
943
- <div id="loading-overlay">
944
- <div class="loading-spinner"></div>
945
- <div class="loading-text" id="loading-title">Loading Tokenizer</div>
946
- <div class="loading-sub" id="loading-sub">Downloading tokenizer files from Hugging Face Hub…<br>This may take a moment on first load. Files are cached in your browser.</div>
947
- <div class="loading-bar-wrap">
948
- <div class="loading-bar" id="loading-bar"></div>
949
- </div>
950
- <div class="loading-file" id="loading-file"></div>
951
- </div>
952
-
953
- <!-- Toast -->
954
- <div id="toast"></div>
955
-
956
- <!-- ─────────────────────────────────────────────────────────
957
- TokenLens Script
958
- ─────────────────────────────────────────────────────────
959
- Architecture:
960
- • Uses @xenova/transformers (Transformers.js v2) via CDN
961
- • Tokenizer files downloaded from HF Hub and cached in IndexedDB
962
- • Extends easily: add entries to MODELS registry
963
- • Supports BPE, WordPiece, SentencePiece, Unigram tokenizers
964
- ─────────────────────────────────────────────────────────── -->
965
- <script type="module">
966
-
967
- import { AutoTokenizer, env }
968
- from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
969
-
970
- // ── Config ────────────────────────────────────────────────
971
- env.allowLocalModels = false;
972
- // Use HF CDN for model files
973
- env.useBrowserCache = true;
974
-
975
- // ── Model Registry ─────────────────────────────────────────
976
- // Add any HuggingFace model ID here — tokenizer.json + tokenizer_config.json
977
- // are the only files downloaded (no weights, no GPU needed).
978
- const MODELS = [
979
- {
980
- id: 'Xenova/gpt2',
981
- name: 'GPT-2',
982
- org: 'OpenAI',
983
- color: '#10a37f',
984
- vocab: '50k',
985
- type: 'BPE',
986
- desc: 'Classic GPT-2 BPE tokenizer'
987
- },
988
- {
989
- id: 'Xenova/gpt-4',
990
- name: 'GPT-4',
991
- org: 'OpenAI',
992
- color: '#10a37f',
993
- vocab: '100k',
994
- type: 'tiktoken cl100k',
995
- desc: 'Used by GPT-3.5 & GPT-4'
996
- },
997
- {
998
- id: 'Xenova/llama-tokenizer',
999
- name: 'LLaMA 2',
1000
- org: 'Meta',
1001
- color: '#0466de',
1002
- vocab: '32k',
1003
- type: 'SP-BPE',
1004
- desc: 'SentencePiece BPE — LLaMA / LLaMA-2'
1005
- },
1006
- {
1007
- id: 'Xenova/mistral-tokenizer-v1',
1008
- name: 'Mistral',
1009
- org: 'Mistral AI',
1010
- color: '#ff7722',
1011
- vocab: '32k',
1012
- type: 'SP-BPE',
1013
- desc: 'Mistral 7B v0.1 tokenizer'
1014
- },
1015
- {
1016
- id: 'Xenova/bert-base-uncased',
1017
- name: 'BERT',
1018
- org: 'Google',
1019
- color: '#4285f4',
1020
- vocab: '30k',
1021
- type: 'WordPiece',
1022
- desc: 'BERT-base uncased WordPiece'
1023
- },
1024
- {
1025
- id: 'Xenova/t5-base',
1026
- name: 'T5',
1027
- org: 'Google',
1028
- color: '#34a853',
1029
- vocab: '32k',
1030
- type: 'Unigram',
1031
- desc: 'T5 SentencePiece Unigram'
1032
- },
1033
- {
1034
- id: 'Xenova/claude-tokenizer',
1035
- name: 'Claude',
1036
- org: 'Anthropic',
1037
- color: '#cc785c',
1038
- vocab: '~100k',
1039
- type: 'BPE',
1040
- desc: "Anthropic Claude's tokenizer"
1041
- },
1042
- {
1043
- id: 'Xenova/roberta-base',
1044
- name: 'RoBERTa',
1045
- org: 'Meta',
1046
- color: '#1a73e8',
1047
- vocab: '50k',
1048
- type: 'BPE',
1049
- desc: 'RoBERTa byte-level BPE'
1050
- },
1051
- ];
1052
-
1053
- // ── Token Color Palette ────────────────────────────────────
1054
- const PALETTE = [
1055
- { text: '#ff8080', bg: 'rgba(255,128,128,.18)', border: 'rgba(255,128,128,.35)' },
1056
- { text: '#ffb84d', bg: 'rgba(255,184, 77,.18)', border: 'rgba(255,184, 77,.35)' },
1057
- { text: '#ffe066', bg: 'rgba(255,224,102,.18)', border: 'rgba(255,224,102,.35)' },
1058
- { text: '#7aed91', bg: 'rgba(122,237,145,.18)', border: 'rgba(122,237,145,.35)' },
1059
- { text: '#4ddfc0', bg: 'rgba( 77,223,192,.18)', border: 'rgba( 77,223,192,.35)' },
1060
- { text: '#56c8f5', bg: 'rgba( 86,200,245,.18)', border: 'rgba( 86,200,245,.35)' },
1061
- { text: '#748ef8', bg: 'rgba(116,142,248,.18)', border: 'rgba(116,142,248,.35)' },
1062
- { text: '#c484f8', bg: 'rgba(196,132,248,.18)', border: 'rgba(196,132,248,.35)' },
1063
- { text: '#f57cd4', bg: 'rgba(245,124,212,.18)', border: 'rgba(245,124,212,.35)' },
1064
- { text: '#fa8072', bg: 'rgba(250,128,114,.18)', border: 'rgba(250,128,114,.35)' },
1065
- { text: '#8be08b', bg: 'rgba(139,224,139,.18)', border: 'rgba(139,224,139,.35)' },
1066
- { text: '#f0c040', bg: 'rgba(240,192, 64,.18)', border: 'rgba(240,192, 64,.35)' },
1067
- { text: '#60d4e0', bg: 'rgba( 96,212,224,.18)', border: 'rgba( 96,212,224,.35)' },
1068
- { text: '#e89060', bg: 'rgba(232,144, 96,.18)', border: 'rgba(232,144, 96,.35)' },
1069
- ];
1070
-
1071
- // ── Sample texts ───────────────────────────────────────────
1072
- const SAMPLES = {
1073
- poetry: `Two roads diverged in a yellow wood,
1074
- And sorry I could not travel both
1075
- And be one traveler, long I stood
1076
- And looked down one as far as I could
1077
- To where it bent in the undergrowth;
1078
-
1079
- — Robert Frost, "The Road Not Taken"`,
1080
-
1081
- code: `async function fetchData(url, retries = 3) {
1082
- for (let i = 0; i < retries; i++) {
1083
- try {
1084
- const res = await fetch(url);
1085
- if (!res.ok) throw new Error(\`HTTP \${res.status}\`);
1086
- return await res.json();
1087
- } catch (e) {
1088
- if (i === retries - 1) throw e;
1089
- await new Promise(r => setTimeout(r, 1000 * 2 ** i));
1090
- }
1091
- }
1092
- }`,
1093
-
1094
- multilingual: `English: The quick brown fox jumps over the lazy dog.
1095
- 日本語: 吾輩は猫である。名前はまだない。
1096
- 中文: 春眠不觉晓,处处闻啼鸟。
1097
- العربية: اللغة العربية جميلة ومعبرة.
1098
- Ελληνικά: Η γνώση είναι δύναμη.
1099
- Emoji: 🌍 🦊 ⚡ 🎯 🧬 🤖 🦋`,
1100
-
1101
- numbers: `π ≈ 3.14159265358979323846
1102
- e ≈ 2.71828182845904523536
1103
- φ ≈ 1.61803398874989484820
1104
- 1,000,000 × $42.99 = $42,990,000.00
1105
- 2024-01-15T08:30:00.000Z
1106
- IPv4: 192.168.1.1 | IPv6: ::1`,
1107
-
1108
- clear: ''
1109
- };
1110
-
1111
- // ── State ──────────────────────────────────────────────────
1112
- let activeTokenizer = null;
1113
- let activeModel = null;
1114
- let tokenizerCache = {}; // modelId → tokenizer
1115
- let currentView = 'text';
1116
- let showSpecial = false;
1117
- let debounceTimer = null;
1118
-
1119
- // ── DOM References ─────────────────────────────────────────
1120
- const $overlay = document.getElementById('loading-overlay');
1121
- const $loadTitle = document.getElementById('loading-title');
1122
- const $loadSub = document.getElementById('loading-sub');
1123
- const $loadBar = document.getElementById('loading-bar');
1124
- const $loadFile = document.getElementById('loading-file');
1125
- const $modelTabs = document.getElementById('model-tabs');
1126
- const $input = document.getElementById('input-area');
1127
- const $charCount = document.getElementById('char-count');
1128
- const $display = document.getElementById('token-display');
1129
- const $placeholder = document.getElementById('placeholder');
1130
- const $stTokens = document.getElementById('stat-tokens');
1131
- const $stChars = document.getElementById('stat-chars');
1132
- const $stWords = document.getElementById('stat-words');
1133
- const $stRatio = document.getElementById('stat-ratio');
1134
- const $stModelName = document.getElementById('stat-model-name');
1135
- const $modelDot = document.getElementById('model-dot');
1136
- const $modelLabel = document.getElementById('model-indicator-label');
1137
- const $toast = document.getElementById('toast');
1138
- const $customInput = document.getElementById('custom-model-input');
1139
- const $customBtn = document.getElementById('custom-model-btn');
1140
-
1141
- // ── Utilities ──────────────────────────────────────────────
1142
-
1143
- function showOverlay(title, sub) {
1144
- $loadTitle.textContent = title;
1145
- $loadSub.textContent = sub;
1146
- $loadBar.style.width = '0%';
1147
- $loadFile.textContent = '';
1148
- $overlay.classList.remove('hidden');
1149
- }
1150
-
1151
- function hideOverlay() {
1152
- $overlay.classList.add('hidden');
1153
- }
1154
-
1155
- function showToast(msg, duration = 5000) {
1156
- $toast.textContent = msg;
1157
- $toast.classList.add('show');
1158
- setTimeout(() => $toast.classList.remove('show'), duration);
1159
- }
1160
-
1161
- function setStats(tokens, text) {
1162
- const chars = text.length;
1163
- const words = text.trim() ? text.trim().split(/\s+/).length : 0;
1164
- const ratio = tokens > 0 && chars > 0 ? (chars / tokens).toFixed(2) : '—';
1165
-
1166
- $stTokens.textContent = tokens > 0 ? tokens.toLocaleString() : '—';
1167
- $stChars.textContent = chars > 0 ? chars.toLocaleString() : '—';
1168
- $stWords.textContent = words > 0 ? words.toLocaleString() : '—';
1169
- $stRatio.textContent = ratio;
1170
-
1171
- // Pulse animation
1172
- ['sc-tokens','sc-chars','sc-words','sc-ratio'].forEach(id => {
1173
- const el = document.getElementById(id);
1174
- el.classList.remove('highlight');
1175
- void el.offsetWidth;
1176
- el.classList.add('highlight');
1177
- });
1178
- }
1179
-
1180
- // ── Decode raw token string for display ───────────────────
1181
- // Handles BPE Ġ prefix, SentencePiece ▁ prefix, byte tokens, etc.
1182
- function decodeTokenString(raw) {
1183
- if (!raw) return '';
1184
- // BPE space prefix
1185
- let s = raw.replace(/^Ġ/, ' ').replace(/Ġ/g, ' ');
1186
- // SentencePiece space prefix
1187
- s = s.replace(/^▁/, ' ').replace(/▁/g, ' ');
1188
- // Newline representation
1189
- s = s.replace(/Ċ/g, '\n');
1190
- // Carriage return
1191
- s = s.replace(/\r/g, '');
1192
- // Byte tokens like <0xAB>
1193
- s = s.replace(/<0x([0-9A-Fa-f]{2})>/g, (_, hex) => {
1194
- const code = parseInt(hex, 16);
1195
- return code < 128 ? String.fromCharCode(code) : `[0x${hex}]`;
1196
- });
1197
- return s;
1198
- }
1199
-
1200
- // ── Tokenize ───────────────────────────────────────────────
1201
- async function tokenize(text) {
1202
- if (!activeTokenizer || !text.trim()) {
1203
- $display.innerHTML = '';
1204
- $display.appendChild($placeholder);
1205
- $placeholder.style.display = 'flex';
1206
- setStats(0, text);
1207
- return;
1208
- }
1209
-
1210
- try {
1211
- $placeholder.style.display = 'none';
1212
-
1213
- // Run tokenizer — only tokenize, no special tokens by default
1214
- const encoded = await activeTokenizer(text, {
1215
- add_special_tokens: showSpecial,
1216
- return_offsets_mapping: false,
1217
- });
1218
-
1219
- const ids = Array.from(encoded.input_ids.data);
1220
-
1221
- // Get raw token strings
1222
- let rawTokens;
1223
- try {
1224
- rawTokens = activeTokenizer.model.convert_ids_to_tokens(ids);
1225
- } catch {
1226
- // Fallback: decode each token individually
1227
- rawTokens = await Promise.all(
1228
- ids.map(id => activeTokenizer.decode([id], { skip_special_tokens: false }))
1229
- );
1230
- }
1231
-
1232
- // Pair: { id, raw, display }
1233
- const tokens = ids.map((id, i) => ({
1234
- id,
1235
- raw: rawTokens[i] || '',
1236
- display: decodeTokenString(rawTokens[i] || ''),
1237
- }));
1238
-
1239
- setStats(tokens.length, text);
1240
- renderView(tokens);
1241
-
1242
- } catch (err) {
1243
- console.error('Tokenization error:', err);
1244
- showToast('Tokenization error: ' + err.message);
1245
- }
1246
- }
1247
-
1248
- // ── Render Views ───────────────────────────────────────────
1249
-
1250
- function renderView(tokens) {
1251
- if (currentView === 'text') renderTextView(tokens);
1252
- else if (currentView === 'ids') renderIdView(tokens);
1253
- else if (currentView === 'list') renderListView(tokens);
1254
- }
1255
-
1256
- function renderTextView(tokens) {
1257
- const container = document.createElement('div');
1258
- container.className = 'token-text-view fade-in';
1259
-
1260
- tokens.forEach((tok, i) => {
1261
- const c = PALETTE[i % PALETTE.length];
1262
- const span = document.createElement('span');
1263
- span.className = 'tok';
1264
- span.style.background = c.bg;
1265
- span.style.color = c.text;
1266
- span.style.borderBottom = `2px solid ${c.border}`;
1267
-
1268
- // Display text — handle spaces and newlines visually
1269
- const disp = tok.display;
1270
- if (disp === ' ') {
1271
- span.innerHTML = '&nbsp;';
1272
- } else if (disp === '\n') {
1273
- span.innerHTML = '↵<br>';
1274
- } else if (disp === '\t') {
1275
- span.innerHTML = '→&nbsp;&nbsp;&nbsp;';
1276
- } else {
1277
- span.textContent = disp;
1278
- }
1279
-
1280
- // Tooltip
1281
- const tip = document.createElement('div');
1282
- tip.className = 'tok-tooltip';
1283
-
1284
- const rawEsc = tok.raw
1285
- .replace(/&/g,'&amp;')
1286
- .replace(/</g,'&lt;')
1287
- .replace(/>/g,'&gt;');
1288
-
1289
- tip.innerHTML =
1290
- `<span class="tok-tooltip-id">#${tok.id}</span> · ` +
1291
- `<span class="tok-tooltip-text">${rawEsc || '(empty)'}</span>`;
1292
- span.appendChild(tip);
1293
-
1294
- container.appendChild(span);
1295
- });
1296
-
1297
- $display.innerHTML = '';
1298
- $display.appendChild(container);
1299
- }
1300
-
1301
- function renderIdView(tokens) {
1302
- const container = document.createElement('div');
1303
- container.className = 'token-id-view fade-in';
1304
-
1305
- tokens.forEach((tok, i) => {
1306
- const c = PALETTE[i % PALETTE.length];
1307
- const card = document.createElement('div');
1308
- card.className = 'tok-id-card';
1309
- card.style.background = c.bg;
1310
- card.style.borderColor = c.border;
1311
- card.title = `Raw: ${tok.raw}`;
1312
-
1313
- const top = document.createElement('div');
1314
- top.className = 'tok-id-top';
1315
- top.style.color = c.text;
1316
- top.textContent = tok.id;
1317
-
1318
- const bot = document.createElement('div');
1319
- bot.className = 'tok-id-bottom';
1320
- // Show abbreviated display text
1321
- const label = tok.display.slice(0, 8).replace(/\n/g,'↵').replace(/\t/g,'→');
1322
- bot.textContent = label || '…';
1323
-
1324
- card.appendChild(top);
1325
- card.appendChild(bot);
1326
- container.appendChild(card);
1327
- });
1328
-
1329
- $display.innerHTML = '';
1330
- $display.appendChild(container);
1331
- }
1332
-
1333
- function renderListView(tokens) {
1334
- const container = document.createElement('div');
1335
- container.className = 'token-split-view fade-in';
1336
-
1337
- tokens.forEach((tok, i) => {
1338
- const c = PALETTE[i % PALETTE.length];
1339
- const row = document.createElement('div');
1340
- row.className = 'tok-split-row';
1341
- row.style.background = c.bg;
1342
- row.style.borderColor = c.border;
1343
-
1344
- const idx = document.createElement('div');
1345
- idx.className = 'tok-split-idx';
1346
- idx.textContent = i;
1347
-
1348
- const text = document.createElement('div');
1349
- text.className = 'tok-split-text';
1350
- text.style.color = c.text;
1351
- const disp = tok.display.replace(/\n/g,'↵').replace(/\t/g,'→') || '(empty)';
1352
- text.textContent = disp;
1353
-
1354
- const id = document.createElement('div');
1355
- id.className = 'tok-split-id';
1356
- id.textContent = tok.id;
1357
-
1358
- row.appendChild(idx);
1359
- row.appendChild(text);
1360
- row.appendChild(id);
1361
- container.appendChild(row);
1362
- });
1363
-
1364
- $display.innerHTML = '';
1365
- $display.appendChild(container);
1366
- }
1367
-
1368
- // ── Load Tokenizer ─────────────────────────────────────────
1369
-
1370
- async function loadModel(modelId) {
1371
- if (tokenizerCache[modelId]) {
1372
- activeTokenizer = tokenizerCache[modelId];
1373
- updateModelIndicator(modelId);
1374
- await runTokenize();
1375
- return;
1376
- }
1377
-
1378
- const displayName = modelId.split('/').pop();
1379
- showOverlay(
1380
- `Loading ${displayName}`,
1381
- `Fetching tokenizer.json and tokenizer_config.json from Hugging Face Hub.\nFiles are cached in IndexedDB after first download.`
1382
- );
1383
-
1384
- let lastProgress = 0;
1385
 
1386
- try {
1387
- const tokenizer = await AutoTokenizer.from_pretrained(modelId, {
1388
- progress_callback: (info) => {
1389
- if (info.status === 'downloading') {
1390
- const pct = info.total
1391
- ? Math.round((info.loaded / info.total) * 100)
1392
- : lastProgress;
1393
- $loadBar.style.width = pct + '%';
1394
- $loadFile.textContent = info.file || '';
1395
- lastProgress = pct;
1396
- } else if (info.status === 'done') {
1397
- $loadBar.style.width = '100%';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1398
  }
1399
- }
1400
- });
1401
-
1402
- tokenizerCache[modelId] = tokenizer;
1403
- activeTokenizer = tokenizer;
1404
- activeModel = modelId;
1405
-
1406
- updateModelIndicator(modelId);
1407
- hideOverlay();
1408
- await runTokenize();
1409
-
1410
- } catch (err) {
1411
- hideOverlay();
1412
- console.error('Failed to load tokenizer:', err);
1413
- showToast(`Failed to load "${modelId}": ${err.message}. Check the model ID and try again.`, 8000);
1414
- }
1415
- }
1416
-
1417
- function updateModelIndicator(modelId) {
1418
- const preset = MODELS.find(m => m.id === modelId);
1419
- const color = preset ? preset.color : '#7899c0';
1420
- const name = modelId.split('/').pop();
1421
- $modelDot.style.background = color;
1422
- $modelDot.style.boxShadow = `0 0 6px ${color}`;
1423
- $modelLabel.textContent = name;
1424
- $stModelName.textContent = preset ? `${preset.org} · ${preset.type} · ${preset.vocab} vocab` : modelId;
1425
- }
1426
 
1427
- // ── Build Model Tabs ───────────────────────────────────────
1428
-
1429
- function buildTabs() {
1430
- $modelTabs.innerHTML = '';
1431
- MODELS.forEach(m => {
1432
- const tab = document.createElement('div');
1433
- tab.className = 'model-tab';
1434
- tab.dataset.id = m.id;
1435
- tab.title = m.desc;
1436
- tab.innerHTML = `
1437
- <div class="model-tab-name">${m.name}</div>
1438
- <div class="model-tab-org">
1439
- <span class="model-org-dot" style="background:${m.color}"></span>${m.org}
1440
- </div>
1441
- <div class="model-tab-vocab">${m.type} · ${m.vocab} vocab</div>
1442
- `;
1443
- tab.addEventListener('click', () => selectTab(m.id));
1444
- $modelTabs.appendChild(tab);
1445
- });
1446
- }
1447
-
1448
- function selectTab(modelId) {
1449
- document.querySelectorAll('.model-tab').forEach(t => {
1450
- t.classList.toggle('active', t.dataset.id === modelId);
1451
- });
1452
- loadModel(modelId);
1453
- }
1454
-
1455
- // ── View Toggle ────────────────────────────────────────────
1456
-
1457
- document.querySelectorAll('.toggle-btn').forEach(btn => {
1458
- btn.addEventListener('click', () => {
1459
- document.querySelectorAll('.toggle-btn').forEach(b => b.classList.remove('active'));
1460
- btn.classList.add('active');
1461
- currentView = btn.dataset.view;
1462
- runTokenize();
1463
- });
1464
- });
1465
-
1466
- // ── Input Handling ─────────────────────────────────────────
1467
-
1468
- async function runTokenize() {
1469
- const text = $input.value;
1470
- $charCount.textContent = text.length;
1471
- await tokenize(text);
1472
- }
1473
-
1474
- $input.addEventListener('input', () => {
1475
- $charCount.textContent = $input.value.length;
1476
- clearTimeout(debounceTimer);
1477
- debounceTimer = setTimeout(runTokenize, 280);
1478
- });
1479
-
1480
- // ── Sample Buttons ─────────────────────────────────────────
1481
-
1482
- document.querySelectorAll('.sample-btn').forEach(btn => {
1483
- btn.addEventListener('click', () => {
1484
- const key = btn.dataset.sample;
1485
- $input.value = SAMPLES[key] ?? '';
1486
- $input.focus();
1487
- runTokenize();
1488
- });
1489
- });
1490
-
1491
- // ── Custom Model ───────────────────────────────────────────
1492
-
1493
- async function loadCustomModel() {
1494
- const id = $customInput.value.trim();
1495
- if (!id) { showToast('Please enter a model ID'); return; }
1496
-
1497
- // Deselect tabs
1498
- document.querySelectorAll('.model-tab').forEach(t => t.classList.remove('active'));
1499
- activeModel = id;
1500
- await loadModel(id);
1501
- }
1502
-
1503
- $customBtn.addEventListener('click', loadCustomModel);
1504
- $customInput.addEventListener('keydown', e => {
1505
- if (e.key === 'Enter') loadCustomModel();
1506
- });
1507
-
1508
- // ── Init ───────────────────────────────────────────────────
1509
-
1510
- buildTabs();
1511
- // Hide overlay on start (no model yet)
1512
- $overlay.classList.add('hidden');
1513
-
1514
- // Default placeholder text
1515
- $input.value = '';
1516
 
1517
- // Auto-select first model
1518
- selectTab(MODELS[0].id);
1519
 
1520
- </script>
 
 
1521
  </body>
1522
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
  <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Universal Tokenizer Visualizer</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <style>
9
+ .token-chip {
10
+ display: inline-block;
11
+ padding: 2px 4px;
12
+ margin: 2px;
13
+ border-radius: 4px;
14
+ font-family: monospace;
15
+ transition: all 0.2s;
16
+ cursor: default;
17
+ }
18
+ .token-chip:hover {
19
+ filter: brightness(0.9);
20
+ transform: translateY(-1px);
21
+ }
22
+ /* Color palette for tokens */
23
+ .color-0 { background-color: #fca5a5; color: #7f1d1d; }
24
+ .color-1 { background-color: #fcd34d; color: #78350f; }
25
+ .color-2 { background-color: #86efac; color: #064e3b; }
26
+ .color-3 { background-color: #93c5fd; color: #1e3a8a; }
27
+ .color-4 { background-color: #c4b5fd; color: #4c1d95; }
28
+ .color-5 { background-color: #f9a8d4; color: #701a75; }
29
+ </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  </head>
31
+ <body class="bg-slate-50 min-h-screen p-4 md:p-8">
32
+ <div class="max-w-5xl mx-auto bg-white rounded-xl shadow-lg overflow-hidden border border-slate-200">
33
+ <div class="bg-slate-900 p-6 text-white">
34
+ <h1 class="text-2xl font-bold">Universal Tokenizer Visualizer</h1>
35
+ <p class="text-slate-400 text-sm mt-1">Inspect how models see your text. Runs 100% in-browser.</p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  </div>
 
 
 
 
 
 
 
 
 
37
 
38
+ <div class="p-6 space-y-6">
39
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-4">
40
+ <div>
41
+ <label class="block text-sm font-medium text-slate-700 mb-1">Hugging Face Model ID</label>
42
+ <div class="flex gap-2">
43
+ <input type="text" id="modelInput" value="deepseek-ai/DeepSeek-V2"
44
+ class="flex-1 border rounded-lg px-3 py-2 text-sm focus:ring-2 focus:ring-blue-500 outline-none">
45
+ <button id="loadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg text-sm font-semibold hover:bg-blue-700 transition">
46
+ Load
47
+ </button>
48
+ </div>
49
+ <p class="text-xs text-slate-500 mt-1 italic">Note: Ensure the model has a tokenizer.json file.</p>
50
+ </div>
51
+ <div class="flex flex-col justify-end">
52
+ <div id="status" class="text-sm font-medium text-slate-600 bg-slate-100 p-2 rounded-lg text-center border border-dashed border-slate-300">
53
+ Ready to load
54
+ </div>
55
+ </div>
56
+ </div>
57
+
58
+ <div>
59
+ <label class="block text-sm font-medium text-slate-700 mb-1">Input Text</label>
60
+ <textarea id="inputText" rows="6"
61
+ class="w-full border rounded-xl p-4 text-lg focus:ring-2 focus:ring-blue-500 outline-none"
62
+ placeholder="Type something here to see tokens..."></textarea>
63
+ </div>
64
+
65
+ <div class="flex gap-4 border-t border-b py-3 text-sm font-mono">
66
+ <div>Tokens: <span id="tokenCount" class="font-bold text-blue-600">0</span></div>
67
+ <div>Characters: <span id="charCount" class="font-bold text-slate-600">0</span></div>
68
+ </div>
69
+
70
+ <div>
71
+ <label class="block text-sm font-medium text-slate-700 mb-2">Tokenized Output</label>
72
+ <div id="visualizer" class="min-h-[150px] p-4 bg-slate-50 rounded-xl border border-slate-200 leading-relaxed">
73
+ </div>
74
+ </div>
75
+
76
+ <div>
77
+ <label class="block text-sm font-medium text-slate-700 mb-2">Token IDs</label>
78
+ <div id="tokenIds" class="text-xs font-mono p-3 bg-slate-900 text-slate-300 rounded-lg overflow-x-auto whitespace-nowrap">
79
+ []
80
+ </div>
81
+ </div>
82
  </div>
83
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ <script type="module">
86
+ import { AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
87
+
88
+ let tokenizer = null;
89
+ const modelInput = document.getElementById('modelInput');
90
+ const loadBtn = document.getElementById('loadBtn');
91
+ const status = document.getElementById('status');
92
+ const inputText = document.getElementById('inputText');
93
+ const visualizer = document.getElementById('visualizer');
94
+ const tokenCount = document.getElementById('tokenCount');
95
+ const charCount = document.getElementById('charCount');
96
+ const tokenIdsDiv = document.getElementById('tokenIds');
97
+
98
+ async function loadModel(modelId) {
99
+ try {
100
+ status.innerText = "⏳ Loading tokenizer...";
101
+ status.className = "text-sm font-medium text-amber-600 bg-amber-50 p-2 rounded-lg text-center border border-amber-200";
102
+
103
+ // We specify legacy: false to ensure it looks for tokenizer.json
104
+ tokenizer = await AutoTokenizer.from_pretrained(modelId);
105
+
106
+ status.innerText = `✅ Loaded: ${modelId}`;
107
+ status.className = "text-sm font-medium text-emerald-600 bg-emerald-50 p-2 rounded-lg text-center border border-emerald-200";
108
+ updateTokenization();
109
+ } catch (e) {
110
+ status.innerText = "❌ Error loading model";
111
+ status.className = "text-sm font-medium text-red-600 bg-red-50 p-2 rounded-lg text-center border border-red-200";
112
+ console.error(e);
113
+ }
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ function updateTokenization() {
117
+ if (!tokenizer) return;
118
+
119
+ const text = inputText.value;
120
+ if (!text) {
121
+ visualizer.innerHTML = "";
122
+ tokenCount.innerText = "0";
123
+ charCount.innerText = "0";
124
+ tokenIdsDiv.innerText = "[]";
125
+ return;
126
+ }
127
+
128
+ // Encode text
129
+ const tokens = tokenizer.encode(text);
130
+ const decodedTokens = tokens.map(id => tokenizer.decode([id]));
131
+
132
+ // Update Stats
133
+ tokenCount.innerText = tokens.length;
134
+ charCount.innerText = text.length;
135
+ tokenIdsDiv.innerText = JSON.stringify(tokens);
136
+
137
+ // Clear visualizer
138
+ visualizer.innerHTML = "";
139
+
140
+ // Create Visual Chips
141
+ decodedTokens.forEach((token, index) => {
142
+ const span = document.createElement('span');
143
+ span.className = `token-chip color-${index % 6}`;
144
+ // Handle spaces and newlines for visibility
145
+ let displayToken = token.replace(/ /g, ' ').replace(/\n/g, '↵\n');
146
+ span.innerText = displayToken;
147
+ span.title = `ID: ${tokens[index]}`;
148
+ visualizer.appendChild(span);
149
+ });
150
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ loadBtn.addEventListener('click', () => loadModel(modelInput.value));
153
+ inputText.addEventListener('input', updateTokenization);
154
 
155
+ // Initial Load
156
+ loadModel(modelInput.value);
157
+ </script>
158
  </body>
159
  </html>