EXDai commited on
Commit
ea97b96
Β·
verified Β·
1 Parent(s): 036695c

Upload index.html with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +558 -18
index.html CHANGED
@@ -1,19 +1,559 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>LLM Inference β€” Pipeline Simulator</title>
7
+ <style>
8
+ :root {
9
+ --bg: #0d1117;
10
+ --surface: #161b22;
11
+ --border: #30363d;
12
+ --text: #c9d1d9;
13
+ --dim: #8b949e;
14
+ --accent: #58a6ff;
15
+ --cpu: #f0883e;
16
+ --ram: #3fb950;
17
+ --gpu: #bc8cff;
18
+ --vram: #f778ba;
19
+ --prefill: #da3633;
20
+ --decode: #d29922;
21
+ --active: #58a6ff;
22
+ }
23
+ * { margin: 0; padding: 0; box-sizing: border-box; }
24
+ body {
25
+ background: var(--bg);
26
+ color: var(--text);
27
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', monospace;
28
+ display: flex; justify-content: center; align-items: center;
29
+ min-height: 100vh; padding: 2rem;
30
+ }
31
+ .sim {
32
+ max-width: 820px; width: 100%;
33
+ background: var(--surface);
34
+ border: 1px solid var(--border);
35
+ border-radius: 12px; padding: 2rem;
36
+ }
37
+ h1 { font-size: 1.25rem; font-weight: 600; margin-bottom: 0.25rem; color: var(--text); }
38
+ .subtitle { font-size: 0.8rem; color: var(--dim); margin-bottom: 1.5rem; }
39
+
40
+ /* Pipeline */
41
+ .pipeline {
42
+ display: flex; align-items: center; justify-content: center;
43
+ gap: 0; margin-bottom: 1.5rem; flex-wrap: wrap;
44
+ }
45
+ .stage {
46
+ padding: 0.6rem 1rem; border-radius: 8px;
47
+ border: 2px solid var(--border); background: var(--bg);
48
+ font-size: 0.85rem; font-weight: 600;
49
+ text-align: center; min-width: 80px;
50
+ transition: all 0.3s ease;
51
+ position: relative; z-index: 2;
52
+ }
53
+ .stage.active {
54
+ border-color: var(--active);
55
+ background: #1a2332;
56
+ box-shadow: 0 0 16px rgba(88,166,255,0.25);
57
+ color: var(--active);
58
+ }
59
+ .arrow {
60
+ width: 32px; height: 2px; background: var(--border);
61
+ margin: 0 4px; flex-shrink: 0; position: relative; z-index: 1;
62
+ }
63
+ .arrow::after {
64
+ content: 'β–Έ'; position: absolute; right: -6px; top: -8px;
65
+ font-size: 0.7rem; color: var(--border);
66
+ }
67
+
68
+ /* Decode loop indicator */
69
+ .decode-loop {
70
+ display: flex; align-items: center; gap: 6px;
71
+ margin-top: 0.3rem; justify-content: center;
72
+ font-size: 0.7rem; color: var(--dim);
73
+ }
74
+ .loop-arrow {
75
+ width: 20px; height: 2px; background: var(--dim);
76
+ }
77
+ .loop-count {
78
+ padding: 2px 8px; border: 1px solid var(--border);
79
+ border-radius: 4px; font-size: 0.7rem;
80
+ }
81
+ .loop-count.active {
82
+ border-color: var(--decode); color: var(--decode);
83
+ background: rgba(210,153,34,0.1);
84
+ }
85
+
86
+ /* Resources */
87
+ .resources {
88
+ display: grid; grid-template-columns: repeat(4, 1fr);
89
+ gap: 1rem; margin-bottom: 1.5rem;
90
+ }
91
+ .res {
92
+ border: 2px solid var(--border); border-radius: 8px;
93
+ padding: 0.6rem 0.4rem; background: var(--bg); text-align: center;
94
+ transition: all 0.3s ease;
95
+ }
96
+ .res-label {
97
+ font-size: 0.7rem; text-transform: uppercase;
98
+ letter-spacing: 0.05em; margin-bottom: 0.15rem;
99
+ }
100
+ .res-status { font-size: 0.65rem; color: var(--dim); }
101
+ .res.cpu .res-label { color: var(--cpu); }
102
+ .res.ram .res-label { color: var(--ram); }
103
+ .res.gpu .res-label { color: var(--gpu); }
104
+ .res.vram .res-label { color: var(--vram); }
105
+ .res.active.cpu { border-color: var(--cpu); box-shadow: 0 0 10px rgba(240,136,62,0.25); }
106
+ .res.active.ram { border-color: var(--ram); box-shadow: 0 0 10px rgba(63,185,80,0.25); }
107
+ .res.active.gpu { border-color: var(--gpu); box-shadow: 0 0 10px rgba(188,140,255,0.25); }
108
+ .res.active.vram { border-color: var(--vram); box-shadow: 0 0 10px rgba(247,120,186,0.25); }
109
+ .res.active .res-status { color: inherit; }
110
+
111
+ /* Token display */
112
+ .token-area {
113
+ background: var(--bg); border: 1px solid var(--border);
114
+ border-radius: 8px; padding: 1rem; margin-bottom: 1.5rem;
115
+ min-height: 80px;
116
+ }
117
+ .token-label {
118
+ font-size: 0.65rem; text-transform: uppercase;
119
+ letter-spacing: 0.05em; color: var(--dim); margin-bottom: 0.5rem;
120
+ }
121
+ .token-row {
122
+ display: flex; gap: 6px; flex-wrap: wrap; align-items: center;
123
+ }
124
+ .token {
125
+ padding: 4px 10px; border-radius: 6px; font-size: 0.8rem;
126
+ border: 1px solid var(--border); background: var(--surface);
127
+ font-family: monospace; transition: all 0.3s ease;
128
+ }
129
+ .token.prefill { border-color: var(--prefill); background: rgba(218,54,51,0.1); }
130
+ .token.decode { border-color: var(--decode); background: rgba(210,153,34,0.1); }
131
+ .token.new { border-color: var(--active); background: rgba(88,166,255,0.1); animation: pulse 0.6s ease; }
132
+ .token.cached { border-color: var(--ram); opacity: 0.6; font-size: 0.7rem; }
133
+ @keyframes pulse {
134
+ 0%,100% { transform: scale(1); }
135
+ 50% { transform: scale(1.08); }
136
+ }
137
+
138
+ /* KV cache */
139
+ .kvcache {
140
+ background: var(--bg); border: 1px solid var(--border);
141
+ border-radius: 8px; padding: 0.75rem 1rem; margin-bottom: 1.5rem;
142
+ display: flex; align-items: center; gap: 0.75rem;
143
+ }
144
+ .kvcache-label {
145
+ font-size: 0.65rem; text-transform: uppercase;
146
+ letter-spacing: 0.05em; color: var(--dim); white-space: nowrap;
147
+ }
148
+ .kvcache-entries {
149
+ display: flex; gap: 4px; flex-wrap: wrap;
150
+ }
151
+ .kv-cell {
152
+ width: 14px; height: 14px; border-radius: 3px;
153
+ border: 1px solid var(--border); background: var(--surface);
154
+ transition: all 0.3s ease;
155
+ }
156
+ .kv-cell.filled { background: var(--vram); border-color: var(--vram); }
157
+
158
+ /* Status */
159
+ .status {
160
+ font-size: 0.8rem; color: var(--text); margin-bottom: 1rem;
161
+ text-align: center; min-height: 1.2em;
162
+ }
163
+ .status .highlight { color: var(--active); }
164
+
165
+ /* Controls */
166
+ .controls {
167
+ display: flex; gap: 0.75rem; justify-content: center;
168
+ }
169
+ button {
170
+ padding: 0.5rem 1.25rem; border-radius: 6px; border: 1px solid var(--border);
171
+ background: var(--bg); color: var(--text); font-size: 0.8rem;
172
+ cursor: pointer; font-family: inherit; transition: all 0.2s;
173
+ }
174
+ button:hover { border-color: var(--dim); background: #1c2128; }
175
+ button.primary {
176
+ border-color: var(--accent); color: var(--accent);
177
+ background: rgba(88,166,255,0.1);
178
+ }
179
+ button.primary:hover { background: rgba(88,166,255,0.2); }
180
+ button:disabled { opacity: 0.4; cursor: not-allowed; }
181
+
182
+ /* Step indicator dots */
183
+ .steps {
184
+ display: flex; gap: 6px; justify-content: center; margin-bottom: 1rem;
185
+ }
186
+ .step-dot {
187
+ width: 8px; height: 8px; border-radius: 50%;
188
+ background: var(--border); transition: background 0.3s;
189
+ }
190
+ .step-dot.done { background: var(--accent); }
191
+ .step-dot.current { background: var(--accent); box-shadow: 0 0 6px var(--accent); }
192
+ </style>
193
+ </head>
194
+ <body>
195
+ <div class="sim">
196
+ <h1>LLM Inference Pipeline</h1>
197
+ <p class="subtitle">Prompt: <strong>"The cat sat"</strong> β€” watch how it flows through the system</p>
198
+
199
+ <!-- Step dots -->
200
+ <div class="steps" id="stepDots">
201
+ <div class="step-dot"></div>
202
+ <div class="step-dot"></div>
203
+ <div class="step-dot"></div>
204
+ <div class="step-dot"></div>
205
+ <div class="step-dot"></div>
206
+ </div>
207
+
208
+ <!-- Pipeline stages -->
209
+ <div class="pipeline">
210
+ <div class="stage" id="s_input">πŸ“₯ Input</div>
211
+ <div class="arrow"></div>
212
+ <div class="stage" id="s_tokenize">πŸ”€ Tokenize</div>
213
+ <div class="arrow"></div>
214
+ <div class="stage" id="s_prefill">⚑ Prefill</div>
215
+ <div class="arrow"></div>
216
+ <div class="stage" id="s_decode">πŸ”„ Decode</div>
217
+ <div class="arrow"></div>
218
+ <div class="stage" id="s_output">πŸ“€ Output</div>
219
+ </div>
220
+ <div class="decode-loop" id="loopInfo">
221
+ <span class="loop-arrow"></span>
222
+ <span>loop</span>
223
+ <span class="loop-count" id="loopCount">0 / 3</span>
224
+ </div>
225
+
226
+ <!-- Resources -->
227
+ <div class="resources">
228
+ <div class="res cpu" id="resCpu">
229
+ <div class="res-label">CPU</div>
230
+ <div class="res-status">idle</div>
231
+ </div>
232
+ <div class="res ram" id="resRam">
233
+ <div class="res-label">RAM</div>
234
+ <div class="res-status">idle</div>
235
+ </div>
236
+ <div class="res gpu" id="resGpu">
237
+ <div class="res-label">GPU Compute</div>
238
+ <div class="res-status">idle</div>
239
+ </div>
240
+ <div class="res vram" id="resVram">
241
+ <div class="res-label">VRAM / KV Cache</div>
242
+ <div class="res-status">idle</div>
243
+ </div>
244
+ </div>
245
+
246
+ <!-- Tokens -->
247
+ <div class="token-area">
248
+ <div class="token-label">Tokens</div>
249
+ <div class="token-row" id="tokenRow">
250
+ <span style="color:var(--dim); font-size:0.8rem;">Waiting for input...</span>
251
+ </div>
252
+ </div>
253
+
254
+ <!-- KV Cache -->
255
+ <div class="kvcache">
256
+ <span class="kvcache-label">KV Cache entries:</span>
257
+ <span class="kvcache-entries" id="kvEntries">
258
+ <span style="color:var(--dim); font-size:0.75rem;">empty</span>
259
+ </span>
260
+ </div>
261
+
262
+ <!-- Status -->
263
+ <div class="status" id="status">Click <span class="highlight">Auto</span> to run the full pipeline, or use <span class="highlight">Forward</span> / <span class="highlight">Back</span> to walk through one stage at a time.</div>
264
+
265
+ <!-- Controls -->
266
+ <div class="controls">
267
+ <button class="primary" id="btnAuto" onclick="toggleAuto()">Auto β–Ά</button>
268
+ <button id="btnBack" onclick="back()">β—€ Back</button>
269
+ <button id="btnStep" onclick="step()">Forward β–Ά</button>
270
+ <button id="btnReset" onclick="reset()">β†Ί Reset</button>
271
+ </div>
272
+ </div>
273
+
274
+ <script>
275
+ // --- State ---
276
+ const TOKENS = [
277
+ { id: 576, text: 'The' },
278
+ { id: 3797, text: ' cat' },
279
+ { id: 7236, text: ' sat' },
280
+ ];
281
+ const OUTPUT = [
282
+ { id: 389, text: ' on' },
283
+ { id: 278, text: ' the' },
284
+ { id: 3098, text: ' mat' },
285
+ ];
286
+
287
+ const TOTAL_STEPS = 7; // 0:idle, 1:input, 2:tokenize, 3:prefill, 4-6:decode steps
288
+ let currentStep = 0;
289
+ let autoTimer = null;
290
+ let decodeIdx = 0;
291
+
292
+ // --- DOM refs ---
293
+ const els = {
294
+ s_input: document.getElementById('s_input'),
295
+ s_tokenize: document.getElementById('s_tokenize'),
296
+ s_prefill: document.getElementById('s_prefill'),
297
+ s_decode: document.getElementById('s_decode'),
298
+ s_output: document.getElementById('s_output'),
299
+ resCpu: document.getElementById('resCpu'),
300
+ resRam: document.getElementById('resRam'),
301
+ resGpu: document.getElementById('resGpu'),
302
+ resVram: document.getElementById('resVram'),
303
+ tokenRow: document.getElementById('tokenRow'),
304
+ kvEntries: document.getElementById('kvEntries'),
305
+ status: document.getElementById('status'),
306
+ loopCount: document.getElementById('loopCount'),
307
+ loopInfo: document.getElementById('loopInfo'),
308
+ btnAuto: document.getElementById('btnAuto'),
309
+ btnStep: document.getElementById('btnStep'),
310
+ btnBack: document.getElementById('btnBack'),
311
+ stepDots: document.getElementById('stepDots').children,
312
+ };
313
+
314
+ function clearActive() {
315
+ Object.values(els).forEach(el => {
316
+ if (el && el.classList) el.classList.remove('active');
317
+ });
318
+ }
319
+
320
+ function setActiveResources(names) {
321
+ const map = { cpu: els.resCpu, ram: els.resRam, gpu: els.resGpu, vram: els.resVram };
322
+ for (const [key, el] of Object.entries(map)) {
323
+ if (names.includes(key)) {
324
+ el.classList.add('active');
325
+ el.querySelector('.res-status').textContent = 'active';
326
+ } else {
327
+ el.classList.remove('active');
328
+ el.querySelector('.res-status').textContent = 'idle';
329
+ }
330
+ }
331
+ }
332
+
333
+ function setStatus(msg) { els.status.innerHTML = msg; }
334
+ function setTokens(html) { els.tokenRow.innerHTML = html; }
335
+ function setKV(html) { els.kvEntries.innerHTML = html; }
336
+
337
+ function setDots(n) {
338
+ for (let i = 0; i < els.stepDots.length; i++) {
339
+ els.stepDots[i].classList.remove('done', 'current');
340
+ if (i < n) els.stepDots[i].classList.add('done');
341
+ if (i === n) els.stepDots[i].classList.add('current');
342
+ }
343
+ }
344
+
345
+ // --- Step implementations ---
346
+ function doIdle() {
347
+ clearActive();
348
+ setActiveResources([]);
349
+ setTokens('<span style="color:var(--dim); font-size:0.8rem;">Waiting for input...</span>');
350
+ setKV('<span style="color:var(--dim); font-size:0.75rem;">empty</span>');
351
+ setStatus('Click <span class="highlight">Auto</span> to run the full pipeline, or use <span class="highlight">Forward</span> / <span class="highlight">Back</span> to walk through one stage at a time.');
352
+ els.loopInfo.style.opacity = '0.3';
353
+ els.loopCount.textContent = '0 / ' + OUTPUT.length;
354
+ els.loopCount.classList.remove('active');
355
+ setDots(0);
356
+ }
357
+
358
+ function doInput() {
359
+ clearActive();
360
+ els.s_input.classList.add('active');
361
+ setActiveResources(['cpu']);
362
+ setTokens('<span style="color:var(--text); font-size:1rem; font-style:italic;">"The cat sat"</span>');
363
+ setKV('<span style="color:var(--dim); font-size:0.75rem;">empty</span>');
364
+ setStatus('πŸ“₯ <span class="highlight">Input</span> β€” user submits the prompt text.<br><span style="color:var(--dim); font-size:0.75rem;">CPU &amp; RAM hold the raw string. GPU is idle.</span>');
365
+ els.loopInfo.style.opacity = '0.3';
366
+ setDots(1);
367
+ }
368
+
369
+ function doTokenize() {
370
+ clearActive();
371
+ els.s_tokenize.classList.add('active');
372
+ setActiveResources(['cpu', 'ram']);
373
+ let html = '';
374
+ TOKENS.forEach(t => {
375
+ html += `<span class="token" style="border-color:var(--cpu);">${t.id}</span>`;
376
+ });
377
+ setTokens(html + '<span style="color:var(--dim); margin-left:6px; font-size:0.7rem;">← token IDs</span>');
378
+ setKV('<span style="color:var(--dim); font-size:0.75rem;">empty</span>');
379
+ setStatus('πŸ”€ <span class="highlight">Tokenize</span> β€” CPU converts text to token IDs: "The cat sat" β†’ [576, 3797, 7236]<br><span style="color:var(--dim); font-size:0.75rem;">This is a fast CPU-bound step. Tokenizer runs on CPU.</span>');
380
+ els.loopInfo.style.opacity = '0.3';
381
+ setDots(2);
382
+ }
383
+
384
+ function doPrefill() {
385
+ clearActive();
386
+ els.s_prefill.classList.add('active');
387
+ setActiveResources(['gpu', 'vram']);
388
+ let html = '';
389
+ TOKENS.forEach(t => {
390
+ html += `<span class="token prefill">${t.id}<br><small>${t.text}</small></span>`;
391
+ });
392
+ html += '<span style="font-size:1.5rem; margin:0 4px;">β†’</span>';
393
+ html += `<span class="token new">${OUTPUT[0].id}<br><small>${OUTPUT[0].text}</small></span>`;
394
+ // V
395
+ setTokens(html);
396
+ // KV cache built
397
+ let kvHtml = '';
398
+ TOKENS.forEach((_, i) => {
399
+ kvHtml += `<div class="kv-cell filled" title="K${i+1} V${i+1}"></div>`;
400
+ });
401
+ setKV(kvHtml + '<span style="color:var(--dim); margin-left:4px; font-size:0.7rem;">3 entries built</span>');
402
+ setStatus('⚑ <span class="highlight">Prefill</span> β€” all 3 tokens run through the model <em>simultaneously</em>.<br><span style="color:var(--dim); font-size:0.75rem;">GPU compute spikes. KV cache is built (stored in VRAM). First output token appears: " on".<br><strong>Compute-bound:</strong> GPU is doing heavy matrix math.</span>');
403
+ els.loopInfo.style.opacity = '0.3';
404
+ els.loopCount.textContent = '1 / ' + OUTPUT.length;
405
+ setDots(3);
406
+ }
407
+
408
+ function doDecodeStep(idx) {
409
+ clearActive();
410
+ els.s_decode.classList.add('active');
411
+
412
+ // During decode, GPU is busy but more moderate (memory-bound)
413
+ setActiveResources(['gpu', 'vram']);
414
+
415
+ // Show all tokens so far
416
+ let html = '';
417
+ // Input tokens (cached)
418
+ TOKENS.forEach(t => {
419
+ html += `<span class="token cached">${t.id}</span>`;
420
+ });
421
+ // Previously generated output tokens (cached)
422
+ for (let i = 0; i < idx; i++) {
423
+ html += `<span class="token cached">${OUTPUT[i].id}</span>`;
424
+ }
425
+ // New token being generated
426
+ html += `<span class="token new">${OUTPUT[idx].id}<br><small>${OUTPUT[idx].text}</small></span>`;
427
+ // Show what's being read
428
+ html += '<span style="color:var(--dim); margin-left:8px; font-size:0.7rem;">← reads KV cache</span>';
429
+ setTokens(html);
430
+
431
+ // KV cache grows
432
+ let kvHtml = '';
433
+ const totalKV = TOKENS.length + idx;
434
+ for (let i = 0; i < totalKV; i++) {
435
+ kvHtml += `<div class="kv-cell filled" title="K${i+1} V${i+1}"></div>`;
436
+ }
437
+ setKV(kvHtml + `<span style="color:var(--dim); margin-left:4px; font-size:0.7rem;">${totalKV} entries β€” growing each step</span>`);
438
+
439
+ const tokenNum = idx + 1;
440
+ setStatus('πŸ”„ <span class="highlight">Decode step ' + tokenNum + '/' + OUTPUT.length + '</span> β€” generating token: <span style="color:var(--active);">"' + OUTPUT[idx].text.trim() + '"</span><br><span style="color:var(--dim); font-size:0.75rem;">Only the NEW token needs fresh GPU compute. All previous tokens reuse their KV cache entries.<br><strong>Memory-bound:</strong> GPU spends most time reading the growing KV cache from VRAM.</span>');
441
+
442
+ els.loopInfo.style.opacity = '1';
443
+ els.loopCount.textContent = tokenNum + ' / ' + OUTPUT.length;
444
+ els.loopCount.classList.add('active');
445
+ els.loopInfo.querySelector('.loop-arrow').style.background = 'var(--decode)';
446
+ setDots(3 + idx);
447
+ }
448
+
449
+ function doDetokenize() {
450
+ clearActive();
451
+ els.s_output.classList.add('active');
452
+ setActiveResources(['cpu']);
453
+
454
+ // Show all tokens
455
+ let html = '';
456
+ TOKENS.forEach(t => { html += `<span class="token cached">${t.id}</span>`; });
457
+ OUTPUT.forEach(t => { html += `<span class="token cached">${t.id}</span>`; });
458
+ html += '<span style="font-size:1.5rem; margin:0 4px;">β†’</span>';
459
+ html += '<span style="color:var(--ram); font-size:1rem; font-style:italic;">"The cat sat on the mat"</span>';
460
+ setTokens(html);
461
+
462
+ // Final KV cache
463
+ let kvHtml = '';
464
+ for (let i = 0; i < TOKENS.length + OUTPUT.length; i++) {
465
+ kvHtml += `<div class="kv-cell filled" title="K${i+1} V${i+1}"></div>`;
466
+ }
467
+ setKV(kvHtml + '<span style="color:var(--dim); margin-left:4px; font-size:0.7rem;">6 total entries (freed after request)</span>');
468
+
469
+ setStatus('πŸ“€ <span class="highlight">Detokenize &amp; Output</span> β€” CPU converts token IDs back to text.<br><span style="color:var(--dim); font-size:0.75rem;">Final: "The cat sat on the mat" β€” KV cache is freed, GPU goes idle.</span>');
470
+ els.loopInfo.style.opacity = '0.3';
471
+ setDots(6);
472
+ }
473
+
474
+ // --- Step state machine ---
475
+ function step() {
476
+ if (autoTimer) return; // don't mix auto + manual
477
+
478
+ // step numbering:
479
+ // 0 idle, 1 input, 2 tokenize, 3 prefill, 4 decode[0], 5 decode[1], 6 decode[2], 7 detokenize
480
+ // idle returned by reset
481
+ const actions = [doIdle, doInput, doTokenize, doPrefill, ...OUTPUT.map((_, i) => () => doDecodeStep(i)), doDetokenize];
482
+
483
+ if (currentStep < actions.length - 1) {
484
+ currentStep++;
485
+ actions[currentStep]();
486
+ }
487
+
488
+ if (currentStep >= actions.length - 1) {
489
+ els.btnStep.disabled = true;
490
+ }
491
+ els.btnBack.disabled = (currentStep <= 0);
492
+ }
493
+
494
+ function back() {
495
+ if (autoTimer) return;
496
+ if (currentStep <= 0) return;
497
+
498
+ const actions = [doIdle, doInput, doTokenize, doPrefill, ...OUTPUT.map((_, i) => () => doDecodeStep(i)), doDetokenize];
499
+
500
+ currentStep--;
501
+ actions[currentStep]();
502
+
503
+ els.btnStep.disabled = false;
504
+ els.btnBack.disabled = (currentStep <= 0);
505
+ }
506
+
507
+ function reset() {
508
+ stopAuto();
509
+ currentStep = 0;
510
+ decodeIdx = 0;
511
+ doIdle();
512
+ els.btnStep.disabled = false;
513
+ els.btnBack.disabled = true;
514
+ }
515
+
516
+ // --- Auto play ---
517
+ function toggleAuto() {
518
+ if (autoTimer) { stopAuto(); return; }
519
+
520
+ reset();
521
+ els.btnAuto.textContent = '⏸ Stop';
522
+ els.btnAuto.classList.remove('primary');
523
+ els.btnStep.disabled = true;
524
+ els.btnBack.disabled = true;
525
+
526
+ const actions = [doIdle, doInput, doTokenize, doPrefill, ...OUTPUT.map((_, i) => () => doDecodeStep(i)), doDetokenize];
527
+
528
+ let i = 0;
529
+ function tick() {
530
+ if (i >= actions.length) { stopAuto(); return; }
531
+ actions[i]();
532
+ currentStep = i;
533
+ i++;
534
+
535
+ // Adjust timing: longer pauses for prefill/decode to let viewer read
536
+ let delay = 1200;
537
+ if (i === 4) delay = 1500; // prefill β€” let it sink in
538
+ if (i >= 5 && i <= actions.length - 2) delay = 1800; // decode steps β€” slower
539
+
540
+ autoTimer = setTimeout(tick, delay);
541
+ }
542
+ // Small initial delay so viewer sees the fresh start
543
+ autoTimer = setTimeout(tick, 400);
544
+ }
545
+
546
+ function stopAuto() {
547
+ if (autoTimer) clearTimeout(autoTimer);
548
+ autoTimer = null;
549
+ els.btnAuto.textContent = 'β–Ά Auto';
550
+ els.btnAuto.classList.add('primary');
551
+ if (currentStep < 7) els.btnStep.disabled = false;
552
+ els.btnBack.disabled = (currentStep <= 0);
553
+ }
554
+
555
+ // --- Init ---
556
+ doIdle();
557
+ </script>
558
+ </body>
559
  </html>