NOT-OMEGA commited on
Commit
294736a
Β·
verified Β·
1 Parent(s): aedc5e9

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +41 -95
index.html CHANGED
@@ -199,7 +199,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
199
  <body>
200
  <div class="app">
201
 
202
- <!-- ── Sidebar ── -->
203
  <aside class="sidebar">
204
  <div class="sb-head">
205
  <h2><span>KV</span>Infer</h2>
@@ -207,7 +206,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
207
  </div>
208
  <div class="sb-body">
209
 
210
- <!-- Live stats -->
211
  <div class="card">
212
  <div class="card-title">
213
  <span class="dot idle" id="dot-s"></span>Live Performance
@@ -222,7 +220,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
222
  <span class="sval yellow" id="s-lat">β€”</span></div>
223
  </div>
224
 
225
- <!-- Sparkline -->
226
  <div class="card">
227
  <div class="card-title">Throughput History (tok/s)</div>
228
  <canvas id="spark"></canvas>
@@ -234,7 +231,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
234
  </div>
235
  </div>
236
 
237
- <!-- Session -->
238
  <div class="card">
239
  <div class="card-title">Session</div>
240
  <div class="srow"><span class="slabel">Turns</span>
@@ -251,14 +247,12 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
251
  </div>
252
  </div>
253
 
254
- <!-- System prompt -->
255
  <div class="card">
256
  <div class="card-title">System Prompt</div>
257
  <textarea class="sysprompt" id="sysprompt" rows="3"
258
  >You are a helpful, concise, and friendly AI assistant.</textarea>
259
  </div>
260
 
261
- <!-- Params -->
262
  <div class="card">
263
  <div class="card-title">Generation</div>
264
  <div style="display:flex;flex-direction:column;gap:10px;margin-top:2px">
@@ -283,7 +277,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
283
  </div>
284
  </aside>
285
 
286
- <!-- ── Chat ── -->
287
  <main class="chat">
288
  <header class="chat-hdr">
289
  <div class="badge">KVInfer Β· 152M</div>
@@ -324,7 +317,6 @@ canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--r
324
 
325
  </div>
326
 
327
- <!-- ── Benchmark modal ── -->
328
  <div id="bov">
329
  <div class="bmod">
330
  <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:16px">
@@ -350,20 +342,17 @@ let totalToks = 0;
350
  let tpsHist = [];
351
  let peakTps = 0;
352
  let engCache = 0;
353
- // ─────────────────────────────────────────
354
- // Textarea auto-resize
355
- // ─────────────────────────────────────────
356
  const inp = document.getElementById('inp');
357
  inp.addEventListener('input', () => {
358
  inp.style.height = 'auto';
359
  inp.style.height = Math.min(inp.scrollHeight, 120) + 'px';
360
  });
 
361
  function handleKey(e) {
362
  if (e.key==='Enter' && !e.shiftKey) { e.preventDefault(); send(); }
363
  }
364
- // ─────────────────────────────────────────
365
- // UI helpers
366
- // ─────────────────────────────────────────
367
  function setBusy(v) {
368
  busy = v;
369
  document.getElementById('sbtn').disabled = v;
@@ -373,17 +362,21 @@ function setBusy(v) {
373
  });
374
  document.getElementById('hstatus').textContent = v ? 'Generating...' : 'Idle';
375
  }
 
376
  function scrollBot() {
377
  const el = document.getElementById('msgs');
378
  el.scrollTop = el.scrollHeight;
379
  }
 
380
  function hideWelcome() {
381
  const w = document.getElementById('welcome');
382
  if (w) w.remove();
383
  }
 
384
  function esc(s) {
385
  return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>');
386
  }
 
387
  function addUserMsg(text) {
388
  hideWelcome();
389
  const g = document.createElement('div');
@@ -392,22 +385,24 @@ function addUserMsg(text) {
392
  document.getElementById('msgs').appendChild(g);
393
  scrollBot();
394
  }
 
 
395
  function createAssistantSlot() {
396
  const g = document.createElement('div');
397
  g.className = 'mg asst';
 
 
398
  g.innerHTML = `
399
  <div class="mg-role"><div class="mlabel">Model</div></div>
400
  <div class="mg-body">
401
- <div class="bubble" id="bubble"><span class="cursor2"></span></div>
402
- <div class="bmeta" id="bmeta"></div>
403
  </div>`;
404
  document.getElementById('msgs').appendChild(g);
405
  scrollBot();
406
- return document.getElementById('bubble');
407
  }
408
- // ─────────────────────────────────────────
409
- // FIX #5 β€” TTFT: use explicit null, not falsy check
410
- // ─────────────────────────────────────────
411
  async function send() {
412
  if (busy) return;
413
  const text = inp.value.trim();
@@ -415,11 +410,16 @@ async function send() {
415
  inp.value = ''; inp.style.height = 'auto';
416
  addUserMsg(text);
417
  setBusy(true);
418
- const bubble = createAssistantSlot();
 
 
 
 
419
  let content = '';
420
  let t0 = Date.now();
421
- let firstTokT = null; // ← FIX: explicit null, not undefined
422
  let tokCount = 0;
 
423
  const payload = {
424
  message: text,
425
  session_id: sessId,
@@ -428,6 +428,7 @@ async function send() {
428
  temperature: parseFloat(document.getElementById('p-temp').value),
429
  top_k: parseInt(document.getElementById('p-topk').value),
430
  };
 
431
  try {
432
  const resp = await fetch(`${API}/chat`, {
433
  method: 'POST',
@@ -435,9 +436,11 @@ async function send() {
435
  body: JSON.stringify(payload),
436
  });
437
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
 
438
  const reader = resp.body.getReader();
439
  const decoder = new TextDecoder();
440
  let buf = '';
 
441
  while (true) {
442
  const {done, value} = await reader.read();
443
  if (done) break;
@@ -452,11 +455,9 @@ async function send() {
452
  let chunk;
453
  try { chunk = JSON.parse(raw); } catch { continue; }
454
  const now = Date.now();
 
455
  if (chunk.type === 'token') {
456
- // FIX #5 β€” correct null check
457
- if (firstTokT === null) {
458
- firstTokT = now;
459
- }
460
  content += chunk.text;
461
  tokCount++;
462
  totalToks++;
@@ -470,11 +471,8 @@ async function send() {
470
  const ttft = firstTokT !== null ? (firstTokT - t0) : 0;
471
  const tps = chunk.tps;
472
  const ms = chunk.total_ms;
473
- // Update meta line
474
- document.getElementById('bmeta').innerHTML =
475
- `<b>${tps}</b> tok/s Β· <b>TTFT</b> ${ttft}ms Β· ` +
476
- `<b>${tokCount}</b> tokens Β· <b>${ms.toFixed(0)}ms</b> total`;
477
- // Update sidebar stats
478
  document.getElementById('s-tps').textContent = tps + ' tok/s';
479
  document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms';
480
  tpsHist.push(tps);
@@ -483,7 +481,7 @@ async function send() {
483
  const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1);
484
  document.getElementById('s-avg').textContent = avg + ' tok/s';
485
  document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s';
486
- // FIX #2 indicator β€” show how many tokens are cached in engine
487
  if (chunk.session_id) {
488
  fetch(`${API}/chat/history?session_id=${chunk.session_id}`)
489
  .then(r => r.json())
@@ -496,12 +494,16 @@ async function send() {
496
  document.getElementById('s-turns').textContent = turnCount;
497
  drawSpark();
498
  } else if (chunk.type === 'error') {
499
- bubble.innerHTML = `<span style="color:var(--red)">Error: ${esc(chunk.message)}</span>`;
500
  }
501
  }
502
  }
503
  } catch (err) {
504
- bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`;
 
 
 
 
505
  } finally {
506
  const cur = bubble.querySelector('.cursor2');
507
  if (cur) cur.remove();
@@ -509,9 +511,7 @@ async function send() {
509
  scrollBot();
510
  }
511
  }
512
- // ─────────────────────────────────────────
513
- // Sparkline
514
- // ─────────────────────────────────────────
515
  function drawSpark() {
516
  const el = document.getElementById('spark');
517
  const ctx = el.getContext('2d');
@@ -537,9 +537,7 @@ function drawSpark() {
537
  ctx.lineTo((d.length-1)*step,H); ctx.lineTo(0,H); ctx.closePath();
538
  ctx.fillStyle=grad; ctx.fill();
539
  }
540
- // ─────────────────────────────────────────
541
- // Clear chat
542
- // ─────────────────────────────────────────
543
  async function clearChat() {
544
  if (busy) return;
545
  await fetch(`${API}/chat/reset`, {
@@ -554,10 +552,7 @@ async function clearChat() {
554
  <h2>KVInfer Studio</h2>
555
  <p>152M Β· GPT-2 Decoder-Only Β· C++ AVX2 + OpenMP Β· Persistent session KV-cache.</p>
556
  <div class="spec-chips">
557
- <span class="chip">152M params</span>
558
- <span class="chip">AVX2 SIMD</span>
559
- <span class="chip">OpenMP</span>
560
- <span class="chip">KV Cache</span>
561
  </div>
562
  </div>`;
563
  ['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0');
@@ -565,56 +560,7 @@ async function clearChat() {
565
  id => document.getElementById(id).textContent = 'β€”');
566
  drawSpark();
567
  }
568
- // ─────────────────────────────────────────
569
- // Benchmark modal
570
- // ─────────────────────────────────────────
571
- function openBench() { document.getElementById('bov').classList.add('on'); }
572
- function closeBench() { document.getElementById('bov').classList.remove('on'); }
573
- async function runBench() {
574
- const btn = document.getElementById('btnbench');
575
- btn.disabled = true; btn.textContent = '⏳ Running...';
576
- try {
577
- const r = await fetch(`${API}/benchmark/run`);
578
- if (!r.ok) throw new Error(`HTTP ${r.status}`);
579
- const d = await r.json();
580
- const rows = d.details.map(x => {
581
- const cls = x.tokens_per_sec>20?'good':x.tokens_per_sec>10?'mid':'bad';
582
- return `<tr>
583
- <td>${x.prompt_preview}</td>
584
- <td class="${cls}">${x.tokens_per_sec}</td>
585
- <td>${x.ttft_ms}</td>
586
- <td>${x.total_ms}</td>
587
- <td>${x.tokens_out}</td></tr>`;
588
- }).join('');
589
- document.getElementById('bcontent').innerHTML = `
590
- <div class="bench-summary">
591
- <div class="bench-stat">
592
- <div class="bval">${d.summary.avg_tps}</div>
593
- <div class="blbl">Avg Throughput (tok/s)</div>
594
- </div>
595
- <div class="bench-stat">
596
- <div class="bval">${d.summary.avg_ttft_ms}</div>
597
- <div class="blbl">Avg TTFT (ms)</div>
598
- </div>
599
- </div>
600
- <table class="btbl">
601
- <thead><tr><th>Prompt</th><th>tok/s</th><th>TTFT</th><th>Total</th><th>Toks</th></tr></thead>
602
- <tbody>${rows}</tbody>
603
- </table>
604
- <div style="margin-top:14px;display:flex;gap:8px">
605
- <button class="btn btn-p btn-sm" onclick="runBench()">β†Ί Rerun</button>
606
- <button class="btn btn-s btn-sm" onclick="closeBench()">Close</button>
607
- </div>`;
608
- } catch(e) {
609
- document.getElementById('bcontent').innerHTML =
610
- `<p style="color:var(--red);margin-bottom:12px">Error: ${e.message}</p>
611
- <button class="btn btn-s btn-sm" onclick="runBench()">Retry</button>`;
612
- }
613
- btn.disabled = false;
614
- }
615
- // ─────────────────────────────────────────
616
- // Poll server metrics every 8s
617
- // ─────────────────────────────────────────
618
  async function pollMetrics() {
619
  try {
620
  const r = await fetch(`${API}/metrics`);
@@ -626,7 +572,7 @@ async function pollMetrics() {
626
  } catch {}
627
  }
628
  pollMetrics();
629
- setInterval(pollMetrics, 8000);
630
  </script>
631
  </body>
632
  </html>
 
199
  <body>
200
  <div class="app">
201
 
 
202
  <aside class="sidebar">
203
  <div class="sb-head">
204
  <h2><span>KV</span>Infer</h2>
 
206
  </div>
207
  <div class="sb-body">
208
 
 
209
  <div class="card">
210
  <div class="card-title">
211
  <span class="dot idle" id="dot-s"></span>Live Performance
 
220
  <span class="sval yellow" id="s-lat">β€”</span></div>
221
  </div>
222
 
 
223
  <div class="card">
224
  <div class="card-title">Throughput History (tok/s)</div>
225
  <canvas id="spark"></canvas>
 
231
  </div>
232
  </div>
233
 
 
234
  <div class="card">
235
  <div class="card-title">Session</div>
236
  <div class="srow"><span class="slabel">Turns</span>
 
247
  </div>
248
  </div>
249
 
 
250
  <div class="card">
251
  <div class="card-title">System Prompt</div>
252
  <textarea class="sysprompt" id="sysprompt" rows="3"
253
  >You are a helpful, concise, and friendly AI assistant.</textarea>
254
  </div>
255
 
 
256
  <div class="card">
257
  <div class="card-title">Generation</div>
258
  <div style="display:flex;flex-direction:column;gap:10px;margin-top:2px">
 
277
  </div>
278
  </aside>
279
 
 
280
  <main class="chat">
281
  <header class="chat-hdr">
282
  <div class="badge">KVInfer Β· 152M</div>
 
317
 
318
  </div>
319
 
 
320
  <div id="bov">
321
  <div class="bmod">
322
  <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:16px">
 
342
  let tpsHist = [];
343
  let peakTps = 0;
344
  let engCache = 0;
345
+
 
 
346
  const inp = document.getElementById('inp');
347
  inp.addEventListener('input', () => {
348
  inp.style.height = 'auto';
349
  inp.style.height = Math.min(inp.scrollHeight, 120) + 'px';
350
  });
351
+
352
  function handleKey(e) {
353
  if (e.key==='Enter' && !e.shiftKey) { e.preventDefault(); send(); }
354
  }
355
+
 
 
356
  function setBusy(v) {
357
  busy = v;
358
  document.getElementById('sbtn').disabled = v;
 
362
  });
363
  document.getElementById('hstatus').textContent = v ? 'Generating...' : 'Idle';
364
  }
365
+
366
  function scrollBot() {
367
  const el = document.getElementById('msgs');
368
  el.scrollTop = el.scrollHeight;
369
  }
370
+
371
  function hideWelcome() {
372
  const w = document.getElementById('welcome');
373
  if (w) w.remove();
374
  }
375
+
376
  function esc(s) {
377
  return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>');
378
  }
379
+
380
  function addUserMsg(text) {
381
  hideWelcome();
382
  const g = document.createElement('div');
 
385
  document.getElementById('msgs').appendChild(g);
386
  scrollBot();
387
  }
388
+
389
+ // ⚠️ FIX: Dynamic IDs added back! No more overlapping text.
390
  function createAssistantSlot() {
391
  const g = document.createElement('div');
392
  g.className = 'mg asst';
393
+ const bid = 'bubble_' + Date.now();
394
+ const mid = 'meta_' + Date.now();
395
  g.innerHTML = `
396
  <div class="mg-role"><div class="mlabel">Model</div></div>
397
  <div class="mg-body">
398
+ <div class="bubble" id="${bid}"><span class="cursor2"></span></div>
399
+ <div class="bmeta" id="${mid}"></div>
400
  </div>`;
401
  document.getElementById('msgs').appendChild(g);
402
  scrollBot();
403
+ return { bubble: document.getElementById(bid), meta: document.getElementById(mid) };
404
  }
405
+
 
 
406
  async function send() {
407
  if (busy) return;
408
  const text = inp.value.trim();
 
410
  inp.value = ''; inp.style.height = 'auto';
411
  addUserMsg(text);
412
  setBusy(true);
413
+
414
+ const slot = createAssistantSlot();
415
+ const bubble = slot.bubble;
416
+ const meta = slot.meta;
417
+
418
  let content = '';
419
  let t0 = Date.now();
420
+ let firstTokT = null;
421
  let tokCount = 0;
422
+
423
  const payload = {
424
  message: text,
425
  session_id: sessId,
 
428
  temperature: parseFloat(document.getElementById('p-temp').value),
429
  top_k: parseInt(document.getElementById('p-topk').value),
430
  };
431
+
432
  try {
433
  const resp = await fetch(`${API}/chat`, {
434
  method: 'POST',
 
436
  body: JSON.stringify(payload),
437
  });
438
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
439
+
440
  const reader = resp.body.getReader();
441
  const decoder = new TextDecoder();
442
  let buf = '';
443
+
444
  while (true) {
445
  const {done, value} = await reader.read();
446
  if (done) break;
 
455
  let chunk;
456
  try { chunk = JSON.parse(raw); } catch { continue; }
457
  const now = Date.now();
458
+
459
  if (chunk.type === 'token') {
460
+ if (firstTokT === null) firstTokT = now;
 
 
 
461
  content += chunk.text;
462
  tokCount++;
463
  totalToks++;
 
471
  const ttft = firstTokT !== null ? (firstTokT - t0) : 0;
472
  const tps = chunk.tps;
473
  const ms = chunk.total_ms;
474
+
475
+ meta.innerHTML = `<b>${tps}</b> tok/s Β· <b>TTFT</b> ${ttft}ms Β· <b>${tokCount}</b> tokens Β· <b>${ms.toFixed(0)}ms</b> total`;
 
 
 
476
  document.getElementById('s-tps').textContent = tps + ' tok/s';
477
  document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms';
478
  tpsHist.push(tps);
 
481
  const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1);
482
  document.getElementById('s-avg').textContent = avg + ' tok/s';
483
  document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s';
484
+
485
  if (chunk.session_id) {
486
  fetch(`${API}/chat/history?session_id=${chunk.session_id}`)
487
  .then(r => r.json())
 
494
  document.getElementById('s-turns').textContent = turnCount;
495
  drawSpark();
496
  } else if (chunk.type === 'error') {
497
+ bubble.innerHTML += `<br><br><span style="color:var(--red)">Error: ${esc(chunk.message)}</span>`;
498
  }
499
  }
500
  }
501
  } catch (err) {
502
+ if (tokCount === 0) {
503
+ bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`;
504
+ } else {
505
+ meta.innerHTML += ` <span style="color:var(--amber)">[Stream Interrupted]</span>`;
506
+ }
507
  } finally {
508
  const cur = bubble.querySelector('.cursor2');
509
  if (cur) cur.remove();
 
511
  scrollBot();
512
  }
513
  }
514
+
 
 
515
  function drawSpark() {
516
  const el = document.getElementById('spark');
517
  const ctx = el.getContext('2d');
 
537
  ctx.lineTo((d.length-1)*step,H); ctx.lineTo(0,H); ctx.closePath();
538
  ctx.fillStyle=grad; ctx.fill();
539
  }
540
+
 
 
541
  async function clearChat() {
542
  if (busy) return;
543
  await fetch(`${API}/chat/reset`, {
 
552
  <h2>KVInfer Studio</h2>
553
  <p>152M Β· GPT-2 Decoder-Only Β· C++ AVX2 + OpenMP Β· Persistent session KV-cache.</p>
554
  <div class="spec-chips">
555
+ <span class="chip">152M params</span><span class="chip">AVX2 SIMD</span><span class="chip">OpenMP</span><span class="chip">KV Cache</span>
 
 
 
556
  </div>
557
  </div>`;
558
  ['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0');
 
560
  id => document.getElementById(id).textContent = 'β€”');
561
  drawSpark();
562
  }
563
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  async function pollMetrics() {
565
  try {
566
  const r = await fetch(`${API}/metrics`);
 
572
  } catch {}
573
  }
574
  pollMetrics();
575
+ setInterval(pollMetrics, 5000);
576
  </script>
577
  </body>
578
  </html>