| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"/> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> |
| <title>KVInfer Studio</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;700;800&display=swap" rel="stylesheet"> |
| <style> |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } |
| :root { |
| --bg0: #0f0e0c; --bg1: #1a1916; --bg2: #252420; --bg3: #302e28; |
| --amber: #e8a030; --amber2: #f5c060; |
| --green: #5dbd7a; --red: #d95f52; --blue: #5b8dee; |
| --text0: #ffffff; --text1: #e8e4d8; --text2: #c8c4b4; --text3: #8a8478; |
| --radius: 6px; --sidebar: 290px; |
| --mono: 'Space Mono', monospace; |
| --sans: 'Syne', sans-serif; |
| } |
| html, body { height: 100%; background: var(--bg0); color: var(--text0); font-family: var(--mono); font-size: 13px; } |
| .app { display: flex; height: 100vh; overflow: hidden; } |
| |
| |
| .sidebar { width: var(--sidebar); min-width: var(--sidebar); background: var(--bg1); |
| border-right: 1px solid var(--bg3); display: flex; flex-direction: column; overflow: hidden; } |
| |
| .sb-head { padding: 16px 18px 14px; border-bottom: 1px solid var(--bg3); background: var(--bg0); } |
| .sb-head h2 { font-family: var(--sans); font-size: 20px; font-weight: 800; |
| letter-spacing: -0.03em; color: var(--text0); } |
| .sb-head h2 span { color: var(--amber); } |
| .sb-head p { font-size: 10px; color: var(--text2); margin-top: 3px; |
| letter-spacing: 0.08em; text-transform: uppercase; } |
| |
| .sb-body { flex: 1; overflow-y: auto; padding: 10px 12px; |
| scrollbar-width: thin; scrollbar-color: var(--bg3) transparent; } |
| |
| |
| .card { background: var(--bg2); border: 1px solid var(--bg3); border-radius: var(--radius); |
| padding: 11px 13px; margin-bottom: 8px; } |
| .card-title { font-size: 9px; font-weight: 700; color: var(--text2); |
| text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 9px; |
| display: flex; align-items: center; gap: 6px; } |
| |
| |
| .srow { display: flex; justify-content: space-between; align-items: center; padding: 3.5px 0; } |
| .slabel { color: var(--text2); font-size: 11px; } |
| .sval { font-size: 12px; font-weight: 700; font-variant-numeric: tabular-nums; color: var(--text0); } |
| .green { color: var(--green); } |
| .blue { color: var(--blue); } |
| .yellow { color: var(--amber); } |
| .red { color: var(--red); } |
| |
| |
| .dot { display: inline-block; width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; } |
| .dot.idle { background: var(--text3); } |
| .dot.live { background: var(--green); animation: pulse 1.4s infinite; } |
| @keyframes pulse { 0%,100%{opacity:1;transform:scale(1)} 50%{opacity:.3;transform:scale(.7)} } |
| |
| |
| canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--radius); |
| background: var(--bg0); } |
| |
| |
| .sysprompt { width: 100%; background: var(--bg0); border: 1px solid var(--bg3); |
| border-radius: var(--radius); color: var(--text0); font-size: 11px; padding: 8px 10px; |
| resize: vertical; min-height: 56px; font-family: var(--mono); line-height: 1.6; } |
| .sysprompt:focus { outline: none; border-color: var(--amber); } |
| |
| |
| .pgroup { display: flex; flex-direction: column; gap: 4px; } |
| .pgroup label { font-size: 9px; color: var(--text2); text-transform: uppercase; |
| letter-spacing: 0.08em; display: flex; justify-content: space-between; } |
| .pgroup label span { color: var(--amber); font-weight: 700; } |
| .pgroup input[type=range] { |
| width: 100%; -webkit-appearance: none; height: 2px; |
| background: var(--bg3); border-radius: 1px; outline: none; |
| } |
| .pgroup input[type=range]::-webkit-slider-thumb { |
| -webkit-appearance: none; width: 11px; height: 11px; border-radius: 50%; |
| background: var(--amber); cursor: pointer; border: 2px solid var(--bg1); |
| } |
| |
| |
| .btn { display: inline-flex; align-items: center; gap: 5px; padding: 6px 12px; |
| border-radius: var(--radius); border: 1px solid var(--bg3); cursor: pointer; |
| font-size: 10px; font-weight: 700; font-family: var(--mono); |
| text-transform: uppercase; letter-spacing: 0.05em; transition: all .13s; } |
| .btn-p { background: var(--amber); color: var(--bg0); border-color: var(--amber); } |
| .btn-p:hover { background: var(--amber2); border-color: var(--amber2); } |
| .btn-s { background: var(--bg3); color: var(--text1); } |
| .btn-s:hover { background: #3a3830; color: var(--text0); border-color: var(--text3); } |
| .btn-sm { padding: 5px 9px; font-size: 10px; } |
| .btn-full { width: 100%; justify-content: center; } |
| .btn:disabled { opacity: .35; cursor: not-allowed; } |
| |
| |
| .chat { flex: 1; display: flex; flex-direction: column; overflow: hidden; background: var(--bg0); } |
| .chat-hdr { height: 52px; display: flex; align-items: center; padding: 0 22px; |
| border-bottom: 1px solid var(--bg3); background: var(--bg1); gap: 10px; } |
| .badge { background: rgba(232,160,48,0.12); border: 1px solid rgba(232,160,48,0.3); |
| border-radius: var(--radius); padding: 3px 10px; |
| font-size: 11px; font-weight: 700; color: var(--amber); letter-spacing: 0.05em; |
| font-family: var(--sans); } |
| .chat-hdr h1 { font-family: var(--sans); font-size: 14px; font-weight: 600; color: var(--text0); } |
| .chat-hdr .spc { flex: 1; } |
| .statpill { display: flex; align-items: center; gap: 6px; |
| font-size: 10px; color: var(--text2); letter-spacing: 0.06em; text-transform: uppercase; } |
| |
| |
| .msgs { flex: 1; overflow-y: auto; padding: 0; |
| display: flex; flex-direction: column; scroll-behavior: smooth; |
| scrollbar-width: thin; scrollbar-color: var(--bg3) transparent; } |
| |
| .mg { display: flex; gap: 0; padding: 16px 24px; |
| border-bottom: 1px solid var(--bg3); animation: fadeup 0.18s ease; } |
| .mg:last-child { border-bottom: none; } |
| @keyframes fadeup { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } } |
| |
| .mg-role { width: 64px; flex-shrink: 0; padding-top: 1px; } |
| .mlabel { font-size: 9px; font-weight: 700; letter-spacing: 0.1em; text-transform: uppercase; } |
| .mg.user .mlabel { color: var(--amber); } |
| .mg.asst .mlabel { color: var(--green); } |
| .mg-body { flex: 1; min-width: 0; } |
| |
| .bubble { font-family: var(--mono); font-size: 13px; line-height: 1.75; |
| color: var(--text1); white-space: pre-wrap; word-break: break-word; max-width: 680px; } |
| .mg.user .bubble { color: var(--text2); } |
| |
| .cursor2 { display: inline-block; width: 8px; height: 2px; |
| background: var(--amber); margin-left: 3px; |
| vertical-align: middle; animation: blink .6s infinite; } |
| @keyframes blink { 0%,49%{opacity:1} 50%,100%{opacity:0} } |
| |
| .bmeta { font-size: 10px; color: var(--text3); margin-top: 6px; display: flex; gap: 12px; } |
| .bmeta b { color: var(--amber); } |
| |
| |
| .welcome { flex: 1; display: flex; flex-direction: column; |
| align-items: center; justify-content: center; gap: 14px; |
| text-align: center; padding: 40px; animation: fadeup 0.4s ease; } |
| .welcome .logo { font-family: var(--sans); font-size: 80px; font-weight: 800; |
| color: var(--amber); letter-spacing: -0.06em; line-height: 1; |
| text-shadow: 0 0 80px rgba(232,160,48,0.2); } |
| .welcome h2 { font-family: var(--sans); font-size: 26px; font-weight: 700; |
| color: var(--text0); letter-spacing: -0.02em; } |
| .welcome p { max-width: 420px; line-height: 1.8; font-size: 13px; color: var(--text2); } |
| .spec-chips { display: flex; gap: 7px; flex-wrap: wrap; justify-content: center; margin-top: 4px; } |
| .chip { font-size: 10px; padding: 4px 12px; border: 1px solid var(--bg3); |
| border-radius: 20px; color: var(--text2); letter-spacing: 0.07em; |
| text-transform: uppercase; background: var(--bg2); } |
| |
| |
| .inputbar { padding: 14px 20px; border-top: 1px solid var(--bg3); background: var(--bg1); } |
| .inputwrap { background: var(--bg2); border: 1px solid var(--bg3); |
| border-radius: var(--radius); display: flex; align-items: flex-end; |
| padding: 3px 3px 3px 14px; gap: 5px; transition: border-color .13s; } |
| .inputwrap:focus-within { border-color: var(--amber); } |
| #inp { flex: 1; background: none; border: none; outline: none; |
| color: var(--text0); font-size: 13px; font-family: var(--mono); |
| resize: none; line-height: 1.6; max-height: 120px; padding: 8px 0; } |
| #inp::placeholder { color: var(--text3); } |
| .sbtn { background: var(--amber); border: none; cursor: pointer; |
| width: 34px; height: 34px; border-radius: var(--radius); display: flex; |
| align-items: center; justify-content: center; flex-shrink: 0; |
| transition: .13s; align-self: flex-end; margin-bottom: 3px; } |
| .sbtn:hover { background: var(--amber2); } |
| .sbtn:disabled { opacity: .3; cursor: not-allowed; } |
| .sbtn svg { fill: var(--bg0); } |
| .input-hint { margin-top: 6px; font-size: 10px; color: var(--text3); |
| text-align: right; letter-spacing: 0.04em; } |
| |
| |
| #bov { display: none; position: fixed; inset: 0; background: rgba(10,9,8,.85); |
| z-index: 100; align-items: center; justify-content: center; } |
| #bov.on { display: flex; } |
| .bmod { background: var(--bg1); border: 1px solid var(--bg3); |
| border-radius: 8px; padding: 24px; width: 520px; max-height: 80vh; overflow-y: auto; |
| animation: fadeup 0.18s ease; } |
| .bmod h3 { font-family: var(--sans); font-size: 16px; font-weight: 700; |
| margin-bottom: 16px; color: var(--text0); } |
| .btbl { width: 100%; border-collapse: collapse; font-size: 11px; font-family: var(--mono); } |
| .btbl th, .btbl td { padding: 7px 10px; text-align: left; border-bottom: 1px solid var(--bg3); } |
| .btbl th { color: var(--text3); font-weight: 400; font-size: 9px; |
| text-transform: uppercase; letter-spacing: 0.1em; } |
| .btbl td { color: var(--text2); } |
| .btbl td.good { color: var(--green); } |
| .btbl td.mid { color: var(--amber); } |
| .btbl td.bad { color: var(--red); } |
| .bench-summary { display: grid; grid-template-columns: 1fr 1fr; gap: 1px; |
| background: var(--bg3); border: 1px solid var(--bg3); border-radius: var(--radius); |
| overflow: hidden; margin-bottom: 16px; } |
| .bench-stat { background: var(--bg2); padding: 14px 16px; text-align: center; } |
| .bench-stat .bval { font-family: var(--sans); font-size: 26px; font-weight: 800; |
| color: var(--amber); letter-spacing: -0.03em; } |
| .bench-stat .blbl { font-size: 9px; color: var(--text3); |
| text-transform: uppercase; letter-spacing: 0.1em; margin-top: 2px; } |
| </style> |
| </head> |
| <body> |
| <div class="app"> |
|
|
| <aside class="sidebar"> |
| <div class="sb-head"> |
| <h2><span>KV</span>Infer</h2> |
| <p>152M · GPT-2 · AVX2 + OpenMP · KV-Cache</p> |
| </div> |
| <div class="sb-body"> |
|
|
| <div class="card"> |
| <div class="card-title"> |
| <span class="dot idle" id="dot-s"></span>Live Performance |
| </div> |
| <div class="srow"><span class="slabel">Throughput</span> |
| <span class="sval green" id="s-tps">—</span></div> |
| <div class="srow"><span class="slabel">TTFT</span> |
| <span class="sval blue" id="s-ttft">—</span></div> |
| <div class="srow"><span class="slabel">Tokens out</span> |
| <span class="sval" id="s-toks">—</span></div> |
| <div class="srow"><span class="slabel">Last latency</span> |
| <span class="sval yellow" id="s-lat">—</span></div> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">Throughput History (tok/s)</div> |
| <canvas id="spark"></canvas> |
| <div style="margin-top:7px"> |
| <div class="srow"><span class="slabel">Session avg</span> |
| <span class="sval green" id="s-avg">—</span></div> |
| <div class="srow"><span class="slabel">Session peak</span> |
| <span class="sval green" id="s-peak">—</span></div> |
| </div> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">Session</div> |
| <div class="srow"><span class="slabel">Turns</span> |
| <span class="sval" id="s-turns">0</span></div> |
| <div class="srow"><span class="slabel">Total tokens</span> |
| <span class="sval" id="s-totok">0</span></div> |
| <div class="srow"><span class="slabel">Tokens in engine</span> |
| <span class="sval blue" id="s-engcache">—</span></div> |
| <div class="srow"><span class="slabel">Server RAM</span> |
| <span class="sval" id="s-ram">—</span></div> |
| <div style="display:flex;gap:6px;margin-top:10px"> |
| <button class="btn btn-s btn-sm btn-full" onclick="clearChat()">↺ Clear</button> |
| <button class="btn btn-p btn-sm btn-full" onclick="openBench()">⊞ Benchmark</button> |
| </div> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">System Prompt</div> |
| <textarea class="sysprompt" id="sysprompt" rows="3" |
| >You are a helpful, concise, and friendly AI assistant.</textarea> |
| </div> |
|
|
| <div class="card"> |
| <div class="card-title">Generation</div> |
| <div style="display:flex;flex-direction:column;gap:10px;margin-top:2px"> |
| <div class="pgroup"> |
| <label>Temperature <span id="v-temp">0.70</span></label> |
| <input type="range" id="p-temp" min="0.1" max="2.0" step="0.05" value="0.7" |
| oninput="document.getElementById('v-temp').textContent=parseFloat(this.value).toFixed(2)"> |
| </div> |
| <div class="pgroup"> |
| <label>Top-K <span id="v-topk">40</span></label> |
| <input type="range" id="p-topk" min="1" max="200" step="1" value="40" |
| oninput="document.getElementById('v-topk').textContent=this.value"> |
| </div> |
| <div class="pgroup"> |
| <label>Max tokens <span id="v-maxt">200</span></label> |
| <input type="range" id="p-maxt" min="20" max="500" step="10" value="200" |
| oninput="document.getElementById('v-maxt').textContent=this.value"> |
| </div> |
| </div> |
| </div> |
|
|
| </div> |
| </aside> |
|
|
| <main class="chat"> |
| <header class="chat-hdr"> |
| <div class="badge">KVInfer · 152M</div> |
| <h1>Chat</h1> |
| <div class="spc"></div> |
| <div class="statpill"> |
| <span class="dot idle" id="dot-h"></span> |
| <span id="hstatus">Idle</span> |
| </div> |
| </header> |
|
|
| <div class="msgs" id="msgs"> |
| <div class="welcome" id="welcome"> |
| <div class="logo">KV</div> |
| <h2>KVInfer Studio</h2> |
| <p>152M · GPT-2 Decoder-Only · Custom C++ inference engine with AVX2 SIMD, OpenMP parallelism & persistent session KV-cache.</p> |
| <div class="spec-chips"> |
| <span class="chip">152M params</span> |
| <span class="chip">AVX2 SIMD</span> |
| <span class="chip">OpenMP</span> |
| <span class="chip">KV Cache</span> |
| <span class="chip">Streaming</span> |
| </div> |
| </div> |
| </div> |
|
|
| <div class="inputbar"> |
| <div class="inputwrap"> |
| <textarea id="inp" rows="1" placeholder="Send a message..." |
| onkeydown="handleKey(event)"></textarea> |
| <button class="sbtn" id="sbtn" onclick="send()"> |
| <svg width="15" height="15" viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg> |
| </button> |
| </div> |
| <div class="input-hint">Enter to send · Shift+Enter for newline</div> |
| </div> |
| </main> |
|
|
| </div> |
|
|
| <div id="bov"> |
| <div class="bmod"> |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:16px"> |
| <h3>📊 Quick Benchmark</h3> |
| <button class="btn btn-s btn-sm" onclick="closeBench()">✕</button> |
| </div> |
| <div id="bcontent"> |
| <p style="color:var(--text2);font-size:12px;line-height:1.7;margin-bottom:14px">Runs 5 built-in prompts and measures throughput, TTFT, and per-token latency.</p> |
| <button class="btn btn-p btn-full" style="margin-top:4px" id="btnbench" onclick="runBench()">▶ Run Benchmark</button> |
| </div> |
| </div> |
| </div> |
|
|
| <script> |
| |
| |
| |
| const API = ''; |
| let sessId = crypto.randomUUID(); |
| let busy = false; |
| let turnCount = 0; |
| let totalToks = 0; |
| let tpsHist = []; |
| let peakTps = 0; |
| let engCache = 0; |
| |
| const inp = document.getElementById('inp'); |
| inp.addEventListener('input', () => { |
| inp.style.height = 'auto'; |
| inp.style.height = Math.min(inp.scrollHeight, 120) + 'px'; |
| }); |
| |
| function handleKey(e) { |
| if (e.key==='Enter' && !e.shiftKey) { e.preventDefault(); send(); } |
| } |
| |
| function setBusy(v) { |
| busy = v; |
| document.getElementById('sbtn').disabled = v; |
| ['dot-s','dot-h'].forEach(id => { |
| const d = document.getElementById(id); |
| d.className = 'dot' + (v ? ' live' : ' idle'); |
| }); |
| document.getElementById('hstatus').textContent = v ? 'Generating...' : 'Idle'; |
| } |
| |
| function scrollBot() { |
| const el = document.getElementById('msgs'); |
| el.scrollTop = el.scrollHeight; |
| } |
| |
| function hideWelcome() { |
| const w = document.getElementById('welcome'); |
| if (w) w.remove(); |
| } |
| |
| function esc(s) { |
| return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/\n/g,'<br>'); |
| } |
| |
| function addUserMsg(text) { |
| hideWelcome(); |
| const g = document.createElement('div'); |
| g.className = 'mg user'; |
| g.innerHTML = `<div class="mg-role"><div class="mlabel">You</div></div><div class="mg-body"><div class="bubble">${esc(text)}</div></div>`; |
| document.getElementById('msgs').appendChild(g); |
| scrollBot(); |
| } |
| |
| function createAssistantSlot() { |
| const g = document.createElement('div'); |
| g.className = 'mg asst'; |
| const bid = 'bubble_' + Date.now(); |
| const mid = 'meta_' + Date.now(); |
| g.innerHTML = ` |
| <div class="mg-role"><div class="mlabel">Model</div></div> |
| <div class="mg-body"> |
| <div class="bubble" id="${bid}"><span class="cursor2"></span></div> |
| <div class="bmeta" id="${mid}"></div> |
| </div>`; |
| document.getElementById('msgs').appendChild(g); |
| scrollBot(); |
| return { bubble: document.getElementById(bid), meta: document.getElementById(mid) }; |
| } |
| |
| async function send() { |
| if (busy) return; |
| const text = inp.value.trim(); |
| if (!text) return; |
| inp.value = ''; inp.style.height = 'auto'; |
| addUserMsg(text); |
| setBusy(true); |
| |
| const slot = createAssistantSlot(); |
| const bubble = slot.bubble; |
| const meta = slot.meta; |
| |
| let content = ''; |
| let t0 = Date.now(); |
| let firstTokT = null; |
| let tokCount = 0; |
| |
| const payload = { |
| message: text, |
| session_id: sessId, |
| system_prompt: document.getElementById('sysprompt').value, |
| max_new_tokens: parseInt(document.getElementById('p-maxt').value), |
| temperature: parseFloat(document.getElementById('p-temp').value), |
| top_k: parseInt(document.getElementById('p-topk').value), |
| }; |
| |
| try { |
| const resp = await fetch(`${API}/chat`, { |
| method: 'POST', |
| headers: {'Content-Type':'application/json'}, |
| body: JSON.stringify(payload), |
| }); |
| if (!resp.ok) throw new Error(`HTTP ${resp.status}`); |
| |
| const reader = resp.body.getReader(); |
| const decoder = new TextDecoder(); |
| let buf = ''; |
| |
| while (true) { |
| const {done, value} = await reader.read(); |
| if (done) break; |
| buf += decoder.decode(value, {stream:true}); |
| let nl; |
| while ((nl = buf.indexOf('\n')) !== -1) { |
| const line = buf.slice(0, nl).trim(); |
| buf = buf.slice(nl+1); |
| if (!line.startsWith('data:')) continue; |
| const raw = line.slice(5).trim(); |
| if (raw === '[DONE]') break; |
| let chunk; |
| try { chunk = JSON.parse(raw); } catch { continue; } |
| const now = Date.now(); |
| |
| if (chunk.type === 'token') { |
| if (firstTokT === null) firstTokT = now; |
| content += chunk.text; |
| tokCount++; |
| totalToks++; |
| document.getElementById('s-totok').textContent = totalToks; |
| document.getElementById('s-toks').textContent = tokCount + ' tok'; |
| document.getElementById('s-ttft').textContent = (firstTokT - t0) + ' ms'; |
| bubble.innerHTML = esc(content) + '<span class="cursor2"></span>'; |
| scrollBot(); |
| } else if (chunk.type === 'done') { |
| bubble.innerHTML = esc(content); |
| const ttft = firstTokT !== null ? (firstTokT - t0) : 0; |
| const tps = chunk.tps; |
| const ms = chunk.total_ms; |
| |
| meta.innerHTML = `<b>${tps}</b> tok/s · <b>TTFT</b> ${ttft}ms · <b>${tokCount}</b> tokens · <b>${ms.toFixed(0)}ms</b> total`; |
| document.getElementById('s-tps').textContent = tps + ' tok/s'; |
| document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms'; |
| tpsHist.push(tps); |
| if (tpsHist.length > 30) tpsHist.shift(); |
| if (tps > peakTps) peakTps = tps; |
| const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1); |
| document.getElementById('s-avg').textContent = avg + ' tok/s'; |
| document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s'; |
| |
| if (chunk.session_id) { |
| fetch(`${API}/chat/history?session_id=${chunk.session_id}`) |
| .then(r => r.json()) |
| .then(d => { |
| engCache = d.tokens_in_engine || 0; |
| document.getElementById('s-engcache').textContent = engCache + ' tok'; |
| }).catch(()=>{}); |
| } |
| turnCount++; |
| document.getElementById('s-turns').textContent = turnCount; |
| drawSpark(); |
| } else if (chunk.type === 'error') { |
| bubble.innerHTML += `<br><br><span style="color:var(--red)">Error: ${esc(chunk.message)}</span>`; |
| } |
| } |
| } |
| } catch (err) { |
| if (tokCount === 0) { |
| bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`; |
| } else { |
| meta.innerHTML += ` <span style="color:var(--amber)">[Stream Interrupted]</span>`; |
| } |
| } finally { |
| const cur = bubble.querySelector('.cursor2'); |
| if (cur) cur.remove(); |
| setBusy(false); |
| scrollBot(); |
| } |
| } |
| |
| function drawSpark() { |
| const el = document.getElementById('spark'); |
| const ctx = el.getContext('2d'); |
| const dpr = window.devicePixelRatio || 1; |
| el.width = el.clientWidth * dpr; |
| el.height = el.clientHeight * dpr; |
| ctx.scale(dpr, dpr); |
| const W = el.clientWidth, H = el.clientHeight; |
| ctx.clearRect(0, 0, W, H); |
| const d = tpsHist; |
| if (d.length < 2) return; |
| const mx = Math.max(...d) * 1.15 || 1; |
| const step = W / (d.length-1); |
| const grad = ctx.createLinearGradient(0,0,0,H); |
| grad.addColorStop(0, 'rgba(232,160,48,.28)'); |
| grad.addColorStop(1, 'rgba(232,160,48,.02)'); |
| ctx.beginPath(); |
| d.forEach((v,i) => { |
| const x=i*step, y=H-(v/mx)*(H-4)-2; |
| i===0 ? ctx.moveTo(x,y) : ctx.lineTo(x,y); |
| }); |
| ctx.strokeStyle='#e8a030'; ctx.lineWidth=1.5; ctx.stroke(); |
| ctx.lineTo((d.length-1)*step,H); ctx.lineTo(0,H); ctx.closePath(); |
| ctx.fillStyle=grad; ctx.fill(); |
| } |
| |
| async function clearChat() { |
| if (busy) return; |
| await fetch(`${API}/chat/reset`, { |
| method:'POST', headers:{'Content-Type':'application/json'}, |
| body: JSON.stringify({session_id: sessId}), |
| }).catch(()=>{}); |
| sessId = crypto.randomUUID(); |
| turnCount = 0; totalToks = 0; tpsHist = []; peakTps = 0; engCache = 0; |
| document.getElementById('msgs').innerHTML = ` |
| <div class="welcome" id="welcome"> |
| <div class="logo">KV</div> |
| <h2>KVInfer Studio</h2> |
| <p>152M · GPT-2 Decoder-Only · C++ AVX2 + OpenMP · Persistent session KV-cache.</p> |
| <div class="spec-chips"> |
| <span class="chip">152M params</span><span class="chip">AVX2 SIMD</span><span class="chip">OpenMP</span><span class="chip">KV Cache</span> |
| </div> |
| </div>`; |
| ['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0'); |
| ['s-tps','s-ttft','s-lat','s-avg','s-peak','s-toks','s-engcache'].forEach( |
| id => document.getElementById(id).textContent = '—'); |
| drawSpark(); |
| } |
| |
| async function pollMetrics() { |
| try { |
| const r = await fetch(`${API}/metrics`); |
| if (r.ok) { |
| const d = await r.json(); |
| document.getElementById('s-ram').textContent = |
| d.system_ram_used_pct.toFixed(0) + '% (' + d.process_ram_mb + ' MB)'; |
| } |
| } catch {} |
| } |
| pollMetrics(); |
| setInterval(pollMetrics, 5000); |
| |
| |
| |
| |
| |
| function openBench() { |
| document.getElementById('bov').classList.add('on'); |
| } |
| |
| function closeBench() { |
| document.getElementById('bov').classList.remove('on'); |
| } |
| |
| async function runBench() { |
| const btn = document.getElementById('btnbench'); |
| const bcontent = document.getElementById('bcontent'); |
| |
| btn.disabled = true; |
| btn.textContent = "Running Benchmark..."; |
| |
| const prompts = [ |
| "What is the capital of France?", |
| "Write a short poem about coding.", |
| "Explain quantum computing in simple terms.", |
| "What are the benefits of exercise?", |
| "Translate 'Hello world' to Spanish." |
| ]; |
| |
| let resultsHTML = ` |
| <table class="btbl" style="margin-top: 15px;"> |
| <thead> |
| <tr> |
| <th>Prompt</th> |
| <th>Tokens</th> |
| <th>TTFT (ms)</th> |
| <th>TPS (tok/s)</th> |
| </tr> |
| </thead> |
| <tbody id="bench-tbody"> |
| </tbody> |
| </table> |
| `; |
| bcontent.innerHTML = resultsHTML; |
| const tbody = document.getElementById('bench-tbody'); |
| |
| let totalTps = 0; |
| let totalTtft = 0; |
| |
| for (let i = 0; i < prompts.length; i++) { |
| let pText = prompts[i]; |
| let tr = document.createElement('tr'); |
| tr.innerHTML = `<td>Prompt ${i+1}</td><td colspan="3" style="color:var(--amber)">Testing...</td>`; |
| tbody.appendChild(tr); |
| |
| let t0 = Date.now(); |
| let firstTokT = null; |
| let tokCount = 0; |
| let finalTps = 0; |
| |
| try { |
| const payload = { |
| message: pText, |
| session_id: crypto.randomUUID(), |
| system_prompt: "You are a helpful assistant.", |
| max_new_tokens: 50, |
| temperature: 0.1, |
| top_k: 40 |
| }; |
| |
| const resp = await fetch(`${API}/chat`, { |
| method: 'POST', |
| headers: {'Content-Type':'application/json'}, |
| body: JSON.stringify(payload), |
| }); |
| |
| const reader = resp.body.getReader(); |
| const decoder = new TextDecoder(); |
| let buf = ''; |
| |
| while (true) { |
| const {done, value} = await reader.read(); |
| if (done) break; |
| buf += decoder.decode(value, {stream:true}); |
| let nl; |
| while ((nl = buf.indexOf('\n')) !== -1) { |
| const line = buf.slice(0, nl).trim(); |
| buf = buf.slice(nl+1); |
| if (!line.startsWith('data:')) continue; |
| const raw = line.slice(5).trim(); |
| if (raw === '[DONE]') break; |
| let chunk; |
| try { chunk = JSON.parse(raw); } catch { continue; } |
| |
| if (chunk.type === 'token') { |
| if (firstTokT === null) firstTokT = Date.now(); |
| tokCount++; |
| } else if (chunk.type === 'done') { |
| finalTps = chunk.tps; |
| } |
| } |
| } |
| |
| let ttft = firstTokT !== null ? (firstTokT - t0) : 0; |
| totalTps += finalTps; |
| totalTtft += ttft; |
| |
| tr.innerHTML = ` |
| <td>Prompt ${i+1}</td> |
| <td>${tokCount}</td> |
| <td class="${ttft < 800 ? 'good' : 'mid'}">${ttft}</td> |
| <td class="good">${finalTps}</td> |
| `; |
| } catch (e) { |
| tr.innerHTML = `<td>Prompt ${i+1}</td><td colspan="3" class="bad">Failed</td>`; |
| } |
| } |
| |
| let avgTps = (totalTps / prompts.length).toFixed(1); |
| let avgTtft = (totalTtft / prompts.length).toFixed(0); |
| |
| let summaryHTML = ` |
| <div class="bench-summary" style="margin-top: 15px;"> |
| <div class="bench-stat"> |
| <div class="bval">${avgTps}</div> |
| <div class="blbl">Avg Tok/s</div> |
| </div> |
| <div class="bench-stat"> |
| <div class="bval">${avgTtft}ms</div> |
| <div class="blbl">Avg TTFT</div> |
| </div> |
| </div> |
| `; |
| bcontent.insertAdjacentHTML('afterbegin', summaryHTML); |
| |
| btn.textContent = "▶ Run Benchmark Again"; |
| btn.disabled = false; |
| } |
| </script> |
| </body> |
| </html> |