| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"/> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"/> |
| <title>KVInfer Β· Llama 1B</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&family=Outfit:wght@300;500;700;900&display=swap" rel="stylesheet"> |
| <style> |
| *,*::before,*::after{box-sizing:border-box;margin:0;padding:0} |
| :root{ |
| --bg: #080b0a; |
| --bg1: #0f1512; |
| --bg2: #161e1b; |
| --bg3: #1e2a26; |
| --bg4: #263530; |
| --acc: #00e5a0; |
| --acc2: #00c484; |
| --acc3: #009963; |
| --dim: rgba(0,229,160,0.08); |
| --dimb: rgba(0,229,160,0.15); |
| --red: #ff5c5c; |
| --blue: #4db8ff; |
| --warn: #ffb347; |
| --t0: #f0faf6; |
| --t1: #b8d4ca; |
| --t2: #6a9a8e; |
| --t3: #3a5a52; |
| --mono: 'JetBrains Mono', monospace; |
| --sans: 'Outfit', sans-serif; |
| --r: 6px; |
| --sb: 272px; |
| } |
| html,body{height:100%;background:var(--bg);color:var(--t0);font-family:var(--mono);font-size:13px;overflow:hidden} |
| |
| |
| .app{display:flex;height:100vh} |
| |
| |
| .sb{width:var(--sb);min-width:var(--sb);background:var(--bg1); |
| border-right:1px solid var(--bg4);display:flex;flex-direction:column;overflow:hidden} |
| |
| .sb-logo{padding:14px 16px;background:var(--bg);border-bottom:1px solid var(--bg4);flex-shrink:0} |
| .sb-logo-row{display:flex;align-items:center;gap:8px} |
| .logo-box{width:30px;height:30px;background:var(--acc);border-radius:5px; |
| display:flex;align-items:center;justify-content:center;flex-shrink:0} |
| .logo-box svg{fill:var(--bg)} |
| .sb-logo h2{font-family:var(--sans);font-size:17px;font-weight:900;letter-spacing:-0.04em} |
| .sb-logo h2 em{color:var(--acc);font-style:normal} |
| .sb-logo p{font-size:9px;color:var(--t2);margin-top:3px;letter-spacing:0.1em;text-transform:uppercase} |
| |
| |
| .sb-tabs{display:flex;border-bottom:1px solid var(--bg4);flex-shrink:0} |
| .sb-tab{flex:1;padding:8px;text-align:center;font-size:9px;font-weight:700; |
| color:var(--t2);text-transform:uppercase;letter-spacing:0.1em;cursor:pointer; |
| border-bottom:2px solid transparent;transition:.15s;background:none;border-top:none; |
| border-left:none;border-right:none;font-family:var(--mono)} |
| .sb-tab.on{color:var(--acc);border-bottom-color:var(--acc);background:var(--dim)} |
| |
| |
| .sb-body{flex:1;overflow-y:auto;padding:10px;scrollbar-width:thin;scrollbar-color:var(--bg4) transparent} |
| |
| |
| .card{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r); |
| padding:10px 12px;margin-bottom:8px} |
| .ct{font-size:9px;font-weight:700;color:var(--t2);text-transform:uppercase; |
| letter-spacing:0.1em;margin-bottom:8px;display:flex;align-items:center;gap:6px} |
| |
| |
| .sr{display:flex;justify-content:space-between;align-items:center;padding:3px 0} |
| .sl{color:var(--t1);font-size:11px}.sv{font-size:12px;font-weight:700;color:var(--t0);font-variant-numeric:tabular-nums} |
| .g{color:var(--acc)}.b{color:var(--blue)}.y{color:var(--warn)}.r{color:var(--red)} |
| |
| |
| .dot{display:inline-block;width:6px;height:6px;border-radius:50%} |
| .dot.off{background:var(--t3)}.dot.on{background:var(--acc);animation:pulse 1.4s infinite} |
| @keyframes pulse{0%,100%{opacity:1;transform:scale(1)}50%{opacity:.3;transform:scale(.7)}} |
| |
| |
| canvas#spark{width:100%;height:44px;display:block;border-radius:var(--r);background:var(--bg)} |
| |
| |
| .ta{width:100%;background:var(--bg);border:1px solid var(--bg4);border-radius:var(--r); |
| color:var(--t0);font-size:11px;padding:8px 10px;resize:vertical;min-height:54px; |
| font-family:var(--mono);line-height:1.6} |
| .ta:focus{outline:none;border-color:var(--acc)} |
| |
| |
| .pg{display:flex;flex-direction:column;gap:4px} |
| .pg label{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.08em; |
| display:flex;justify-content:space-between} |
| .pg label span{color:var(--acc);font-weight:700} |
| .pg input[type=range]{width:100%;-webkit-appearance:none;height:2px; |
| background:var(--bg4);border-radius:1px;outline:none} |
| .pg input[type=range]::-webkit-slider-thumb{-webkit-appearance:none;width:11px;height:11px; |
| border-radius:50%;background:var(--acc);cursor:pointer;border:2px solid var(--bg1)} |
| |
| |
| .btn{display:inline-flex;align-items:center;gap:5px;padding:6px 12px; |
| border-radius:var(--r);border:1px solid var(--bg4);cursor:pointer; |
| font-size:10px;font-weight:700;font-family:var(--mono); |
| text-transform:uppercase;letter-spacing:0.05em;transition:.13s} |
| .bp{background:var(--acc);color:var(--bg);border-color:var(--acc)} |
| .bp:hover{background:var(--acc2)}.bs{background:var(--bg3);color:var(--t1)} |
| .bs:hover{background:var(--bg4)}.btn.full{width:100%;justify-content:center} |
| .btn:disabled{opacity:.3;cursor:not-allowed} |
| |
| |
| #bench-panel{display:none;flex-direction:column;height:100%;overflow:hidden} |
| #bench-panel.visible{display:flex} |
| #stats-panel{display:flex;flex-direction:column;height:100%;overflow:hidden} |
| |
| .bench-run-btn{margin:10px;flex-shrink:0} |
| .bench-results{flex:1;overflow-y:auto;padding:0 10px 10px;scrollbar-width:thin;scrollbar-color:var(--bg4) transparent} |
| |
| |
| .bench-grid{display:grid;grid-template-columns:1fr 1fr;gap:6px;margin-bottom:10px} |
| .bstat{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r); |
| padding:12px;text-align:center} |
| .bstat .bv{font-family:var(--sans);font-size:24px;font-weight:900;color:var(--acc);letter-spacing:-0.04em} |
| .bstat .bl{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.1em;margin-top:2px} |
| |
| |
| .p-card{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r); |
| padding:9px 11px;margin-bottom:6px;animation:fadeup .2s ease} |
| .p-card .p-title{font-size:10px;color:var(--t2);margin-bottom:6px; |
| white-space:nowrap;overflow:hidden;text-overflow:ellipsis} |
| .p-stats{display:flex;gap:10px} |
| .p-stat{display:flex;flex-direction:column;gap:2px} |
| .p-stat .pv{font-size:13px;font-weight:700;color:var(--acc);font-variant-numeric:tabular-nums} |
| .p-stat .pk{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.06em} |
| .p-progress{height:2px;background:var(--bg4);border-radius:1px;margin-top:7px;overflow:hidden} |
| .p-bar{height:100%;background:linear-gradient(90deg,var(--acc3),var(--acc)); |
| border-radius:1px;transition:width .6s ease;width:0%} |
| .p-status{font-size:10px;color:var(--warn);display:flex;align-items:center;gap:5px} |
| .p-spin{width:8px;height:8px;border:1px solid var(--warn);border-top-color:transparent; |
| border-radius:50%;animation:spin .7s linear infinite;flex-shrink:0} |
| @keyframes spin{to{transform:rotate(360deg)}} |
| .bench-empty{text-align:center;padding:32px 16px;color:var(--t2);font-size:12px;line-height:1.8} |
| |
| |
| .chat{flex:1;display:flex;flex-direction:column;overflow:hidden} |
| |
| .chat-hdr{height:50px;display:flex;align-items:center;padding:0 20px; |
| border-bottom:1px solid var(--bg4);background:var(--bg1);gap:8px;flex-shrink:0} |
| .badge{background:var(--dim);border:1px solid rgba(0,229,160,.25);border-radius:var(--r); |
| padding:3px 9px;font-size:10px;font-weight:700;color:var(--acc);font-family:var(--sans)} |
| .chat-hdr-title{font-family:var(--sans);font-size:13px;font-weight:600;color:var(--t1)} |
| .spc{flex:1} |
| .hpill{display:flex;align-items:center;gap:6px;font-size:10px;color:var(--t2);text-transform:uppercase;letter-spacing:.06em} |
| |
| |
| .msgs{flex:1;overflow-y:auto;display:flex;flex-direction:column; |
| scrollbar-width:thin;scrollbar-color:var(--bg4) transparent} |
| |
| |
| .welcome{flex:1;display:flex;flex-direction:column;align-items:center; |
| justify-content:center;gap:16px;text-align:center;padding:40px;animation:fadeup .4s ease} |
| .wlogo{font-family:var(--sans);font-size:60px;font-weight:900;color:var(--acc); |
| letter-spacing:-0.06em;line-height:1;text-shadow:0 0 60px rgba(0,229,160,.18)} |
| .welcome h2{font-family:var(--sans);font-size:22px;font-weight:700; |
| color:var(--t0);letter-spacing:-0.02em} |
| .welcome p{max-width:400px;line-height:1.8;font-size:12px;color:var(--t1)} |
| .chips{display:flex;gap:6px;flex-wrap:wrap;justify-content:center} |
| .chip{font-size:9px;padding:4px 10px;border:1px solid var(--bg4);border-radius:16px; |
| color:var(--t2);letter-spacing:.07em;text-transform:uppercase;background:var(--bg2)} |
| |
| |
| .mg{display:flex;gap:0;padding:14px 20px;border-bottom:1px solid var(--bg3); |
| animation:fadeup .18s ease} |
| .mg:last-child{border-bottom:none} |
| @keyframes fadeup{from{opacity:0;transform:translateY(6px)}to{opacity:1;transform:none}} |
| .mg-role{width:60px;flex-shrink:0;padding-top:2px} |
| .mlabel{font-size:9px;font-weight:700;letter-spacing:.1em;text-transform:uppercase} |
| .mg.user .mlabel{color:var(--acc)}.mg.asst .mlabel{color:var(--blue)} |
| .mg-body{flex:1;min-width:0} |
| .bubble{font-size:13px;line-height:1.75;color:var(--t1); |
| white-space:pre-wrap;word-break:break-word;max-width:660px} |
| .mg.user .bubble{color:var(--t2)} |
| .cur{display:inline-block;width:7px;height:2px;background:var(--acc); |
| margin-left:3px;vertical-align:middle;animation:blink .6s infinite} |
| @keyframes blink{0%,49%{opacity:1}50%,100%{opacity:0}} |
| .bmeta{font-size:10px;color:var(--t3);margin-top:5px;display:flex;flex-wrap:wrap;gap:8px} |
| .bmeta b{color:var(--acc)} |
| |
| |
| .inputbar{padding:12px 16px;border-top:1px solid var(--bg4);background:var(--bg1);flex-shrink:0} |
| .inputwrap{background:var(--bg2);border:1px solid var(--bg4);border-radius:var(--r); |
| display:flex;align-items:flex-end;padding:3px 3px 3px 12px;gap:4px;transition:.13s} |
| .inputwrap:focus-within{border-color:var(--acc)} |
| #inp{flex:1;background:none;border:none;outline:none;color:var(--t0); |
| font-size:13px;font-family:var(--mono);resize:none;line-height:1.6; |
| max-height:110px;padding:7px 0} |
| #inp::placeholder{color:var(--t3)} |
| .sbtn{background:var(--acc);border:none;cursor:pointer;width:32px;height:32px; |
| border-radius:var(--r);display:flex;align-items:center;justify-content:center; |
| flex-shrink:0;transition:.13s;align-self:flex-end;margin-bottom:3px} |
| .sbtn:hover{background:var(--acc2)}.sbtn:disabled{opacity:.3;cursor:not-allowed} |
| .sbtn svg{fill:var(--bg)} |
| .hint{margin-top:5px;font-size:10px;color:var(--t3);text-align:right;letter-spacing:.04em} |
| </style> |
| </head> |
| <body> |
| <div class="app"> |
|
|
| |
| <aside class="sb"> |
| <div class="sb-logo"> |
| <div class="sb-logo-row"> |
| <div class="logo-box"> |
| <svg width="16" height="16" viewBox="0 0 24 24"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg> |
| </div> |
| <div> |
| <h2>KV<em>Infer</em></h2> |
| <p>1B Β· Llama 3.2 Β· RoPE Β· GQA Β· SwiGLU</p> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div class="sb-tabs"> |
| <button class="sb-tab on" id="tab-stats" onclick="switchTab('stats')">Stats</button> |
| <button class="sb-tab" id="tab-bench" onclick="switchTab('bench')">Benchmark</button> |
| <button class="sb-tab" id="tab-params" onclick="switchTab('params')">Params</button> |
| </div> |
|
|
| |
| <div id="stats-panel"> |
| <div class="sb-body"> |
|
|
| <div class="card"> |
| <div class="ct"><span class="dot off" id="dot-s"></span>Live</div> |
| <div class="sr"><span class="sl">Throughput</span><span class="sv g" id="s-tps">β</span></div> |
| <div class="sr"><span class="sl">TTFT</span><span class="sv b" id="s-ttft">β</span></div> |
| <div class="sr"><span class="sl">Tokens out</span><span class="sv" id="s-toks">β</span></div> |
| <div class="sr"><span class="sl">Latency</span><span class="sv y" id="s-lat">β</span></div> |
| </div> |
|
|
| <div class="card"> |
| <div class="ct">Throughput History</div> |
| <canvas id="spark"></canvas> |
| <div style="margin-top:6px"> |
| <div class="sr"><span class="sl">Session avg</span><span class="sv g" id="s-avg">β</span></div> |
| <div class="sr"><span class="sl">Peak</span><span class="sv g" id="s-peak">β</span></div> |
| </div> |
| </div> |
|
|
| <div class="card"> |
| <div class="ct">Session</div> |
| <div class="sr"><span class="sl">Turns</span><span class="sv" id="s-turns">0</span></div> |
| <div class="sr"><span class="sl">Total tokens</span><span class="sv" id="s-totok">0</span></div> |
| <div class="sr"><span class="sl">KV cache</span><span class="sv b" id="s-kv">β</span></div> |
| <div class="sr"><span class="sl">Server RAM</span><span class="sv" id="s-ram">β</span></div> |
| <div style="display:flex;gap:6px;margin-top:8px"> |
| <button class="btn bs full" onclick="clearChat()">βΊ Reset</button> |
| </div> |
| </div> |
|
|
| <div class="card"> |
| <div class="ct">Engine</div> |
| <div class="sr"><span class="sl">Engines ready</span><span class="sv g" id="s-eng">β</span></div> |
| <div class="sr"><span class="sl">Active sessions</span><span class="sv" id="s-sess">β</span></div> |
| <div class="sr"><span class="sl">Uptime</span><span class="sv" id="s-uptime">β</span></div> |
| </div> |
|
|
| </div> |
| </div> |
|
|
| |
| <div id="bench-panel"> |
| <button class="btn bp bench-run-btn" id="btnbench" onclick="runBench()">βΆ Run Benchmark</button> |
| <div class="bench-results" id="bench-results"> |
| <div class="bench-empty" id="bench-empty"> |
| Run benchmark to see<br>performance metrics |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div id="params-panel" style="display:none;flex:1;overflow-y:auto"> |
| <div class="sb-body"> |
|
|
| <div class="card"> |
| <div class="ct">System Prompt</div> |
| <textarea class="ta" id="sysprompt" rows="4">You are a helpful, concise, and friendly AI assistant.</textarea> |
| </div> |
|
|
| <div class="card"> |
| <div class="ct">Generation</div> |
| <div style="display:flex;flex-direction:column;gap:10px;margin-top:2px"> |
| <div class="pg"> |
| <label>Temperature <span id="v-temp">0.70</span></label> |
| <input type="range" id="p-temp" min="0.1" max="2.0" step="0.05" value="0.7" |
| oninput="document.getElementById('v-temp').textContent=parseFloat(this.value).toFixed(2)"> |
| </div> |
| <div class="pg"> |
| <label>Top-K <span id="v-topk">40</span></label> |
| <input type="range" id="p-topk" min="1" max="200" step="1" value="40" |
| oninput="document.getElementById('v-topk').textContent=this.value"> |
| </div> |
| <div class="pg"> |
| <label>Max tokens <span id="v-maxt">256</span></label> |
| <input type="range" id="p-maxt" min="32" max="500" step="8" value="256" |
| oninput="document.getElementById('v-maxt').textContent=this.value"> |
| </div> |
| </div> |
| </div> |
|
|
| </div> |
| </div> |
| </aside> |
|
|
| |
| <main class="chat"> |
| <header class="chat-hdr"> |
| <div class="badge">Llama 3.2 Β· 1B</div> |
| <span class="chat-hdr-title">KVInfer Studio</span> |
| <div class="spc"></div> |
| <div class="hpill"> |
| <span class="dot off" id="dot-h"></span> |
| <span id="hstatus">Idle</span> |
| </div> |
| </header> |
|
|
| <div class="msgs" id="msgs"> |
| <div class="welcome" id="welcome"> |
| <div class="wlogo">1B</div> |
| <h2>KVInfer Studio</h2> |
| <p>Fine-tuned Llama 3.2 1B running on a hand-written C++ inference engine β AVX2 SIMD, OpenMP, RoPE, GQA, SwiGLU, persistent KV-cache.</p> |
| <div class="chips"> |
| <span class="chip">1B params</span> |
| <span class="chip">RoPE</span> |
| <span class="chip">GQA 8 heads</span> |
| <span class="chip">SwiGLU</span> |
| <span class="chip">AVX2 SIMD</span> |
| <span class="chip">KV Cache</span> |
| </div> |
| </div> |
| </div> |
|
|
| <div class="inputbar"> |
| <div class="inputwrap"> |
| <textarea id="inp" rows="1" placeholder="Send a messageβ¦" onkeydown="handleKey(event)"></textarea> |
| <button class="sbtn" id="sbtn" onclick="send()"> |
| <svg width="14" height="14" viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg> |
| </button> |
| </div> |
| <div class="hint">Enter β΅ to send Β· Shift+Enter for newline</div> |
| </div> |
| </main> |
|
|
| </div> |
|
|
| <script> |
| |
| const API = ''; |
| let sessId = crypto.randomUUID(); |
| let busy = false, turns = 0, totalToks = 0; |
| let tpsHist = [], peakTps = 0; |
| |
| |
| function switchTab(name) { |
| ['stats','bench','params'].forEach(t => { |
| const panel = document.getElementById(t === 'stats' ? 'stats-panel' : t + '-panel'); |
| const tab = document.getElementById('tab-' + t); |
| if (t === name) { |
| panel.style.display = (t === 'bench') ? 'flex' : 'block'; |
| tab.classList.add('on'); |
| } else { |
| panel.style.display = 'none'; |
| tab.classList.remove('on'); |
| } |
| }); |
| } |
| |
| |
| const inp = document.getElementById('inp'); |
| inp.addEventListener('input', () => { |
| inp.style.height = 'auto'; |
| inp.style.height = Math.min(inp.scrollHeight, 110) + 'px'; |
| }); |
| function handleKey(e) { |
| if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); } |
| } |
| |
| |
| function setBusy(v) { |
| busy = v; |
| document.getElementById('sbtn').disabled = v; |
| ['dot-s','dot-h'].forEach(id => { |
| document.getElementById(id).className = 'dot ' + (v ? 'on' : 'off'); |
| }); |
| document.getElementById('hstatus').textContent = v ? 'Generatingβ¦' : 'Idle'; |
| } |
| |
| function scrollBot() { |
| const el = document.getElementById('msgs'); el.scrollTop = el.scrollHeight; |
| } |
| function hideWelcome() { const w = document.getElementById('welcome'); if (w) w.remove(); } |
| function esc(s) { |
| return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/\n/g,'<br>'); |
| } |
| |
| |
| function addUserMsg(text) { |
| hideWelcome(); |
| const g = document.createElement('div'); g.className = 'mg user'; |
| g.innerHTML = `<div class="mg-role"><div class="mlabel">You</div></div> |
| <div class="mg-body"><div class="bubble">${esc(text)}</div></div>`; |
| document.getElementById('msgs').appendChild(g); scrollBot(); |
| } |
| |
| function createAsstSlot() { |
| const g = document.createElement('div'); g.className = 'mg asst'; |
| const bid = 'b' + Date.now(), mid = 'm' + Date.now(); |
| g.innerHTML = `<div class="mg-role"><div class="mlabel">Model</div></div> |
| <div class="mg-body"> |
| <div class="bubble" id="${bid}"><span class="cur"></span></div> |
| <div class="bmeta" id="${mid}"></div> |
| </div>`; |
| document.getElementById('msgs').appendChild(g); scrollBot(); |
| return { bubble: document.getElementById(bid), meta: document.getElementById(mid) }; |
| } |
| |
| |
| async function send() { |
| if (busy) return; |
| const text = inp.value.trim(); if (!text) return; |
| inp.value = ''; inp.style.height = 'auto'; |
| addUserMsg(text); setBusy(true); |
| |
| const { bubble, meta } = createAsstSlot(); |
| let content = '', t0 = Date.now(), firstTok = null, tokCount = 0; |
| |
| const payload = { |
| message: text, |
| session_id: sessId, |
| system_prompt: document.getElementById('sysprompt').value, |
| max_new_tokens: parseInt(document.getElementById('p-maxt').value), |
| temperature: parseFloat(document.getElementById('p-temp').value), |
| top_k: parseInt(document.getElementById('p-topk').value), |
| }; |
| |
| try { |
| const resp = await fetch(`${API}/chat`, { |
| method: 'POST', headers: {'Content-Type':'application/json'}, |
| body: JSON.stringify(payload), |
| }); |
| if (!resp.ok) throw new Error(`HTTP ${resp.status}`); |
| |
| const reader = resp.body.getReader(), decoder = new TextDecoder(); |
| let buf = ''; |
| while (true) { |
| const { done, value } = await reader.read(); if (done) break; |
| buf += decoder.decode(value, { stream: true }); |
| let nl; |
| while ((nl = buf.indexOf('\n')) !== -1) { |
| const line = buf.slice(0, nl).trim(); buf = buf.slice(nl + 1); |
| if (!line.startsWith('data:')) continue; |
| const raw = line.slice(5).trim(); if (raw === '[DONE]') break; |
| let chunk; try { chunk = JSON.parse(raw); } catch { continue; } |
| |
| if (chunk.type === 'token') { |
| if (firstTok === null) firstTok = Date.now(); |
| content += chunk.text; tokCount++; totalToks++; |
| document.getElementById('s-totok').textContent = totalToks; |
| document.getElementById('s-toks').textContent = tokCount + ' tok'; |
| document.getElementById('s-ttft').textContent = (firstTok - t0) + ' ms'; |
| bubble.innerHTML = esc(content) + '<span class="cur"></span>'; |
| scrollBot(); |
| } else if (chunk.type === 'done') { |
| bubble.innerHTML = esc(content); |
| const ttft = firstTok ? (firstTok - t0) : 0; |
| const tps = chunk.tps, ms = chunk.total_ms; |
| meta.innerHTML = `<b>${tps}</b> tok/s Β· TTFT <b>${ttft}ms</b> Β· <b>${tokCount}</b> tokens Β· <b>${ms.toFixed(0)}ms</b>`; |
| document.getElementById('s-tps').textContent = tps + ' tok/s'; |
| document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms'; |
| tpsHist.push(tps); if (tpsHist.length > 30) tpsHist.shift(); |
| if (tps > peakTps) peakTps = tps; |
| const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1); |
| document.getElementById('s-avg').textContent = avg + ' tok/s'; |
| document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s'; |
| if (chunk.session_id) { |
| fetch(`${API}/chat/history?session_id=${chunk.session_id}`) |
| .then(r=>r.json()).then(d=>{ document.getElementById('s-kv').textContent=(d.tokens_in_engine||0)+' tok'; }).catch(()=>{}); |
| } |
| turns++; document.getElementById('s-turns').textContent = turns; |
| drawSpark(); |
| } else if (chunk.type === 'error') { |
| bubble.innerHTML += `<br><span style="color:var(--red)">β ${esc(chunk.message)}</span>`; |
| } |
| } |
| } |
| } catch (err) { |
| if (tokCount === 0) bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`; |
| else meta.innerHTML += `<span style="color:var(--warn)"> [interrupted]</span>`; |
| } finally { |
| const c = bubble.querySelector('.cur'); if (c) c.remove(); |
| setBusy(false); scrollBot(); |
| } |
| } |
| |
| |
| function drawSpark() { |
| const el = document.getElementById('spark'), ctx = el.getContext('2d'); |
| const dpr = window.devicePixelRatio || 1; |
| el.width = el.clientWidth * dpr; el.height = el.clientHeight * dpr; |
| ctx.scale(dpr, dpr); |
| const W = el.clientWidth, H = el.clientHeight; |
| ctx.clearRect(0, 0, W, H); |
| const d = tpsHist; if (d.length < 2) return; |
| const mx = Math.max(...d) * 1.15 || 1, step = W / (d.length - 1); |
| const grad = ctx.createLinearGradient(0, 0, 0, H); |
| grad.addColorStop(0, 'rgba(0,229,160,.25)'); grad.addColorStop(1, 'rgba(0,229,160,.02)'); |
| ctx.beginPath(); |
| d.forEach((v, i) => { const x = i*step, y = H-(v/mx)*(H-4)-2; i===0?ctx.moveTo(x,y):ctx.lineTo(x,y); }); |
| ctx.strokeStyle = '#00e5a0'; ctx.lineWidth = 1.5; ctx.stroke(); |
| ctx.lineTo((d.length-1)*step, H); ctx.lineTo(0, H); ctx.closePath(); |
| ctx.fillStyle = grad; ctx.fill(); |
| } |
| |
| |
| async function clearChat() { |
| if (busy) return; |
| await fetch(`${API}/chat/reset`, { |
| method:'POST', headers:{'Content-Type':'application/json'}, |
| body: JSON.stringify({ session_id: sessId }), |
| }).catch(()=>{}); |
| sessId = crypto.randomUUID(); |
| turns = 0; totalToks = 0; tpsHist = []; peakTps = 0; |
| document.getElementById('msgs').innerHTML = ` |
| <div class="welcome" id="welcome"> |
| <div class="wlogo">1B</div> |
| <h2>KVInfer Studio</h2> |
| <p>Fine-tuned Llama 3.2 1B β C++ AVX2 + RoPE + GQA + SwiGLU + KV Cache.</p> |
| <div class="chips"> |
| <span class="chip">1B params</span><span class="chip">RoPE</span> |
| <span class="chip">GQA</span><span class="chip">SwiGLU</span><span class="chip">KV Cache</span> |
| </div> |
| </div>`; |
| ['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0'); |
| ['s-tps','s-ttft','s-lat','s-avg','s-peak','s-toks','s-kv'].forEach(id => document.getElementById(id).textContent = 'β'); |
| drawSpark(); |
| } |
| |
| |
| async function pollMetrics() { |
| try { |
| const r = await fetch(`${API}/metrics`); |
| if (!r.ok) return; |
| const d = await r.json(); |
| document.getElementById('s-ram').textContent = d.system_ram_used_pct.toFixed(0) + '% (' + d.process_ram_mb + 'MB)'; |
| document.getElementById('s-eng').textContent = d.engines_ready + ' / ' + (d.n_engines || '?'); |
| document.getElementById('s-sess').textContent = d.active_sessions; |
| document.getElementById('s-uptime').textContent = fmtUptime(d.uptime_s); |
| } catch {} |
| } |
| function fmtUptime(s) { |
| if (s < 60) return s + 's'; |
| if (s < 3600) return Math.floor(s/60) + 'm ' + (s%60) + 's'; |
| return Math.floor(s/3600) + 'h ' + Math.floor((s%3600)/60) + 'm'; |
| } |
| pollMetrics(); setInterval(pollMetrics, 4000); |
| |
| |
| const BENCH_PROMPTS = [ |
| { label: "Capital question", text: "What is the capital of Japan?" }, |
| { label: "Simple math", text: "What is 17 multiplied by 13?" }, |
| { label: "Short poem", text: "Write a 4-line poem about stars." }, |
| { label: "Explain concept", text: "Explain gravity in 2 sentences." }, |
| { label: "Code snippet", text: "Write a Python function to reverse a string." }, |
| ]; |
| |
| let benchRunning = false; |
| |
| async function runBench() { |
| if (benchRunning || busy) return; |
| benchRunning = true; |
| switchTab('bench'); |
| |
| const btn = document.getElementById('btnbench'); |
| const results = document.getElementById('bench-results'); |
| const empty = document.getElementById('bench-empty'); |
| btn.disabled = true; btn.textContent = 'Runningβ¦'; |
| if (empty) empty.remove(); |
| |
| |
| results.innerHTML = ''; |
| |
| |
| const cards = BENCH_PROMPTS.map((p, i) => { |
| const card = document.createElement('div'); card.className = 'p-card'; |
| card.id = 'bcard-' + i; |
| card.innerHTML = ` |
| <div class="p-title">${p.label}</div> |
| <div class="p-status" id="bst-${i}"><span class="p-spin"></span> Runningβ¦</div> |
| <div class="p-progress"><div class="p-bar" id="bbar-${i}"></div></div>`; |
| results.appendChild(card); return card; |
| }); |
| |
| let sumTps = 0, sumTtft = 0, ok = 0; |
| |
| for (let i = 0; i < BENCH_PROMPTS.length; i++) { |
| const p = BENCH_PROMPTS[i]; |
| let t0 = Date.now(), firstTok = null, tokCount = 0, finalTps = 0, finalMs = 0; |
| const bar = document.getElementById('bbar-' + i); |
| const st = document.getElementById('bst-' + i); |
| |
| try { |
| const resp = await fetch(`${API}/chat`, { |
| method: 'POST', headers: {'Content-Type':'application/json'}, |
| body: JSON.stringify({ |
| message: p.text, session_id: crypto.randomUUID(), |
| system_prompt: 'You are a helpful assistant.', |
| max_new_tokens: 80, temperature: 0.1, top_k: 40, |
| }), |
| }); |
| const reader = resp.body.getReader(), decoder = new TextDecoder(); |
| let buf = ''; |
| while (true) { |
| const { done, value } = await reader.read(); if (done) break; |
| buf += decoder.decode(value, { stream: true }); |
| let nl; |
| while ((nl = buf.indexOf('\n')) !== -1) { |
| const line = buf.slice(0, nl).trim(); buf = buf.slice(nl + 1); |
| if (!line.startsWith('data:')) continue; |
| const raw = line.slice(5).trim(); if (raw === '[DONE]') break; |
| let chunk; try { chunk = JSON.parse(raw); } catch { continue; } |
| if (chunk.type === 'token') { |
| if (firstTok === null) firstTok = Date.now(); |
| tokCount++; |
| |
| bar.style.width = Math.min(100, tokCount * 1.25) + '%'; |
| } else if (chunk.type === 'done') { |
| finalTps = chunk.tps; finalMs = chunk.total_ms; |
| } |
| } |
| } |
| const ttft = firstTok ? (firstTok - t0) : 0; |
| sumTps += finalTps; sumTtft += ttft; ok++; |
| bar.style.width = '100%'; |
| st.innerHTML = ` |
| <div class="p-stats"> |
| <div class="p-stat"><div class="pv">${finalTps}</div><div class="pk">tok/s</div></div> |
| <div class="p-stat"><div class="pv">${ttft}</div><div class="pk">ttft ms</div></div> |
| <div class="p-stat"><div class="pv">${tokCount}</div><div class="pk">tokens</div></div> |
| <div class="p-stat"><div class="pv">${finalMs.toFixed(0)}</div><div class="pk">total ms</div></div> |
| </div>`; |
| } catch (e) { |
| st.innerHTML = `<span style="color:var(--red)">β Failed</span>`; |
| bar.style.background = 'var(--red)'; bar.style.width = '100%'; |
| } |
| } |
| |
| |
| if (ok > 0) { |
| const avgTps = (sumTps / ok).toFixed(1); |
| const avgTtft = (sumTtft / ok).toFixed(0); |
| const summary = document.createElement('div'); |
| summary.innerHTML = ` |
| <div class="bench-grid" style="margin-bottom:10px"> |
| <div class="bstat"><div class="bv">${avgTps}</div><div class="bl">Avg tok/s</div></div> |
| <div class="bstat"><div class="bv">${avgTtft}ms</div><div class="bl">Avg TTFT</div></div> |
| </div>`; |
| results.insertBefore(summary, results.firstChild); |
| } |
| |
| btn.textContent = 'βΊ Run Again'; btn.disabled = false; |
| benchRunning = false; |
| } |
| </script> |
| </body> |
| </html> |
|
|