KVInfer / index.html
NOT-OMEGA's picture
Update index.html
dcd3af5 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>KVInfer Studio</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;700;800&display=swap" rel="stylesheet">
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg0: #0f0e0c; --bg1: #1a1916; --bg2: #252420; --bg3: #302e28;
--amber: #e8a030; --amber2: #f5c060;
--green: #5dbd7a; --red: #d95f52; --blue: #5b8dee;
--text0: #ffffff; --text1: #e8e4d8; --text2: #c8c4b4; --text3: #8a8478;
--radius: 6px; --sidebar: 290px;
--mono: 'Space Mono', monospace;
--sans: 'Syne', sans-serif;
}
html, body { height: 100%; background: var(--bg0); color: var(--text0); font-family: var(--mono); font-size: 13px; }
.app { display: flex; height: 100vh; overflow: hidden; }
/* ── Sidebar ── */
.sidebar { width: var(--sidebar); min-width: var(--sidebar); background: var(--bg1);
border-right: 1px solid var(--bg3); display: flex; flex-direction: column; overflow: hidden; }
.sb-head { padding: 16px 18px 14px; border-bottom: 1px solid var(--bg3); background: var(--bg0); }
.sb-head h2 { font-family: var(--sans); font-size: 20px; font-weight: 800;
letter-spacing: -0.03em; color: var(--text0); }
.sb-head h2 span { color: var(--amber); }
.sb-head p { font-size: 10px; color: var(--text2); margin-top: 3px;
letter-spacing: 0.08em; text-transform: uppercase; }
.sb-body { flex: 1; overflow-y: auto; padding: 10px 12px;
scrollbar-width: thin; scrollbar-color: var(--bg3) transparent; }
/* Cards */
.card { background: var(--bg2); border: 1px solid var(--bg3); border-radius: var(--radius);
padding: 11px 13px; margin-bottom: 8px; }
.card-title { font-size: 9px; font-weight: 700; color: var(--text2);
text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 9px;
display: flex; align-items: center; gap: 6px; }
/* Stats */
.srow { display: flex; justify-content: space-between; align-items: center; padding: 3.5px 0; }
.slabel { color: var(--text2); font-size: 11px; }
.sval { font-size: 12px; font-weight: 700; font-variant-numeric: tabular-nums; color: var(--text0); }
.green { color: var(--green); }
.blue { color: var(--blue); }
.yellow { color: var(--amber); }
.red { color: var(--red); }
/* Live dot */
.dot { display: inline-block; width: 6px; height: 6px; border-radius: 50%; flex-shrink: 0; }
.dot.idle { background: var(--text3); }
.dot.live { background: var(--green); animation: pulse 1.4s infinite; }
@keyframes pulse { 0%,100%{opacity:1;transform:scale(1)} 50%{opacity:.3;transform:scale(.7)} }
/* Sparkline */
canvas#spark { width: 100%; height: 48px; display: block; border-radius: var(--radius);
background: var(--bg0); }
/* System prompt */
.sysprompt { width: 100%; background: var(--bg0); border: 1px solid var(--bg3);
border-radius: var(--radius); color: var(--text0); font-size: 11px; padding: 8px 10px;
resize: vertical; min-height: 56px; font-family: var(--mono); line-height: 1.6; }
.sysprompt:focus { outline: none; border-color: var(--amber); }
/* Sliders */
.pgroup { display: flex; flex-direction: column; gap: 4px; }
.pgroup label { font-size: 9px; color: var(--text2); text-transform: uppercase;
letter-spacing: 0.08em; display: flex; justify-content: space-between; }
.pgroup label span { color: var(--amber); font-weight: 700; }
.pgroup input[type=range] {
width: 100%; -webkit-appearance: none; height: 2px;
background: var(--bg3); border-radius: 1px; outline: none;
}
.pgroup input[type=range]::-webkit-slider-thumb {
-webkit-appearance: none; width: 11px; height: 11px; border-radius: 50%;
background: var(--amber); cursor: pointer; border: 2px solid var(--bg1);
}
/* Buttons */
.btn { display: inline-flex; align-items: center; gap: 5px; padding: 6px 12px;
border-radius: var(--radius); border: 1px solid var(--bg3); cursor: pointer;
font-size: 10px; font-weight: 700; font-family: var(--mono);
text-transform: uppercase; letter-spacing: 0.05em; transition: all .13s; }
.btn-p { background: var(--amber); color: var(--bg0); border-color: var(--amber); }
.btn-p:hover { background: var(--amber2); border-color: var(--amber2); }
.btn-s { background: var(--bg3); color: var(--text1); }
.btn-s:hover { background: #3a3830; color: var(--text0); border-color: var(--text3); }
.btn-sm { padding: 5px 9px; font-size: 10px; }
.btn-full { width: 100%; justify-content: center; }
.btn:disabled { opacity: .35; cursor: not-allowed; }
/* ── Chat Main ── */
.chat { flex: 1; display: flex; flex-direction: column; overflow: hidden; background: var(--bg0); }
.chat-hdr { height: 52px; display: flex; align-items: center; padding: 0 22px;
border-bottom: 1px solid var(--bg3); background: var(--bg1); gap: 10px; }
.badge { background: rgba(232,160,48,0.12); border: 1px solid rgba(232,160,48,0.3);
border-radius: var(--radius); padding: 3px 10px;
font-size: 11px; font-weight: 700; color: var(--amber); letter-spacing: 0.05em;
font-family: var(--sans); }
.chat-hdr h1 { font-family: var(--sans); font-size: 14px; font-weight: 600; color: var(--text0); }
.chat-hdr .spc { flex: 1; }
.statpill { display: flex; align-items: center; gap: 6px;
font-size: 10px; color: var(--text2); letter-spacing: 0.06em; text-transform: uppercase; }
/* Messages */
.msgs { flex: 1; overflow-y: auto; padding: 0;
display: flex; flex-direction: column; scroll-behavior: smooth;
scrollbar-width: thin; scrollbar-color: var(--bg3) transparent; }
.mg { display: flex; gap: 0; padding: 16px 24px;
border-bottom: 1px solid var(--bg3); animation: fadeup 0.18s ease; }
.mg:last-child { border-bottom: none; }
@keyframes fadeup { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } }
.mg-role { width: 64px; flex-shrink: 0; padding-top: 1px; }
.mlabel { font-size: 9px; font-weight: 700; letter-spacing: 0.1em; text-transform: uppercase; }
.mg.user .mlabel { color: var(--amber); }
.mg.asst .mlabel { color: var(--green); }
.mg-body { flex: 1; min-width: 0; }
.bubble { font-family: var(--mono); font-size: 13px; line-height: 1.75;
color: var(--text1); white-space: pre-wrap; word-break: break-word; max-width: 680px; }
.mg.user .bubble { color: var(--text2); }
.cursor2 { display: inline-block; width: 8px; height: 2px;
background: var(--amber); margin-left: 3px;
vertical-align: middle; animation: blink .6s infinite; }
@keyframes blink { 0%,49%{opacity:1} 50%,100%{opacity:0} }
.bmeta { font-size: 10px; color: var(--text3); margin-top: 6px; display: flex; gap: 12px; }
.bmeta b { color: var(--amber); }
/* Welcome */
.welcome { flex: 1; display: flex; flex-direction: column;
align-items: center; justify-content: center; gap: 14px;
text-align: center; padding: 40px; animation: fadeup 0.4s ease; }
.welcome .logo { font-family: var(--sans); font-size: 80px; font-weight: 800;
color: var(--amber); letter-spacing: -0.06em; line-height: 1;
text-shadow: 0 0 80px rgba(232,160,48,0.2); }
.welcome h2 { font-family: var(--sans); font-size: 26px; font-weight: 700;
color: var(--text0); letter-spacing: -0.02em; }
.welcome p { max-width: 420px; line-height: 1.8; font-size: 13px; color: var(--text2); }
.spec-chips { display: flex; gap: 7px; flex-wrap: wrap; justify-content: center; margin-top: 4px; }
.chip { font-size: 10px; padding: 4px 12px; border: 1px solid var(--bg3);
border-radius: 20px; color: var(--text2); letter-spacing: 0.07em;
text-transform: uppercase; background: var(--bg2); }
/* Input */
.inputbar { padding: 14px 20px; border-top: 1px solid var(--bg3); background: var(--bg1); }
.inputwrap { background: var(--bg2); border: 1px solid var(--bg3);
border-radius: var(--radius); display: flex; align-items: flex-end;
padding: 3px 3px 3px 14px; gap: 5px; transition: border-color .13s; }
.inputwrap:focus-within { border-color: var(--amber); }
#inp { flex: 1; background: none; border: none; outline: none;
color: var(--text0); font-size: 13px; font-family: var(--mono);
resize: none; line-height: 1.6; max-height: 120px; padding: 8px 0; }
#inp::placeholder { color: var(--text3); }
.sbtn { background: var(--amber); border: none; cursor: pointer;
width: 34px; height: 34px; border-radius: var(--radius); display: flex;
align-items: center; justify-content: center; flex-shrink: 0;
transition: .13s; align-self: flex-end; margin-bottom: 3px; }
.sbtn:hover { background: var(--amber2); }
.sbtn:disabled { opacity: .3; cursor: not-allowed; }
.sbtn svg { fill: var(--bg0); }
.input-hint { margin-top: 6px; font-size: 10px; color: var(--text3);
text-align: right; letter-spacing: 0.04em; }
/* Benchmark modal */
#bov { display: none; position: fixed; inset: 0; background: rgba(10,9,8,.85);
z-index: 100; align-items: center; justify-content: center; }
#bov.on { display: flex; }
.bmod { background: var(--bg1); border: 1px solid var(--bg3);
border-radius: 8px; padding: 24px; width: 520px; max-height: 80vh; overflow-y: auto;
animation: fadeup 0.18s ease; }
.bmod h3 { font-family: var(--sans); font-size: 16px; font-weight: 700;
margin-bottom: 16px; color: var(--text0); }
.btbl { width: 100%; border-collapse: collapse; font-size: 11px; font-family: var(--mono); }
.btbl th, .btbl td { padding: 7px 10px; text-align: left; border-bottom: 1px solid var(--bg3); }
.btbl th { color: var(--text3); font-weight: 400; font-size: 9px;
text-transform: uppercase; letter-spacing: 0.1em; }
.btbl td { color: var(--text2); }
.btbl td.good { color: var(--green); }
.btbl td.mid { color: var(--amber); }
.btbl td.bad { color: var(--red); }
.bench-summary { display: grid; grid-template-columns: 1fr 1fr; gap: 1px;
background: var(--bg3); border: 1px solid var(--bg3); border-radius: var(--radius);
overflow: hidden; margin-bottom: 16px; }
.bench-stat { background: var(--bg2); padding: 14px 16px; text-align: center; }
.bench-stat .bval { font-family: var(--sans); font-size: 26px; font-weight: 800;
color: var(--amber); letter-spacing: -0.03em; }
.bench-stat .blbl { font-size: 9px; color: var(--text3);
text-transform: uppercase; letter-spacing: 0.1em; margin-top: 2px; }
</style>
</head>
<body>
<div class="app">
<aside class="sidebar">
<div class="sb-head">
<h2><span>KV</span>Infer</h2>
<p>152M · GPT-2 · AVX2 + OpenMP · KV-Cache</p>
</div>
<div class="sb-body">
<div class="card">
<div class="card-title">
<span class="dot idle" id="dot-s"></span>Live Performance
</div>
<div class="srow"><span class="slabel">Throughput</span>
<span class="sval green" id="s-tps"></span></div>
<div class="srow"><span class="slabel">TTFT</span>
<span class="sval blue" id="s-ttft"></span></div>
<div class="srow"><span class="slabel">Tokens out</span>
<span class="sval" id="s-toks"></span></div>
<div class="srow"><span class="slabel">Last latency</span>
<span class="sval yellow" id="s-lat"></span></div>
</div>
<div class="card">
<div class="card-title">Throughput History (tok/s)</div>
<canvas id="spark"></canvas>
<div style="margin-top:7px">
<div class="srow"><span class="slabel">Session avg</span>
<span class="sval green" id="s-avg"></span></div>
<div class="srow"><span class="slabel">Session peak</span>
<span class="sval green" id="s-peak"></span></div>
</div>
</div>
<div class="card">
<div class="card-title">Session</div>
<div class="srow"><span class="slabel">Turns</span>
<span class="sval" id="s-turns">0</span></div>
<div class="srow"><span class="slabel">Total tokens</span>
<span class="sval" id="s-totok">0</span></div>
<div class="srow"><span class="slabel">Tokens in engine</span>
<span class="sval blue" id="s-engcache"></span></div>
<div class="srow"><span class="slabel">Server RAM</span>
<span class="sval" id="s-ram"></span></div>
<div style="display:flex;gap:6px;margin-top:10px">
<button class="btn btn-s btn-sm btn-full" onclick="clearChat()">↺ Clear</button>
<button class="btn btn-p btn-sm btn-full" onclick="openBench()">⊞ Benchmark</button>
</div>
</div>
<div class="card">
<div class="card-title">System Prompt</div>
<textarea class="sysprompt" id="sysprompt" rows="3"
>You are a helpful, concise, and friendly AI assistant.</textarea>
</div>
<div class="card">
<div class="card-title">Generation</div>
<div style="display:flex;flex-direction:column;gap:10px;margin-top:2px">
<div class="pgroup">
<label>Temperature <span id="v-temp">0.70</span></label>
<input type="range" id="p-temp" min="0.1" max="2.0" step="0.05" value="0.7"
oninput="document.getElementById('v-temp').textContent=parseFloat(this.value).toFixed(2)">
</div>
<div class="pgroup">
<label>Top-K <span id="v-topk">40</span></label>
<input type="range" id="p-topk" min="1" max="200" step="1" value="40"
oninput="document.getElementById('v-topk').textContent=this.value">
</div>
<div class="pgroup">
<label>Max tokens <span id="v-maxt">200</span></label>
<input type="range" id="p-maxt" min="20" max="500" step="10" value="200"
oninput="document.getElementById('v-maxt').textContent=this.value">
</div>
</div>
</div>
</div>
</aside>
<main class="chat">
<header class="chat-hdr">
<div class="badge">KVInfer · 152M</div>
<h1>Chat</h1>
<div class="spc"></div>
<div class="statpill">
<span class="dot idle" id="dot-h"></span>
<span id="hstatus">Idle</span>
</div>
</header>
<div class="msgs" id="msgs">
<div class="welcome" id="welcome">
<div class="logo">KV</div>
<h2>KVInfer Studio</h2>
<p>152M · GPT-2 Decoder-Only · Custom C++ inference engine with AVX2 SIMD, OpenMP parallelism &amp; persistent session KV-cache.</p>
<div class="spec-chips">
<span class="chip">152M params</span>
<span class="chip">AVX2 SIMD</span>
<span class="chip">OpenMP</span>
<span class="chip">KV Cache</span>
<span class="chip">Streaming</span>
</div>
</div>
</div>
<div class="inputbar">
<div class="inputwrap">
<textarea id="inp" rows="1" placeholder="Send a message..."
onkeydown="handleKey(event)"></textarea>
<button class="sbtn" id="sbtn" onclick="send()">
<svg width="15" height="15" viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg>
</button>
</div>
<div class="input-hint">Enter to send &nbsp;·&nbsp; Shift+Enter for newline</div>
</div>
</main>
</div>
<div id="bov">
<div class="bmod">
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:16px">
<h3>📊 Quick Benchmark</h3>
<button class="btn btn-s btn-sm" onclick="closeBench()"></button>
</div>
<div id="bcontent">
<p style="color:var(--text2);font-size:12px;line-height:1.7;margin-bottom:14px">Runs 5 built-in prompts and measures throughput, TTFT, and per-token latency.</p>
<button class="btn btn-p btn-full" style="margin-top:4px" id="btnbench" onclick="runBench()">▶ Run Benchmark</button>
</div>
</div>
</div>
<script>
// ─────────────────────────────────────────
// State
// ─────────────────────────────────────────
const API = '';
let sessId = crypto.randomUUID();
let busy = false;
let turnCount = 0;
let totalToks = 0;
let tpsHist = [];
let peakTps = 0;
let engCache = 0;
const inp = document.getElementById('inp');
inp.addEventListener('input', () => {
inp.style.height = 'auto';
inp.style.height = Math.min(inp.scrollHeight, 120) + 'px';
});
function handleKey(e) {
if (e.key==='Enter' && !e.shiftKey) { e.preventDefault(); send(); }
}
function setBusy(v) {
busy = v;
document.getElementById('sbtn').disabled = v;
['dot-s','dot-h'].forEach(id => {
const d = document.getElementById(id);
d.className = 'dot' + (v ? ' live' : ' idle');
});
document.getElementById('hstatus').textContent = v ? 'Generating...' : 'Idle';
}
function scrollBot() {
const el = document.getElementById('msgs');
el.scrollTop = el.scrollHeight;
}
function hideWelcome() {
const w = document.getElementById('welcome');
if (w) w.remove();
}
function esc(s) {
return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>');
}
function addUserMsg(text) {
hideWelcome();
const g = document.createElement('div');
g.className = 'mg user';
g.innerHTML = `<div class="mg-role"><div class="mlabel">You</div></div><div class="mg-body"><div class="bubble">${esc(text)}</div></div>`;
document.getElementById('msgs').appendChild(g);
scrollBot();
}
function createAssistantSlot() {
const g = document.createElement('div');
g.className = 'mg asst';
const bid = 'bubble_' + Date.now();
const mid = 'meta_' + Date.now();
g.innerHTML = `
<div class="mg-role"><div class="mlabel">Model</div></div>
<div class="mg-body">
<div class="bubble" id="${bid}"><span class="cursor2"></span></div>
<div class="bmeta" id="${mid}"></div>
</div>`;
document.getElementById('msgs').appendChild(g);
scrollBot();
return { bubble: document.getElementById(bid), meta: document.getElementById(mid) };
}
async function send() {
if (busy) return;
const text = inp.value.trim();
if (!text) return;
inp.value = ''; inp.style.height = 'auto';
addUserMsg(text);
setBusy(true);
const slot = createAssistantSlot();
const bubble = slot.bubble;
const meta = slot.meta;
let content = '';
let t0 = Date.now();
let firstTokT = null;
let tokCount = 0;
const payload = {
message: text,
session_id: sessId,
system_prompt: document.getElementById('sysprompt').value,
max_new_tokens: parseInt(document.getElementById('p-maxt').value),
temperature: parseFloat(document.getElementById('p-temp').value),
top_k: parseInt(document.getElementById('p-topk').value),
};
try {
const resp = await fetch(`${API}/chat`, {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(payload),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
const reader = resp.body.getReader();
const decoder = new TextDecoder();
let buf = '';
while (true) {
const {done, value} = await reader.read();
if (done) break;
buf += decoder.decode(value, {stream:true});
let nl;
while ((nl = buf.indexOf('\n')) !== -1) {
const line = buf.slice(0, nl).trim();
buf = buf.slice(nl+1);
if (!line.startsWith('data:')) continue;
const raw = line.slice(5).trim();
if (raw === '[DONE]') break;
let chunk;
try { chunk = JSON.parse(raw); } catch { continue; }
const now = Date.now();
if (chunk.type === 'token') {
if (firstTokT === null) firstTokT = now;
content += chunk.text;
tokCount++;
totalToks++;
document.getElementById('s-totok').textContent = totalToks;
document.getElementById('s-toks').textContent = tokCount + ' tok';
document.getElementById('s-ttft').textContent = (firstTokT - t0) + ' ms';
bubble.innerHTML = esc(content) + '<span class="cursor2"></span>';
scrollBot();
} else if (chunk.type === 'done') {
bubble.innerHTML = esc(content);
const ttft = firstTokT !== null ? (firstTokT - t0) : 0;
const tps = chunk.tps;
const ms = chunk.total_ms;
meta.innerHTML = `<b>${tps}</b> tok/s · <b>TTFT</b> ${ttft}ms · <b>${tokCount}</b> tokens · <b>${ms.toFixed(0)}ms</b> total`;
document.getElementById('s-tps').textContent = tps + ' tok/s';
document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms';
tpsHist.push(tps);
if (tpsHist.length > 30) tpsHist.shift();
if (tps > peakTps) peakTps = tps;
const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1);
document.getElementById('s-avg').textContent = avg + ' tok/s';
document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s';
if (chunk.session_id) {
fetch(`${API}/chat/history?session_id=${chunk.session_id}`)
.then(r => r.json())
.then(d => {
engCache = d.tokens_in_engine || 0;
document.getElementById('s-engcache').textContent = engCache + ' tok';
}).catch(()=>{});
}
turnCount++;
document.getElementById('s-turns').textContent = turnCount;
drawSpark();
} else if (chunk.type === 'error') {
bubble.innerHTML += `<br><br><span style="color:var(--red)">Error: ${esc(chunk.message)}</span>`;
}
}
}
} catch (err) {
if (tokCount === 0) {
bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`;
} else {
meta.innerHTML += ` <span style="color:var(--amber)">[Stream Interrupted]</span>`;
}
} finally {
const cur = bubble.querySelector('.cursor2');
if (cur) cur.remove();
setBusy(false);
scrollBot();
}
}
function drawSpark() {
const el = document.getElementById('spark');
const ctx = el.getContext('2d');
const dpr = window.devicePixelRatio || 1;
el.width = el.clientWidth * dpr;
el.height = el.clientHeight * dpr;
ctx.scale(dpr, dpr);
const W = el.clientWidth, H = el.clientHeight;
ctx.clearRect(0, 0, W, H);
const d = tpsHist;
if (d.length < 2) return;
const mx = Math.max(...d) * 1.15 || 1;
const step = W / (d.length-1);
const grad = ctx.createLinearGradient(0,0,0,H);
grad.addColorStop(0, 'rgba(232,160,48,.28)');
grad.addColorStop(1, 'rgba(232,160,48,.02)');
ctx.beginPath();
d.forEach((v,i) => {
const x=i*step, y=H-(v/mx)*(H-4)-2;
i===0 ? ctx.moveTo(x,y) : ctx.lineTo(x,y);
});
ctx.strokeStyle='#e8a030'; ctx.lineWidth=1.5; ctx.stroke();
ctx.lineTo((d.length-1)*step,H); ctx.lineTo(0,H); ctx.closePath();
ctx.fillStyle=grad; ctx.fill();
}
async function clearChat() {
if (busy) return;
await fetch(`${API}/chat/reset`, {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({session_id: sessId}),
}).catch(()=>{});
sessId = crypto.randomUUID();
turnCount = 0; totalToks = 0; tpsHist = []; peakTps = 0; engCache = 0;
document.getElementById('msgs').innerHTML = `
<div class="welcome" id="welcome">
<div class="logo">KV</div>
<h2>KVInfer Studio</h2>
<p>152M · GPT-2 Decoder-Only · C++ AVX2 + OpenMP · Persistent session KV-cache.</p>
<div class="spec-chips">
<span class="chip">152M params</span><span class="chip">AVX2 SIMD</span><span class="chip">OpenMP</span><span class="chip">KV Cache</span>
</div>
</div>`;
['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0');
['s-tps','s-ttft','s-lat','s-avg','s-peak','s-toks','s-engcache'].forEach(
id => document.getElementById(id).textContent = '—');
drawSpark();
}
async function pollMetrics() {
try {
const r = await fetch(`${API}/metrics`);
if (r.ok) {
const d = await r.json();
document.getElementById('s-ram').textContent =
d.system_ram_used_pct.toFixed(0) + '% (' + d.process_ram_mb + ' MB)';
}
} catch {}
}
pollMetrics();
setInterval(pollMetrics, 5000);
// ─────────────────────────────────────────
// Benchmark Modal Functions
// ─────────────────────────────────────────
function openBench() {
document.getElementById('bov').classList.add('on');
}
function closeBench() {
document.getElementById('bov').classList.remove('on');
}
async function runBench() {
const btn = document.getElementById('btnbench');
const bcontent = document.getElementById('bcontent');
btn.disabled = true;
btn.textContent = "Running Benchmark...";
const prompts = [
"What is the capital of France?",
"Write a short poem about coding.",
"Explain quantum computing in simple terms.",
"What are the benefits of exercise?",
"Translate 'Hello world' to Spanish."
];
let resultsHTML = `
<table class="btbl" style="margin-top: 15px;">
<thead>
<tr>
<th>Prompt</th>
<th>Tokens</th>
<th>TTFT (ms)</th>
<th>TPS (tok/s)</th>
</tr>
</thead>
<tbody id="bench-tbody">
</tbody>
</table>
`;
bcontent.innerHTML = resultsHTML;
const tbody = document.getElementById('bench-tbody');
let totalTps = 0;
let totalTtft = 0;
for (let i = 0; i < prompts.length; i++) {
let pText = prompts[i];
let tr = document.createElement('tr');
tr.innerHTML = `<td>Prompt ${i+1}</td><td colspan="3" style="color:var(--amber)">Testing...</td>`;
tbody.appendChild(tr);
let t0 = Date.now();
let firstTokT = null;
let tokCount = 0;
let finalTps = 0;
try {
const payload = {
message: pText,
session_id: crypto.randomUUID(),
system_prompt: "You are a helpful assistant.",
max_new_tokens: 50,
temperature: 0.1,
top_k: 40
};
const resp = await fetch(`${API}/chat`, {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(payload),
});
const reader = resp.body.getReader();
const decoder = new TextDecoder();
let buf = '';
while (true) {
const {done, value} = await reader.read();
if (done) break;
buf += decoder.decode(value, {stream:true});
let nl;
while ((nl = buf.indexOf('\n')) !== -1) {
const line = buf.slice(0, nl).trim();
buf = buf.slice(nl+1);
if (!line.startsWith('data:')) continue;
const raw = line.slice(5).trim();
if (raw === '[DONE]') break;
let chunk;
try { chunk = JSON.parse(raw); } catch { continue; }
if (chunk.type === 'token') {
if (firstTokT === null) firstTokT = Date.now();
tokCount++;
} else if (chunk.type === 'done') {
finalTps = chunk.tps;
}
}
}
let ttft = firstTokT !== null ? (firstTokT - t0) : 0;
totalTps += finalTps;
totalTtft += ttft;
tr.innerHTML = `
<td>Prompt ${i+1}</td>
<td>${tokCount}</td>
<td class="${ttft < 800 ? 'good' : 'mid'}">${ttft}</td>
<td class="good">${finalTps}</td>
`;
} catch (e) {
tr.innerHTML = `<td>Prompt ${i+1}</td><td colspan="3" class="bad">Failed</td>`;
}
}
let avgTps = (totalTps / prompts.length).toFixed(1);
let avgTtft = (totalTtft / prompts.length).toFixed(0);
let summaryHTML = `
<div class="bench-summary" style="margin-top: 15px;">
<div class="bench-stat">
<div class="bval">${avgTps}</div>
<div class="blbl">Avg Tok/s</div>
</div>
<div class="bench-stat">
<div class="bval">${avgTtft}ms</div>
<div class="blbl">Avg TTFT</div>
</div>
</div>
`;
bcontent.insertAdjacentHTML('afterbegin', summaryHTML);
btn.textContent = "▶ Run Benchmark Again";
btn.disabled = false;
}
</script>
</body>
</html>