LLaMA.Cpp / index.html
NOT-OMEGA's picture
Update index.html
0916a28 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<title>KVInfer Β· Llama 1B</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&family=Outfit:wght@300;500;700;900&display=swap" rel="stylesheet">
<style>
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
:root{
--bg: #080b0a;
--bg1: #0f1512;
--bg2: #161e1b;
--bg3: #1e2a26;
--bg4: #263530;
--acc: #00e5a0;
--acc2: #00c484;
--acc3: #009963;
--dim: rgba(0,229,160,0.08);
--dimb: rgba(0,229,160,0.15);
--red: #ff5c5c;
--blue: #4db8ff;
--warn: #ffb347;
--t0: #f0faf6;
--t1: #b8d4ca;
--t2: #6a9a8e;
--t3: #3a5a52;
--mono: 'JetBrains Mono', monospace;
--sans: 'Outfit', sans-serif;
--r: 6px;
--sb: 272px;
}
html,body{height:100%;background:var(--bg);color:var(--t0);font-family:var(--mono);font-size:13px;overflow:hidden}
/* ══ LAYOUT ══════════════════════════════════════════════ */
.app{display:flex;height:100vh}
/* ══ SIDEBAR ═════════════════════════════════════════════ */
.sb{width:var(--sb);min-width:var(--sb);background:var(--bg1);
border-right:1px solid var(--bg4);display:flex;flex-direction:column;overflow:hidden}
.sb-logo{padding:14px 16px;background:var(--bg);border-bottom:1px solid var(--bg4);flex-shrink:0}
.sb-logo-row{display:flex;align-items:center;gap:8px}
.logo-box{width:30px;height:30px;background:var(--acc);border-radius:5px;
display:flex;align-items:center;justify-content:center;flex-shrink:0}
.logo-box svg{fill:var(--bg)}
.sb-logo h2{font-family:var(--sans);font-size:17px;font-weight:900;letter-spacing:-0.04em}
.sb-logo h2 em{color:var(--acc);font-style:normal}
.sb-logo p{font-size:9px;color:var(--t2);margin-top:3px;letter-spacing:0.1em;text-transform:uppercase}
/* Tabs */
.sb-tabs{display:flex;border-bottom:1px solid var(--bg4);flex-shrink:0}
.sb-tab{flex:1;padding:8px;text-align:center;font-size:9px;font-weight:700;
color:var(--t2);text-transform:uppercase;letter-spacing:0.1em;cursor:pointer;
border-bottom:2px solid transparent;transition:.15s;background:none;border-top:none;
border-left:none;border-right:none;font-family:var(--mono)}
.sb-tab.on{color:var(--acc);border-bottom-color:var(--acc);background:var(--dim)}
/* Scroll body */
.sb-body{flex:1;overflow-y:auto;padding:10px;scrollbar-width:thin;scrollbar-color:var(--bg4) transparent}
/* Cards */
.card{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r);
padding:10px 12px;margin-bottom:8px}
.ct{font-size:9px;font-weight:700;color:var(--t2);text-transform:uppercase;
letter-spacing:0.1em;margin-bottom:8px;display:flex;align-items:center;gap:6px}
/* Stat rows */
.sr{display:flex;justify-content:space-between;align-items:center;padding:3px 0}
.sl{color:var(--t1);font-size:11px}.sv{font-size:12px;font-weight:700;color:var(--t0);font-variant-numeric:tabular-nums}
.g{color:var(--acc)}.b{color:var(--blue)}.y{color:var(--warn)}.r{color:var(--red)}
/* Live dot */
.dot{display:inline-block;width:6px;height:6px;border-radius:50%}
.dot.off{background:var(--t3)}.dot.on{background:var(--acc);animation:pulse 1.4s infinite}
@keyframes pulse{0%,100%{opacity:1;transform:scale(1)}50%{opacity:.3;transform:scale(.7)}}
/* Sparkline */
canvas#spark{width:100%;height:44px;display:block;border-radius:var(--r);background:var(--bg)}
/* Textarea */
.ta{width:100%;background:var(--bg);border:1px solid var(--bg4);border-radius:var(--r);
color:var(--t0);font-size:11px;padding:8px 10px;resize:vertical;min-height:54px;
font-family:var(--mono);line-height:1.6}
.ta:focus{outline:none;border-color:var(--acc)}
/* Sliders */
.pg{display:flex;flex-direction:column;gap:4px}
.pg label{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.08em;
display:flex;justify-content:space-between}
.pg label span{color:var(--acc);font-weight:700}
.pg input[type=range]{width:100%;-webkit-appearance:none;height:2px;
background:var(--bg4);border-radius:1px;outline:none}
.pg input[type=range]::-webkit-slider-thumb{-webkit-appearance:none;width:11px;height:11px;
border-radius:50%;background:var(--acc);cursor:pointer;border:2px solid var(--bg1)}
/* Buttons */
.btn{display:inline-flex;align-items:center;gap:5px;padding:6px 12px;
border-radius:var(--r);border:1px solid var(--bg4);cursor:pointer;
font-size:10px;font-weight:700;font-family:var(--mono);
text-transform:uppercase;letter-spacing:0.05em;transition:.13s}
.bp{background:var(--acc);color:var(--bg);border-color:var(--acc)}
.bp:hover{background:var(--acc2)}.bs{background:var(--bg3);color:var(--t1)}
.bs:hover{background:var(--bg4)}.btn.full{width:100%;justify-content:center}
.btn:disabled{opacity:.3;cursor:not-allowed}
/* ══ BENCHMARK PANEL (sidebar tab) ═══════════════════════ */
#bench-panel{display:none;flex-direction:column;height:100%;overflow:hidden}
#bench-panel.visible{display:flex}
#stats-panel{display:flex;flex-direction:column;height:100%;overflow:hidden}
.bench-run-btn{margin:10px;flex-shrink:0}
.bench-results{flex:1;overflow-y:auto;padding:0 10px 10px;scrollbar-width:thin;scrollbar-color:var(--bg4) transparent}
/* Summary cards */
.bench-grid{display:grid;grid-template-columns:1fr 1fr;gap:6px;margin-bottom:10px}
.bstat{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r);
padding:12px;text-align:center}
.bstat .bv{font-family:var(--sans);font-size:24px;font-weight:900;color:var(--acc);letter-spacing:-0.04em}
.bstat .bl{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.1em;margin-top:2px}
/* Per-prompt cards */
.p-card{background:var(--bg2);border:1px solid var(--bg3);border-radius:var(--r);
padding:9px 11px;margin-bottom:6px;animation:fadeup .2s ease}
.p-card .p-title{font-size:10px;color:var(--t2);margin-bottom:6px;
white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
.p-stats{display:flex;gap:10px}
.p-stat{display:flex;flex-direction:column;gap:2px}
.p-stat .pv{font-size:13px;font-weight:700;color:var(--acc);font-variant-numeric:tabular-nums}
.p-stat .pk{font-size:9px;color:var(--t2);text-transform:uppercase;letter-spacing:0.06em}
.p-progress{height:2px;background:var(--bg4);border-radius:1px;margin-top:7px;overflow:hidden}
.p-bar{height:100%;background:linear-gradient(90deg,var(--acc3),var(--acc));
border-radius:1px;transition:width .6s ease;width:0%}
.p-status{font-size:10px;color:var(--warn);display:flex;align-items:center;gap:5px}
.p-spin{width:8px;height:8px;border:1px solid var(--warn);border-top-color:transparent;
border-radius:50%;animation:spin .7s linear infinite;flex-shrink:0}
@keyframes spin{to{transform:rotate(360deg)}}
.bench-empty{text-align:center;padding:32px 16px;color:var(--t2);font-size:12px;line-height:1.8}
/* ══ CHAT MAIN ═══════════════════════════════════════════ */
.chat{flex:1;display:flex;flex-direction:column;overflow:hidden}
.chat-hdr{height:50px;display:flex;align-items:center;padding:0 20px;
border-bottom:1px solid var(--bg4);background:var(--bg1);gap:8px;flex-shrink:0}
.badge{background:var(--dim);border:1px solid rgba(0,229,160,.25);border-radius:var(--r);
padding:3px 9px;font-size:10px;font-weight:700;color:var(--acc);font-family:var(--sans)}
.chat-hdr-title{font-family:var(--sans);font-size:13px;font-weight:600;color:var(--t1)}
.spc{flex:1}
.hpill{display:flex;align-items:center;gap:6px;font-size:10px;color:var(--t2);text-transform:uppercase;letter-spacing:.06em}
/* Messages */
.msgs{flex:1;overflow-y:auto;display:flex;flex-direction:column;
scrollbar-width:thin;scrollbar-color:var(--bg4) transparent}
/* Welcome screen */
.welcome{flex:1;display:flex;flex-direction:column;align-items:center;
justify-content:center;gap:16px;text-align:center;padding:40px;animation:fadeup .4s ease}
.wlogo{font-family:var(--sans);font-size:60px;font-weight:900;color:var(--acc);
letter-spacing:-0.06em;line-height:1;text-shadow:0 0 60px rgba(0,229,160,.18)}
.welcome h2{font-family:var(--sans);font-size:22px;font-weight:700;
color:var(--t0);letter-spacing:-0.02em}
.welcome p{max-width:400px;line-height:1.8;font-size:12px;color:var(--t1)}
.chips{display:flex;gap:6px;flex-wrap:wrap;justify-content:center}
.chip{font-size:9px;padding:4px 10px;border:1px solid var(--bg4);border-radius:16px;
color:var(--t2);letter-spacing:.07em;text-transform:uppercase;background:var(--bg2)}
/* Message bubbles */
.mg{display:flex;gap:0;padding:14px 20px;border-bottom:1px solid var(--bg3);
animation:fadeup .18s ease}
.mg:last-child{border-bottom:none}
@keyframes fadeup{from{opacity:0;transform:translateY(6px)}to{opacity:1;transform:none}}
.mg-role{width:60px;flex-shrink:0;padding-top:2px}
.mlabel{font-size:9px;font-weight:700;letter-spacing:.1em;text-transform:uppercase}
.mg.user .mlabel{color:var(--acc)}.mg.asst .mlabel{color:var(--blue)}
.mg-body{flex:1;min-width:0}
.bubble{font-size:13px;line-height:1.75;color:var(--t1);
white-space:pre-wrap;word-break:break-word;max-width:660px}
.mg.user .bubble{color:var(--t2)}
.cur{display:inline-block;width:7px;height:2px;background:var(--acc);
margin-left:3px;vertical-align:middle;animation:blink .6s infinite}
@keyframes blink{0%,49%{opacity:1}50%,100%{opacity:0}}
.bmeta{font-size:10px;color:var(--t3);margin-top:5px;display:flex;flex-wrap:wrap;gap:8px}
.bmeta b{color:var(--acc)}
/* Input */
.inputbar{padding:12px 16px;border-top:1px solid var(--bg4);background:var(--bg1);flex-shrink:0}
.inputwrap{background:var(--bg2);border:1px solid var(--bg4);border-radius:var(--r);
display:flex;align-items:flex-end;padding:3px 3px 3px 12px;gap:4px;transition:.13s}
.inputwrap:focus-within{border-color:var(--acc)}
#inp{flex:1;background:none;border:none;outline:none;color:var(--t0);
font-size:13px;font-family:var(--mono);resize:none;line-height:1.6;
max-height:110px;padding:7px 0}
#inp::placeholder{color:var(--t3)}
.sbtn{background:var(--acc);border:none;cursor:pointer;width:32px;height:32px;
border-radius:var(--r);display:flex;align-items:center;justify-content:center;
flex-shrink:0;transition:.13s;align-self:flex-end;margin-bottom:3px}
.sbtn:hover{background:var(--acc2)}.sbtn:disabled{opacity:.3;cursor:not-allowed}
.sbtn svg{fill:var(--bg)}
.hint{margin-top:5px;font-size:10px;color:var(--t3);text-align:right;letter-spacing:.04em}
</style>
</head>
<body>
<div class="app">
<!-- ══ SIDEBAR ════════════════════════════════════════════ -->
<aside class="sb">
<div class="sb-logo">
<div class="sb-logo-row">
<div class="logo-box">
<svg width="16" height="16" viewBox="0 0 24 24"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg>
</div>
<div>
<h2>KV<em>Infer</em></h2>
<p>1B Β· Llama 3.2 Β· RoPE Β· GQA Β· SwiGLU</p>
</div>
</div>
</div>
<!-- Tabs -->
<div class="sb-tabs">
<button class="sb-tab on" id="tab-stats" onclick="switchTab('stats')">Stats</button>
<button class="sb-tab" id="tab-bench" onclick="switchTab('bench')">Benchmark</button>
<button class="sb-tab" id="tab-params" onclick="switchTab('params')">Params</button>
</div>
<!-- ── Stats Tab ── -->
<div id="stats-panel">
<div class="sb-body">
<div class="card">
<div class="ct"><span class="dot off" id="dot-s"></span>Live</div>
<div class="sr"><span class="sl">Throughput</span><span class="sv g" id="s-tps">β€”</span></div>
<div class="sr"><span class="sl">TTFT</span><span class="sv b" id="s-ttft">β€”</span></div>
<div class="sr"><span class="sl">Tokens out</span><span class="sv" id="s-toks">β€”</span></div>
<div class="sr"><span class="sl">Latency</span><span class="sv y" id="s-lat">β€”</span></div>
</div>
<div class="card">
<div class="ct">Throughput History</div>
<canvas id="spark"></canvas>
<div style="margin-top:6px">
<div class="sr"><span class="sl">Session avg</span><span class="sv g" id="s-avg">β€”</span></div>
<div class="sr"><span class="sl">Peak</span><span class="sv g" id="s-peak">β€”</span></div>
</div>
</div>
<div class="card">
<div class="ct">Session</div>
<div class="sr"><span class="sl">Turns</span><span class="sv" id="s-turns">0</span></div>
<div class="sr"><span class="sl">Total tokens</span><span class="sv" id="s-totok">0</span></div>
<div class="sr"><span class="sl">KV cache</span><span class="sv b" id="s-kv">β€”</span></div>
<div class="sr"><span class="sl">Server RAM</span><span class="sv" id="s-ram">β€”</span></div>
<div style="display:flex;gap:6px;margin-top:8px">
<button class="btn bs full" onclick="clearChat()">β†Ί Reset</button>
</div>
</div>
<div class="card">
<div class="ct">Engine</div>
<div class="sr"><span class="sl">Engines ready</span><span class="sv g" id="s-eng">β€”</span></div>
<div class="sr"><span class="sl">Active sessions</span><span class="sv" id="s-sess">β€”</span></div>
<div class="sr"><span class="sl">Uptime</span><span class="sv" id="s-uptime">β€”</span></div>
</div>
</div>
</div>
<!-- ── Benchmark Tab ── -->
<div id="bench-panel">
<button class="btn bp bench-run-btn" id="btnbench" onclick="runBench()">β–Ά Run Benchmark</button>
<div class="bench-results" id="bench-results">
<div class="bench-empty" id="bench-empty">
Run benchmark to see<br>performance metrics
</div>
</div>
</div>
<!-- ── Params Tab ── -->
<div id="params-panel" style="display:none;flex:1;overflow-y:auto">
<div class="sb-body">
<div class="card">
<div class="ct">System Prompt</div>
<textarea class="ta" id="sysprompt" rows="4">You are a helpful, concise, and friendly AI assistant.</textarea>
</div>
<div class="card">
<div class="ct">Generation</div>
<div style="display:flex;flex-direction:column;gap:10px;margin-top:2px">
<div class="pg">
<label>Temperature <span id="v-temp">0.70</span></label>
<input type="range" id="p-temp" min="0.1" max="2.0" step="0.05" value="0.7"
oninput="document.getElementById('v-temp').textContent=parseFloat(this.value).toFixed(2)">
</div>
<div class="pg">
<label>Top-K <span id="v-topk">40</span></label>
<input type="range" id="p-topk" min="1" max="200" step="1" value="40"
oninput="document.getElementById('v-topk').textContent=this.value">
</div>
<div class="pg">
<label>Max tokens <span id="v-maxt">256</span></label>
<input type="range" id="p-maxt" min="32" max="500" step="8" value="256"
oninput="document.getElementById('v-maxt').textContent=this.value">
</div>
</div>
</div>
</div>
</div>
</aside>
<!-- ══ CHAT MAIN ══════════════════════════════════════════ -->
<main class="chat">
<header class="chat-hdr">
<div class="badge">Llama 3.2 Β· 1B</div>
<span class="chat-hdr-title">KVInfer Studio</span>
<div class="spc"></div>
<div class="hpill">
<span class="dot off" id="dot-h"></span>
<span id="hstatus">Idle</span>
</div>
</header>
<div class="msgs" id="msgs">
<div class="welcome" id="welcome">
<div class="wlogo">1B</div>
<h2>KVInfer Studio</h2>
<p>Fine-tuned Llama 3.2 1B running on a hand-written C++ inference engine β€” AVX2 SIMD, OpenMP, RoPE, GQA, SwiGLU, persistent KV-cache.</p>
<div class="chips">
<span class="chip">1B params</span>
<span class="chip">RoPE</span>
<span class="chip">GQA 8 heads</span>
<span class="chip">SwiGLU</span>
<span class="chip">AVX2 SIMD</span>
<span class="chip">KV Cache</span>
</div>
</div>
</div>
<div class="inputbar">
<div class="inputwrap">
<textarea id="inp" rows="1" placeholder="Send a message…" onkeydown="handleKey(event)"></textarea>
<button class="sbtn" id="sbtn" onclick="send()">
<svg width="14" height="14" viewBox="0 0 24 24"><path d="M2 21l21-9L2 3v7l15 2-15 2v7z"/></svg>
</button>
</div>
<div class="hint">Enter ↡ to send Β· Shift+Enter for newline</div>
</div>
</main>
</div><!-- .app -->
<script>
// ══ State ════════════════════════════════════════════════
const API = '';
let sessId = crypto.randomUUID();
let busy = false, turns = 0, totalToks = 0;
let tpsHist = [], peakTps = 0;
// ══ Tab Switching ════════════════════════════════════════
function switchTab(name) {
['stats','bench','params'].forEach(t => {
const panel = document.getElementById(t === 'stats' ? 'stats-panel' : t + '-panel');
const tab = document.getElementById('tab-' + t);
if (t === name) {
panel.style.display = (t === 'bench') ? 'flex' : 'block';
tab.classList.add('on');
} else {
panel.style.display = 'none';
tab.classList.remove('on');
}
});
}
// ══ Input resize ═════════════════════════════════════════
const inp = document.getElementById('inp');
inp.addEventListener('input', () => {
inp.style.height = 'auto';
inp.style.height = Math.min(inp.scrollHeight, 110) + 'px';
});
function handleKey(e) {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
}
// ══ UI helpers ═══════════════════════════════════════════
function setBusy(v) {
busy = v;
document.getElementById('sbtn').disabled = v;
['dot-s','dot-h'].forEach(id => {
document.getElementById(id).className = 'dot ' + (v ? 'on' : 'off');
});
document.getElementById('hstatus').textContent = v ? 'Generating…' : 'Idle';
}
function scrollBot() {
const el = document.getElementById('msgs'); el.scrollTop = el.scrollHeight;
}
function hideWelcome() { const w = document.getElementById('welcome'); if (w) w.remove(); }
function esc(s) {
return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g,'<br>');
}
// ══ Messages ═════════════════════════════════════════════
function addUserMsg(text) {
hideWelcome();
const g = document.createElement('div'); g.className = 'mg user';
g.innerHTML = `<div class="mg-role"><div class="mlabel">You</div></div>
<div class="mg-body"><div class="bubble">${esc(text)}</div></div>`;
document.getElementById('msgs').appendChild(g); scrollBot();
}
function createAsstSlot() {
const g = document.createElement('div'); g.className = 'mg asst';
const bid = 'b' + Date.now(), mid = 'm' + Date.now();
g.innerHTML = `<div class="mg-role"><div class="mlabel">Model</div></div>
<div class="mg-body">
<div class="bubble" id="${bid}"><span class="cur"></span></div>
<div class="bmeta" id="${mid}"></div>
</div>`;
document.getElementById('msgs').appendChild(g); scrollBot();
return { bubble: document.getElementById(bid), meta: document.getElementById(mid) };
}
// ══ Send ═════════════════════════════════════════════════
async function send() {
if (busy) return;
const text = inp.value.trim(); if (!text) return;
inp.value = ''; inp.style.height = 'auto';
addUserMsg(text); setBusy(true);
const { bubble, meta } = createAsstSlot();
let content = '', t0 = Date.now(), firstTok = null, tokCount = 0;
const payload = {
message: text,
session_id: sessId,
system_prompt: document.getElementById('sysprompt').value,
max_new_tokens: parseInt(document.getElementById('p-maxt').value),
temperature: parseFloat(document.getElementById('p-temp').value),
top_k: parseInt(document.getElementById('p-topk').value),
};
try {
const resp = await fetch(`${API}/chat`, {
method: 'POST', headers: {'Content-Type':'application/json'},
body: JSON.stringify(payload),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
const reader = resp.body.getReader(), decoder = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read(); if (done) break;
buf += decoder.decode(value, { stream: true });
let nl;
while ((nl = buf.indexOf('\n')) !== -1) {
const line = buf.slice(0, nl).trim(); buf = buf.slice(nl + 1);
if (!line.startsWith('data:')) continue;
const raw = line.slice(5).trim(); if (raw === '[DONE]') break;
let chunk; try { chunk = JSON.parse(raw); } catch { continue; }
if (chunk.type === 'token') {
if (firstTok === null) firstTok = Date.now();
content += chunk.text; tokCount++; totalToks++;
document.getElementById('s-totok').textContent = totalToks;
document.getElementById('s-toks').textContent = tokCount + ' tok';
document.getElementById('s-ttft').textContent = (firstTok - t0) + ' ms';
bubble.innerHTML = esc(content) + '<span class="cur"></span>';
scrollBot();
} else if (chunk.type === 'done') {
bubble.innerHTML = esc(content);
const ttft = firstTok ? (firstTok - t0) : 0;
const tps = chunk.tps, ms = chunk.total_ms;
meta.innerHTML = `<b>${tps}</b>&nbsp;tok/s Β· TTFT&nbsp;<b>${ttft}ms</b> Β· <b>${tokCount}</b>&nbsp;tokens Β· <b>${ms.toFixed(0)}ms</b>`;
document.getElementById('s-tps').textContent = tps + ' tok/s';
document.getElementById('s-lat').textContent = ms.toFixed(0) + ' ms';
tpsHist.push(tps); if (tpsHist.length > 30) tpsHist.shift();
if (tps > peakTps) peakTps = tps;
const avg = (tpsHist.reduce((a,b)=>a+b,0)/tpsHist.length).toFixed(1);
document.getElementById('s-avg').textContent = avg + ' tok/s';
document.getElementById('s-peak').textContent = peakTps.toFixed(1) + ' tok/s';
if (chunk.session_id) {
fetch(`${API}/chat/history?session_id=${chunk.session_id}`)
.then(r=>r.json()).then(d=>{ document.getElementById('s-kv').textContent=(d.tokens_in_engine||0)+' tok'; }).catch(()=>{});
}
turns++; document.getElementById('s-turns').textContent = turns;
drawSpark();
} else if (chunk.type === 'error') {
bubble.innerHTML += `<br><span style="color:var(--red)">⚠ ${esc(chunk.message)}</span>`;
}
}
}
} catch (err) {
if (tokCount === 0) bubble.innerHTML = `<span style="color:var(--red)">Connection error: ${esc(err.message)}</span>`;
else meta.innerHTML += `<span style="color:var(--warn)"> [interrupted]</span>`;
} finally {
const c = bubble.querySelector('.cur'); if (c) c.remove();
setBusy(false); scrollBot();
}
}
// ══ Sparkline ════════════════════════════════════════════
function drawSpark() {
const el = document.getElementById('spark'), ctx = el.getContext('2d');
const dpr = window.devicePixelRatio || 1;
el.width = el.clientWidth * dpr; el.height = el.clientHeight * dpr;
ctx.scale(dpr, dpr);
const W = el.clientWidth, H = el.clientHeight;
ctx.clearRect(0, 0, W, H);
const d = tpsHist; if (d.length < 2) return;
const mx = Math.max(...d) * 1.15 || 1, step = W / (d.length - 1);
const grad = ctx.createLinearGradient(0, 0, 0, H);
grad.addColorStop(0, 'rgba(0,229,160,.25)'); grad.addColorStop(1, 'rgba(0,229,160,.02)');
ctx.beginPath();
d.forEach((v, i) => { const x = i*step, y = H-(v/mx)*(H-4)-2; i===0?ctx.moveTo(x,y):ctx.lineTo(x,y); });
ctx.strokeStyle = '#00e5a0'; ctx.lineWidth = 1.5; ctx.stroke();
ctx.lineTo((d.length-1)*step, H); ctx.lineTo(0, H); ctx.closePath();
ctx.fillStyle = grad; ctx.fill();
}
// ══ Clear ════════════════════════════════════════════════
async function clearChat() {
if (busy) return;
await fetch(`${API}/chat/reset`, {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({ session_id: sessId }),
}).catch(()=>{});
sessId = crypto.randomUUID();
turns = 0; totalToks = 0; tpsHist = []; peakTps = 0;
document.getElementById('msgs').innerHTML = `
<div class="welcome" id="welcome">
<div class="wlogo">1B</div>
<h2>KVInfer Studio</h2>
<p>Fine-tuned Llama 3.2 1B β€” C++ AVX2 + RoPE + GQA + SwiGLU + KV Cache.</p>
<div class="chips">
<span class="chip">1B params</span><span class="chip">RoPE</span>
<span class="chip">GQA</span><span class="chip">SwiGLU</span><span class="chip">KV Cache</span>
</div>
</div>`;
['s-turns','s-totok'].forEach(id => document.getElementById(id).textContent = '0');
['s-tps','s-ttft','s-lat','s-avg','s-peak','s-toks','s-kv'].forEach(id => document.getElementById(id).textContent = 'β€”');
drawSpark();
}
// ══ Poll metrics ═════════════════════════════════════════
async function pollMetrics() {
try {
const r = await fetch(`${API}/metrics`);
if (!r.ok) return;
const d = await r.json();
document.getElementById('s-ram').textContent = d.system_ram_used_pct.toFixed(0) + '% (' + d.process_ram_mb + 'MB)';
document.getElementById('s-eng').textContent = d.engines_ready + ' / ' + (d.n_engines || '?');
document.getElementById('s-sess').textContent = d.active_sessions;
document.getElementById('s-uptime').textContent = fmtUptime(d.uptime_s);
} catch {}
}
function fmtUptime(s) {
if (s < 60) return s + 's';
if (s < 3600) return Math.floor(s/60) + 'm ' + (s%60) + 's';
return Math.floor(s/3600) + 'h ' + Math.floor((s%3600)/60) + 'm';
}
pollMetrics(); setInterval(pollMetrics, 4000);
// ══ BENCHMARK ════════════════════════════════════════════
const BENCH_PROMPTS = [
{ label: "Capital question", text: "What is the capital of Japan?" },
{ label: "Simple math", text: "What is 17 multiplied by 13?" },
{ label: "Short poem", text: "Write a 4-line poem about stars." },
{ label: "Explain concept", text: "Explain gravity in 2 sentences." },
{ label: "Code snippet", text: "Write a Python function to reverse a string." },
];
let benchRunning = false;
async function runBench() {
if (benchRunning || busy) return;
benchRunning = true;
switchTab('bench');
const btn = document.getElementById('btnbench');
const results = document.getElementById('bench-results');
const empty = document.getElementById('bench-empty');
btn.disabled = true; btn.textContent = 'Running…';
if (empty) empty.remove();
// Clear old results
results.innerHTML = '';
// Create cards for each prompt
const cards = BENCH_PROMPTS.map((p, i) => {
const card = document.createElement('div'); card.className = 'p-card';
card.id = 'bcard-' + i;
card.innerHTML = `
<div class="p-title">${p.label}</div>
<div class="p-status" id="bst-${i}"><span class="p-spin"></span> Running…</div>
<div class="p-progress"><div class="p-bar" id="bbar-${i}"></div></div>`;
results.appendChild(card); return card;
});
let sumTps = 0, sumTtft = 0, ok = 0;
for (let i = 0; i < BENCH_PROMPTS.length; i++) {
const p = BENCH_PROMPTS[i];
let t0 = Date.now(), firstTok = null, tokCount = 0, finalTps = 0, finalMs = 0;
const bar = document.getElementById('bbar-' + i);
const st = document.getElementById('bst-' + i);
try {
const resp = await fetch(`${API}/chat`, {
method: 'POST', headers: {'Content-Type':'application/json'},
body: JSON.stringify({
message: p.text, session_id: crypto.randomUUID(),
system_prompt: 'You are a helpful assistant.',
max_new_tokens: 80, temperature: 0.1, top_k: 40,
}),
});
const reader = resp.body.getReader(), decoder = new TextDecoder();
let buf = '';
while (true) {
const { done, value } = await reader.read(); if (done) break;
buf += decoder.decode(value, { stream: true });
let nl;
while ((nl = buf.indexOf('\n')) !== -1) {
const line = buf.slice(0, nl).trim(); buf = buf.slice(nl + 1);
if (!line.startsWith('data:')) continue;
const raw = line.slice(5).trim(); if (raw === '[DONE]') break;
let chunk; try { chunk = JSON.parse(raw); } catch { continue; }
if (chunk.type === 'token') {
if (firstTok === null) firstTok = Date.now();
tokCount++;
// Update progress bar live
bar.style.width = Math.min(100, tokCount * 1.25) + '%';
} else if (chunk.type === 'done') {
finalTps = chunk.tps; finalMs = chunk.total_ms;
}
}
}
const ttft = firstTok ? (firstTok - t0) : 0;
sumTps += finalTps; sumTtft += ttft; ok++;
bar.style.width = '100%';
st.innerHTML = `
<div class="p-stats">
<div class="p-stat"><div class="pv">${finalTps}</div><div class="pk">tok/s</div></div>
<div class="p-stat"><div class="pv">${ttft}</div><div class="pk">ttft ms</div></div>
<div class="p-stat"><div class="pv">${tokCount}</div><div class="pk">tokens</div></div>
<div class="p-stat"><div class="pv">${finalMs.toFixed(0)}</div><div class="pk">total ms</div></div>
</div>`;
} catch (e) {
st.innerHTML = `<span style="color:var(--red)">βœ— Failed</span>`;
bar.style.background = 'var(--red)'; bar.style.width = '100%';
}
}
// Summary at top
if (ok > 0) {
const avgTps = (sumTps / ok).toFixed(1);
const avgTtft = (sumTtft / ok).toFixed(0);
const summary = document.createElement('div');
summary.innerHTML = `
<div class="bench-grid" style="margin-bottom:10px">
<div class="bstat"><div class="bv">${avgTps}</div><div class="bl">Avg tok/s</div></div>
<div class="bstat"><div class="bv">${avgTtft}ms</div><div class="bl">Avg TTFT</div></div>
</div>`;
results.insertBefore(summary, results.firstChild);
}
btn.textContent = 'β†Ί Run Again'; btn.disabled = false;
benchRunning = false;
}
</script>
</body>
</html>