q / q-chat.html
Humuhumu33's picture
Upload folder using huggingface_hub
3365e13 verified
Raw
History Blame Contribute Delete
9 kB
<!doctype html><html><head><meta charset=utf8><meta name=viewport content="width=device-width,initial-scale=1">
<title>Q · local chat</title>
<style>
:root{--bg:#0b0e14;--panel:#141922;--ink:#e6e9ef;--dim:#8a94a6;--q:#7c5cff;--u:#1f6feb;--line:#1e2531}
*{box-sizing:border-box}html,body{height:100%}
body{margin:0;font:15px/1.55 -apple-system,Segoe UI,Roboto,monospace;background:var(--bg);color:var(--ink);display:flex;flex-direction:column}
header{padding:10px 16px;border-bottom:1px solid var(--line);display:flex;align-items:center;gap:10px;flex:0 0 auto}
header b{font-weight:600}header .s{color:var(--dim);font-size:12px}
#log{flex:1;overflow:auto;padding:16px;display:flex;flex-direction:column;gap:12px}
.msg{max-width:82%;padding:9px 13px;border-radius:12px;white-space:pre-wrap;word-wrap:break-word}
.u{align-self:flex-end;background:var(--u)}.a{align-self:flex-start;background:var(--panel);border:1px solid #232b3a}
.a.think{color:var(--dim);font-style:italic}
footer{padding:12px 16px;border-top:1px solid var(--line);display:flex;gap:8px;flex:0 0 auto}
#in{flex:1;background:var(--panel);border:1px solid #232b3a;color:var(--ink);border-radius:10px;padding:10px 12px;font:inherit;resize:none;max-height:140px}
button{background:var(--q);color:#fff;border:0;border-radius:10px;padding:0 18px;font:inherit;cursor:pointer}button:disabled{opacity:.4;cursor:default}
.stat{color:var(--dim);font-size:11px;margin-top:3px}
</style></head><body>
<header><b>Q</b> <span class=s id=st>booting…</span></header>
<div id=log></div>
<footer><textarea id=in rows=1 placeholder="Message Q…" disabled></textarea><button id=send disabled>Send</button></footer>
<script type=module>
import { ready, loadModel, MODELS, defaultModelIndex } from "./core/loader.js";
import { createEngine } from "./core/engine.js";
import { selfPersona } from "./core/q-self.mjs"; // ONE grounded self-knowledge, shared with the messenger + voice
const $ = (s) => document.querySelector(s);
const log = $("#log"), input = $("#in"), send = $("#send"), st = $("#st");
const bubble = (cls, text = "") => { const d = document.createElement("div"); d.className = "msg " + cls; d.textContent = text; log.appendChild(d); log.scrollTop = log.scrollHeight; return d; };
const params = new URLSearchParams(location.search);
const pick = params.get("m");
let m = pick ? (MODELS.find((x) => new RegExp(pick, "i").test(x.name)) || MODELS[0]) : MODELS[defaultModelIndex()];
// STREAM FROM ANYWHERE: ?hf=<org/repo> (a HuggingFace κ-object) or ?kappa=<absolute-url> loads the weights from
// that host's CDN instead of the local mount. The κ-object is content-addressed + pinned, so the host is an
// UNTRUSTED CDN — every block is re-derived (Law L5); a bad byte is rejected. Blocks cache locally after first
// load (0-network on return). Uses BitNet's engine flags (the HOLOGRAMTECH model is BitNet-2B).
{
const hf = params.get("hf"), kappa = params.get("kappa");
if (hf || kappa) {
const base = (kappa || `https://huggingface.co/${hf}/resolve/main`).replace(/\/+$/, "");
const bit = MODELS.find((x) => (x.fam || "").toLowerCase() === "bitnet") || m;
m = { ...bit, kappaUrl: base, name: bit.name + " · via " + (hf || new URL(base).host) };
}
}
// GROUND the model as on-device Q (a base/instruct model has NO self-knowledge — without this it confabulates
// a generic "I run on OpenAI/AWS cloud servers" identity, which is false). Injected as the SYSTEM turn.
// the system block for the model's own chat template (matches core/engine frameTurn's per-family markers).
// PERSONA is Q's LIVE grounded self-knowledge (q-self), computed at call time so it names the real resident
// model + κ — without it a base model confabulates a cloud/OpenAI/AWS identity.
function frameSystem() {
const PERSONA = selfPersona({ model: m, engine });
if (m.llama3) return `<|start_header_id|>system<|end_header_id|>\n\n${PERSONA}<|eot_id|>`;
if (m.qwen) return `<|im_start|>system\n${PERSONA}<|im_end|>\n`;
if (m.olmo) return `<|system|>\n${PERSONA}\n`;
return PERSONA + "\n\n"; // word/plain frame: prepend as leading context
}
let engine = null, convIds = [], busy = false, armed = false, pending = null;
// type immediately — don't wait for the model. The first message is queued and auto-sent the instant Q is ready.
input.disabled = send.disabled = false; input.placeholder = "Message Q… (model loading — will send the moment it's ready)"; input.focus();
async function generate(text, skipUser) {
busy = true; input.disabled = send.disabled = true;
if (!skipUser) bubble("u", text);
const a = bubble("a think", "…"); let first = true;
const stat = document.createElement("div"); stat.className = "stat";
const t0 = performance.now();
try {
let framed = engine.frameTurn(text, convIds.length > 0);
if (convIds.length === 0) framed = frameSystem() + framed; // first turn → lead with the on-device Q persona
let turnIds = engine.tokenize(framed);
if (m.bos && engine.bosId != null && convIds.length === 0) turnIds = [engine.bosId, ...turnIds];
const res = await engine.generate(convIds.concat(turnIds), { maxNew: m.cap || 256, onToken: ({ text: t, stats }) => {
if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; }
a.textContent = t; log.scrollTop = log.scrollHeight;
if (stats) stat.textContent = `${stats.tokps ? stats.tokps.toFixed(0) + " tok/s" : ""}${stats.ttft ? " · TTFT " + Math.round(stats.ttft) + "ms" : ""}`;
} });
if (first) { a.classList.remove("think"); a.textContent = res.text || "(no output)"; }
convIds = res.ids; a.after(stat);
} catch (e) { a.classList.remove("think"); a.textContent = "⚠ " + e.message; }
busy = false; input.disabled = send.disabled = false; input.focus();
}
// Q reaches out first — a real, model-GENERATED opening (not a canned line), so it feels alive.
// This also warms the GPU pipelines. It is standalone (not added to convIds) so the real chat starts fresh.
async function proactiveGreeting() {
busy = true; input.disabled = send.disabled = true;
const a = bubble("a think", "…"); let first = true;
const stat = document.createElement("div"); stat.className = "stat";
const FALLBACK = "Hey — I'm Q, running entirely on your device, no server, my weights verified by re-derivation. What can I help you with?";
try {
const P = "This is the very first thing you say to the person who just opened you. You are Q — a private AI running entirely on their device with no server, your weights verified by re-derivation. Greet them warmly in one or two sentences and invite them to ask you anything.";
let ids = engine.tokenize(engine.frameTurn(P, false));
if (m.bos && engine.bosId != null) ids = [engine.bosId, ...ids];
await engine.generate(ids, { maxNew: 64, onToken: ({ text: t, stats }) => { if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; } a.textContent = t; log.scrollTop = log.scrollHeight; if (stats && stats.tokps) stat.textContent = `${stats.tokps.toFixed(0)} tok/s`; } });
if (first || a.textContent.trim().length < 4) { a.classList.remove("think"); a.textContent = FALLBACK; } else a.after(stat);
} catch (e) { a.classList.remove("think"); a.textContent = FALLBACK; }
busy = false; input.disabled = send.disabled = false; input.focus();
}
function onSend() {
const text = input.value.trim(); if (!text || busy) return;
input.value = ""; input.style.height = "auto";
if (!armed) { pending = text; bubble("u", text); const w = bubble("a think", "…starting the model, one moment…"); w.dataset.pending = "1"; return; }
generate(text);
}
send.onclick = onSend;
input.onkeydown = (e) => { if (e.key === "Enter" && !e.shiftKey) { e.preventDefault(); onSend(); } };
input.oninput = () => { input.style.height = "auto"; input.style.height = Math.min(140, input.scrollHeight) + "px"; };
try {
if (!navigator.gpu) throw new Error("This browser has no WebGPU. Use Chrome/Edge.");
st.textContent = `loading ${m.name} (${m.size})…`;
const loaded = await loadModel(m, { onStatus: (s) => { if (s) st.textContent = `${m.name}: ${s}`; }, onProgress: (d, t, w) => { st.textContent = `${m.name}: ${w} ${t ? Math.round(100 * d / t) : 0}%`; } });
if (!loaded || !loaded.gpu) throw new Error("model load failed");
engine = await createEngine(m, loaded);
armed = true;
st.textContent = `${m.name} · ${m.size} · resident on your GPU · ready`;
input.placeholder = "Message Q…";
if (pending) { const w = [...log.querySelectorAll(".a")].reverse().find((x) => x.dataset.pending); if (w) w.remove(); const p = pending; pending = null; generate(p, true); } // you jumped in first → answer that
else await proactiveGreeting(); // otherwise Q reaches out the moment it's ready (also warms the GPU)
} catch (e) { st.textContent = "⚠ " + e.message; bubble("a", "Could not start: " + e.message); }
</script></body></html>