Spaces:
Running
Running
| <html><head><meta charset=utf8><meta name=viewport content="width=device-width,initial-scale=1"> | |
| <title>Q · local chat</title> | |
| <style> | |
| :root{--bg:#0b0e14;--panel:#141922;--ink:#e6e9ef;--dim:#8a94a6;--q:#7c5cff;--u:#1f6feb;--line:#1e2531} | |
| *{box-sizing:border-box}html,body{height:100%} | |
| body{margin:0;font:15px/1.55 -apple-system,Segoe UI,Roboto,monospace;background:var(--bg);color:var(--ink);display:flex;flex-direction:column} | |
| header{padding:10px 16px;border-bottom:1px solid var(--line);display:flex;align-items:center;gap:10px;flex:0 0 auto} | |
| header b{font-weight:600}header .s{color:var(--dim);font-size:12px} | |
| #log{flex:1;overflow:auto;padding:16px;display:flex;flex-direction:column;gap:12px} | |
| .msg{max-width:82%;padding:9px 13px;border-radius:12px;white-space:pre-wrap;word-wrap:break-word} | |
| .u{align-self:flex-end;background:var(--u)}.a{align-self:flex-start;background:var(--panel);border:1px solid #232b3a} | |
| .a.think{color:var(--dim);font-style:italic} | |
| footer{padding:12px 16px;border-top:1px solid var(--line);display:flex;gap:8px;flex:0 0 auto} | |
| #in{flex:1;background:var(--panel);border:1px solid #232b3a;color:var(--ink);border-radius:10px;padding:10px 12px;font:inherit;resize:none;max-height:140px} | |
| button{background:var(--q);color:#fff;border:0;border-radius:10px;padding:0 18px;font:inherit;cursor:pointer}button:disabled{opacity:.4;cursor:default} | |
| .stat{color:var(--dim);font-size:11px;margin-top:3px} | |
| </style></head><body> | |
| <header><b>Q</b> <span class=s id=st>booting…</span></header> | |
| <div id=log></div> | |
| <footer><textarea id=in rows=1 placeholder="Message Q…" disabled></textarea><button id=send disabled>Send</button></footer> | |
| <script type=module> | |
| import { ready, loadModel, MODELS, defaultModelIndex } from "./core/loader.js"; | |
| import { createEngine } from "./core/engine.js"; | |
| import { selfPersona } from "./core/q-self.mjs"; // ONE grounded self-knowledge, shared with the messenger + voice | |
| const $ = (s) => document.querySelector(s); | |
| const log = $("#log"), input = $("#in"), send = $("#send"), st = $("#st"); | |
| const bubble = (cls, text = "") => { const d = document.createElement("div"); d.className = "msg " + cls; d.textContent = text; log.appendChild(d); log.scrollTop = log.scrollHeight; return d; }; | |
| const params = new URLSearchParams(location.search); | |
| const pick = params.get("m"); | |
| let m = pick ? (MODELS.find((x) => new RegExp(pick, "i").test(x.name)) || MODELS[0]) : MODELS[defaultModelIndex()]; | |
| // STREAM FROM ANYWHERE: ?hf=<org/repo> (a HuggingFace κ-object) or ?kappa=<absolute-url> loads the weights from | |
| // that host's CDN instead of the local mount. The κ-object is content-addressed + pinned, so the host is an | |
| // UNTRUSTED CDN — every block is re-derived (Law L5); a bad byte is rejected. Blocks cache locally after first | |
| // load (0-network on return). Uses BitNet's engine flags (the HOLOGRAMTECH model is BitNet-2B). | |
| { | |
| const hf = params.get("hf"), kappa = params.get("kappa"); | |
| if (hf || kappa) { | |
| const base = (kappa || `https://huggingface.co/${hf}/resolve/main`).replace(/\/+$/, ""); | |
| const bit = MODELS.find((x) => (x.fam || "").toLowerCase() === "bitnet") || m; | |
| m = { ...bit, kappaUrl: base, name: bit.name + " · via " + (hf || new URL(base).host) }; | |
| } | |
| } | |
| // GROUND the model as on-device Q (a base/instruct model has NO self-knowledge — without this it confabulates | |
| // a generic "I run on OpenAI/AWS cloud servers" identity, which is false). Injected as the SYSTEM turn. | |
| // the system block for the model's own chat template (matches core/engine frameTurn's per-family markers). | |
| // PERSONA is Q's LIVE grounded self-knowledge (q-self), computed at call time so it names the real resident | |
| // model + κ — without it a base model confabulates a cloud/OpenAI/AWS identity. | |
| function frameSystem() { | |
| const PERSONA = selfPersona({ model: m, engine }); | |
| if (m.llama3) return `<|start_header_id|>system<|end_header_id|>\n\n${PERSONA}<|eot_id|>`; | |
| if (m.qwen) return `<|im_start|>system\n${PERSONA}<|im_end|>\n`; | |
| if (m.olmo) return `<|system|>\n${PERSONA}\n`; | |
| return PERSONA + "\n\n"; // word/plain frame: prepend as leading context | |
| } | |
| let engine = null, convIds = [], busy = false, armed = false, pending = null; | |
| // type immediately — don't wait for the model. The first message is queued and auto-sent the instant Q is ready. | |
| input.disabled = send.disabled = false; input.placeholder = "Message Q… (model loading — will send the moment it's ready)"; input.focus(); | |
| async function generate(text, skipUser) { | |
| busy = true; input.disabled = send.disabled = true; | |
| if (!skipUser) bubble("u", text); | |
| const a = bubble("a think", "…"); let first = true; | |
| const stat = document.createElement("div"); stat.className = "stat"; | |
| const t0 = performance.now(); | |
| try { | |
| let framed = engine.frameTurn(text, convIds.length > 0); | |
| if (convIds.length === 0) framed = frameSystem() + framed; // first turn → lead with the on-device Q persona | |
| let turnIds = engine.tokenize(framed); | |
| if (m.bos && engine.bosId != null && convIds.length === 0) turnIds = [engine.bosId, ...turnIds]; | |
| const res = await engine.generate(convIds.concat(turnIds), { maxNew: m.cap || 256, onToken: ({ text: t, stats }) => { | |
| if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; } | |
| a.textContent = t; log.scrollTop = log.scrollHeight; | |
| if (stats) stat.textContent = `${stats.tokps ? stats.tokps.toFixed(0) + " tok/s" : ""}${stats.ttft ? " · TTFT " + Math.round(stats.ttft) + "ms" : ""}`; | |
| } }); | |
| if (first) { a.classList.remove("think"); a.textContent = res.text || "(no output)"; } | |
| convIds = res.ids; a.after(stat); | |
| } catch (e) { a.classList.remove("think"); a.textContent = "⚠ " + e.message; } | |
| busy = false; input.disabled = send.disabled = false; input.focus(); | |
| } | |
| // Q reaches out first — a real, model-GENERATED opening (not a canned line), so it feels alive. | |
| // This also warms the GPU pipelines. It is standalone (not added to convIds) so the real chat starts fresh. | |
| async function proactiveGreeting() { | |
| busy = true; input.disabled = send.disabled = true; | |
| const a = bubble("a think", "…"); let first = true; | |
| const stat = document.createElement("div"); stat.className = "stat"; | |
| const FALLBACK = "Hey — I'm Q, running entirely on your device, no server, my weights verified by re-derivation. What can I help you with?"; | |
| try { | |
| const P = "This is the very first thing you say to the person who just opened you. You are Q — a private AI running entirely on their device with no server, your weights verified by re-derivation. Greet them warmly in one or two sentences and invite them to ask you anything."; | |
| let ids = engine.tokenize(engine.frameTurn(P, false)); | |
| if (m.bos && engine.bosId != null) ids = [engine.bosId, ...ids]; | |
| await engine.generate(ids, { maxNew: 64, onToken: ({ text: t, stats }) => { if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; } a.textContent = t; log.scrollTop = log.scrollHeight; if (stats && stats.tokps) stat.textContent = `${stats.tokps.toFixed(0)} tok/s`; } }); | |
| if (first || a.textContent.trim().length < 4) { a.classList.remove("think"); a.textContent = FALLBACK; } else a.after(stat); | |
| } catch (e) { a.classList.remove("think"); a.textContent = FALLBACK; } | |
| busy = false; input.disabled = send.disabled = false; input.focus(); | |
| } | |
| function onSend() { | |
| const text = input.value.trim(); if (!text || busy) return; | |
| input.value = ""; input.style.height = "auto"; | |
| if (!armed) { pending = text; bubble("u", text); const w = bubble("a think", "…starting the model, one moment…"); w.dataset.pending = "1"; return; } | |
| generate(text); | |
| } | |
| send.onclick = onSend; | |
| input.onkeydown = (e) => { if (e.key === "Enter" && !e.shiftKey) { e.preventDefault(); onSend(); } }; | |
| input.oninput = () => { input.style.height = "auto"; input.style.height = Math.min(140, input.scrollHeight) + "px"; }; | |
| try { | |
| if (!navigator.gpu) throw new Error("This browser has no WebGPU. Use Chrome/Edge."); | |
| st.textContent = `loading ${m.name} (${m.size})…`; | |
| const loaded = await loadModel(m, { onStatus: (s) => { if (s) st.textContent = `${m.name}: ${s}`; }, onProgress: (d, t, w) => { st.textContent = `${m.name}: ${w} ${t ? Math.round(100 * d / t) : 0}%`; } }); | |
| if (!loaded || !loaded.gpu) throw new Error("model load failed"); | |
| engine = await createEngine(m, loaded); | |
| armed = true; | |
| st.textContent = `${m.name} · ${m.size} · resident on your GPU · ready`; | |
| input.placeholder = "Message Q…"; | |
| if (pending) { const w = [...log.querySelectorAll(".a")].reverse().find((x) => x.dataset.pending); if (w) w.remove(); const p = pending; pending = null; generate(p, true); } // you jumped in first → answer that | |
| else await proactiveGreeting(); // otherwise Q reaches out the moment it's ready (also warms the GPU) | |
| } catch (e) { st.textContent = "⚠ " + e.message; bubble("a", "Could not start: " + e.message); } | |
| </script></body></html> | |