Spaces:

HOLOGRAMTECH
/

q

Running

App Files Files Community

q / q-chat.html

Humuhumu33

Upload folder using huggingface_hub

3365e13 verified about 17 hours ago

Raw

History Blame Contribute Delete

9 kB

	<!doctype html><html><head><meta charset=utf8><meta name=viewport content="width=device-width,initial-scale=1">
	<title>Q · local chat</title>
	<style>
	:root{--bg:#0b0e14;--panel:#141922;--ink:#e6e9ef;--dim:#8a94a6;--q:#7c5cff;--u:#1f6feb;--line:#1e2531}
	*{box-sizing:border-box}html,body{height:100%}
	body{margin:0;font:15px/1.55 -apple-system,Segoe UI,Roboto,monospace;background:var(--bg);color:var(--ink);display:flex;flex-direction:column}
	header{padding:10px 16px;border-bottom:1px solid var(--line);display:flex;align-items:center;gap:10px;flex:0 0 auto}
	header b{font-weight:600}header .s{color:var(--dim);font-size:12px}
	#log{flex:1;overflow:auto;padding:16px;display:flex;flex-direction:column;gap:12px}
	.msg{max-width:82%;padding:9px 13px;border-radius:12px;white-space:pre-wrap;word-wrap:break-word}
	.u{align-self:flex-end;background:var(--u)}.a{align-self:flex-start;background:var(--panel);border:1px solid #232b3a}
	.a.think{color:var(--dim);font-style:italic}
	footer{padding:12px 16px;border-top:1px solid var(--line);display:flex;gap:8px;flex:0 0 auto}
	#in{flex:1;background:var(--panel);border:1px solid #232b3a;color:var(--ink);border-radius:10px;padding:10px 12px;font:inherit;resize:none;max-height:140px}
	button{background:var(--q);color:#fff;border:0;border-radius:10px;padding:0 18px;font:inherit;cursor:pointer}button:disabled{opacity:.4;cursor:default}
	.stat{color:var(--dim);font-size:11px;margin-top:3px}
	</style></head><body>
	<header><b>Q</b> <span class=s id=st>booting…</span></header>
	<div id=log></div>
	<footer><textarea id=in rows=1 placeholder="Message Q…" disabled></textarea><button id=send disabled>Send</button></footer>
	<script type=module>
	import { ready, loadModel, MODELS, defaultModelIndex } from "./core/loader.js";
	import { createEngine } from "./core/engine.js";
	import { selfPersona } from "./core/q-self.mjs"; // ONE grounded self-knowledge, shared with the messenger + voice
	const $ = (s) => document.querySelector(s);
	const log = $("#log"), input = $("#in"), send = $("#send"), st = $("#st");
	const bubble = (cls, text = "") => { const d = document.createElement("div"); d.className = "msg " + cls; d.textContent = text; log.appendChild(d); log.scrollTop = log.scrollHeight; return d; };
	const params = new URLSearchParams(location.search);
	const pick = params.get("m");
	let m = pick ? (MODELS.find((x) => new RegExp(pick, "i").test(x.name)) \|\| MODELS[0]) : MODELS[defaultModelIndex()];
	// STREAM FROM ANYWHERE: ?hf=<org/repo> (a HuggingFace κ-object) or ?kappa=<absolute-url> loads the weights from
	// that host's CDN instead of the local mount. The κ-object is content-addressed + pinned, so the host is an
	// UNTRUSTED CDN — every block is re-derived (Law L5); a bad byte is rejected. Blocks cache locally after first
	// load (0-network on return). Uses BitNet's engine flags (the HOLOGRAMTECH model is BitNet-2B).
	{
	const hf = params.get("hf"), kappa = params.get("kappa");
	if (hf \|\| kappa) {
	const base = (kappa \|\| `https://huggingface.co/${hf}/resolve/main`).replace(/\/+$/, "");
	const bit = MODELS.find((x) => (x.fam \|\| "").toLowerCase() === "bitnet") \|\| m;
	m = { ...bit, kappaUrl: base, name: bit.name + " · via " + (hf \|\| new URL(base).host) };
	}
	}

	// GROUND the model as on-device Q (a base/instruct model has NO self-knowledge — without this it confabulates
	// a generic "I run on OpenAI/AWS cloud servers" identity, which is false). Injected as the SYSTEM turn.
	// the system block for the model's own chat template (matches core/engine frameTurn's per-family markers).
	// PERSONA is Q's LIVE grounded self-knowledge (q-self), computed at call time so it names the real resident
	// model + κ — without it a base model confabulates a cloud/OpenAI/AWS identity.
	function frameSystem() {
	const PERSONA = selfPersona({ model: m, engine });
	if (m.llama3) return `<\|start_header_id\|>system<\|end_header_id\|>\n\n${PERSONA}<\|eot_id\|>`;
	if (m.qwen) return `<\|im_start\|>system\n${PERSONA}<\|im_end\|>\n`;
	if (m.olmo) return `<\|system\|>\n${PERSONA}\n`;
	return PERSONA + "\n\n"; // word/plain frame: prepend as leading context
	}

	let engine = null, convIds = [], busy = false, armed = false, pending = null;
	// type immediately — don't wait for the model. The first message is queued and auto-sent the instant Q is ready.
	input.disabled = send.disabled = false; input.placeholder = "Message Q… (model loading — will send the moment it's ready)"; input.focus();

	async function generate(text, skipUser) {
	busy = true; input.disabled = send.disabled = true;
	if (!skipUser) bubble("u", text);
	const a = bubble("a think", "…"); let first = true;
	const stat = document.createElement("div"); stat.className = "stat";
	const t0 = performance.now();
	try {
	let framed = engine.frameTurn(text, convIds.length > 0);
	if (convIds.length === 0) framed = frameSystem() + framed; // first turn → lead with the on-device Q persona
	let turnIds = engine.tokenize(framed);
	if (m.bos && engine.bosId != null && convIds.length === 0) turnIds = [engine.bosId, ...turnIds];
	const res = await engine.generate(convIds.concat(turnIds), { maxNew: m.cap \|\| 256, onToken: ({ text: t, stats }) => {
	if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; }
	a.textContent = t; log.scrollTop = log.scrollHeight;
	if (stats) stat.textContent = `${stats.tokps ? stats.tokps.toFixed(0) + " tok/s" : ""}${stats.ttft ? " · TTFT " + Math.round(stats.ttft) + "ms" : ""}`;
	} });
	if (first) { a.classList.remove("think"); a.textContent = res.text \|\| "(no output)"; }
	convIds = res.ids; a.after(stat);
	} catch (e) { a.classList.remove("think"); a.textContent = "⚠ " + e.message; }
	busy = false; input.disabled = send.disabled = false; input.focus();
	}
	// Q reaches out first — a real, model-GENERATED opening (not a canned line), so it feels alive.
	// This also warms the GPU pipelines. It is standalone (not added to convIds) so the real chat starts fresh.
	async function proactiveGreeting() {
	busy = true; input.disabled = send.disabled = true;
	const a = bubble("a think", "…"); let first = true;
	const stat = document.createElement("div"); stat.className = "stat";
	const FALLBACK = "Hey — I'm Q, running entirely on your device, no server, my weights verified by re-derivation. What can I help you with?";
	try {
	const P = "This is the very first thing you say to the person who just opened you. You are Q — a private AI running entirely on their device with no server, your weights verified by re-derivation. Greet them warmly in one or two sentences and invite them to ask you anything.";
	let ids = engine.tokenize(engine.frameTurn(P, false));
	if (m.bos && engine.bosId != null) ids = [engine.bosId, ...ids];
	await engine.generate(ids, { maxNew: 64, onToken: ({ text: t, stats }) => { if (first && t) { a.classList.remove("think"); a.textContent = ""; first = false; } a.textContent = t; log.scrollTop = log.scrollHeight; if (stats && stats.tokps) stat.textContent = `${stats.tokps.toFixed(0)} tok/s`; } });
	if (first \|\| a.textContent.trim().length < 4) { a.classList.remove("think"); a.textContent = FALLBACK; } else a.after(stat);
	} catch (e) { a.classList.remove("think"); a.textContent = FALLBACK; }
	busy = false; input.disabled = send.disabled = false; input.focus();
	}
	function onSend() {
	const text = input.value.trim(); if (!text \|\| busy) return;
	input.value = ""; input.style.height = "auto";
	if (!armed) { pending = text; bubble("u", text); const w = bubble("a think", "…starting the model, one moment…"); w.dataset.pending = "1"; return; }
	generate(text);
	}
	send.onclick = onSend;
	input.onkeydown = (e) => { if (e.key === "Enter" && !e.shiftKey) { e.preventDefault(); onSend(); } };
	input.oninput = () => { input.style.height = "auto"; input.style.height = Math.min(140, input.scrollHeight) + "px"; };

	try {
	if (!navigator.gpu) throw new Error("This browser has no WebGPU. Use Chrome/Edge.");
	st.textContent = `loading ${m.name} (${m.size})…`;
	const loaded = await loadModel(m, { onStatus: (s) => { if (s) st.textContent = `${m.name}: ${s}`; }, onProgress: (d, t, w) => { st.textContent = `${m.name}: ${w} ${t ? Math.round(100 * d / t) : 0}%`; } });
	if (!loaded \|\| !loaded.gpu) throw new Error("model load failed");
	engine = await createEngine(m, loaded);
	armed = true;
	st.textContent = `${m.name} · ${m.size} · resident on your GPU · ready`;
	input.placeholder = "Message Q…";
	if (pending) { const w = [...log.querySelectorAll(".a")].reverse().find((x) => x.dataset.pending); if (w) w.remove(); const p = pending; pending = null; generate(p, true); } // you jumped in first → answer that
	else await proactiveGreeting(); // otherwise Q reaches out the moment it's ready (also warms the GPU)
	} catch (e) { st.textContent = "⚠ " + e.message; bubble("a", "Could not start: " + e.message); }
	</script></body></html>