mamba-webgpu / grandma.html
LJTSG's picture
Upload grandma.html with huggingface_hub
cb3e6cf verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Grandma Goodwin β€” Browser-Native Mamba + TTT</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body { font-family: Georgia, serif; background: #1a1410; color: #d4c5a9; height: 100vh; display: flex; flex-direction: column; }
header { background: #2a1f15; padding: 12px 20px; border-bottom: 1px solid #3d2e1f; text-align: center; }
header h1 { font-size: 18px; color: #e8c87a; font-weight: normal; letter-spacing: 1px; }
header .sub { font-size: 11px; color: #8a7a5a; margin-top: 2px; }
#status { font-size: 12px; color: #8a7a5a; padding: 6px 20px; background: #1f1810; text-align: center; }
#chat { flex: 1; overflow-y: auto; padding: 20px; display: flex; flex-direction: column; gap: 12px; }
.msg { max-width: 80%; padding: 10px 14px; border-radius: 8px; line-height: 1.5; font-size: 15px; }
.msg.user { align-self: flex-end; background: #2a3a2a; color: #b8d4b8; border-radius: 8px 8px 2px 8px; }
.msg.grandma { align-self: flex-start; background: #2a1f15; color: #d4c5a9; border-radius: 8px 8px 8px 2px; border: 1px solid #3d2e1f; }
.msg.system { align-self: center; font-size: 12px; color: #6a5a3a; font-style: italic; }
#input-row { display: flex; gap: 8px; padding: 12px 20px; background: #1f1810; border-top: 1px solid #3d2e1f; }
#input { flex: 1; background: #2a1f15; border: 1px solid #3d2e1f; color: #d4c5a9; border-radius: 6px; padding: 10px 14px; font-family: Georgia, serif; font-size: 15px; outline: none; }
#input:focus { border-color: #e8c87a; }
#send { background: #3d2e1f; color: #e8c87a; border: none; border-radius: 6px; padding: 10px 18px; cursor: pointer; font-family: Georgia, serif; font-size: 14px; }
#send:disabled { opacity: 0.4; cursor: wait; }
#send:hover:not(:disabled) { background: #4d3e2f; }
</style>
</head>
<body>
<header>
<h1>Grandma Goodwin</h1>
<div class="sub">Falcon-Mamba 7B + TTT Substrate β€” Browser-Native SSM</div>
</header>
<div id="status">loading...</div>
<div id="chat"></div>
<div id="input-row">
<input id="input" placeholder="Talk to Grandma..." disabled autocomplete="off" />
<button id="send" disabled onclick="sendMessage()">Send</button>
</div>
<script type="module">
import { MambaRuntime } from './mamba_runtime.js';
const DIM = 384;
const IDENTITY = `You are Grandma Goodwin, a warm wise grandmother. You call people sugar and darling. Comfort first, stories over lectures. Keep responses to 2-3 sentences.`;
let mamba = null;
let embed = null;
let FACTS = [];
let W = null; // 384x384 projection matrix
const chat = document.getElementById('chat');
const input = document.getElementById('input');
const status = document.getElementById('status');
const sendBtn = document.getElementById('send');
function addMsg(text, cls) {
const div = document.createElement('div');
div.className = 'msg ' + cls;
div.textContent = text;
chat.appendChild(div);
chat.scrollTop = chat.scrollHeight;
}
// ── Math helpers ──
function dot(a, b) { let s = 0; for (let i = 0; i < a.length; i++) s += a[i] * b[i]; return s; }
function matvec(M, v) {
const o = new Float32Array(M.length);
for (let r = 0; r < M.length; r++) {
let s = 0; for (let c = 0; c < v.length; c++) s += M[r][c] * v[c];
o[r] = s;
}
return o;
}
function softmax(s, temp = 0.1) {
let m = -Infinity; for (let i = 0; i < s.length; i++) if (s[i] > m) m = s[i];
let z = 0; const p = new Array(s.length);
for (let j = 0; j < s.length; j++) { p[j] = Math.exp((s[j] - m) / temp); z += p[j]; }
for (let k = 0; k < p.length; k++) p[k] /= z;
return p;
}
function eye(n) {
const M = [];
for (let r = 0; r < n; r++) { const row = new Float32Array(n); row[r] = 1; M.push(row); }
return M;
}
// ── TTT Retrieval ──
async function topMemory(text, k = 6) {
if (!embed || FACTS.length === 0) return [];
const qe = await embed(text);
const q = W ? matvec(W, qe) : qe;
const scores = FACTS.map(f => dot(f.vec, q));
const p = softmax(scores);
const order = p.map((v, i) => [v, i]).sort((a, b) => b[0] - a[0]).slice(0, k);
return order.map(([score, i]) => FACTS[i]);
}
function buildSystem(memories) {
if (memories.length === 0) return IDENTITY;
const memBlock = memories.map(m => `- ${m.value}`).join('\n');
return IDENTITY + `\n\nMemories relevant to this moment:\n${memBlock}`;
}
// ── Boot ──
async function boot() {
// 1. Load substrate
status.textContent = 'loading substrate (212 facts)...';
const subResp = await fetch('./grandma-substrate.json');
const substrate = await subResp.json();
FACTS = substrate.facts.map((f, i) => ({
id: 'b' + i,
key: f.key,
value: f.value,
vec: Float32Array.from(f.vec),
sal: 1.0,
base: true
}));
console.log(`[ttt] loaded ${FACTS.length} substrate facts`);
// 2. Initialize W matrix (identity β€” no pre-training yet)
W = eye(DIM);
console.log('[ttt] W matrix: 384x384 identity');
// 3. Load MiniLM embedder
status.textContent = 'loading MiniLM embedder...';
try {
const mod = await import('./vendor/transformers/transformers.min.js');
const env = mod.env;
env.allowRemoteModels = false;
env.localModelPath = './models/';
env.backends.onnx.wasm.wasmPaths = './vendor/transformers/';
const ext = await mod.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
embed = async (t) => {
const o = await ext(t, { pooling: 'mean', normalize: true });
return Array.from(o.data);
};
console.log('[ttt] MiniLM embedder ready');
} catch (e) {
console.error('[ttt] embedder failed:', e);
}
// 4. Init Mamba
status.textContent = 'initializing WebGPU...';
mamba = new MambaRuntime();
await mamba.init();
status.textContent = 'loading Falcon-Mamba 7B weights (~60s)...';
await mamba.loadWeights('./weights');
status.textContent = 'ready β€” the fire is lit';
input.disabled = false;
sendBtn.disabled = false;
input.focus();
addMsg('settles into the chair by the fire', 'system');
}
window.sendMessage = async function() {
const text = input.value.trim();
if (!text || sendBtn.disabled) return;
input.value = '';
sendBtn.disabled = true;
input.disabled = true;
addMsg(text, 'user');
// Retrieve relevant memories
status.textContent = 'remembering...';
const memories = await topMemory(text, 6);
if (memories.length > 0) {
console.log('[ttt] retrieved:', memories.map(m => m.key).join(' | '));
}
// Build system prompt with memories
const system = buildSystem(memories);
status.textContent = 'grandma is thinking...';
const t0 = performance.now();
const reply = await mamba.generate(text, 150, 0.8, null, system);
const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
addMsg(reply.replace(/<\|im_end\|>/g, '').trim(), 'grandma');
status.textContent = `${elapsed}s β€” ${memories.length} memories recalled`;
sendBtn.disabled = false;
input.disabled = false;
input.focus();
};
input.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMessage(); }
});
boot().catch(e => { status.textContent = 'error: ' + e.message; console.error(e); });
</script>
</body>
</html>