File size: 5,205 Bytes
ed8ac31 e9a2d5b ed8ac31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Mamba WebGPU — First Browser-Native SSM Inference</title>
<style>
body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
.label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
.value { color: #c9d1d9; font-size: 14px; margin-top: 4px; }
.green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; }
#log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; }
button:disabled { opacity: 0.5; cursor: wait; }
input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; }
</style>
</head>
<body>
<h1>🐍 Mamba WebGPU — Falcon-Mamba 7B in Browser</h1>
<p>First browser-native Mamba/SSM inference engine. Pure WebGPU compute shaders — no MLC, no TVM.</p>
<div class="card">
<div class="label">Status</div>
<div class="value" id="status"><span class="amber">⬤</span> not initialized</div>
</div>
<div class="card">
<div class="label">WebGPU Info</div>
<div class="value" id="gpu-info">checking...</div>
</div>
<div class="card">
<button id="btn-init" onclick="doInit()">1. Initialize WebGPU + Compile Shaders</button>
<button id="btn-load" onclick="doLoad()" disabled>2. Load Weights</button>
<button id="btn-gen" onclick="doGenerate()" disabled>3. Generate</button>
</div>
<div class="card">
<div class="label">Prompt</div>
<input id="prompt" value="Hello, I am Grandma Goodwin and" />
</div>
<div class="card">
<div class="label">Log</div>
<div id="log"></div>
</div>
<script type="module">
import { MambaRuntime } from './mamba_runtime.js';
let mamba = null;
const log = document.getElementById('log');
const status = document.getElementById('status');
function l(msg) {
const ts = new Date().toISOString().slice(11, 19);
log.textContent += `[${ts}] ${msg}\n`;
log.scrollTop = log.scrollHeight;
}
// Check WebGPU on load
(async () => {
const info = document.getElementById('gpu-info');
if (!navigator.gpu) {
info.innerHTML = '<span class="red">WebGPU not supported</span>';
return;
}
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
info.innerHTML = '<span class="red">No adapter found</span>';
return;
}
const ai = adapter.info || {};
const lim = adapter.limits;
info.innerHTML = `<span class="green">✓</span> ${ai.vendor || '?'} ${ai.architecture || '?'} | ` +
`maxBufferSize: ${(lim.maxBufferSize/1024/1024/1024).toFixed(2)} GB | ` +
`maxStorageBuffer: ${(lim.maxStorageBufferBindingSize/1024/1024).toFixed(0)} MB`;
})();
window.doInit = async function() {
try {
document.getElementById('btn-init').disabled = true;
l('Initializing WebGPU device...');
mamba = new MambaRuntime();
await mamba.init();
l('✓ Device ready, all 12 shaders compiled');
status.innerHTML = '<span class="green">⬤</span> shaders compiled — ready to load weights';
document.getElementById('btn-load').disabled = false;
} catch (e) {
l('✗ Init failed: ' + e.message);
status.innerHTML = '<span class="red">⬤</span> ' + e.message;
}
};
window.doLoad = async function() {
try {
document.getElementById('btn-load').disabled = true;
l('Loading Falcon-Mamba 7B weights...');
l('(Point ./weights/ to your safetensors directory)');
status.innerHTML = '<span class="amber">⬤</span> loading weights...';
await mamba.loadWeights('./weights');
l('✓ Weights loaded, SSM state allocated');
status.innerHTML = '<span class="green">⬤</span> model loaded — ready to generate';
document.getElementById('btn-gen').disabled = false;
} catch (e) {
l('✗ Load failed: ' + e.message);
status.innerHTML = '<span class="red">⬤</span> ' + e.message;
document.getElementById('btn-load').disabled = false;
}
};
window.doGenerate = async function() {
const btn = document.getElementById('btn-gen');
if (btn.disabled) return;
btn.disabled = true;
const prompt = document.getElementById('prompt').value;
l('Generating: "' + prompt + '"');
status.innerHTML = '<span class="amber">⬤</span> encoding prompt...';
const t0 = performance.now();
try {
const result = await mamba.generate(prompt, 100, 0.75, (token, step) => {
if (step === 0) status.innerHTML = '<span class="amber">⬤</span> generating tokens...';
});
const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
l(`[${elapsed}s] ${prompt}${result}`);
status.innerHTML = '<span class="green">⬤</span> done';
} catch(e) {
l('ERROR: ' + e.message);
status.innerHTML = '<span class="red">⬤</span> error';
}
btn.disabled = false;
};
</script>
</body>
</html>
|