| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>Mamba WebGPU — First Browser-Native SSM Inference</title> |
| <style> |
| body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; } |
| h1 { color: #58a6ff; font-size: 20px; } |
| .card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; } |
| .label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; } |
| .value { color: #c9d1d9; font-size: 14px; margin-top: 4px; } |
| .green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; } |
| #log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; } |
| button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; } |
| button:disabled { opacity: 0.5; cursor: wait; } |
| input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; } |
| </style> |
| </head> |
| <body> |
| <h1>🐍 Mamba WebGPU — Falcon-Mamba 7B in Browser</h1> |
| <p>First browser-native Mamba/SSM inference engine. Pure WebGPU compute shaders — no MLC, no TVM.</p> |
|
|
| <div class="card"> |
| <div class="label">Status</div> |
| <div class="value" id="status"><span class="amber">⬤</span> not initialized</div> |
| </div> |
|
|
| <div class="card"> |
| <div class="label">WebGPU Info</div> |
| <div class="value" id="gpu-info">checking...</div> |
| </div> |
|
|
| <div class="card"> |
| <button id="btn-init" onclick="doInit()">1. Initialize WebGPU + Compile Shaders</button> |
| <button id="btn-load" onclick="doLoad()" disabled>2. Load Weights</button> |
| <button id="btn-gen" onclick="doGenerate()" disabled>3. Generate</button> |
| </div> |
|
|
| <div class="card"> |
| <div class="label">Prompt</div> |
| <input id="prompt" value="Hello, I am Grandma Goodwin and" /> |
| </div> |
|
|
| <div class="card"> |
| <div class="label">Log</div> |
| <div id="log"></div> |
| </div> |
|
|
| <script type="module"> |
| import { MambaRuntime } from './mamba_runtime.js'; |
| |
| let mamba = null; |
| const log = document.getElementById('log'); |
| const status = document.getElementById('status'); |
| |
| function l(msg) { |
| const ts = new Date().toISOString().slice(11, 19); |
| log.textContent += `[${ts}] ${msg}\n`; |
| log.scrollTop = log.scrollHeight; |
| } |
| |
| |
| (async () => { |
| const info = document.getElementById('gpu-info'); |
| if (!navigator.gpu) { |
| info.innerHTML = '<span class="red">WebGPU not supported</span>'; |
| return; |
| } |
| const adapter = await navigator.gpu.requestAdapter(); |
| if (!adapter) { |
| info.innerHTML = '<span class="red">No adapter found</span>'; |
| return; |
| } |
| const ai = adapter.info || {}; |
| const lim = adapter.limits; |
| info.innerHTML = `<span class="green">✓</span> ${ai.vendor || '?'} ${ai.architecture || '?'} | ` + |
| `maxBufferSize: ${(lim.maxBufferSize/1024/1024/1024).toFixed(2)} GB | ` + |
| `maxStorageBuffer: ${(lim.maxStorageBufferBindingSize/1024/1024).toFixed(0)} MB`; |
| })(); |
| |
| window.doInit = async function() { |
| try { |
| document.getElementById('btn-init').disabled = true; |
| l('Initializing WebGPU device...'); |
| mamba = new MambaRuntime(); |
| await mamba.init(); |
| l('✓ Device ready, all 12 shaders compiled'); |
| status.innerHTML = '<span class="green">⬤</span> shaders compiled — ready to load weights'; |
| document.getElementById('btn-load').disabled = false; |
| } catch (e) { |
| l('✗ Init failed: ' + e.message); |
| status.innerHTML = '<span class="red">⬤</span> ' + e.message; |
| } |
| }; |
| |
| window.doLoad = async function() { |
| try { |
| document.getElementById('btn-load').disabled = true; |
| l('Loading Falcon-Mamba 7B weights...'); |
| l('(Point ./weights/ to your safetensors directory)'); |
| status.innerHTML = '<span class="amber">⬤</span> loading weights...'; |
| await mamba.loadWeights('./weights'); |
| l('✓ Weights loaded, SSM state allocated'); |
| status.innerHTML = '<span class="green">⬤</span> model loaded — ready to generate'; |
| document.getElementById('btn-gen').disabled = false; |
| } catch (e) { |
| l('✗ Load failed: ' + e.message); |
| status.innerHTML = '<span class="red">⬤</span> ' + e.message; |
| document.getElementById('btn-load').disabled = false; |
| } |
| }; |
| |
| window.doGenerate = async function() { |
| const btn = document.getElementById('btn-gen'); |
| if (btn.disabled) return; |
| btn.disabled = true; |
| const prompt = document.getElementById('prompt').value; |
| l('Generating: "' + prompt + '"'); |
| status.innerHTML = '<span class="amber">⬤</span> encoding prompt...'; |
| const t0 = performance.now(); |
| try { |
| const result = await mamba.generate(prompt, 100, 0.75, (token, step) => { |
| if (step === 0) status.innerHTML = '<span class="amber">⬤</span> generating tokens...'; |
| }); |
| const elapsed = ((performance.now() - t0) / 1000).toFixed(1); |
| l(`[${elapsed}s] ${prompt}${result}`); |
| status.innerHTML = '<span class="green">⬤</span> done'; |
| } catch(e) { |
| l('ERROR: ' + e.message); |
| status.innerHTML = '<span class="red">⬤</span> error'; |
| } |
| btn.disabled = false; |
| }; |
| </script> |
| </body> |
| </html> |
|
|