File size: 5,205 Bytes
ed8ac31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9a2d5b
ed8ac31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Mamba WebGPU — First Browser-Native SSM Inference</title>
<style>
body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
.label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
.value { color: #c9d1d9; font-size: 14px; margin-top: 4px; }
.green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; }
#log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; }
button:disabled { opacity: 0.5; cursor: wait; }
input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; }
</style>
</head>
<body>
<h1>🐍 Mamba WebGPU — Falcon-Mamba 7B in Browser</h1>
<p>First browser-native Mamba/SSM inference engine. Pure WebGPU compute shaders — no MLC, no TVM.</p>

<div class="card">
  <div class="label">Status</div>
  <div class="value" id="status"><span class="amber"></span> not initialized</div>
</div>

<div class="card">
  <div class="label">WebGPU Info</div>
  <div class="value" id="gpu-info">checking...</div>
</div>

<div class="card">
  <button id="btn-init" onclick="doInit()">1. Initialize WebGPU + Compile Shaders</button>
  <button id="btn-load" onclick="doLoad()" disabled>2. Load Weights</button>
  <button id="btn-gen" onclick="doGenerate()" disabled>3. Generate</button>
</div>

<div class="card">
  <div class="label">Prompt</div>
  <input id="prompt" value="Hello, I am Grandma Goodwin and" />
</div>

<div class="card">
  <div class="label">Log</div>
  <div id="log"></div>
</div>

<script type="module">
import { MambaRuntime } from './mamba_runtime.js';

let mamba = null;
const log = document.getElementById('log');
const status = document.getElementById('status');

function l(msg) {
  const ts = new Date().toISOString().slice(11, 19);
  log.textContent += `[${ts}] ${msg}\n`;
  log.scrollTop = log.scrollHeight;
}

// Check WebGPU on load
(async () => {
  const info = document.getElementById('gpu-info');
  if (!navigator.gpu) {
    info.innerHTML = '<span class="red">WebGPU not supported</span>';
    return;
  }
  const adapter = await navigator.gpu.requestAdapter();
  if (!adapter) {
    info.innerHTML = '<span class="red">No adapter found</span>';
    return;
  }
  const ai = adapter.info || {};
  const lim = adapter.limits;
  info.innerHTML = `<span class="green">✓</span> ${ai.vendor || '?'} ${ai.architecture || '?'} | ` +
    `maxBufferSize: ${(lim.maxBufferSize/1024/1024/1024).toFixed(2)} GB | ` +
    `maxStorageBuffer: ${(lim.maxStorageBufferBindingSize/1024/1024).toFixed(0)} MB`;
})();

window.doInit = async function() {
  try {
    document.getElementById('btn-init').disabled = true;
    l('Initializing WebGPU device...');
    mamba = new MambaRuntime();
    await mamba.init();
    l('✓ Device ready, all 12 shaders compiled');
    status.innerHTML = '<span class="green">⬤</span> shaders compiled — ready to load weights';
    document.getElementById('btn-load').disabled = false;
  } catch (e) {
    l('✗ Init failed: ' + e.message);
    status.innerHTML = '<span class="red">⬤</span> ' + e.message;
  }
};

window.doLoad = async function() {
  try {
    document.getElementById('btn-load').disabled = true;
    l('Loading Falcon-Mamba 7B weights...');
    l('(Point ./weights/ to your safetensors directory)');
    status.innerHTML = '<span class="amber">⬤</span> loading weights...';
    await mamba.loadWeights('./weights');
    l('✓ Weights loaded, SSM state allocated');
    status.innerHTML = '<span class="green">⬤</span> model loaded — ready to generate';
    document.getElementById('btn-gen').disabled = false;
  } catch (e) {
    l('✗ Load failed: ' + e.message);
    status.innerHTML = '<span class="red">⬤</span> ' + e.message;
    document.getElementById('btn-load').disabled = false;
  }
};

window.doGenerate = async function() {
  const btn = document.getElementById('btn-gen');
  if (btn.disabled) return;
  btn.disabled = true;
  const prompt = document.getElementById('prompt').value;
  l('Generating: "' + prompt + '"');
  status.innerHTML = '<span class="amber">⬤</span> encoding prompt...';
  const t0 = performance.now();
  try {
    const result = await mamba.generate(prompt, 100, 0.75, (token, step) => {
      if (step === 0) status.innerHTML = '<span class="amber">⬤</span> generating tokens...';
    });
    const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
    l(`[${elapsed}s] ${prompt}${result}`);
    status.innerHTML = '<span class="green">⬤</span> done';
  } catch(e) {
    l('ERROR: ' + e.message);
    status.innerHTML = '<span class="red">⬤</span> error';
  }
  btn.disabled = false;
};
</script>
</body>
</html>