File size: 6,053 Bytes
6a41fd5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Gemma 26B A4B — Browser WebGPU via wllama</title>
<style>
body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
.label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
.value { color: #c9d1d9; font-size: 14px; margin-top: 4px; }
.green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; }
#log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; }
button:disabled { opacity: 0.5; cursor: wait; }
input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; }
#output { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 12px; min-height: 60px; white-space: pre-wrap; font-size: 14px; margin-top: 8px; }
</style>
</head>
<body>
<h1>Gemma 4 26B A4B — Browser WebGPU</h1>
<p>Gemma-4-26B-A4B-it (MoE, 3.8B active) running in browser via wllama + WebGPU. GGUF loaded from local server.</p>
<div class="card">
<div class="label">Status</div>
<div class="value" id="status"><span class="amber">*</span> not initialized</div>
</div>
<div class="card">
<button id="btn-load" onclick="doLoad()">1. Load Model (WebGPU)</button>
<button id="btn-gen" onclick="doGenerate()" disabled>2. Generate</button>
</div>
<div class="card">
<div class="label">Prompt</div>
<input id="prompt" value="Hello, I am a helpful assistant and" />
</div>
<div class="card">
<div class="label">Output</div>
<div id="output"></div>
</div>
<div class="card">
<div class="label">Log</div>
<div id="log"></div>
</div>
<script type="module">
import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
const log = document.getElementById('log');
const status = document.getElementById('status');
const output = document.getElementById('output');
let wllama = null;
function l(msg) {
const ts = new Date().toISOString().slice(11, 19);
log.textContent += `[${ts}] ${msg}\n`;
log.scrollTop = log.scrollHeight;
}
window.doLoad = async function() {
try {
document.getElementById('btn-load').disabled = true;
l('Initializing wllama...');
status.innerHTML = '<span class="amber">*</span> initializing...';
const CONFIG_PATHS = {
default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm',
};
wllama = new Wllama(CONFIG_PATHS, {
parallelDownloads: 5,
logger: {
debug: (msg) => console.log('[wllama]', msg),
log: (msg) => { console.log('[wllama]', msg); l(msg); },
warn: (msg) => { console.warn('[wllama]', msg); l('WARN: ' + msg); },
error: (msg) => { console.error('[wllama]', msg); l('ERROR: ' + msg); },
},
});
l('Loading Gemma 26B A4B (Q5_K_XL, ~20GB in 512MB splits)...');
l('This will take several minutes on first load.');
status.innerHTML = '<span class="amber">*</span> loading model...';
// Load from local server (split GGUF files)
// wllama auto-detects split pattern from the first file name
const firstSplit = window.location.origin + '/model/gemma-26b-00001-of-00062.gguf';
await wllama.loadModelFromUrl(firstSplit, {
n_gpu_layers: 99, // GPU — patched GLU shader fixes aliasing
n_ctx: 512, // minimal context to reduce CPU memory
n_batch: 64,
useCache: false, // don't cache 20GB in browser storage
progressCallback: ({ loaded, total }) => {
const pct = Math.round((loaded / total) * 100);
if (pct % 5 === 0) l(`Downloading... ${pct}% (${(loaded/1024/1024/1024).toFixed(1)}/${(total/1024/1024/1024).toFixed(1)} GB)`);
status.innerHTML = `<span class="amber">*</span> downloading ${pct}%...`;
},
});
l('Model loaded!');
status.innerHTML = '<span class="green">*</span> model ready';
document.getElementById('btn-gen').disabled = false;
} catch (e) {
l('ERROR: ' + e.message);
console.error(e);
status.innerHTML = '<span class="red">*</span> ' + e.message;
document.getElementById('btn-load').disabled = false;
}
};
window.doGenerate = async function() {
const prompt = document.getElementById('prompt').value;
document.getElementById('btn-gen').disabled = true;
output.textContent = '';
l('Generating: "' + prompt + '"');
status.innerHTML = '<span class="amber">*</span> generating...';
const t0 = performance.now();
try {
const result = await wllama.createChatCompletion({
messages: [{ role: 'user', content: prompt }],
max_tokens: 500,
temperature: 0.7,
top_k: 40,
top_p: 0.9,
});
const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
console.log('[gemma] raw result:', JSON.stringify(result, null, 2));
const msg = result?.choices?.[0]?.message;
const text = msg?.content || '';
const thinking = msg?.reasoning_content || '';
const tps = result?.timings?.predicted_per_second?.toFixed(1) || '?';
if (thinking && !text) {
output.textContent = thinking;
l(`[thinking only, ${tps} tok/s] ` + thinking.slice(0, 200));
} else {
output.textContent = text || '(empty)';
if (thinking) l('[thinking] ' + thinking.slice(0, 100));
l(`[${tps} tok/s] ` + (text || '(empty)').slice(0, 200));
}
l(`Done in ${elapsed}s`);
status.innerHTML = `<span class="green">*</span> done (${elapsed}s)`;
} catch (e) {
l('ERROR: ' + e.message);
console.error(e);
status.innerHTML = '<span class="red">*</span> error';
}
document.getElementById('btn-gen').disabled = false;
};
</script>
</body>
</html>
|