Upload index.html with huggingface_hub
Browse files- index.html +156 -0
index.html
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<title>Gemma 26B A4B — Browser WebGPU via wllama</title>
|
| 6 |
+
<style>
|
| 7 |
+
body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; }
|
| 8 |
+
h1 { color: #58a6ff; font-size: 20px; }
|
| 9 |
+
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
|
| 10 |
+
.label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
|
| 11 |
+
.value { color: #c9d1d9; font-size: 14px; margin-top: 4px; }
|
| 12 |
+
.green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; }
|
| 13 |
+
#log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; }
|
| 14 |
+
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; }
|
| 15 |
+
button:disabled { opacity: 0.5; cursor: wait; }
|
| 16 |
+
input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; }
|
| 17 |
+
#output { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 12px; min-height: 60px; white-space: pre-wrap; font-size: 14px; margin-top: 8px; }
|
| 18 |
+
</style>
|
| 19 |
+
</head>
|
| 20 |
+
<body>
|
| 21 |
+
<h1>Gemma 4 26B A4B — Browser WebGPU</h1>
|
| 22 |
+
<p>Gemma-4-26B-A4B-it (MoE, 3.8B active) running in browser via wllama + WebGPU. GGUF loaded from local server.</p>
|
| 23 |
+
|
| 24 |
+
<div class="card">
|
| 25 |
+
<div class="label">Status</div>
|
| 26 |
+
<div class="value" id="status"><span class="amber">*</span> not initialized</div>
|
| 27 |
+
</div>
|
| 28 |
+
|
| 29 |
+
<div class="card">
|
| 30 |
+
<button id="btn-load" onclick="doLoad()">1. Load Model (WebGPU)</button>
|
| 31 |
+
<button id="btn-gen" onclick="doGenerate()" disabled>2. Generate</button>
|
| 32 |
+
</div>
|
| 33 |
+
|
| 34 |
+
<div class="card">
|
| 35 |
+
<div class="label">Prompt</div>
|
| 36 |
+
<input id="prompt" value="Hello, I am a helpful assistant and" />
|
| 37 |
+
</div>
|
| 38 |
+
|
| 39 |
+
<div class="card">
|
| 40 |
+
<div class="label">Output</div>
|
| 41 |
+
<div id="output"></div>
|
| 42 |
+
</div>
|
| 43 |
+
|
| 44 |
+
<div class="card">
|
| 45 |
+
<div class="label">Log</div>
|
| 46 |
+
<div id="log"></div>
|
| 47 |
+
</div>
|
| 48 |
+
|
| 49 |
+
<script type="module">
|
| 50 |
+
import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
|
| 51 |
+
|
| 52 |
+
const log = document.getElementById('log');
|
| 53 |
+
const status = document.getElementById('status');
|
| 54 |
+
const output = document.getElementById('output');
|
| 55 |
+
let wllama = null;
|
| 56 |
+
|
| 57 |
+
function l(msg) {
|
| 58 |
+
const ts = new Date().toISOString().slice(11, 19);
|
| 59 |
+
log.textContent += `[${ts}] ${msg}\n`;
|
| 60 |
+
log.scrollTop = log.scrollHeight;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
window.doLoad = async function() {
|
| 64 |
+
try {
|
| 65 |
+
document.getElementById('btn-load').disabled = true;
|
| 66 |
+
l('Initializing wllama...');
|
| 67 |
+
status.innerHTML = '<span class="amber">*</span> initializing...';
|
| 68 |
+
|
| 69 |
+
const CONFIG_PATHS = {
|
| 70 |
+
default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm',
|
| 71 |
+
};
|
| 72 |
+
|
| 73 |
+
wllama = new Wllama(CONFIG_PATHS, {
|
| 74 |
+
parallelDownloads: 5,
|
| 75 |
+
logger: {
|
| 76 |
+
debug: (msg) => console.log('[wllama]', msg),
|
| 77 |
+
log: (msg) => { console.log('[wllama]', msg); l(msg); },
|
| 78 |
+
warn: (msg) => { console.warn('[wllama]', msg); l('WARN: ' + msg); },
|
| 79 |
+
error: (msg) => { console.error('[wllama]', msg); l('ERROR: ' + msg); },
|
| 80 |
+
},
|
| 81 |
+
});
|
| 82 |
+
|
| 83 |
+
l('Loading Gemma 26B A4B (Q5_K_XL, ~20GB in 512MB splits)...');
|
| 84 |
+
l('This will take several minutes on first load.');
|
| 85 |
+
status.innerHTML = '<span class="amber">*</span> loading model...';
|
| 86 |
+
|
| 87 |
+
// Load from local server (split GGUF files)
|
| 88 |
+
// wllama auto-detects split pattern from the first file name
|
| 89 |
+
const firstSplit = window.location.origin + '/model/gemma-26b-00001-of-00062.gguf';
|
| 90 |
+
|
| 91 |
+
await wllama.loadModelFromUrl(firstSplit, {
|
| 92 |
+
n_gpu_layers: 99, // GPU — patched GLU shader fixes aliasing
|
| 93 |
+
n_ctx: 512, // minimal context to reduce CPU memory
|
| 94 |
+
n_batch: 64,
|
| 95 |
+
useCache: false, // don't cache 20GB in browser storage
|
| 96 |
+
progressCallback: ({ loaded, total }) => {
|
| 97 |
+
const pct = Math.round((loaded / total) * 100);
|
| 98 |
+
if (pct % 5 === 0) l(`Downloading... ${pct}% (${(loaded/1024/1024/1024).toFixed(1)}/${(total/1024/1024/1024).toFixed(1)} GB)`);
|
| 99 |
+
status.innerHTML = `<span class="amber">*</span> downloading ${pct}%...`;
|
| 100 |
+
},
|
| 101 |
+
});
|
| 102 |
+
|
| 103 |
+
l('Model loaded!');
|
| 104 |
+
status.innerHTML = '<span class="green">*</span> model ready';
|
| 105 |
+
document.getElementById('btn-gen').disabled = false;
|
| 106 |
+
} catch (e) {
|
| 107 |
+
l('ERROR: ' + e.message);
|
| 108 |
+
console.error(e);
|
| 109 |
+
status.innerHTML = '<span class="red">*</span> ' + e.message;
|
| 110 |
+
document.getElementById('btn-load').disabled = false;
|
| 111 |
+
}
|
| 112 |
+
};
|
| 113 |
+
|
| 114 |
+
window.doGenerate = async function() {
|
| 115 |
+
const prompt = document.getElementById('prompt').value;
|
| 116 |
+
document.getElementById('btn-gen').disabled = true;
|
| 117 |
+
output.textContent = '';
|
| 118 |
+
l('Generating: "' + prompt + '"');
|
| 119 |
+
status.innerHTML = '<span class="amber">*</span> generating...';
|
| 120 |
+
|
| 121 |
+
const t0 = performance.now();
|
| 122 |
+
try {
|
| 123 |
+
const result = await wllama.createChatCompletion({
|
| 124 |
+
messages: [{ role: 'user', content: prompt }],
|
| 125 |
+
max_tokens: 500,
|
| 126 |
+
temperature: 0.7,
|
| 127 |
+
top_k: 40,
|
| 128 |
+
top_p: 0.9,
|
| 129 |
+
});
|
| 130 |
+
|
| 131 |
+
const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
|
| 132 |
+
console.log('[gemma] raw result:', JSON.stringify(result, null, 2));
|
| 133 |
+
const msg = result?.choices?.[0]?.message;
|
| 134 |
+
const text = msg?.content || '';
|
| 135 |
+
const thinking = msg?.reasoning_content || '';
|
| 136 |
+
const tps = result?.timings?.predicted_per_second?.toFixed(1) || '?';
|
| 137 |
+
if (thinking && !text) {
|
| 138 |
+
output.textContent = thinking;
|
| 139 |
+
l(`[thinking only, ${tps} tok/s] ` + thinking.slice(0, 200));
|
| 140 |
+
} else {
|
| 141 |
+
output.textContent = text || '(empty)';
|
| 142 |
+
if (thinking) l('[thinking] ' + thinking.slice(0, 100));
|
| 143 |
+
l(`[${tps} tok/s] ` + (text || '(empty)').slice(0, 200));
|
| 144 |
+
}
|
| 145 |
+
l(`Done in ${elapsed}s`);
|
| 146 |
+
status.innerHTML = `<span class="green">*</span> done (${elapsed}s)`;
|
| 147 |
+
} catch (e) {
|
| 148 |
+
l('ERROR: ' + e.message);
|
| 149 |
+
console.error(e);
|
| 150 |
+
status.innerHTML = '<span class="red">*</span> error';
|
| 151 |
+
}
|
| 152 |
+
document.getElementById('btn-gen').disabled = false;
|
| 153 |
+
};
|
| 154 |
+
</script>
|
| 155 |
+
</body>
|
| 156 |
+
</html>
|