LJTSG commited on
Commit
6a41fd5
·
verified ·
1 Parent(s): 8a927ce

Upload index.html with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.html +156 -0
index.html ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Gemma 26B A4B — Browser WebGPU via wllama</title>
6
+ <style>
7
+ body { font-family: monospace; background: #0d1117; color: #c9d1d9; padding: 24px; max-width: 900px; margin: 0 auto; }
8
+ h1 { color: #58a6ff; font-size: 20px; }
9
+ .card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
10
+ .label { color: #8b949e; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
11
+ .value { color: #c9d1d9; font-size: 14px; margin-top: 4px; }
12
+ .green { color: #3fb950; } .red { color: #f85149; } .amber { color: #d29922; }
13
+ #log { font-size: 12px; background: #010409; border: 1px solid #30363d; border-radius: 6px; padding: 10px; max-height: 400px; overflow-y: auto; white-space: pre-wrap; }
14
+ button { background: #238636; color: white; border: none; border-radius: 6px; padding: 8px 16px; cursor: pointer; font-weight: bold; margin: 4px; }
15
+ button:disabled { opacity: 0.5; cursor: wait; }
16
+ input { background: #161b22; border: 1px solid #30363d; color: #c9d1d9; border-radius: 6px; padding: 8px 12px; width: 60%; }
17
+ #output { background: #161b22; border: 1px solid #30363d; border-radius: 6px; padding: 12px; min-height: 60px; white-space: pre-wrap; font-size: 14px; margin-top: 8px; }
18
+ </style>
19
+ </head>
20
+ <body>
21
+ <h1>Gemma 4 26B A4B — Browser WebGPU</h1>
22
+ <p>Gemma-4-26B-A4B-it (MoE, 3.8B active) running in browser via wllama + WebGPU. GGUF loaded from local server.</p>
23
+
24
+ <div class="card">
25
+ <div class="label">Status</div>
26
+ <div class="value" id="status"><span class="amber">*</span> not initialized</div>
27
+ </div>
28
+
29
+ <div class="card">
30
+ <button id="btn-load" onclick="doLoad()">1. Load Model (WebGPU)</button>
31
+ <button id="btn-gen" onclick="doGenerate()" disabled>2. Generate</button>
32
+ </div>
33
+
34
+ <div class="card">
35
+ <div class="label">Prompt</div>
36
+ <input id="prompt" value="Hello, I am a helpful assistant and" />
37
+ </div>
38
+
39
+ <div class="card">
40
+ <div class="label">Output</div>
41
+ <div id="output"></div>
42
+ </div>
43
+
44
+ <div class="card">
45
+ <div class="label">Log</div>
46
+ <div id="log"></div>
47
+ </div>
48
+
49
+ <script type="module">
50
+ import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
51
+
52
+ const log = document.getElementById('log');
53
+ const status = document.getElementById('status');
54
+ const output = document.getElementById('output');
55
+ let wllama = null;
56
+
57
+ function l(msg) {
58
+ const ts = new Date().toISOString().slice(11, 19);
59
+ log.textContent += `[${ts}] ${msg}\n`;
60
+ log.scrollTop = log.scrollHeight;
61
+ }
62
+
63
+ window.doLoad = async function() {
64
+ try {
65
+ document.getElementById('btn-load').disabled = true;
66
+ l('Initializing wllama...');
67
+ status.innerHTML = '<span class="amber">*</span> initializing...';
68
+
69
+ const CONFIG_PATHS = {
70
+ default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm',
71
+ };
72
+
73
+ wllama = new Wllama(CONFIG_PATHS, {
74
+ parallelDownloads: 5,
75
+ logger: {
76
+ debug: (msg) => console.log('[wllama]', msg),
77
+ log: (msg) => { console.log('[wllama]', msg); l(msg); },
78
+ warn: (msg) => { console.warn('[wllama]', msg); l('WARN: ' + msg); },
79
+ error: (msg) => { console.error('[wllama]', msg); l('ERROR: ' + msg); },
80
+ },
81
+ });
82
+
83
+ l('Loading Gemma 26B A4B (Q5_K_XL, ~20GB in 512MB splits)...');
84
+ l('This will take several minutes on first load.');
85
+ status.innerHTML = '<span class="amber">*</span> loading model...';
86
+
87
+ // Load from local server (split GGUF files)
88
+ // wllama auto-detects split pattern from the first file name
89
+ const firstSplit = window.location.origin + '/model/gemma-26b-00001-of-00062.gguf';
90
+
91
+ await wllama.loadModelFromUrl(firstSplit, {
92
+ n_gpu_layers: 99, // GPU — patched GLU shader fixes aliasing
93
+ n_ctx: 512, // minimal context to reduce CPU memory
94
+ n_batch: 64,
95
+ useCache: false, // don't cache 20GB in browser storage
96
+ progressCallback: ({ loaded, total }) => {
97
+ const pct = Math.round((loaded / total) * 100);
98
+ if (pct % 5 === 0) l(`Downloading... ${pct}% (${(loaded/1024/1024/1024).toFixed(1)}/${(total/1024/1024/1024).toFixed(1)} GB)`);
99
+ status.innerHTML = `<span class="amber">*</span> downloading ${pct}%...`;
100
+ },
101
+ });
102
+
103
+ l('Model loaded!');
104
+ status.innerHTML = '<span class="green">*</span> model ready';
105
+ document.getElementById('btn-gen').disabled = false;
106
+ } catch (e) {
107
+ l('ERROR: ' + e.message);
108
+ console.error(e);
109
+ status.innerHTML = '<span class="red">*</span> ' + e.message;
110
+ document.getElementById('btn-load').disabled = false;
111
+ }
112
+ };
113
+
114
+ window.doGenerate = async function() {
115
+ const prompt = document.getElementById('prompt').value;
116
+ document.getElementById('btn-gen').disabled = true;
117
+ output.textContent = '';
118
+ l('Generating: "' + prompt + '"');
119
+ status.innerHTML = '<span class="amber">*</span> generating...';
120
+
121
+ const t0 = performance.now();
122
+ try {
123
+ const result = await wllama.createChatCompletion({
124
+ messages: [{ role: 'user', content: prompt }],
125
+ max_tokens: 500,
126
+ temperature: 0.7,
127
+ top_k: 40,
128
+ top_p: 0.9,
129
+ });
130
+
131
+ const elapsed = ((performance.now() - t0) / 1000).toFixed(1);
132
+ console.log('[gemma] raw result:', JSON.stringify(result, null, 2));
133
+ const msg = result?.choices?.[0]?.message;
134
+ const text = msg?.content || '';
135
+ const thinking = msg?.reasoning_content || '';
136
+ const tps = result?.timings?.predicted_per_second?.toFixed(1) || '?';
137
+ if (thinking && !text) {
138
+ output.textContent = thinking;
139
+ l(`[thinking only, ${tps} tok/s] ` + thinking.slice(0, 200));
140
+ } else {
141
+ output.textContent = text || '(empty)';
142
+ if (thinking) l('[thinking] ' + thinking.slice(0, 100));
143
+ l(`[${tps} tok/s] ` + (text || '(empty)').slice(0, 200));
144
+ }
145
+ l(`Done in ${elapsed}s`);
146
+ status.innerHTML = `<span class="green">*</span> done (${elapsed}s)`;
147
+ } catch (e) {
148
+ l('ERROR: ' + e.message);
149
+ console.error(e);
150
+ status.innerHTML = '<span class="red">*</span> error';
151
+ }
152
+ document.getElementById('btn-gen').disabled = false;
153
+ };
154
+ </script>
155
+ </body>
156
+ </html>