| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>SmolLM2-360M — WebGPU</title> |
| <style> |
| body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; } |
| h1 { color: #58a6ff; font-size: 20px; } |
| .card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; } |
| button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; } |
| button:disabled { opacity: 0.4; } |
| #status { color: #e8c87a; font-size: 13px; margin: 8px 0; } |
| #chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; } |
| .msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; } |
| .user { background: #1f3a5f; color: #e0e8f0; } |
| .assistant { background: #1a2332; color: #c9d1d9; } |
| #input-row { display: flex; gap: 8px; margin-top: 8px; } |
| #input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; } |
| .info { color: #8b949e; font-size: 12px; } |
| </style> |
| </head> |
| <body> |
| <h1>SmolLM2-360M on WebGPU</h1> |
| <p>HuggingFace's tiny but capable 360M parameter model. Q8_0 (369 MB). Loads in seconds.</p> |
|
|
| <div class="card"> |
| <button id="btn-load" onclick="doLoad()">Load Model (369 MB)</button> |
| <div id="status">Click Load to start</div> |
| </div> |
|
|
| <div id="chat"></div> |
| <div id="input-row"> |
| <textarea id="input" rows="2" placeholder="Ask something..." disabled></textarea> |
| <button id="btn-send" onclick="doSend()" disabled>Send</button> |
| </div> |
| <p class="info">SmolLM2-360M-Instruct via wllama WebGPU. Built for AMD Strix Halo unified memory.</p> |
|
|
| <script type="module"> |
| import { Wllama } from './node_modules/@wllama/wllama/esm/index.js'; |
| |
| let wllama = null; |
| const statusEl = document.getElementById('status'); |
| const chatEl = document.getElementById('chat'); |
| const inputEl = document.getElementById('input'); |
| let history = []; |
| |
| function addMsg(role, text) { |
| const div = document.createElement('div'); |
| div.className = `msg ${role}`; |
| div.textContent = text || ''; |
| chatEl.appendChild(div); |
| chatEl.scrollTop = chatEl.scrollHeight; |
| return div; |
| } |
| |
| window.doLoad = async function() { |
| document.getElementById('btn-load').disabled = true; |
| statusEl.textContent = 'Loading SmolLM2-360M...'; |
| |
| wllama = new Wllama( |
| { default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' }, |
| { parallelDownloads: 3, logger: { |
| debug: () => {}, |
| log: m => { statusEl.textContent = m; }, |
| warn: m => console.warn(m), |
| error: m => console.error(m), |
| }} |
| ); |
| |
| await wllama.loadModelFromUrl( |
| window.location.origin + '/model/SmolLM2-360M-Instruct-Q8_0.gguf', |
| { |
| n_gpu_layers: 99, |
| n_ctx: 2048, |
| n_batch: 64, |
| useCache: true, |
| progressCallback: ({ loaded, total }) => { |
| const p = Math.round(loaded / total * 100); |
| if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`; |
| }, |
| } |
| ); |
| |
| statusEl.textContent = 'Ready — SmolLM2-360M on WebGPU'; |
| inputEl.disabled = false; |
| document.getElementById('btn-send').disabled = false; |
| inputEl.focus(); |
| }; |
| |
| function buildPrompt() { |
| let prompt = '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n'; |
| for (const msg of history) { |
| prompt += `<|im_start|>${msg.role}\n${msg.content}<|im_end|>\n`; |
| } |
| prompt += '<|im_start|>assistant\n'; |
| return prompt; |
| } |
| |
| window.doSend = async function() { |
| const text = inputEl.value.trim(); |
| if (!text || !wllama) return; |
| inputEl.value = ''; |
| inputEl.disabled = true; |
| document.getElementById('btn-send').disabled = true; |
| |
| history.push({ role: 'user', content: text }); |
| addMsg('user', text); |
| |
| const prompt = buildPrompt(); |
| const genStart = performance.now(); |
| |
| statusEl.textContent = 'Generating...'; |
| |
| const result = await wllama.createCompletion({ |
| prompt, |
| max_tokens: 512, |
| temperature: 0.7, |
| top_k: 40, |
| repeat_penalty: 1.1, |
| stop: ['<|im_end|>', '<|im_start|>'], |
| }); |
| |
| const rawText = result?.choices?.[0]?.text || result?.text || ''; |
| const cleanText = rawText.replace('<|im_end|>', '').trim(); |
| |
| addMsg('assistant', cleanText); |
| history.push({ role: 'assistant', content: cleanText }); |
| |
| const elapsed = (performance.now() - genStart) / 1000; |
| statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`; |
| |
| inputEl.disabled = false; |
| document.getElementById('btn-send').disabled = false; |
| inputEl.focus(); |
| }; |
| |
| inputEl.addEventListener('keydown', (e) => { |
| if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); } |
| }); |
| </script> |
| </body> |
| </html> |
|
|