SmolLM2-360M-webgpu / index.html
LJTSG's picture
Add index.html
fd6abf8 verified
Raw
History Blame Contribute Delete
4.82 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>SmolLM2-360M — WebGPU</title>
<style>
body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; }
button:disabled { opacity: 0.4; }
#status { color: #e8c87a; font-size: 13px; margin: 8px 0; }
#chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; }
.msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; }
.user { background: #1f3a5f; color: #e0e8f0; }
.assistant { background: #1a2332; color: #c9d1d9; }
#input-row { display: flex; gap: 8px; margin-top: 8px; }
#input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; }
.info { color: #8b949e; font-size: 12px; }
</style>
</head>
<body>
<h1>SmolLM2-360M on WebGPU</h1>
<p>HuggingFace's tiny but capable 360M parameter model. Q8_0 (369 MB). Loads in seconds.</p>
<div class="card">
<button id="btn-load" onclick="doLoad()">Load Model (369 MB)</button>
<div id="status">Click Load to start</div>
</div>
<div id="chat"></div>
<div id="input-row">
<textarea id="input" rows="2" placeholder="Ask something..." disabled></textarea>
<button id="btn-send" onclick="doSend()" disabled>Send</button>
</div>
<p class="info">SmolLM2-360M-Instruct via wllama WebGPU. Built for AMD Strix Halo unified memory.</p>
<script type="module">
import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
let wllama = null;
const statusEl = document.getElementById('status');
const chatEl = document.getElementById('chat');
const inputEl = document.getElementById('input');
let history = [];
function addMsg(role, text) {
const div = document.createElement('div');
div.className = `msg ${role}`;
div.textContent = text || '';
chatEl.appendChild(div);
chatEl.scrollTop = chatEl.scrollHeight;
return div;
}
window.doLoad = async function() {
document.getElementById('btn-load').disabled = true;
statusEl.textContent = 'Loading SmolLM2-360M...';
wllama = new Wllama(
{ default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' },
{ parallelDownloads: 3, logger: {
debug: () => {},
log: m => { statusEl.textContent = m; },
warn: m => console.warn(m),
error: m => console.error(m),
}}
);
await wllama.loadModelFromUrl(
window.location.origin + '/model/SmolLM2-360M-Instruct-Q8_0.gguf',
{
n_gpu_layers: 99,
n_ctx: 2048,
n_batch: 64,
useCache: true,
progressCallback: ({ loaded, total }) => {
const p = Math.round(loaded / total * 100);
if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`;
},
}
);
statusEl.textContent = 'Ready — SmolLM2-360M on WebGPU';
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
function buildPrompt() {
let prompt = '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n';
for (const msg of history) {
prompt += `<|im_start|>${msg.role}\n${msg.content}<|im_end|>\n`;
}
prompt += '<|im_start|>assistant\n';
return prompt;
}
window.doSend = async function() {
const text = inputEl.value.trim();
if (!text || !wllama) return;
inputEl.value = '';
inputEl.disabled = true;
document.getElementById('btn-send').disabled = true;
history.push({ role: 'user', content: text });
addMsg('user', text);
const prompt = buildPrompt();
const genStart = performance.now();
statusEl.textContent = 'Generating...';
const result = await wllama.createCompletion({
prompt,
max_tokens: 512,
temperature: 0.7,
top_k: 40,
repeat_penalty: 1.1,
stop: ['<|im_end|>', '<|im_start|>'],
});
const rawText = result?.choices?.[0]?.text || result?.text || '';
const cleanText = rawText.replace('<|im_end|>', '').trim();
addMsg('assistant', cleanText);
history.push({ role: 'assistant', content: cleanText });
const elapsed = (performance.now() - genStart) / 1000;
statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`;
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
inputEl.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); }
});
</script>
</body>
</html>