Add index.html

fd6abf8 verified about 1 month ago

4.82 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<title>SmolLM2-360M — WebGPU</title>
	<style>
	body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; }
	h1 { color: #58a6ff; font-size: 20px; }
	.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
	button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; }
	button:disabled { opacity: 0.4; }
	#status { color: #e8c87a; font-size: 13px; margin: 8px 0; }
	#chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; }
	.msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; }
	.user { background: #1f3a5f; color: #e0e8f0; }
	.assistant { background: #1a2332; color: #c9d1d9; }
	#input-row { display: flex; gap: 8px; margin-top: 8px; }
	#input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; }
	.info { color: #8b949e; font-size: 12px; }
	</style>
	</head>
	<body>
	<h1>SmolLM2-360M on WebGPU</h1>
	<p>HuggingFace's tiny but capable 360M parameter model. Q8_0 (369 MB). Loads in seconds.</p>

	<div class="card">
	<button id="btn-load" onclick="doLoad()">Load Model (369 MB)</button>
	<div id="status">Click Load to start</div>
	</div>

	<div id="chat"></div>
	<div id="input-row">
	<textarea id="input" rows="2" placeholder="Ask something..." disabled></textarea>
	<button id="btn-send" onclick="doSend()" disabled>Send</button>
	</div>
	<p class="info">SmolLM2-360M-Instruct via wllama WebGPU. Built for AMD Strix Halo unified memory.</p>

	<script type="module">
	import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';

	let wllama = null;
	const statusEl = document.getElementById('status');
	const chatEl = document.getElementById('chat');
	const inputEl = document.getElementById('input');
	let history = [];

	function addMsg(role, text) {
	const div = document.createElement('div');
	div.className = `msg ${role}`;
	div.textContent = text \|\| '';
	chatEl.appendChild(div);
	chatEl.scrollTop = chatEl.scrollHeight;
	return div;
	}

	window.doLoad = async function() {
	document.getElementById('btn-load').disabled = true;
	statusEl.textContent = 'Loading SmolLM2-360M...';

	wllama = new Wllama(
	{ default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' },
	{ parallelDownloads: 3, logger: {
	debug: () => {},
	log: m => { statusEl.textContent = m; },
	warn: m => console.warn(m),
	error: m => console.error(m),
	}}
	);

	await wllama.loadModelFromUrl(
	window.location.origin + '/model/SmolLM2-360M-Instruct-Q8_0.gguf',
	{
	n_gpu_layers: 99,
	n_ctx: 2048,
	n_batch: 64,
	useCache: true,
	progressCallback: ({ loaded, total }) => {
	const p = Math.round(loaded / total * 100);
	if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`;
	},
	}
	);

	statusEl.textContent = 'Ready — SmolLM2-360M on WebGPU';
	inputEl.disabled = false;
	document.getElementById('btn-send').disabled = false;
	inputEl.focus();
	};

	function buildPrompt() {
	let prompt = '<\|im_start\|>system\nYou are a helpful assistant.<\|im_end\|>\n';
	for (const msg of history) {
	prompt += `<\|im_start\|>${msg.role}\n${msg.content}<\|im_end\|>\n`;
	}
	prompt += '<\|im_start\|>assistant\n';
	return prompt;
	}

	window.doSend = async function() {
	const text = inputEl.value.trim();
	if (!text \|\| !wllama) return;
	inputEl.value = '';
	inputEl.disabled = true;
	document.getElementById('btn-send').disabled = true;

	history.push({ role: 'user', content: text });
	addMsg('user', text);

	const prompt = buildPrompt();
	const genStart = performance.now();

	statusEl.textContent = 'Generating...';

	const result = await wllama.createCompletion({
	prompt,
	max_tokens: 512,
	temperature: 0.7,
	top_k: 40,
	repeat_penalty: 1.1,
	stop: ['<\|im_end\|>', '<\|im_start\|>'],
	});

	const rawText = result?.choices?.[0]?.text \|\| result?.text \|\| '';
	const cleanText = rawText.replace('<\|im_end\|>', '').trim();

	addMsg('assistant', cleanText);
	history.push({ role: 'assistant', content: cleanText });

	const elapsed = (performance.now() - genStart) / 1000;
	statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`;

	inputEl.disabled = false;
	document.getElementById('btn-send').disabled = false;
	inputEl.focus();
	};

	inputEl.addEventListener('keydown', (e) => {
	if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); }
	});
	</script>
	</body>
	</html>