LJTSG's picture
Add index.html
bf04578 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Phi-4-mini-reasoning — WebGPU</title>
<style>
body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; }
button:disabled { opacity: 0.4; }
#status { color: #e8c87a; font-size: 13px; margin: 8px 0; }
#chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; }
.msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; }
.user { background: #1f3a5f; color: #e0e8f0; }
.assistant { background: #1a2332; color: #c9d1d9; }
#input-row { display: flex; gap: 8px; margin-top: 8px; }
#input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; }
.info { color: #8b949e; font-size: 12px; }
</style>
</head>
<body>
<h1>Phi-4-mini-reasoning on WebGPU</h1>
<p>Microsoft's math reasoning model (3.8B params). 2.4 GB Q4_K_M. Chain-of-thought reasoning in browser.</p>
<div class="card">
<button id="btn-load" onclick="doLoad()">Load Model (2.4 GB)</button>
<div id="status">Click Load to start</div>
</div>
<div id="chat"></div>
<div id="input-row">
<textarea id="input" rows="2" placeholder="Ask a math or reasoning question..." disabled></textarea>
<button id="btn-send" onclick="doSend()" disabled>Send</button>
</div>
<p class="info">Phi-4-mini-reasoning via wllama WebGPU. Trained on DeepSeek-R1 CoT distillation. First reasoning-variant WebGPU package. Built for AMD Strix Halo.</p>
<script type="module">
import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
let wllama = null;
const statusEl = document.getElementById('status');
const chatEl = document.getElementById('chat');
const inputEl = document.getElementById('input');
let history = [];
function addMsg(role, text) {
const div = document.createElement('div');
div.className = `msg ${role}`;
div.textContent = text || '';
chatEl.appendChild(div);
chatEl.scrollTop = chatEl.scrollHeight;
return div;
}
window.doLoad = async function() {
document.getElementById('btn-load').disabled = true;
statusEl.textContent = 'Loading Phi-4-mini-reasoning...';
wllama = new Wllama(
{ default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' },
{ parallelDownloads: 3, logger: {
debug: () => {},
log: m => statusEl.textContent = m,
warn: m => console.warn(m),
error: m => console.error(m),
}}
);
await wllama.loadModelFromUrl(
window.location.origin + '/model/Phi-4-mini-reasoning-Q4_K_M.gguf',
{
n_gpu_layers: 99,
n_ctx: 4096,
n_batch: 64,
useCache: true,
progressCallback: ({ loaded, total }) => {
const p = Math.round(loaded / total * 100);
if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`;
},
}
);
statusEl.textContent = 'Ready — Phi-4-mini-reasoning on WebGPU';
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
function buildPrompt() {
let prompt = '<|system|>You are Phi, an AI math and reasoning expert developed by Microsoft. Think step by step and show your reasoning.<|end|>';
for (const msg of history) {
prompt += `<|${msg.role}|>${msg.content}<|end|>`;
}
prompt += '<|assistant|>';
return prompt;
}
window.doSend = async function() {
const text = inputEl.value.trim();
if (!text || !wllama) return;
inputEl.value = '';
inputEl.disabled = true;
document.getElementById('btn-send').disabled = true;
history.push({ role: 'user', content: text });
addMsg('user', text);
const prompt = buildPrompt();
const genStart = performance.now();
statusEl.textContent = 'Reasoning...';
const result = await wllama.createCompletion({
prompt,
max_tokens: 1024,
temperature: 0.6,
top_k: 40,
repeat_penalty: 1.1,
stop: ['<|end|>', '<|endoftext|>', '<|user|>'],
});
const rawText = result?.choices?.[0]?.text || result?.text || '';
const cleanText = rawText.replace(/<\|end\|>/g, '').replace(/<\|endoftext\|>/g, '').trim();
addMsg('assistant', cleanText);
history.push({ role: 'assistant', content: cleanText });
const elapsed = (performance.now() - genStart) / 1000;
statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`;
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
inputEl.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); }
});
</script>
</body>
</html>