| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>Phi-4-mini-reasoning — WebGPU</title> |
| <style> |
| body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; } |
| h1 { color: #58a6ff; font-size: 20px; } |
| .card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; } |
| button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; } |
| button:disabled { opacity: 0.4; } |
| #status { color: #e8c87a; font-size: 13px; margin: 8px 0; } |
| #chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; } |
| .msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; } |
| .user { background: #1f3a5f; color: #e0e8f0; } |
| .assistant { background: #1a2332; color: #c9d1d9; } |
| #input-row { display: flex; gap: 8px; margin-top: 8px; } |
| #input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; } |
| .info { color: #8b949e; font-size: 12px; } |
| </style> |
| </head> |
| <body> |
| <h1>Phi-4-mini-reasoning on WebGPU</h1> |
| <p>Microsoft's math reasoning model (3.8B params). 2.4 GB Q4_K_M. Chain-of-thought reasoning in browser.</p> |
|
|
| <div class="card"> |
| <button id="btn-load" onclick="doLoad()">Load Model (2.4 GB)</button> |
| <div id="status">Click Load to start</div> |
| </div> |
|
|
| <div id="chat"></div> |
| <div id="input-row"> |
| <textarea id="input" rows="2" placeholder="Ask a math or reasoning question..." disabled></textarea> |
| <button id="btn-send" onclick="doSend()" disabled>Send</button> |
| </div> |
| <p class="info">Phi-4-mini-reasoning via wllama WebGPU. Trained on DeepSeek-R1 CoT distillation. First reasoning-variant WebGPU package. Built for AMD Strix Halo.</p> |
|
|
| <script type="module"> |
| import { Wllama } from './node_modules/@wllama/wllama/esm/index.js'; |
| |
| let wllama = null; |
| const statusEl = document.getElementById('status'); |
| const chatEl = document.getElementById('chat'); |
| const inputEl = document.getElementById('input'); |
| let history = []; |
| |
| function addMsg(role, text) { |
| const div = document.createElement('div'); |
| div.className = `msg ${role}`; |
| div.textContent = text || ''; |
| chatEl.appendChild(div); |
| chatEl.scrollTop = chatEl.scrollHeight; |
| return div; |
| } |
| |
| window.doLoad = async function() { |
| document.getElementById('btn-load').disabled = true; |
| statusEl.textContent = 'Loading Phi-4-mini-reasoning...'; |
| |
| wllama = new Wllama( |
| { default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' }, |
| { parallelDownloads: 3, logger: { |
| debug: () => {}, |
| log: m => statusEl.textContent = m, |
| warn: m => console.warn(m), |
| error: m => console.error(m), |
| }} |
| ); |
| |
| await wllama.loadModelFromUrl( |
| window.location.origin + '/model/Phi-4-mini-reasoning-Q4_K_M.gguf', |
| { |
| n_gpu_layers: 99, |
| n_ctx: 4096, |
| n_batch: 64, |
| useCache: true, |
| progressCallback: ({ loaded, total }) => { |
| const p = Math.round(loaded / total * 100); |
| if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`; |
| }, |
| } |
| ); |
| |
| statusEl.textContent = 'Ready — Phi-4-mini-reasoning on WebGPU'; |
| inputEl.disabled = false; |
| document.getElementById('btn-send').disabled = false; |
| inputEl.focus(); |
| }; |
| |
| function buildPrompt() { |
| let prompt = '<|system|>You are Phi, an AI math and reasoning expert developed by Microsoft. Think step by step and show your reasoning.<|end|>'; |
| for (const msg of history) { |
| prompt += `<|${msg.role}|>${msg.content}<|end|>`; |
| } |
| prompt += '<|assistant|>'; |
| return prompt; |
| } |
| |
| window.doSend = async function() { |
| const text = inputEl.value.trim(); |
| if (!text || !wllama) return; |
| inputEl.value = ''; |
| inputEl.disabled = true; |
| document.getElementById('btn-send').disabled = true; |
| |
| history.push({ role: 'user', content: text }); |
| addMsg('user', text); |
| |
| const prompt = buildPrompt(); |
| const genStart = performance.now(); |
| statusEl.textContent = 'Reasoning...'; |
| |
| const result = await wllama.createCompletion({ |
| prompt, |
| max_tokens: 1024, |
| temperature: 0.6, |
| top_k: 40, |
| repeat_penalty: 1.1, |
| stop: ['<|end|>', '<|endoftext|>', '<|user|>'], |
| }); |
| |
| const rawText = result?.choices?.[0]?.text || result?.text || ''; |
| const cleanText = rawText.replace(/<\|end\|>/g, '').replace(/<\|endoftext\|>/g, '').trim(); |
| |
| addMsg('assistant', cleanText); |
| history.push({ role: 'assistant', content: cleanText }); |
| |
| const elapsed = (performance.now() - genStart) / 1000; |
| statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`; |
| |
| inputEl.disabled = false; |
| document.getElementById('btn-send').disabled = false; |
| inputEl.focus(); |
| }; |
| |
| inputEl.addEventListener('keydown', (e) => { |
| if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); } |
| }); |
| </script> |
| </body> |
| </html> |
|
|