File size: 5,030 Bytes
bf04578 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Phi-4-mini-reasoning — WebGPU</title>
<style>
body { font-family: -apple-system, sans-serif; background: #0a0e14; color: #c9d1d9; max-width: 800px; margin: 0 auto; padding: 20px; }
h1 { color: #58a6ff; font-size: 20px; }
.card { background: #161b22; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; }
button { background: #238636; color: white; border: none; border-radius: 6px; padding: 10px 20px; cursor: pointer; font-weight: bold; font-size: 14px; margin: 4px; }
button:disabled { opacity: 0.4; }
#status { color: #e8c87a; font-size: 13px; margin: 8px 0; }
#chat { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; padding: 16px; margin: 12px 0; min-height: 300px; max-height: 500px; overflow-y: auto; }
.msg { margin: 8px 0; padding: 8px 12px; border-radius: 6px; white-space: pre-wrap; line-height: 1.5; }
.user { background: #1f3a5f; color: #e0e8f0; }
.assistant { background: #1a2332; color: #c9d1d9; }
#input-row { display: flex; gap: 8px; margin-top: 8px; }
#input { flex: 1; background: #0d1117; color: #c9d1d9; border: 1px solid #30363d; border-radius: 6px; padding: 10px; font-size: 14px; resize: none; }
.info { color: #8b949e; font-size: 12px; }
</style>
</head>
<body>
<h1>Phi-4-mini-reasoning on WebGPU</h1>
<p>Microsoft's math reasoning model (3.8B params). 2.4 GB Q4_K_M. Chain-of-thought reasoning in browser.</p>
<div class="card">
<button id="btn-load" onclick="doLoad()">Load Model (2.4 GB)</button>
<div id="status">Click Load to start</div>
</div>
<div id="chat"></div>
<div id="input-row">
<textarea id="input" rows="2" placeholder="Ask a math or reasoning question..." disabled></textarea>
<button id="btn-send" onclick="doSend()" disabled>Send</button>
</div>
<p class="info">Phi-4-mini-reasoning via wllama WebGPU. Trained on DeepSeek-R1 CoT distillation. First reasoning-variant WebGPU package. Built for AMD Strix Halo.</p>
<script type="module">
import { Wllama } from './node_modules/@wllama/wllama/esm/index.js';
let wllama = null;
const statusEl = document.getElementById('status');
const chatEl = document.getElementById('chat');
const inputEl = document.getElementById('input');
let history = [];
function addMsg(role, text) {
const div = document.createElement('div');
div.className = `msg ${role}`;
div.textContent = text || '';
chatEl.appendChild(div);
chatEl.scrollTop = chatEl.scrollHeight;
return div;
}
window.doLoad = async function() {
document.getElementById('btn-load').disabled = true;
statusEl.textContent = 'Loading Phi-4-mini-reasoning...';
wllama = new Wllama(
{ default: './node_modules/@wllama/wllama/esm/wasm/wllama.wasm' },
{ parallelDownloads: 3, logger: {
debug: () => {},
log: m => statusEl.textContent = m,
warn: m => console.warn(m),
error: m => console.error(m),
}}
);
await wllama.loadModelFromUrl(
window.location.origin + '/model/Phi-4-mini-reasoning-Q4_K_M.gguf',
{
n_gpu_layers: 99,
n_ctx: 4096,
n_batch: 64,
useCache: true,
progressCallback: ({ loaded, total }) => {
const p = Math.round(loaded / total * 100);
if (p % 10 === 0) statusEl.textContent = `Downloading... ${p}%`;
},
}
);
statusEl.textContent = 'Ready — Phi-4-mini-reasoning on WebGPU';
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
function buildPrompt() {
let prompt = '<|system|>You are Phi, an AI math and reasoning expert developed by Microsoft. Think step by step and show your reasoning.<|end|>';
for (const msg of history) {
prompt += `<|${msg.role}|>${msg.content}<|end|>`;
}
prompt += '<|assistant|>';
return prompt;
}
window.doSend = async function() {
const text = inputEl.value.trim();
if (!text || !wllama) return;
inputEl.value = '';
inputEl.disabled = true;
document.getElementById('btn-send').disabled = true;
history.push({ role: 'user', content: text });
addMsg('user', text);
const prompt = buildPrompt();
const genStart = performance.now();
statusEl.textContent = 'Reasoning...';
const result = await wllama.createCompletion({
prompt,
max_tokens: 1024,
temperature: 0.6,
top_k: 40,
repeat_penalty: 1.1,
stop: ['<|end|>', '<|endoftext|>', '<|user|>'],
});
const rawText = result?.choices?.[0]?.text || result?.text || '';
const cleanText = rawText.replace(/<\|end\|>/g, '').replace(/<\|endoftext\|>/g, '').trim();
addMsg('assistant', cleanText);
history.push({ role: 'assistant', content: cleanText });
const elapsed = (performance.now() - genStart) / 1000;
statusEl.textContent = `Done — ${elapsed.toFixed(1)}s`;
inputEl.disabled = false;
document.getElementById('btn-send').disabled = false;
inputEl.focus();
};
inputEl.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); doSend(); }
});
</script>
</body>
</html>
|