NurseLex / webgpu_prototype.html
NurseCitizenDeveloper's picture
feat(webgpu): add actionable UI error message for enabling chrome webgpu flags
4039d9d
<div style="border: 1px solid #ddd; padding: 20px; border-radius: 8px; background: #f9fafb;">
<h3 style="margin-top: 0;">Local AI Inference Test (WebGPU)</h3>
<p id="wg-status" style="color: #6366f1; font-weight: bold;">[Status] Waiting to initialize...</p>
<div style="margin-bottom: 10px;">
<strong>Context (Simulation):</strong><br />
<textarea id="wg-context"
style="width: 100%; height: 60px; padding: 8px;">Mental Health Act 1983, Section 5(4): A nurse has the power to detain a voluntary patient who is already receiving treatment for a mental disorder, for up to 6 hours.</textarea>
</div>
<div style="margin-bottom: 10px;">
<strong>Question:</strong><br />
<input type="text" id="wg-question" style="width: 100%; padding: 8px;"
value="What power does a nurse have under Section 5(4)?" />
</div>
<button id="wg-btn"
style="background: #4f46e5; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer;"
disabled>Load Model & Ask</button>
<div style="margin-top: 15px;">
<strong>Output:</strong>
<pre id="wg-output"
style="white-space: pre-wrap; background: #1e293b; color: #a5b4fc; padding: 15px; border-radius: 6px; min-height: 100px;">(Output will appear here)</pre>
</div>
</div>
<script type="module">
// Import transformers.js (v3 is required for WebGPU support for most modern models)
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1';
const statusEl = document.getElementById('wg-status');
const contextEl = document.getElementById('wg-context');
const questionEl = document.getElementById('wg-question');
const btnEl = document.getElementById('wg-btn');
const outputEl = document.getElementById('wg-output');
let generator = null;
let isLoaded = false;
btnEl.disabled = false;
statusEl.textContent = '[Status] Ready to load model (Requires WebGPU capable browser like Chrome)';
btnEl.addEventListener('click', async () => {
if (!isLoaded) {
statusEl.textContent = '[Status] Downloading LFM2.5-1.2B-Thinking (~800MB). This may take a few minutes...';
btnEl.disabled = true;
try {
generator = await pipeline('text-generation', 'LiquidAI/LFM2.5-1.2B-Thinking-ONNX', {
device: 'webgpu',
dtype: 'q4' // Use int4 quantization to save memory
});
isLoaded = true;
btnEl.textContent = "Generate Answer";
statusEl.textContent = '[Status] Model loaded successfully!';
btnEl.disabled = false;
} catch (err) {
if (err.message.includes('GPU adapter') || err.message.includes('webgpu')) {
statusEl.innerHTML = '<span style="color: red;">[Status] WebGPU disabled by browser. To fix this, open a new tab and go to <b>chrome://flags/#enable-unsafe-webgpu</b>, set it to "Enabled", and restart your browser.</span>';
} else {
statusEl.textContent = '[Status] Error loading model: ' + err.message;
}
console.error(err);
btnEl.disabled = false;
return;
}
}
// Now generate
outputEl.textContent = 'Thinking...';
btnEl.disabled = true;
statusEl.textContent = '[Status] Generating text via WebGPU...';
try {
// Basic prompt format. LFM Thinking models usually use <think> tags.
const prompt = `Context: ${contextEl.value}\n\nQuestion: ${questionEl.value}\n\nAnswer:`;
const out = await generator(prompt, {
max_new_tokens: 256,
do_sample: false
});
outputEl.textContent = out[0].generated_text;
statusEl.textContent = '[Status] Generation complete! (~ ' + out[0].generated_text.length + ' chars)';
} catch (err) {
outputEl.textContent = 'Generation Error: ' + err.message;
statusEl.textContent = '[Status] Error during generation.';
}
btnEl.disabled = false;
});
</script>