Spaces:

NurseCitizenDeveloper
/

NurseLex

Sleeping

App Files Files Community

NurseCitizenDeveloper commited on 12 days ago

Commit

45255a4

1 Parent(s): ca709ec

feat: add experimental WebGPU Liquid AI LFM prototype

Browse files

Files changed (2) hide show

app.py +9 -1
webgpu_prototype.html +87 -0

app.py CHANGED Viewed

@@ -502,7 +502,15 @@ with gr.Blocks(theme=THEME, css=CSS, title="NurseLex — UK Law for All Nurses")
             scenario_btn.click(scenario_search, [scenario_input], scenario_output)
-        # --- Tab 4: Browse Legislation ---
         with gr.TabItem("📚 Browse Legislation", id="browse"):
             gr.Markdown(f"Browse **219,678** health & social care Acts and Statutory Instruments from the i.AI Lex dataset.")

             scenario_btn.click(scenario_search, [scenario_input], scenario_output)
+        # --- Tab 4: WebGPU Test ---
+        with gr.TabItem("🚀 On-Device WebGPU Test", id="webgpu"):
+            gr.Markdown("**Experimental:** Test running Liquid AI's LFM2.5-1.2B-Thinking model 100% locally in your browser via WebGPU. *Note: This requires Chrome/Edge and will download ~900MB to your device's cache.*")
+            with open(os.path.join(os.path.dirname(__file__), "webgpu_prototype.html"), "r", encoding="utf-8") as f:
+                webgpu_html = f.read()
+            gr.HTML(webgpu_html)
+        # --- Tab 5: Browse Legislation ---
         with gr.TabItem("📚 Browse Legislation", id="browse"):
             gr.Markdown(f"Browse **219,678** health & social care Acts and Statutory Instruments from the i.AI Lex dataset.")

webgpu_prototype.html ADDED Viewed

	@@ -0,0 +1,87 @@

+<div style="border: 1px solid #ddd; padding: 20px; border-radius: 8px; background: #f9fafb;">
+    <h3 style="margin-top: 0;">Local AI Inference Test (WebGPU)</h3>
+    <p id="wg-status" style="color: #6366f1; font-weight: bold;">[Status] Waiting to initialize...</p>
+    <div style="margin-bottom: 10px;">
+        <strong>Context (Simulation):</strong><br />
+        <textarea id="wg-context"
+            style="width: 100%; height: 60px; padding: 8px;">Mental Health Act 1983, Section 5(4): A nurse has the power to detain a voluntary patient who is already receiving treatment for a mental disorder, for up to 6 hours.</textarea>
+    </div>
+    <div style="margin-bottom: 10px;">
+        <strong>Question:</strong><br />
+        <input type="text" id="wg-question" style="width: 100%; padding: 8px;"
+            value="What power does a nurse have under Section 5(4)?" />
+    </div>
+    <button id="wg-btn"
+        style="background: #4f46e5; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer;"
+        disabled>Load Model & Ask</button>
+    <div style="margin-top: 15px;">
+        <strong>Output:</strong>
+        <pre id="wg-output"
+            style="white-space: pre-wrap; background: #1e293b; color: #a5b4fc; padding: 15px; border-radius: 6px; min-height: 100px;">(Output will appear here)</pre>
+    </div>
+</div>
+<script type="module">
+    // Import transformers.js (v3 is required for WebGPU support for most modern models)
+    import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.2.0';
+    const statusEl = document.getElementById('wg-status');
+    const contextEl = document.getElementById('wg-context');
+    const questionEl = document.getElementById('wg-question');
+    const btnEl = document.getElementById('wg-btn');
+    const outputEl = document.getElementById('wg-output');
+    let generator = null;
+    let isLoaded = false;
+    btnEl.disabled = false;
+    statusEl.textContent = '[Status] Ready to load model (Requires WebGPU capable browser like Chrome)';
+    btnEl.addEventListener('click', async () => {
+        if (!isLoaded) {
+            statusEl.textContent = '[Status] Downloading LFM2.5-1.2B-Thinking (~800MB). This may take a few minutes...';
+            btnEl.disabled = true;
+            try {
+                generator = await pipeline('text-generation', 'LiquidAI/LFM2.5-1.2B-Thinking-ONNX', {
+                    device: 'webgpu',
+                    dtype: 'q4' // Use int4 quantization to save memory
+                });
+                isLoaded = true;
+                btnEl.textContent = "Generate Answer";
+                statusEl.textContent = '[Status] Model loaded successfully!';
+                btnEl.disabled = false;
+            } catch (err) {
+                statusEl.textContent = '[Status] Error loading model: ' + err.message;
+                console.error(err);
+                return;
+            }
+        }
+        // Now generate
+        outputEl.textContent = 'Thinking...';
+        btnEl.disabled = true;
+        statusEl.textContent = '[Status] Generating text via WebGPU...';
+        try {
+            // Basic prompt format. LFM Thinking models usually use <think> tags.
+            const prompt = `Context: ${contextEl.value}\n\nQuestion: ${questionEl.value}\n\nAnswer:`;
+            const out = await generator(prompt, {
+                max_new_tokens: 256,
+                do_sample: false
+            });
+            outputEl.textContent = out[0].generated_text;
+            statusEl.textContent = '[Status] Generation complete! (~ ' + out[0].generated_text.length + ' chars)';
+        } catch (err) {
+            outputEl.textContent = 'Generation Error: ' + err.message;
+            statusEl.textContent = '[Status] Error during generation.';
+        }
+        btnEl.disabled = false;
+    });
+</script>