NurseCitizenDeveloper commited on
Commit
45255a4
ยท
1 Parent(s): ca709ec

feat: add experimental WebGPU Liquid AI LFM prototype

Browse files
Files changed (2) hide show
  1. app.py +9 -1
  2. webgpu_prototype.html +87 -0
app.py CHANGED
@@ -502,7 +502,15 @@ with gr.Blocks(theme=THEME, css=CSS, title="NurseLex โ€” UK Law for All Nurses")
502
 
503
  scenario_btn.click(scenario_search, [scenario_input], scenario_output)
504
 
505
- # --- Tab 4: Browse Legislation ---
 
 
 
 
 
 
 
 
506
  with gr.TabItem("๐Ÿ“š Browse Legislation", id="browse"):
507
  gr.Markdown(f"Browse **219,678** health & social care Acts and Statutory Instruments from the i.AI Lex dataset.")
508
 
 
502
 
503
  scenario_btn.click(scenario_search, [scenario_input], scenario_output)
504
 
505
+ # --- Tab 4: WebGPU Test ---
506
+ with gr.TabItem("๐Ÿš€ On-Device WebGPU Test", id="webgpu"):
507
+ gr.Markdown("**Experimental:** Test running Liquid AI's LFM2.5-1.2B-Thinking model 100% locally in your browser via WebGPU. *Note: This requires Chrome/Edge and will download ~900MB to your device's cache.*")
508
+
509
+ with open(os.path.join(os.path.dirname(__file__), "webgpu_prototype.html"), "r", encoding="utf-8") as f:
510
+ webgpu_html = f.read()
511
+ gr.HTML(webgpu_html)
512
+
513
+ # --- Tab 5: Browse Legislation ---
514
  with gr.TabItem("๐Ÿ“š Browse Legislation", id="browse"):
515
  gr.Markdown(f"Browse **219,678** health & social care Acts and Statutory Instruments from the i.AI Lex dataset.")
516
 
webgpu_prototype.html ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div style="border: 1px solid #ddd; padding: 20px; border-radius: 8px; background: #f9fafb;">
2
+ <h3 style="margin-top: 0;">Local AI Inference Test (WebGPU)</h3>
3
+ <p id="wg-status" style="color: #6366f1; font-weight: bold;">[Status] Waiting to initialize...</p>
4
+
5
+ <div style="margin-bottom: 10px;">
6
+ <strong>Context (Simulation):</strong><br />
7
+ <textarea id="wg-context"
8
+ style="width: 100%; height: 60px; padding: 8px;">Mental Health Act 1983, Section 5(4): A nurse has the power to detain a voluntary patient who is already receiving treatment for a mental disorder, for up to 6 hours.</textarea>
9
+ </div>
10
+
11
+ <div style="margin-bottom: 10px;">
12
+ <strong>Question:</strong><br />
13
+ <input type="text" id="wg-question" style="width: 100%; padding: 8px;"
14
+ value="What power does a nurse have under Section 5(4)?" />
15
+ </div>
16
+
17
+ <button id="wg-btn"
18
+ style="background: #4f46e5; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer;"
19
+ disabled>Load Model & Ask</button>
20
+
21
+ <div style="margin-top: 15px;">
22
+ <strong>Output:</strong>
23
+ <pre id="wg-output"
24
+ style="white-space: pre-wrap; background: #1e293b; color: #a5b4fc; padding: 15px; border-radius: 6px; min-height: 100px;">(Output will appear here)</pre>
25
+ </div>
26
+ </div>
27
+
28
+ <script type="module">
29
+ // Import transformers.js (v3 is required for WebGPU support for most modern models)
30
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.2.0';
31
+
32
+ const statusEl = document.getElementById('wg-status');
33
+ const contextEl = document.getElementById('wg-context');
34
+ const questionEl = document.getElementById('wg-question');
35
+ const btnEl = document.getElementById('wg-btn');
36
+ const outputEl = document.getElementById('wg-output');
37
+
38
+ let generator = null;
39
+ let isLoaded = false;
40
+
41
+ btnEl.disabled = false;
42
+ statusEl.textContent = '[Status] Ready to load model (Requires WebGPU capable browser like Chrome)';
43
+
44
+ btnEl.addEventListener('click', async () => {
45
+ if (!isLoaded) {
46
+ statusEl.textContent = '[Status] Downloading LFM2.5-1.2B-Thinking (~800MB). This may take a few minutes...';
47
+ btnEl.disabled = true;
48
+ try {
49
+ generator = await pipeline('text-generation', 'LiquidAI/LFM2.5-1.2B-Thinking-ONNX', {
50
+ device: 'webgpu',
51
+ dtype: 'q4' // Use int4 quantization to save memory
52
+ });
53
+ isLoaded = true;
54
+ btnEl.textContent = "Generate Answer";
55
+ statusEl.textContent = '[Status] Model loaded successfully!';
56
+ btnEl.disabled = false;
57
+ } catch (err) {
58
+ statusEl.textContent = '[Status] Error loading model: ' + err.message;
59
+ console.error(err);
60
+ return;
61
+ }
62
+ }
63
+
64
+ // Now generate
65
+ outputEl.textContent = 'Thinking...';
66
+ btnEl.disabled = true;
67
+ statusEl.textContent = '[Status] Generating text via WebGPU...';
68
+
69
+ try {
70
+ // Basic prompt format. LFM Thinking models usually use <think> tags.
71
+ const prompt = `Context: ${contextEl.value}\n\nQuestion: ${questionEl.value}\n\nAnswer:`;
72
+
73
+ const out = await generator(prompt, {
74
+ max_new_tokens: 256,
75
+ do_sample: false
76
+ });
77
+
78
+ outputEl.textContent = out[0].generated_text;
79
+ statusEl.textContent = '[Status] Generation complete! (~ ' + out[0].generated_text.length + ' chars)';
80
+ } catch (err) {
81
+ outputEl.textContent = 'Generation Error: ' + err.message;
82
+ statusEl.textContent = '[Status] Error during generation.';
83
+ }
84
+
85
+ btnEl.disabled = false;
86
+ });
87
+ </script>