File size: 4,275 Bytes
45255a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d314abb
45255a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4039d9d
 
 
 
 
45255a4
4039d9d
45255a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
<div style="border: 1px solid #ddd; padding: 20px; border-radius: 8px; background: #f9fafb;">
    <h3 style="margin-top: 0;">Local AI Inference Test (WebGPU)</h3>
    <p id="wg-status" style="color: #6366f1; font-weight: bold;">[Status] Waiting to initialize...</p>

    <div style="margin-bottom: 10px;">
        <strong>Context (Simulation):</strong><br />
        <textarea id="wg-context"
            style="width: 100%; height: 60px; padding: 8px;">Mental Health Act 1983, Section 5(4): A nurse has the power to detain a voluntary patient who is already receiving treatment for a mental disorder, for up to 6 hours.</textarea>
    </div>

    <div style="margin-bottom: 10px;">
        <strong>Question:</strong><br />
        <input type="text" id="wg-question" style="width: 100%; padding: 8px;"
            value="What power does a nurse have under Section 5(4)?" />
    </div>

    <button id="wg-btn"
        style="background: #4f46e5; color: white; padding: 10px 15px; border: none; border-radius: 4px; cursor: pointer;"
        disabled>Load Model & Ask</button>

    <div style="margin-top: 15px;">
        <strong>Output:</strong>
        <pre id="wg-output"
            style="white-space: pre-wrap; background: #1e293b; color: #a5b4fc; padding: 15px; border-radius: 6px; min-height: 100px;">(Output will appear here)</pre>
    </div>
</div>

<script type="module">
    // Import transformers.js (v3 is required for WebGPU support for most modern models)
    import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1';

    const statusEl = document.getElementById('wg-status');
    const contextEl = document.getElementById('wg-context');
    const questionEl = document.getElementById('wg-question');
    const btnEl = document.getElementById('wg-btn');
    const outputEl = document.getElementById('wg-output');

    let generator = null;
    let isLoaded = false;

    btnEl.disabled = false;
    statusEl.textContent = '[Status] Ready to load model (Requires WebGPU capable browser like Chrome)';

    btnEl.addEventListener('click', async () => {
        if (!isLoaded) {
            statusEl.textContent = '[Status] Downloading LFM2.5-1.2B-Thinking (~800MB). This may take a few minutes...';
            btnEl.disabled = true;
            try {
                generator = await pipeline('text-generation', 'LiquidAI/LFM2.5-1.2B-Thinking-ONNX', {
                    device: 'webgpu',
                    dtype: 'q4' // Use int4 quantization to save memory
                });
                isLoaded = true;
                btnEl.textContent = "Generate Answer";
                statusEl.textContent = '[Status] Model loaded successfully!';
                btnEl.disabled = false;
            } catch (err) {
                if (err.message.includes('GPU adapter') || err.message.includes('webgpu')) {
                    statusEl.innerHTML = '<span style="color: red;">[Status] WebGPU disabled by browser. To fix this, open a new tab and go to <b>chrome://flags/#enable-unsafe-webgpu</b>, set it to "Enabled", and restart your browser.</span>';
                } else {
                    statusEl.textContent = '[Status] Error loading model: ' + err.message;
                }
                console.error(err);
                btnEl.disabled = false;
                return;
            }
        }

        // Now generate
        outputEl.textContent = 'Thinking...';
        btnEl.disabled = true;
        statusEl.textContent = '[Status] Generating text via WebGPU...';

        try {
            // Basic prompt format. LFM Thinking models usually use <think> tags.
            const prompt = `Context: ${contextEl.value}\n\nQuestion: ${questionEl.value}\n\nAnswer:`;

            const out = await generator(prompt, {
                max_new_tokens: 256,
                do_sample: false
            });

            outputEl.textContent = out[0].generated_text;
            statusEl.textContent = '[Status] Generation complete! (~ ' + out[0].generated_text.length + ' chars)';
        } catch (err) {
            outputEl.textContent = 'Generation Error: ' + err.message;
            statusEl.textContent = '[Status] Error during generation.';
        }

        btnEl.disabled = false;
    });
</script>