Bobber commited on
Commit
5477c5d
·
1 Parent(s): a9fb427

add back test-webgpu.html to dist

Browse files
Files changed (1) hide show
  1. dist/test-webgpu.html +153 -0
dist/test-webgpu.html ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>ONNX WebGPU Test</title>
5
+ <script type="module">
6
+ import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js';
7
+
8
+ const log = (msg) => {
9
+ console.log(msg);
10
+ document.getElementById('log').textContent += msg + '\n';
11
+ };
12
+
13
+ const MODELS = {
14
+ reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX',
15
+ finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12',
16
+ };
17
+
18
+ window.runTest = async (modelKey) => {
19
+ const modelId = MODELS[modelKey];
20
+ document.getElementById('log').textContent = '';
21
+ log(`Testing: ${modelId}`);
22
+ log(`Device: webgpu`);
23
+
24
+ // Check WebGPU
25
+ if (!navigator.gpu) { log('❌ No WebGPU!'); return; }
26
+ const adapter = await navigator.gpu.requestAdapter();
27
+ log(`GPU: ${adapter ? (adapter.info?.description || adapter.name || 'adapter found') : 'no adapter'}`);
28
+
29
+ log('Loading pipeline (this downloads ~2.5GB)...');
30
+ const statusEl = document.getElementById('status');
31
+ statusEl.textContent = 'Downloading model...';
32
+
33
+ let gen;
34
+ try {
35
+ gen = await pipeline('text-generation', modelId, {
36
+ dtype: 'q4',
37
+ device: 'webgpu',
38
+ progress_callback: (p) => {
39
+ if (p.status === 'progress') {
40
+ const pct = Math.round((p.loaded / p.total) * 100);
41
+ statusEl.textContent = `Downloading: ${pct}%`;
42
+ }
43
+ }
44
+ });
45
+ } catch(e) {
46
+ log(`❌ Pipeline error: ${e.message}`);
47
+ return;
48
+ }
49
+ statusEl.textContent = 'Model loaded!';
50
+ log('Model loaded ✓');
51
+
52
+ // Test with thinking enabled
53
+ for (const enableThinking of [true, false]) {
54
+ log(`\n=== enable_thinking: ${enableThinking} ===`);
55
+
56
+ const allChunks = [];
57
+ const streamer = new TextStreamer(gen.tokenizer, {
58
+ skip_prompt: true,
59
+ skip_special_tokens: false,
60
+ callback_function: (output) => {
61
+ allChunks.push(output);
62
+ },
63
+ });
64
+
65
+ const messages = [
66
+ { role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' },
67
+ { role: 'user', content: "I think neural networks are simple." },
68
+ ];
69
+
70
+ log('Generating...');
71
+ await gen(messages, {
72
+ max_new_tokens: 512,
73
+ do_sample: false,
74
+ eos_token_id: [2, 11],
75
+ streamer,
76
+ tokenizer_encode_kwargs: { enable_thinking: enableThinking },
77
+ });
78
+
79
+ const fullText = allChunks.join('');
80
+ log(`Total chunks: ${allChunks.length}`);
81
+ log(`Total chars: ${fullText.length}`);
82
+ log(`Contains </think>: ${fullText.includes('</think>')}`);
83
+ log(`Contains <|im_end|>: ${fullText.includes('<|im_end|>')}`);
84
+
85
+ log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`);
86
+ log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`);
87
+
88
+ if (fullText.includes('</think>')) {
89
+ const afterThink = fullText.slice(fullText.indexOf('</think>') + 8)
90
+ .replace(/<\|im_end\|>/g, '').trim();
91
+ log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`);
92
+ } else {
93
+ log(`❌ No </think> found!`);
94
+ log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`);
95
+ }
96
+
97
+ // Simulate the parser
98
+ let isFirst = true;
99
+ let inThink = false;
100
+ let reasoning = '';
101
+ let content = '';
102
+ let buf = '';
103
+ for (const chunk of allChunks) {
104
+ if (!chunk || chunk === '<|im_end|>') continue;
105
+ let text = chunk;
106
+ if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; }
107
+ else if (isFirst) { isFirst = false; }
108
+ buf += text;
109
+ while (buf.length > 0) {
110
+ if (inThink) {
111
+ const ci = buf.indexOf('</think>');
112
+ if (ci !== -1) {
113
+ reasoning += buf.slice(0, ci);
114
+ buf = buf.slice(ci + 8);
115
+ inThink = false;
116
+ continue;
117
+ }
118
+ reasoning += buf;
119
+ buf = '';
120
+ break;
121
+ }
122
+ const oi = buf.indexOf('<think>');
123
+ if (oi !== -1) {
124
+ content += buf.slice(0, oi);
125
+ buf = buf.slice(oi + 7);
126
+ inThink = true;
127
+ continue;
128
+ }
129
+ content += buf;
130
+ buf = '';
131
+ break;
132
+ }
133
+ }
134
+ log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`);
135
+ log(`Parser result: reasoning_length=${reasoning.length}`);
136
+ log(`Parser result: still_in_think=${inThink}`);
137
+ log(`Would show "No response": ${!content.trim()}`);
138
+ }
139
+
140
+ log('\n✅ Test complete!');
141
+ statusEl.textContent = 'Test complete!';
142
+ };
143
+ </script>
144
+ </head>
145
+ <body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;">
146
+ <h2>ONNX WebGPU Think-Tag Test</h2>
147
+ <p id="status">Ready</p>
148
+ <button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button>
149
+ <button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button>
150
+ <hr>
151
+ <pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre>
152
+ </body>
153
+ </html>