lex-interviewer-chat

Sleeping

App Files Files Community

Bobber commited on Apr 1

Commit

5477c5d

1 Parent(s): a9fb427

add back test-webgpu.html to dist

Browse files

Files changed (1) hide show

dist/test-webgpu.html +153 -0

dist/test-webgpu.html ADDED Viewed

	@@ -0,0 +1,153 @@

+<!DOCTYPE html>
+<html>
+<head>
+  <title>ONNX WebGPU Test</title>
+  <script type="module">
+    import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js';
+    const log = (msg) => {
+      console.log(msg);
+      document.getElementById('log').textContent += msg + '\n';
+    };
+    const MODELS = {
+      reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX',
+      finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12',
+    };
+    window.runTest = async (modelKey) => {
+      const modelId = MODELS[modelKey];
+      document.getElementById('log').textContent = '';
+      log(`Testing: ${modelId}`);
+      log(`Device: webgpu`);
+      // Check WebGPU
+      if (!navigator.gpu) { log('❌ No WebGPU!'); return; }
+      const adapter = await navigator.gpu.requestAdapter();
+      log(`GPU: ${adapter ? (adapter.info?.description || adapter.name || 'adapter found') : 'no adapter'}`);
+      log('Loading pipeline (this downloads ~2.5GB)...');
+      const statusEl = document.getElementById('status');
+      statusEl.textContent = 'Downloading model...';
+      let gen;
+      try {
+        gen = await pipeline('text-generation', modelId, {
+          dtype: 'q4',
+          device: 'webgpu',
+          progress_callback: (p) => {
+            if (p.status === 'progress') {
+              const pct = Math.round((p.loaded / p.total) * 100);
+              statusEl.textContent = `Downloading: ${pct}%`;
+            }
+          }
+        });
+      } catch(e) {
+        log(`❌ Pipeline error: ${e.message}`);
+        return;
+      }
+      statusEl.textContent = 'Model loaded!';
+      log('Model loaded ✓');
+      // Test with thinking enabled
+      for (const enableThinking of [true, false]) {
+        log(`\n=== enable_thinking: ${enableThinking} ===`);
+        const allChunks = [];
+        const streamer = new TextStreamer(gen.tokenizer, {
+          skip_prompt: true,
+          skip_special_tokens: false,
+          callback_function: (output) => {
+            allChunks.push(output);
+          },
+        });
+        const messages = [
+          { role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' },
+          { role: 'user', content: "I think neural networks are simple." },
+        ];
+        log('Generating...');
+        await gen(messages, {
+          max_new_tokens: 512,
+          do_sample: false,
+          eos_token_id: [2, 11],
+          streamer,
+          tokenizer_encode_kwargs: { enable_thinking: enableThinking },
+        });
+        const fullText = allChunks.join('');
+        log(`Total chunks: ${allChunks.length}`);
+        log(`Total chars: ${fullText.length}`);
+        log(`Contains </think>: ${fullText.includes('</think>')}`);
+        log(`Contains <|im_end|>: ${fullText.includes('<|im_end|>')}`);
+        log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`);
+        log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`);
+        if (fullText.includes('</think>')) {
+          const afterThink = fullText.slice(fullText.indexOf('</think>') + 8)
+            .replace(/<\|im_end\|>/g, '').trim();
+          log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`);
+        } else {
+          log(`❌ No </think> found!`);
+          log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`);
+        }
+        // Simulate the parser
+        let isFirst = true;
+        let inThink = false;
+        let reasoning = '';
+        let content = '';
+        let buf = '';
+        for (const chunk of allChunks) {
+          if (!chunk || chunk === '<|im_end|>') continue;
+          let text = chunk;
+          if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; }
+          else if (isFirst) { isFirst = false; }
+          buf += text;
+          while (buf.length > 0) {
+            if (inThink) {
+              const ci = buf.indexOf('</think>');
+              if (ci !== -1) {
+                reasoning += buf.slice(0, ci);
+                buf = buf.slice(ci + 8);
+                inThink = false;
+                continue;
+              }
+              reasoning += buf;
+              buf = '';
+              break;
+            }
+            const oi = buf.indexOf('<think>');
+            if (oi !== -1) {
+              content += buf.slice(0, oi);
+              buf = buf.slice(oi + 7);
+              inThink = true;
+              continue;
+            }
+            content += buf;
+            buf = '';
+            break;
+          }
+        }
+        log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`);
+        log(`Parser result: reasoning_length=${reasoning.length}`);
+        log(`Parser result: still_in_think=${inThink}`);
+        log(`Would show "No response": ${!content.trim()}`);
+      }
+      log('\n✅ Test complete!');
+      statusEl.textContent = 'Test complete!';
+    };
+  </script>
+</head>
+<body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;">
+  <h2>ONNX WebGPU Think-Tag Test</h2>
+  <p id="status">Ready</p>
+  <button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button>
+  <button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button>
+  <hr>
+  <pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre>
+</body>
+</html>