Spaces:
Sleeping
Sleeping
| <html> | |
| <head> | |
| <title>ONNX WebGPU Test</title> | |
| <script type="module"> | |
| import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js'; | |
| const log = (msg) => { | |
| console.log(msg); | |
| document.getElementById('log').textContent += msg + '\n'; | |
| }; | |
| const MODELS = { | |
| reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX', | |
| finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12', | |
| }; | |
| window.runTest = async (modelKey) => { | |
| const modelId = MODELS[modelKey]; | |
| document.getElementById('log').textContent = ''; | |
| log(`Testing: ${modelId}`); | |
| log(`Device: webgpu`); | |
| // Check WebGPU | |
| if (!navigator.gpu) { log('❌ No WebGPU!'); return; } | |
| const adapter = await navigator.gpu.requestAdapter(); | |
| log(`GPU: ${adapter ? (adapter.info?.description || adapter.name || 'adapter found') : 'no adapter'}`); | |
| log('Loading pipeline (this downloads ~2.5GB)...'); | |
| const statusEl = document.getElementById('status'); | |
| statusEl.textContent = 'Downloading model...'; | |
| let gen; | |
| try { | |
| gen = await pipeline('text-generation', modelId, { | |
| dtype: 'q4', | |
| device: 'webgpu', | |
| progress_callback: (p) => { | |
| if (p.status === 'progress') { | |
| const pct = Math.round((p.loaded / p.total) * 100); | |
| statusEl.textContent = `Downloading: ${pct}%`; | |
| } | |
| } | |
| }); | |
| } catch(e) { | |
| log(`❌ Pipeline error: ${e.message}`); | |
| return; | |
| } | |
| statusEl.textContent = 'Model loaded!'; | |
| log('Model loaded ✓'); | |
| // Test with thinking enabled | |
| for (const enableThinking of [true, false]) { | |
| log(`\n=== enable_thinking: ${enableThinking} ===`); | |
| const allChunks = []; | |
| const streamer = new TextStreamer(gen.tokenizer, { | |
| skip_prompt: true, | |
| skip_special_tokens: false, | |
| callback_function: (output) => { | |
| allChunks.push(output); | |
| }, | |
| }); | |
| const messages = [ | |
| { role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' }, | |
| { role: 'user', content: "I think neural networks are simple." }, | |
| ]; | |
| log('Generating...'); | |
| await gen(messages, { | |
| max_new_tokens: 512, | |
| do_sample: false, | |
| eos_token_id: [2, 11], | |
| streamer, | |
| tokenizer_encode_kwargs: { enable_thinking: enableThinking }, | |
| }); | |
| const fullText = allChunks.join(''); | |
| log(`Total chunks: ${allChunks.length}`); | |
| log(`Total chars: ${fullText.length}`); | |
| log(`Contains </think>: ${fullText.includes('</think>')}`); | |
| log(`Contains <|im_end|>: ${fullText.includes('<|im_end|>')}`); | |
| log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`); | |
| log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`); | |
| if (fullText.includes('</think>')) { | |
| const afterThink = fullText.slice(fullText.indexOf('</think>') + 8) | |
| .replace(/<\|im_end\|>/g, '').trim(); | |
| log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`); | |
| } else { | |
| log(`❌ No </think> found!`); | |
| log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`); | |
| } | |
| // Simulate the parser | |
| let isFirst = true; | |
| let inThink = false; | |
| let reasoning = ''; | |
| let content = ''; | |
| let buf = ''; | |
| for (const chunk of allChunks) { | |
| if (!chunk || chunk === '<|im_end|>') continue; | |
| let text = chunk; | |
| if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; } | |
| else if (isFirst) { isFirst = false; } | |
| buf += text; | |
| while (buf.length > 0) { | |
| if (inThink) { | |
| const ci = buf.indexOf('</think>'); | |
| if (ci !== -1) { | |
| reasoning += buf.slice(0, ci); | |
| buf = buf.slice(ci + 8); | |
| inThink = false; | |
| continue; | |
| } | |
| reasoning += buf; | |
| buf = ''; | |
| break; | |
| } | |
| const oi = buf.indexOf('<think>'); | |
| if (oi !== -1) { | |
| content += buf.slice(0, oi); | |
| buf = buf.slice(oi + 7); | |
| inThink = true; | |
| continue; | |
| } | |
| content += buf; | |
| buf = ''; | |
| break; | |
| } | |
| } | |
| log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`); | |
| log(`Parser result: reasoning_length=${reasoning.length}`); | |
| log(`Parser result: still_in_think=${inThink}`); | |
| log(`Would show "No response": ${!content.trim()}`); | |
| } | |
| log('\n✅ Test complete!'); | |
| statusEl.textContent = 'Test complete!'; | |
| }; | |
| </script> | |
| </head> | |
| <body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;"> | |
| <h2>ONNX WebGPU Think-Tag Test</h2> | |
| <p id="status">Ready</p> | |
| <button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button> | |
| <button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button> | |
| <hr> | |
| <pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre> | |
| </body> | |
| </html> | |