Spaces:
Sleeping
Sleeping
Bobber commited on
Commit ·
5477c5d
1
Parent(s): a9fb427
add back test-webgpu.html to dist
Browse files- dist/test-webgpu.html +153 -0
dist/test-webgpu.html
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>ONNX WebGPU Test</title>
|
| 5 |
+
<script type="module">
|
| 6 |
+
import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js';
|
| 7 |
+
|
| 8 |
+
const log = (msg) => {
|
| 9 |
+
console.log(msg);
|
| 10 |
+
document.getElementById('log').textContent += msg + '\n';
|
| 11 |
+
};
|
| 12 |
+
|
| 13 |
+
const MODELS = {
|
| 14 |
+
reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX',
|
| 15 |
+
finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12',
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
window.runTest = async (modelKey) => {
|
| 19 |
+
const modelId = MODELS[modelKey];
|
| 20 |
+
document.getElementById('log').textContent = '';
|
| 21 |
+
log(`Testing: ${modelId}`);
|
| 22 |
+
log(`Device: webgpu`);
|
| 23 |
+
|
| 24 |
+
// Check WebGPU
|
| 25 |
+
if (!navigator.gpu) { log('❌ No WebGPU!'); return; }
|
| 26 |
+
const adapter = await navigator.gpu.requestAdapter();
|
| 27 |
+
log(`GPU: ${adapter ? (adapter.info?.description || adapter.name || 'adapter found') : 'no adapter'}`);
|
| 28 |
+
|
| 29 |
+
log('Loading pipeline (this downloads ~2.5GB)...');
|
| 30 |
+
const statusEl = document.getElementById('status');
|
| 31 |
+
statusEl.textContent = 'Downloading model...';
|
| 32 |
+
|
| 33 |
+
let gen;
|
| 34 |
+
try {
|
| 35 |
+
gen = await pipeline('text-generation', modelId, {
|
| 36 |
+
dtype: 'q4',
|
| 37 |
+
device: 'webgpu',
|
| 38 |
+
progress_callback: (p) => {
|
| 39 |
+
if (p.status === 'progress') {
|
| 40 |
+
const pct = Math.round((p.loaded / p.total) * 100);
|
| 41 |
+
statusEl.textContent = `Downloading: ${pct}%`;
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
});
|
| 45 |
+
} catch(e) {
|
| 46 |
+
log(`❌ Pipeline error: ${e.message}`);
|
| 47 |
+
return;
|
| 48 |
+
}
|
| 49 |
+
statusEl.textContent = 'Model loaded!';
|
| 50 |
+
log('Model loaded ✓');
|
| 51 |
+
|
| 52 |
+
// Test with thinking enabled
|
| 53 |
+
for (const enableThinking of [true, false]) {
|
| 54 |
+
log(`\n=== enable_thinking: ${enableThinking} ===`);
|
| 55 |
+
|
| 56 |
+
const allChunks = [];
|
| 57 |
+
const streamer = new TextStreamer(gen.tokenizer, {
|
| 58 |
+
skip_prompt: true,
|
| 59 |
+
skip_special_tokens: false,
|
| 60 |
+
callback_function: (output) => {
|
| 61 |
+
allChunks.push(output);
|
| 62 |
+
},
|
| 63 |
+
});
|
| 64 |
+
|
| 65 |
+
const messages = [
|
| 66 |
+
{ role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' },
|
| 67 |
+
{ role: 'user', content: "I think neural networks are simple." },
|
| 68 |
+
];
|
| 69 |
+
|
| 70 |
+
log('Generating...');
|
| 71 |
+
await gen(messages, {
|
| 72 |
+
max_new_tokens: 512,
|
| 73 |
+
do_sample: false,
|
| 74 |
+
eos_token_id: [2, 11],
|
| 75 |
+
streamer,
|
| 76 |
+
tokenizer_encode_kwargs: { enable_thinking: enableThinking },
|
| 77 |
+
});
|
| 78 |
+
|
| 79 |
+
const fullText = allChunks.join('');
|
| 80 |
+
log(`Total chunks: ${allChunks.length}`);
|
| 81 |
+
log(`Total chars: ${fullText.length}`);
|
| 82 |
+
log(`Contains </think>: ${fullText.includes('</think>')}`);
|
| 83 |
+
log(`Contains <|im_end|>: ${fullText.includes('<|im_end|>')}`);
|
| 84 |
+
|
| 85 |
+
log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`);
|
| 86 |
+
log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`);
|
| 87 |
+
|
| 88 |
+
if (fullText.includes('</think>')) {
|
| 89 |
+
const afterThink = fullText.slice(fullText.indexOf('</think>') + 8)
|
| 90 |
+
.replace(/<\|im_end\|>/g, '').trim();
|
| 91 |
+
log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`);
|
| 92 |
+
} else {
|
| 93 |
+
log(`❌ No </think> found!`);
|
| 94 |
+
log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
// Simulate the parser
|
| 98 |
+
let isFirst = true;
|
| 99 |
+
let inThink = false;
|
| 100 |
+
let reasoning = '';
|
| 101 |
+
let content = '';
|
| 102 |
+
let buf = '';
|
| 103 |
+
for (const chunk of allChunks) {
|
| 104 |
+
if (!chunk || chunk === '<|im_end|>') continue;
|
| 105 |
+
let text = chunk;
|
| 106 |
+
if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; }
|
| 107 |
+
else if (isFirst) { isFirst = false; }
|
| 108 |
+
buf += text;
|
| 109 |
+
while (buf.length > 0) {
|
| 110 |
+
if (inThink) {
|
| 111 |
+
const ci = buf.indexOf('</think>');
|
| 112 |
+
if (ci !== -1) {
|
| 113 |
+
reasoning += buf.slice(0, ci);
|
| 114 |
+
buf = buf.slice(ci + 8);
|
| 115 |
+
inThink = false;
|
| 116 |
+
continue;
|
| 117 |
+
}
|
| 118 |
+
reasoning += buf;
|
| 119 |
+
buf = '';
|
| 120 |
+
break;
|
| 121 |
+
}
|
| 122 |
+
const oi = buf.indexOf('<think>');
|
| 123 |
+
if (oi !== -1) {
|
| 124 |
+
content += buf.slice(0, oi);
|
| 125 |
+
buf = buf.slice(oi + 7);
|
| 126 |
+
inThink = true;
|
| 127 |
+
continue;
|
| 128 |
+
}
|
| 129 |
+
content += buf;
|
| 130 |
+
buf = '';
|
| 131 |
+
break;
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`);
|
| 135 |
+
log(`Parser result: reasoning_length=${reasoning.length}`);
|
| 136 |
+
log(`Parser result: still_in_think=${inThink}`);
|
| 137 |
+
log(`Would show "No response": ${!content.trim()}`);
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
log('\n✅ Test complete!');
|
| 141 |
+
statusEl.textContent = 'Test complete!';
|
| 142 |
+
};
|
| 143 |
+
</script>
|
| 144 |
+
</head>
|
| 145 |
+
<body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;">
|
| 146 |
+
<h2>ONNX WebGPU Think-Tag Test</h2>
|
| 147 |
+
<p id="status">Ready</p>
|
| 148 |
+
<button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button>
|
| 149 |
+
<button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button>
|
| 150 |
+
<hr>
|
| 151 |
+
<pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre>
|
| 152 |
+
</body>
|
| 153 |
+
</html>
|