lex-interviewer-chat / dist /test-webgpu.html
Bobber
add back test-webgpu.html to dist
5477c5d
<!DOCTYPE html>
<html>
<head>
<title>ONNX WebGPU Test</title>
<script type="module">
import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js';
const log = (msg) => {
console.log(msg);
document.getElementById('log').textContent += msg + '\n';
};
const MODELS = {
reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX',
finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12',
};
window.runTest = async (modelKey) => {
const modelId = MODELS[modelKey];
document.getElementById('log').textContent = '';
log(`Testing: ${modelId}`);
log(`Device: webgpu`);
// Check WebGPU
if (!navigator.gpu) { log('❌ No WebGPU!'); return; }
const adapter = await navigator.gpu.requestAdapter();
log(`GPU: ${adapter ? (adapter.info?.description || adapter.name || 'adapter found') : 'no adapter'}`);
log('Loading pipeline (this downloads ~2.5GB)...');
const statusEl = document.getElementById('status');
statusEl.textContent = 'Downloading model...';
let gen;
try {
gen = await pipeline('text-generation', modelId, {
dtype: 'q4',
device: 'webgpu',
progress_callback: (p) => {
if (p.status === 'progress') {
const pct = Math.round((p.loaded / p.total) * 100);
statusEl.textContent = `Downloading: ${pct}%`;
}
}
});
} catch(e) {
log(`❌ Pipeline error: ${e.message}`);
return;
}
statusEl.textContent = 'Model loaded!';
log('Model loaded ✓');
// Test with thinking enabled
for (const enableThinking of [true, false]) {
log(`\n=== enable_thinking: ${enableThinking} ===`);
const allChunks = [];
const streamer = new TextStreamer(gen.tokenizer, {
skip_prompt: true,
skip_special_tokens: false,
callback_function: (output) => {
allChunks.push(output);
},
});
const messages = [
{ role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' },
{ role: 'user', content: "I think neural networks are simple." },
];
log('Generating...');
await gen(messages, {
max_new_tokens: 512,
do_sample: false,
eos_token_id: [2, 11],
streamer,
tokenizer_encode_kwargs: { enable_thinking: enableThinking },
});
const fullText = allChunks.join('');
log(`Total chunks: ${allChunks.length}`);
log(`Total chars: ${fullText.length}`);
log(`Contains </think>: ${fullText.includes('</think>')}`);
log(`Contains <|im_end|>: ${fullText.includes('<|im_end|>')}`);
log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`);
log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`);
if (fullText.includes('</think>')) {
const afterThink = fullText.slice(fullText.indexOf('</think>') + 8)
.replace(/<\|im_end\|>/g, '').trim();
log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`);
} else {
log(`❌ No </think> found!`);
log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`);
}
// Simulate the parser
let isFirst = true;
let inThink = false;
let reasoning = '';
let content = '';
let buf = '';
for (const chunk of allChunks) {
if (!chunk || chunk === '<|im_end|>') continue;
let text = chunk;
if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; }
else if (isFirst) { isFirst = false; }
buf += text;
while (buf.length > 0) {
if (inThink) {
const ci = buf.indexOf('</think>');
if (ci !== -1) {
reasoning += buf.slice(0, ci);
buf = buf.slice(ci + 8);
inThink = false;
continue;
}
reasoning += buf;
buf = '';
break;
}
const oi = buf.indexOf('<think>');
if (oi !== -1) {
content += buf.slice(0, oi);
buf = buf.slice(oi + 7);
inThink = true;
continue;
}
content += buf;
buf = '';
break;
}
}
log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`);
log(`Parser result: reasoning_length=${reasoning.length}`);
log(`Parser result: still_in_think=${inThink}`);
log(`Would show "No response": ${!content.trim()}`);
}
log('\n✅ Test complete!');
statusEl.textContent = 'Test complete!';
};
</script>
</head>
<body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;">
<h2>ONNX WebGPU Think-Tag Test</h2>
<p id="status">Ready</p>
<button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button>
<button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button>
<hr>
<pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre>
</body>
</html>