lex-interviewer-chat

Sleeping

lex-interviewer-chat / dist /test-webgpu.html

Bobber

add back test-webgpu.html to dist

5477c5d about 2 months ago

5.51 kB

	<!DOCTYPE html>
	<html>
	<head>
	<title>ONNX WebGPU Test</title>
	<script type="module">
	import { pipeline, TextStreamer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@4.0.0-next.8/dist/transformers.min.js';

	const log = (msg) => {
	console.log(msg);
	document.getElementById('log').textContent += msg + '\n';
	};

	const MODELS = {
	reference: 'onnx-community/NVIDIA-Nemotron-3-Nano-4B-BF16-ONNX',
	finetuned: 'bobber/lex-interviewer-nemotron-4b-grpo-v12',
	};

	window.runTest = async (modelKey) => {
	const modelId = MODELS[modelKey];
	document.getElementById('log').textContent = '';
	log(`Testing: ${modelId}`);
	log(`Device: webgpu`);

	// Check WebGPU
	if (!navigator.gpu) { log('❌ No WebGPU!'); return; }
	const adapter = await navigator.gpu.requestAdapter();
	log(`GPU: ${adapter ? (adapter.info?.description \|\| adapter.name \|\| 'adapter found') : 'no adapter'}`);

	log('Loading pipeline (this downloads ~2.5GB)...');
	const statusEl = document.getElementById('status');
	statusEl.textContent = 'Downloading model...';

	let gen;
	try {
	gen = await pipeline('text-generation', modelId, {
	dtype: 'q4',
	device: 'webgpu',
	progress_callback: (p) => {
	if (p.status === 'progress') {
	const pct = Math.round((p.loaded / p.total) * 100);
	statusEl.textContent = `Downloading: ${pct}%`;
	}
	}
	});
	} catch(e) {
	log(`❌ Pipeline error: ${e.message}`);
	return;
	}
	statusEl.textContent = 'Model loaded!';
	log('Model loaded ✓');

	// Test with thinking enabled
	for (const enableThinking of [true, false]) {
	log(`\n=== enable_thinking: ${enableThinking} ===`);

	const allChunks = [];
	const streamer = new TextStreamer(gen.tokenizer, {
	skip_prompt: true,
	skip_special_tokens: false,
	callback_function: (output) => {
	allChunks.push(output);
	},
	});

	const messages = [
	{ role: 'system', content: 'You are an AI interviewer. Ask one question at a time.' },
	{ role: 'user', content: "I think neural networks are simple." },
	];

	log('Generating...');
	await gen(messages, {
	max_new_tokens: 512,
	do_sample: false,
	eos_token_id: [2, 11],
	streamer,
	tokenizer_encode_kwargs: { enable_thinking: enableThinking },
	});

	const fullText = allChunks.join('');
	log(`Total chunks: ${allChunks.length}`);
	log(`Total chars: ${fullText.length}`);
	log(`Contains </think>: ${fullText.includes('</think>')}`);
	log(`Contains <\|im_end\|>: ${fullText.includes('<\|im_end\|>')}`);

	log(`First 3 chunks: ${allChunks.slice(0, 3).map(c => JSON.stringify(c)).join(', ')}`);
	log(`Last 3 chunks: ${allChunks.slice(-3).map(c => JSON.stringify(c)).join(', ')}`);

	if (fullText.includes('</think>')) {
	const afterThink = fullText.slice(fullText.indexOf('</think>') + 8)
	.replace(/<\\|im_end\\|>/g, '').trim();
	log(`Content after </think>: ${JSON.stringify(afterThink.slice(0, 200))}`);
	} else {
	log(`❌ No </think> found!`);
	log(`Full output (last 300): ${JSON.stringify(fullText.slice(-300))}`);
	}

	// Simulate the parser
	let isFirst = true;
	let inThink = false;
	let reasoning = '';
	let content = '';
	let buf = '';
	for (const chunk of allChunks) {
	if (!chunk \|\| chunk === '<\|im_end\|>') continue;
	let text = chunk;
	if (isFirst && enableThinking) { text = '<think>' + text; isFirst = false; }
	else if (isFirst) { isFirst = false; }
	buf += text;
	while (buf.length > 0) {
	if (inThink) {
	const ci = buf.indexOf('</think>');
	if (ci !== -1) {
	reasoning += buf.slice(0, ci);
	buf = buf.slice(ci + 8);
	inThink = false;
	continue;
	}
	reasoning += buf;
	buf = '';
	break;
	}
	const oi = buf.indexOf('<think>');
	if (oi !== -1) {
	content += buf.slice(0, oi);
	buf = buf.slice(oi + 7);
	inThink = true;
	continue;
	}
	content += buf;
	buf = '';
	break;
	}
	}
	log(`Parser result: content=${JSON.stringify(content.trim().slice(0, 200))}`);
	log(`Parser result: reasoning_length=${reasoning.length}`);
	log(`Parser result: still_in_think=${inThink}`);
	log(`Would show "No response": ${!content.trim()}`);
	}

	log('\n✅ Test complete!');
	statusEl.textContent = 'Test complete!';
	};
	</script>
	</head>
	<body style="font-family: monospace; padding: 20px; background: #1a1a1a; color: #eee;">
	<h2>ONNX WebGPU Think-Tag Test</h2>
	<p id="status">Ready</p>
	<button onclick="runTest('reference')" style="padding: 10px 20px; margin: 5px;">Test Reference Model</button>
	<button onclick="runTest('finetuned')" style="padding: 10px 20px; margin: 5px;">Test Fine-tuned Model</button>
	<hr>
	<pre id="log" style="white-space: pre-wrap; max-height: 80vh; overflow-y: auto;"></pre>
	</body>
	</html>