| #!/usr/bin/env node |
| |
| |
| |
| |
| |
| |
| |
|
|
| import http from 'http'; |
| import https from 'https'; |
| import { writeFileSync, mkdirSync } from 'fs'; |
| import { dirname, join } from 'path'; |
| import { fileURLToPath } from 'url'; |
|
|
| const __dirname = dirname(fileURLToPath(import.meta.url)); |
| const LOG_DIR = join(__dirname, '..', 'logs'); |
| mkdirSync(LOG_DIR, { recursive: true }); |
|
|
| const args = process.argv.slice(2); |
| function getArg(name, fb) { const i = args.indexOf(`--${name}`); return i !== -1 && args[i+1] ? args[i+1] : fb; } |
|
|
| const BASE = getArg('base-url', 'http://localhost:8996'); |
| const KEY = getArg('api-key', 'sk-yebainb666sblzhsqjcnmb----12312312'); |
|
|
| const MODELS = [ |
| 'gemini-2.5-flash', 'gemini-3.0-flash', 'gpt-4o', 'gpt-5', |
| 'claude-4.5-sonnet', 'claude-sonnet-4.6', 'claude-opus-4.6', |
| 'glm-5', 'grok-3', 'kimi-k2.5', 'swe-1.5', |
| ]; |
|
|
| const TESTS = [ |
| { name: 'identity', prompt: 'What model are you? Who developed you? Answer in exactly one sentence.', check: (r, model) => { |
| const low = r.toLowerCase(); |
| const bad = low.includes('cascade') || low.includes('codeium') || low.includes('windsurf'); |
| const hasModel = low.includes(model.split('-')[0]); |
| return { pass: !bad && hasModel, bad: bad ? 'says Cascade/Codeium/Windsurf' : (!hasModel ? 'missing model name' : null) }; |
| }}, |
| { name: 'knowledge', prompt: 'What is the capital of France? Answer in one word.', check: (r) => { |
| return { pass: r.toLowerCase().includes('paris'), bad: r.toLowerCase().includes('paris') ? null : 'wrong answer' }; |
| }}, |
| { name: 'math', prompt: 'What is 17 * 23? Answer with just the number.', check: (r) => { |
| return { pass: r.includes('391'), bad: r.includes('391') ? null : 'wrong math' }; |
| }}, |
| { name: 'coding', prompt: 'Write a Python function that returns the sum of a list. Output ONLY the function, no explanation.', check: (r) => { |
| return { pass: r.includes('def ') && r.includes('sum'), bad: null }; |
| }}, |
| ]; |
|
|
| function chat(model, prompt) { |
| return new Promise((resolve, reject) => { |
| const url = new URL('/v1/chat/completions', BASE); |
| const mod = url.protocol === 'https:' ? https : http; |
| const body = JSON.stringify({ model, messages: [{ role: 'user', content: prompt }], stream: false }); |
| const req = mod.request(url, { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${KEY}` }, |
| }, (res) => { |
| const chunks = []; |
| res.on('data', c => chunks.push(c)); |
| res.on('end', () => { |
| try { |
| const d = JSON.parse(Buffer.concat(chunks).toString()); |
| const content = d.choices?.[0]?.message?.content || ''; |
| const error = d.error?.message || ''; |
| const retryAfter = d.error?.retry_after_ms || 0; |
| resolve({ status: res.statusCode, content, error, retryAfter, retryHeader: res.headers['retry-after'] }); |
| } catch (e) { reject(e); } |
| }); |
| }); |
| req.on('error', reject); |
| setTimeout(() => { req.destroy(); reject(new Error('timeout')); }, 60000); |
| req.write(body); |
| req.end(); |
| }); |
| } |
|
|
| function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } |
|
|
| async function testModel(model) { |
| const results = []; |
| for (const test of TESTS) { |
| let attempt = 0; |
| while (attempt < 5) { |
| attempt++; |
| try { |
| const r = await chat(model, test.prompt); |
| if (r.status === 429 || r.error.includes('ιεΆ') || r.error.includes('rate limit')) { |
| const waitSec = parseInt(r.retryHeader || '0') || Math.ceil((r.retryAfter || 60000) / 1000); |
| console.log(` β³ ${model}/${test.name}: rate limited, waiting ${waitSec}s...`); |
| await sleep(waitSec * 1000 + 2000); |
| continue; |
| } |
| if (r.status !== 200 || !r.content) { |
| results.push({ test: test.name, pass: false, reason: r.error || `status=${r.status} empty`, content: '' }); |
| break; |
| } |
| const check = test.check(r.content, model); |
| results.push({ test: test.name, pass: check.pass, reason: check.bad, content: r.content.slice(0, 150) }); |
| break; |
| } catch (e) { |
| if (attempt >= 5) results.push({ test: test.name, pass: false, reason: e.message, content: '' }); |
| else { console.log(` β ${model}/${test.name}: ${e.message}, retry ${attempt}/5`); await sleep(3000); } |
| } |
| } |
| } |
| return results; |
| } |
|
|
| async function main() { |
| console.log(`\n Model Identity & Quality Test`); |
| console.log(` Base: ${BASE} Models: ${MODELS.length}\n`); |
|
|
| const report = []; |
|
|
| for (const model of MODELS) { |
| console.log(` βΈ ${model}`); |
| const results = await testModel(model); |
| const passed = results.filter(r => r.pass).length; |
| const total = results.length; |
| const icon = passed === total ? 'β' : passed > 0 ? 'β³' : 'β'; |
| console.log(` ${icon} ${passed}/${total} passed`); |
| for (const r of results) { |
| if (!r.pass) console.log(` β ${r.test}: ${r.reason || 'failed'}`); |
| } |
| report.push({ model, passed, total, results }); |
| } |
|
|
| console.log(`\n ββ Summary ββ`); |
| let totalPass = 0, totalTests = 0; |
| for (const r of report) { |
| const icon = r.passed === r.total ? 'β' : 'β'; |
| console.log(` ${icon} ${r.model.padEnd(22)} ${r.passed}/${r.total}`); |
| totalPass += r.passed; |
| totalTests += r.total; |
| } |
| console.log(`\n Total: ${totalPass}/${totalTests} (${Math.round(totalPass/totalTests*100)}%)\n`); |
|
|
| const logFile = join(LOG_DIR, `identity-test-${new Date().toISOString().replace(/[:.]/g, '-')}.json`); |
| writeFileSync(logFile, JSON.stringify(report, null, 2)); |
| console.log(` Report: ${logFile}\n`); |
| } |
|
|
| main().catch(e => { console.error('Fatal:', e.message); process.exit(1); }); |
|
|