W
File size: 5,977 Bytes
2b64d42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env node
/**
 * Model identity & quality test β€” verifies every model responds with
 * correct identity (not "Cascade") and passes basic knowledge checks.
 * Waits on rate limits automatically.
 *
 * Usage: node scripts/model-identity-test.js [--base-url http://...] [--api-key sk-...]
 */

import http from 'http';
import https from 'https';
import { writeFileSync, mkdirSync } from 'fs';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const LOG_DIR = join(__dirname, '..', 'logs');
mkdirSync(LOG_DIR, { recursive: true });

const args = process.argv.slice(2);
function getArg(name, fb) { const i = args.indexOf(`--${name}`); return i !== -1 && args[i+1] ? args[i+1] : fb; }

const BASE = getArg('base-url', 'http://localhost:8996');
const KEY = getArg('api-key', 'sk-yebainb666sblzhsqjcnmb----12312312');

const MODELS = [
  'gemini-2.5-flash', 'gemini-3.0-flash', 'gpt-4o', 'gpt-5',
  'claude-4.5-sonnet', 'claude-sonnet-4.6', 'claude-opus-4.6',
  'glm-5', 'grok-3', 'kimi-k2.5', 'swe-1.5',
];

const TESTS = [
  { name: 'identity', prompt: 'What model are you? Who developed you? Answer in exactly one sentence.', check: (r, model) => {
    const low = r.toLowerCase();
    const bad = low.includes('cascade') || low.includes('codeium') || low.includes('windsurf');
    const hasModel = low.includes(model.split('-')[0]);
    return { pass: !bad && hasModel, bad: bad ? 'says Cascade/Codeium/Windsurf' : (!hasModel ? 'missing model name' : null) };
  }},
  { name: 'knowledge', prompt: 'What is the capital of France? Answer in one word.', check: (r) => {
    return { pass: r.toLowerCase().includes('paris'), bad: r.toLowerCase().includes('paris') ? null : 'wrong answer' };
  }},
  { name: 'math', prompt: 'What is 17 * 23? Answer with just the number.', check: (r) => {
    return { pass: r.includes('391'), bad: r.includes('391') ? null : 'wrong math' };
  }},
  { name: 'coding', prompt: 'Write a Python function that returns the sum of a list. Output ONLY the function, no explanation.', check: (r) => {
    return { pass: r.includes('def ') && r.includes('sum'), bad: null };
  }},
];

function chat(model, prompt) {
  return new Promise((resolve, reject) => {
    const url = new URL('/v1/chat/completions', BASE);
    const mod = url.protocol === 'https:' ? https : http;
    const body = JSON.stringify({ model, messages: [{ role: 'user', content: prompt }], stream: false });
    const req = mod.request(url, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${KEY}` },
    }, (res) => {
      const chunks = [];
      res.on('data', c => chunks.push(c));
      res.on('end', () => {
        try {
          const d = JSON.parse(Buffer.concat(chunks).toString());
          const content = d.choices?.[0]?.message?.content || '';
          const error = d.error?.message || '';
          const retryAfter = d.error?.retry_after_ms || 0;
          resolve({ status: res.statusCode, content, error, retryAfter, retryHeader: res.headers['retry-after'] });
        } catch (e) { reject(e); }
      });
    });
    req.on('error', reject);
    setTimeout(() => { req.destroy(); reject(new Error('timeout')); }, 60000);
    req.write(body);
    req.end();
  });
}

function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }

async function testModel(model) {
  const results = [];
  for (const test of TESTS) {
    let attempt = 0;
    while (attempt < 5) {
      attempt++;
      try {
        const r = await chat(model, test.prompt);
        if (r.status === 429 || r.error.includes('ι™εˆΆ') || r.error.includes('rate limit')) {
          const waitSec = parseInt(r.retryHeader || '0') || Math.ceil((r.retryAfter || 60000) / 1000);
          console.log(`    ⏳ ${model}/${test.name}: rate limited, waiting ${waitSec}s...`);
          await sleep(waitSec * 1000 + 2000);
          continue;
        }
        if (r.status !== 200 || !r.content) {
          results.push({ test: test.name, pass: false, reason: r.error || `status=${r.status} empty`, content: '' });
          break;
        }
        const check = test.check(r.content, model);
        results.push({ test: test.name, pass: check.pass, reason: check.bad, content: r.content.slice(0, 150) });
        break;
      } catch (e) {
        if (attempt >= 5) results.push({ test: test.name, pass: false, reason: e.message, content: '' });
        else { console.log(`    ⚠ ${model}/${test.name}: ${e.message}, retry ${attempt}/5`); await sleep(3000); }
      }
    }
  }
  return results;
}

async function main() {
  console.log(`\n  Model Identity & Quality Test`);
  console.log(`  Base: ${BASE}  Models: ${MODELS.length}\n`);

  const report = [];

  for (const model of MODELS) {
    console.log(`  β–Έ ${model}`);
    const results = await testModel(model);
    const passed = results.filter(r => r.pass).length;
    const total = results.length;
    const icon = passed === total ? 'βœ“' : passed > 0 ? 'β–³' : 'βœ—';
    console.log(`    ${icon} ${passed}/${total} passed`);
    for (const r of results) {
      if (!r.pass) console.log(`      βœ— ${r.test}: ${r.reason || 'failed'}`);
    }
    report.push({ model, passed, total, results });
  }

  console.log(`\n  ── Summary ──`);
  let totalPass = 0, totalTests = 0;
  for (const r of report) {
    const icon = r.passed === r.total ? 'βœ“' : 'βœ—';
    console.log(`  ${icon} ${r.model.padEnd(22)} ${r.passed}/${r.total}`);
    totalPass += r.passed;
    totalTests += r.total;
  }
  console.log(`\n  Total: ${totalPass}/${totalTests} (${Math.round(totalPass/totalTests*100)}%)\n`);

  const logFile = join(LOG_DIR, `identity-test-${new Date().toISOString().replace(/[:.]/g, '-')}.json`);
  writeFileSync(logFile, JSON.stringify(report, null, 2));
  console.log(`  Report: ${logFile}\n`);
}

main().catch(e => { console.error('Fatal:', e.message); process.exit(1); });