File size: 8,236 Bytes
097fb32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | /**
* e2e-thinking-truncation.mjs
*
* 实际请求测试:thinking 截断场景
*
* 测试场景:
* 1. 请求 thinking 模式,验证 thinking block 正确返回,不泄漏到正文
* 2. 带工具 + thinking,验证 thinking 剥离后工具调用续写正常触发
* 3. 带工具 + thinking,验证 200-char 修复(thinking 剥离后正文短但工具续写仍触发)
*/
import http from 'http';
const BASE = process.env.BASE_URL || 'http://localhost:3010';
const url = new URL(BASE);
let passed = 0;
let failed = 0;
function runAnthropicTest(name, body, timeoutMs = 120000) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => reject(new Error(`超时 ${timeoutMs}ms`)), timeoutMs);
const data = JSON.stringify(body);
const req = http.request({
hostname: url.hostname, port: url.port || 3010, path: '/v1/messages', method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': 'test',
'anthropic-version': '2023-06-01',
'Content-Length': Buffer.byteLength(data),
},
}, (res) => {
let buf = '';
const events = [];
res.on('data', chunk => {
buf += chunk.toString();
const lines = buf.split('\n');
buf = lines.pop();
for (const line of lines) {
if (!line.startsWith('data: ')) continue;
try { events.push(JSON.parse(line.slice(6).trim())); } catch { /* skip */ }
}
});
res.on('end', () => { clearTimeout(timer); resolve(events); });
res.on('error', err => { clearTimeout(timer); reject(err); });
});
req.on('error', err => { clearTimeout(timer); reject(err); });
req.write(data);
req.end();
});
}
function parseEvents(events) {
let thinkingContent = '';
let textContent = '';
let stopReason = '';
for (const ev of events) {
if (ev.type === 'content_block_delta') {
if (ev.delta?.type === 'thinking_delta') thinkingContent += ev.delta.thinking || '';
if (ev.delta?.type === 'text_delta') textContent += ev.delta.text || '';
}
if (ev.type === 'message_delta') stopReason = ev.delta?.stop_reason || '';
}
return { thinkingContent, textContent, stopReason };
}
async function test(name, fn) {
try {
await fn();
console.log(` ✅ ${name}`);
passed++;
} catch (err) {
console.error(` ❌ ${name}`);
console.error(` ${err.message}`);
failed++;
}
}
function assert(cond, msg) {
if (!cond) throw new Error(msg || 'Assertion failed');
}
const TOOLS = [
{
name: 'Write',
description: 'Write a file',
input_schema: {
type: 'object',
properties: {
file_path: { type: 'string' },
content: { type: 'string' },
},
required: ['file_path', 'content'],
},
},
{
name: 'Read',
description: 'Read a file',
input_schema: {
type: 'object',
properties: { file_path: { type: 'string' } },
required: ['file_path'],
},
},
];
console.log('\n📦 E2E: thinking 截断场景测试\n');
console.log(` 服务地址: ${BASE}`);
console.log(` 注意:以下测试需要模型实际支持 thinking 模式\n`);
// ==================== 测试 1:thinking 模式基础验证 ====================
await test('thinking 模式:thinking block 出现在正文之前,不泄漏到 text', async () => {
const events = await runAnthropicTest('thinking-basic', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 16000,
thinking: { type: 'enabled', budget_tokens: 10000 },
messages: [{
role: 'user',
content: '简单回答:1+1等于几?',
}],
stream: true,
});
const { thinkingContent, textContent } = parseEvents(events);
// thinking block 必须存在
assert(thinkingContent.length > 0, `期望有 thinking block,实际为空`);
// thinking 内容不应出现在正文里
assert(
!textContent.includes('<thinking>'),
`正文不应包含 <thinking> 标签,实际正文: ${textContent.substring(0, 200)}`,
);
assert(
!textContent.includes('</thinking>'),
`正文不应包含 </thinking> 标签`,
);
// 正文应有实际内容
assert(textContent.trim().length > 0, `正文应有内容,实际为空`);
console.log(` thinking: ${thinkingContent.length} chars, text: ${textContent.length} chars`);
});
// ==================== 测试 2:thinking 不泄漏到正文(无 thinking 请求) ====================
await test('非 thinking 模式:即使模型输出 <thinking> 也不泄漏到正文', async () => {
// 使用普通模型名,但通过 system prompt 诱导模型输出 thinking 标签
const events = await runAnthropicTest('thinking-leak', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 8000,
// 不传 thinking 参数
messages: [{
role: 'user',
content: '请用中文简短回答:什么是递归?',
}],
stream: true,
});
const { textContent } = parseEvents(events);
assert(
!textContent.includes('<thinking>'),
`正文不应包含 <thinking> 开标签,实际: ${textContent.substring(0, 300)}`,
);
assert(
!textContent.includes('</thinking>'),
`正文不应包含 </thinking> 闭标签`,
);
console.log(` text: ${textContent.length} chars, preview: ${textContent.substring(0, 80).replace(/\n/g, '\\n')}`);
});
// ==================== 测试 3:带工具 + thinking,工具调用完整返回 ====================
await test('thinking + 工具调用:工具参数完整,thinking 不泄漏', async () => {
const events = await runAnthropicTest('thinking-tools', {
model: 'claude-sonnet-4-6-thinking',
max_tokens: 16000,
thinking: { type: 'enabled', budget_tokens: 8000 },
tools: TOOLS,
messages: [{
role: 'user',
content: '请用 Write 工具写一个包含 50 行注释的 Python hello world 文件到 /tmp/hello.py',
}],
stream: true,
});
const { thinkingContent, textContent } = parseEvents(events);
// 解析工具调用
const toolStarts = events.filter(e => e.type === 'content_block_start' && e.content_block?.type === 'tool_use');
const toolInputDeltas = events.filter(e => e.type === 'content_block_delta' && e.delta?.type === 'input_json_delta');
const toolInputRaw = toolInputDeltas.map(e => e.delta.partial_json || '').join('');
assert(
!textContent.includes('<thinking>') && !textContent.includes('</thinking>'),
`正文不应包含 thinking 标签,实际: ${textContent.substring(0, 200)}`,
);
if (toolStarts.length > 0) {
// 有工具调用:验证参数完整(能解析为有效 JSON)
let toolInput = {};
try { toolInput = JSON.parse(toolInputRaw); } catch (e) {
throw new Error(`工具调用参数 JSON 解析失败: ${e.message}\n原始: ${toolInputRaw.substring(0, 200)}`);
}
assert(typeof toolInput.file_path === 'string', '工具参数应包含 file_path');
assert(typeof toolInput.content === 'string', '工具参数应包含 content');
console.log(` thinking: ${thinkingContent.length} chars, tool: ${toolStarts[0]?.content_block?.name}, content: ${toolInput.content?.length} chars`);
} else {
// 没有工具调用:至少有正文
assert(textContent.trim().length > 0, '无工具调用时正文不应为空');
console.log(` thinking: ${thinkingContent.length} chars, text: ${textContent.length} chars (无工具调用)`);
}
});
// ==================== 汇总 ====================
console.log(`\n结果:${passed} 通过,${failed} 失败\n`);
if (failed > 0) process.exit(1);
|