/** * message-convert.js - 消息格式转换 * * 将 OpenAI / Anthropic 的 messages 数组转换为 * chataibot.pro 接受的单一 text 字符串 * * chataibot.pro 限制单条消息 2500 字符。 * 超长文本处理策略: * 1. 优先压缩: 截断 tool results → 删除中间轮次 → 压缩 system * 2. 仍超限 → 拆分为多条消息 (context prefill + final) */ import config from './config.js'; import { serializeTools, serializeToolsAnthropic } from './tool-prompt.js'; /** * chataibot.pro 单条消息字符上限 * 实际限制 2500,留 100 余量 */ const MAX_TEXT_LEN = 2400; /** * 从 content 字段提取纯文本 * OpenAI/Anthropic 的 content 可能是字符串或多模态数组 */ function extractText(content) { if (typeof content === 'string') return content; if (Array.isArray(content)) { return content .filter(part => part.type === 'text') .map(part => part.text) .join('\n'); } return String(content || ''); } /** * 截断单个 [Tool Result: ...] 块的内容到指定字符数 * 保留开头 + 末尾各一半,中间用省略标记 */ function truncateToolResults(text, maxLen = 600) { return text.replace(/\[Tool Result: [^\]]*\]\n([\s\S]*?)(?=\n\[(?:User|Assistant|Tool Result|System)\]|\n```tool_calls|$)/g, (match, content) => { if (match.length <= maxLen) return match; const header = match.substring(0, match.indexOf('\n') + 1); const keep = maxLen - header.length - 30; if (keep <= 0) return header + '(content truncated)'; const half = Math.floor(keep / 2); return header + content.substring(0, half) + '\n...(truncated)...\n' + content.substring(content.length - half); }); } /** * 解析文本为 [System]/[User]/[Assistant] 段落 */ function parseSegments(text) { const segments = []; const segRegex = /\[(System|User|Assistant)\] /g; let m; while ((m = segRegex.exec(text)) !== null) { if (segments.length > 0) { segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start, m.index).trimEnd(); } segments.push({ role: m[1], start: m.index, text: '' }); } if (segments.length > 0) { segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start).trimEnd(); } return segments; } /** * 智能压缩到单条 ≤ MAX_TEXT_LEN * * 策略 (逐级加压): * 1. 截断 tool results 到 600/300/100/40 字符 * 2. tool results 替换为一句话摘要 * 3. 删除中间对话轮次 (仅保留 system + 最后 user/assistant) * 4. 截断 system prompt * 5. 硬截断 * * @returns {string | null} 压缩后的文本,null 表示无法压到单条 */ function compressToSingle(text) { if (text.length <= MAX_TEXT_LEN) return text; const originalLen = text.length; // Step 1: 逐步截断 tool result 内容 let result = text; for (const maxToolLen of [600, 300, 100, 40]) { result = truncateToolResults(result, maxToolLen); if (result.length <= MAX_TEXT_LEN) { console.log(`[Convert] 压缩 ${originalLen} → ${result.length} (截断 tool results)`); return result; } } // Step 2: 所有 tool result 替换为一行标记 result = result.replace(/\[Tool Result: [^\]]*\]\n[\s\S]*?(?=\n\[(?:User|Assistant|System)\]|\n```tool_calls|$)/g, (match) => { const nl = match.indexOf('\n'); return match.substring(0, nl > 0 ? nl : match.length) + '\n(omitted)'; }); if (result.length <= MAX_TEXT_LEN) { console.log(`[Convert] 压缩 ${originalLen} → ${result.length} (清除 tool results)`); return result; } // Step 3: 拆分段落,保留 system + 最后 user (+ 最后 assistant 如果有) const segments = parseSegments(result); if (segments.length <= 2) { // 段落太少,跳到 step 4 } else { const systemSeg = segments[0]?.role === 'System' ? segments[0] : null; let lastUserIdx = -1; for (let i = segments.length - 1; i >= 0; i--) { if (segments[i].role === 'User') { lastUserIdx = i; break; } } // 保留 system + 最后 user + 最后一条(如果不同) const mustKeep = new Set(); if (systemSeg) mustKeep.add(0); if (lastUserIdx >= 0) mustKeep.add(lastUserIdx); mustKeep.add(segments.length - 1); const kept = segments.filter((_, i) => mustKeep.has(i)); const rebuilt = kept.map(s => s.text).join('\n\n'); if (rebuilt.length <= MAX_TEXT_LEN) { console.log(`[Convert] 压缩 ${originalLen} → ${rebuilt.length} (删除中间轮次)`); return rebuilt; } // Step 4: 截断 system prompt,给 user 消息留空间 if (systemSeg) { const lastMsg = lastUserIdx >= 0 ? segments[lastUserIdx].text : segments[segments.length - 1].text; const budgetForSystem = MAX_TEXT_LEN - lastMsg.length - 20; if (budgetForSystem > 200) { const truncated = systemSeg.text.substring(0, budgetForSystem) + '\n\n' + lastMsg; console.log(`[Convert] 压缩 ${originalLen} → ${truncated.length} (截断 system)`); return truncated; } // 仅保留 user 消息 if (lastMsg.length <= MAX_TEXT_LEN) { console.log(`[Convert] 压缩 ${originalLen} → ${lastMsg.length} (仅 user 消息)`); return lastMsg; } } } // Step 5: 硬截断 (保留末尾,因为最后的内容最重要) const truncated = text.substring(text.length - MAX_TEXT_LEN); console.log(`[Convert] 压缩 ${originalLen} → ${MAX_TEXT_LEN} (硬截断末尾)`); return truncated; } /** * 主入口: 将文本处理为 chataibot 可发送的消息数组 * * 优先压缩到单条 (零额外开销); * 极端情况下才拆分多条 (消耗额外额度)。 * * @param {string} text - 完整转换后的文本 * @returns {string[]} 消息数组,长度 ≥ 1,每条 ≤ MAX_TEXT_LEN */ export function splitToChunks(text) { if (!text) return ['']; // 尝试压缩到单条 (绝大多数场景走这里) const single = compressToSingle(text); if (single && single.length <= MAX_TEXT_LEN) return [single]; // 极端情况: 压缩后仍超限 → 多消息拆分 const originalLen = text.length; // 先做 tool result 截断到 300 字符 (减少总量但保留上下文) const compressed = truncateToolResults(text, 300); const segments = parseSegments(compressed); if (segments.length === 0) return hardSplit(compressed); // 贪心打包段落到 chunks const chunks = []; let currentChunk = ''; for (const seg of segments) { if (seg.text.length > MAX_TEXT_LEN) { if (currentChunk) { chunks.push(currentChunk.trimEnd()); currentChunk = ''; } chunks.push(...hardSplit(seg.text)); continue; } const candidate = currentChunk ? currentChunk + '\n\n' + seg.text : seg.text; if (candidate.length <= MAX_TEXT_LEN) { currentChunk = candidate; } else { if (currentChunk) chunks.push(currentChunk.trimEnd()); currentChunk = seg.text; } } if (currentChunk) chunks.push(currentChunk.trimEnd()); console.log(`[Convert] 多消息拆分 ${originalLen} → ${chunks.length} 条 [${chunks.map(c => c.length).join(', ')}]`); return chunks; } /** * 按字符硬拆,尽量在换行处断开 */ function hardSplit(text) { const chunks = []; let pos = 0; while (pos < text.length) { if (text.length - pos <= MAX_TEXT_LEN) { chunks.push(text.substring(pos)); break; } let cut = pos + MAX_TEXT_LEN; const lastNL = text.lastIndexOf('\n', cut); if (lastNL > pos + 500) cut = lastNL; chunks.push(text.substring(pos, cut)); pos = cut; while (pos < text.length && text[pos] === '\n') pos++; } return chunks; } /** * OpenAI messages → chataibot text * @param {Array} messages - [{ role: 'system'|'user'|'assistant'|'tool', content }] * @param {Array} [tools] - OpenAI tools 数组 * @param {*} [toolChoice] - tool_choice 参数 */ export function openaiToText(messages, tools, toolChoice) { if (!messages || !messages.length) return ''; const system = messages.filter(m => m.role === 'system').map(m => extractText(m.content)).join('\n'); const conversation = messages.filter(m => m.role !== 'system'); // 工具定义注入到 system prompt 末尾 const toolPrompt = serializeTools(tools, toolChoice); // 处理 tool 角色消息 (工具返回结果) — 转为文本格式 const processedConversation = []; for (const msg of conversation) { if (msg.role === 'tool') { // 工具返回结果,格式化为文本 const toolName = msg.name || msg.tool_call_id || 'unknown'; processedConversation.push({ role: 'user', content: `[Tool Result: ${toolName}]\n${extractText(msg.content)}`, }); } else if (msg.role === 'assistant' && msg.tool_calls) { // assistant 发起的工具调用 — 转为文本表示 let callText = extractText(msg.content) || ''; for (const tc of msg.tool_calls) { const fn = tc.function || {}; callText += `\n\`\`\`tool_calls\n[{"name": "${fn.name}", "arguments": ${fn.arguments || '{}'}}]\n\`\`\``; } processedConversation.push({ role: 'assistant', content: callText.trim() }); } else { processedConversation.push(msg); } } const fullSystem = system + toolPrompt; // 单轮: 只有一条 user 消息 if (processedConversation.length === 1 && processedConversation[0].role === 'user') { const userText = extractText(processedConversation[0].content); return fullSystem ? `${fullSystem}\n\n${userText}` : userText; } // 多轮: 格式化为带角色标签的文本 let text = ''; if (fullSystem) text += `[System] ${fullSystem}\n\n`; for (const msg of processedConversation) { const role = msg.role === 'assistant' ? 'Assistant' : 'User'; text += `[${role}] ${extractText(msg.content)}\n\n`; } return text.trim(); } /** * Anthropic messages → chataibot text * @param {string|undefined} system - system prompt (Anthropic 单独字段) * @param {Array} messages - [{ role: 'user'|'assistant', content }] * @param {Array} [tools] - Anthropic tools 数组 * @param {*} [toolChoice] - tool_choice 参数 */ export function anthropicToText(system, messages, tools, toolChoice) { if (!messages || !messages.length) return extractText(system) || ''; // 工具定义注入到 system prompt 末尾 const toolPrompt = serializeToolsAnthropic(tools, toolChoice); const systemText = extractText(system) || ''; const fullSystem = systemText + toolPrompt; // 处理 Anthropic content 数组中的 tool_use 和 tool_result const processedMessages = []; for (const msg of messages) { if (Array.isArray(msg.content)) { // Anthropic content 可能包含 tool_use / tool_result blocks const parts = []; for (const block of msg.content) { if (block.type === 'text') { parts.push(block.text); } else if (block.type === 'tool_use') { parts.push(`\`\`\`tool_calls\n[{"name": "${block.name}", "arguments": ${JSON.stringify(block.input || {})}}]\n\`\`\``); } else if (block.type === 'tool_result') { const resultContent = typeof block.content === 'string' ? block.content : Array.isArray(block.content) ? block.content.filter(c => c.type === 'text').map(c => c.text).join('\n') : JSON.stringify(block.content); parts.push(`[Tool Result: ${block.tool_use_id || 'unknown'}]\n${resultContent}`); } } processedMessages.push({ role: msg.role, content: parts.join('\n') }); } else { processedMessages.push(msg); } } // 单轮 if (processedMessages.length === 1 && processedMessages[0].role === 'user') { const userText = typeof processedMessages[0].content === 'string' ? processedMessages[0].content : extractText(processedMessages[0].content); return fullSystem ? `${fullSystem}\n\n${userText}` : userText; } // 多轮 let text = ''; if (fullSystem) text += `[System] ${fullSystem}\n\n`; for (const msg of processedMessages) { const role = msg.role === 'assistant' ? 'Assistant' : 'User'; const content = typeof msg.content === 'string' ? msg.content : extractText(msg.content); text += `[${role}] ${content}\n\n`; } return text.trim(); } /** * 解析并映射模型名 */ export function resolveModel(requestModel) { if (!requestModel) return 'gpt-4o'; // 精确匹配 if (config.modelMapping[requestModel]) return config.modelMapping[requestModel]; // 尝试去掉版本后缀匹配 (如 claude-3-sonnet-20240229 → claude-3-sonnet) const base = requestModel.replace(/-\d{8}$/, ''); if (config.modelMapping[base]) return config.modelMapping[base]; // 原样返回让 chataibot 自己判断 return requestModel; } /** * 根据模型名判断所属厂商 (与官网分组一致) */ function getOwner(model) { if (model.startsWith('gpt-') || model.startsWith('o1') || model.startsWith('o3') || model.startsWith('o4')) return 'OpenAI'; if (model.startsWith('claude-')) return 'Anthropic'; if (model.startsWith('gemini-')) return 'Google'; return '其他'; } /** * 获取可用模型列表 (去重,只列 chataibot 原生模型名) */ export function getModelList() { const seen = new Set(); const list = []; for (const target of Object.values(config.modelMapping)) { if (seen.has(target)) continue; seen.add(target); list.push({ id: target, object: 'model', created: 1700000000, owned_by: getOwner(target), }); } return list; }