ikun2 / message-convert.js
bingn's picture
Upload 19 files
e3e4fcf verified
/**
* message-convert.js - 消息格式转换
*
* 将 OpenAI / Anthropic 的 messages 数组转换为
* chataibot.pro 接受的单一 text 字符串
*
* chataibot.pro 限制单条消息 2500 字符。
* 超长文本处理策略:
* 1. 优先压缩: 截断 tool results → 删除中间轮次 → 压缩 system
* 2. 仍超限 → 拆分为多条消息 (context prefill + final)
*/
import config from './config.js';
import { serializeTools, serializeToolsAnthropic } from './tool-prompt.js';
/**
* chataibot.pro 单条消息字符上限
* 实际限制 2500,留 100 余量
*/
const MAX_TEXT_LEN = 2400;
/**
* 从 content 字段提取纯文本
* OpenAI/Anthropic 的 content 可能是字符串或多模态数组
*/
function extractText(content) {
if (typeof content === 'string') return content;
if (Array.isArray(content)) {
return content
.filter(part => part.type === 'text')
.map(part => part.text)
.join('\n');
}
return String(content || '');
}
/**
* 截断单个 [Tool Result: ...] 块的内容到指定字符数
* 保留开头 + 末尾各一半,中间用省略标记
*/
function truncateToolResults(text, maxLen = 600) {
return text.replace(/\[Tool Result: [^\]]*\]\n([\s\S]*?)(?=\n\[(?:User|Assistant|Tool Result|System)\]|\n```tool_calls|$)/g,
(match, content) => {
if (match.length <= maxLen) return match;
const header = match.substring(0, match.indexOf('\n') + 1);
const keep = maxLen - header.length - 30;
if (keep <= 0) return header + '(content truncated)';
const half = Math.floor(keep / 2);
return header + content.substring(0, half) + '\n...(truncated)...\n' + content.substring(content.length - half);
});
}
/**
* 解析文本为 [System]/[User]/[Assistant] 段落
*/
function parseSegments(text) {
const segments = [];
const segRegex = /\[(System|User|Assistant)\] /g;
let m;
while ((m = segRegex.exec(text)) !== null) {
if (segments.length > 0) {
segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start, m.index).trimEnd();
}
segments.push({ role: m[1], start: m.index, text: '' });
}
if (segments.length > 0) {
segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start).trimEnd();
}
return segments;
}
/**
* 智能压缩到单条 ≤ MAX_TEXT_LEN
*
* 策略 (逐级加压):
* 1. 截断 tool results 到 600/300/100/40 字符
* 2. tool results 替换为一句话摘要
* 3. 删除中间对话轮次 (仅保留 system + 最后 user/assistant)
* 4. 截断 system prompt
* 5. 硬截断
*
* @returns {string | null} 压缩后的文本,null 表示无法压到单条
*/
function compressToSingle(text) {
if (text.length <= MAX_TEXT_LEN) return text;
const originalLen = text.length;
// Step 1: 逐步截断 tool result 内容
let result = text;
for (const maxToolLen of [600, 300, 100, 40]) {
result = truncateToolResults(result, maxToolLen);
if (result.length <= MAX_TEXT_LEN) {
console.log(`[Convert] 压缩 ${originalLen}${result.length} (截断 tool results)`);
return result;
}
}
// Step 2: 所有 tool result 替换为一行标记
result = result.replace(/\[Tool Result: [^\]]*\]\n[\s\S]*?(?=\n\[(?:User|Assistant|System)\]|\n```tool_calls|$)/g,
(match) => {
const nl = match.indexOf('\n');
return match.substring(0, nl > 0 ? nl : match.length) + '\n(omitted)';
});
if (result.length <= MAX_TEXT_LEN) {
console.log(`[Convert] 压缩 ${originalLen}${result.length} (清除 tool results)`);
return result;
}
// Step 3: 拆分段落,保留 system + 最后 user (+ 最后 assistant 如果有)
const segments = parseSegments(result);
if (segments.length <= 2) {
// 段落太少,跳到 step 4
} else {
const systemSeg = segments[0]?.role === 'System' ? segments[0] : null;
let lastUserIdx = -1;
for (let i = segments.length - 1; i >= 0; i--) {
if (segments[i].role === 'User') { lastUserIdx = i; break; }
}
// 保留 system + 最后 user + 最后一条(如果不同)
const mustKeep = new Set();
if (systemSeg) mustKeep.add(0);
if (lastUserIdx >= 0) mustKeep.add(lastUserIdx);
mustKeep.add(segments.length - 1);
const kept = segments.filter((_, i) => mustKeep.has(i));
const rebuilt = kept.map(s => s.text).join('\n\n');
if (rebuilt.length <= MAX_TEXT_LEN) {
console.log(`[Convert] 压缩 ${originalLen}${rebuilt.length} (删除中间轮次)`);
return rebuilt;
}
// Step 4: 截断 system prompt,给 user 消息留空间
if (systemSeg) {
const lastMsg = lastUserIdx >= 0 ? segments[lastUserIdx].text : segments[segments.length - 1].text;
const budgetForSystem = MAX_TEXT_LEN - lastMsg.length - 20;
if (budgetForSystem > 200) {
const truncated = systemSeg.text.substring(0, budgetForSystem) + '\n\n' + lastMsg;
console.log(`[Convert] 压缩 ${originalLen}${truncated.length} (截断 system)`);
return truncated;
}
// 仅保留 user 消息
if (lastMsg.length <= MAX_TEXT_LEN) {
console.log(`[Convert] 压缩 ${originalLen}${lastMsg.length} (仅 user 消息)`);
return lastMsg;
}
}
}
// Step 5: 硬截断 (保留末尾,因为最后的内容最重要)
const truncated = text.substring(text.length - MAX_TEXT_LEN);
console.log(`[Convert] 压缩 ${originalLen}${MAX_TEXT_LEN} (硬截断末尾)`);
return truncated;
}
/**
* 主入口: 将文本处理为 chataibot 可发送的消息数组
*
* 优先压缩到单条 (零额外开销);
* 极端情况下才拆分多条 (消耗额外额度)。
*
* @param {string} text - 完整转换后的文本
* @returns {string[]} 消息数组,长度 ≥ 1,每条 ≤ MAX_TEXT_LEN
*/
export function splitToChunks(text) {
if (!text) return [''];
// 尝试压缩到单条 (绝大多数场景走这里)
const single = compressToSingle(text);
if (single && single.length <= MAX_TEXT_LEN) return [single];
// 极端情况: 压缩后仍超限 → 多消息拆分
const originalLen = text.length;
// 先做 tool result 截断到 300 字符 (减少总量但保留上下文)
const compressed = truncateToolResults(text, 300);
const segments = parseSegments(compressed);
if (segments.length === 0) return hardSplit(compressed);
// 贪心打包段落到 chunks
const chunks = [];
let currentChunk = '';
for (const seg of segments) {
if (seg.text.length > MAX_TEXT_LEN) {
if (currentChunk) { chunks.push(currentChunk.trimEnd()); currentChunk = ''; }
chunks.push(...hardSplit(seg.text));
continue;
}
const candidate = currentChunk ? currentChunk + '\n\n' + seg.text : seg.text;
if (candidate.length <= MAX_TEXT_LEN) {
currentChunk = candidate;
} else {
if (currentChunk) chunks.push(currentChunk.trimEnd());
currentChunk = seg.text;
}
}
if (currentChunk) chunks.push(currentChunk.trimEnd());
console.log(`[Convert] 多消息拆分 ${originalLen}${chunks.length} 条 [${chunks.map(c => c.length).join(', ')}]`);
return chunks;
}
/**
* 按字符硬拆,尽量在换行处断开
*/
function hardSplit(text) {
const chunks = [];
let pos = 0;
while (pos < text.length) {
if (text.length - pos <= MAX_TEXT_LEN) {
chunks.push(text.substring(pos));
break;
}
let cut = pos + MAX_TEXT_LEN;
const lastNL = text.lastIndexOf('\n', cut);
if (lastNL > pos + 500) cut = lastNL;
chunks.push(text.substring(pos, cut));
pos = cut;
while (pos < text.length && text[pos] === '\n') pos++;
}
return chunks;
}
/**
* OpenAI messages → chataibot text
* @param {Array} messages - [{ role: 'system'|'user'|'assistant'|'tool', content }]
* @param {Array} [tools] - OpenAI tools 数组
* @param {*} [toolChoice] - tool_choice 参数
*/
export function openaiToText(messages, tools, toolChoice) {
if (!messages || !messages.length) return '';
const system = messages.filter(m => m.role === 'system').map(m => extractText(m.content)).join('\n');
const conversation = messages.filter(m => m.role !== 'system');
// 工具定义注入到 system prompt 末尾
const toolPrompt = serializeTools(tools, toolChoice);
// 处理 tool 角色消息 (工具返回结果) — 转为文本格式
const processedConversation = [];
for (const msg of conversation) {
if (msg.role === 'tool') {
// 工具返回结果,格式化为文本
const toolName = msg.name || msg.tool_call_id || 'unknown';
processedConversation.push({
role: 'user',
content: `[Tool Result: ${toolName}]\n${extractText(msg.content)}`,
});
} else if (msg.role === 'assistant' && msg.tool_calls) {
// assistant 发起的工具调用 — 转为文本表示
let callText = extractText(msg.content) || '';
for (const tc of msg.tool_calls) {
const fn = tc.function || {};
callText += `\n\`\`\`tool_calls\n[{"name": "${fn.name}", "arguments": ${fn.arguments || '{}'}}]\n\`\`\``;
}
processedConversation.push({ role: 'assistant', content: callText.trim() });
} else {
processedConversation.push(msg);
}
}
const fullSystem = system + toolPrompt;
// 单轮: 只有一条 user 消息
if (processedConversation.length === 1 && processedConversation[0].role === 'user') {
const userText = extractText(processedConversation[0].content);
return fullSystem ? `${fullSystem}\n\n${userText}` : userText;
}
// 多轮: 格式化为带角色标签的文本
let text = '';
if (fullSystem) text += `[System] ${fullSystem}\n\n`;
for (const msg of processedConversation) {
const role = msg.role === 'assistant' ? 'Assistant' : 'User';
text += `[${role}] ${extractText(msg.content)}\n\n`;
}
return text.trim();
}
/**
* Anthropic messages → chataibot text
* @param {string|undefined} system - system prompt (Anthropic 单独字段)
* @param {Array} messages - [{ role: 'user'|'assistant', content }]
* @param {Array} [tools] - Anthropic tools 数组
* @param {*} [toolChoice] - tool_choice 参数
*/
export function anthropicToText(system, messages, tools, toolChoice) {
if (!messages || !messages.length) return extractText(system) || '';
// 工具定义注入到 system prompt 末尾
const toolPrompt = serializeToolsAnthropic(tools, toolChoice);
const systemText = extractText(system) || '';
const fullSystem = systemText + toolPrompt;
// 处理 Anthropic content 数组中的 tool_use 和 tool_result
const processedMessages = [];
for (const msg of messages) {
if (Array.isArray(msg.content)) {
// Anthropic content 可能包含 tool_use / tool_result blocks
const parts = [];
for (const block of msg.content) {
if (block.type === 'text') {
parts.push(block.text);
} else if (block.type === 'tool_use') {
parts.push(`\`\`\`tool_calls\n[{"name": "${block.name}", "arguments": ${JSON.stringify(block.input || {})}}]\n\`\`\``);
} else if (block.type === 'tool_result') {
const resultContent = typeof block.content === 'string'
? block.content
: Array.isArray(block.content)
? block.content.filter(c => c.type === 'text').map(c => c.text).join('\n')
: JSON.stringify(block.content);
parts.push(`[Tool Result: ${block.tool_use_id || 'unknown'}]\n${resultContent}`);
}
}
processedMessages.push({ role: msg.role, content: parts.join('\n') });
} else {
processedMessages.push(msg);
}
}
// 单轮
if (processedMessages.length === 1 && processedMessages[0].role === 'user') {
const userText = typeof processedMessages[0].content === 'string'
? processedMessages[0].content
: extractText(processedMessages[0].content);
return fullSystem ? `${fullSystem}\n\n${userText}` : userText;
}
// 多轮
let text = '';
if (fullSystem) text += `[System] ${fullSystem}\n\n`;
for (const msg of processedMessages) {
const role = msg.role === 'assistant' ? 'Assistant' : 'User';
const content = typeof msg.content === 'string' ? msg.content : extractText(msg.content);
text += `[${role}] ${content}\n\n`;
}
return text.trim();
}
/**
* 解析并映射模型名
*/
export function resolveModel(requestModel) {
if (!requestModel) return 'gpt-4o';
// 精确匹配
if (config.modelMapping[requestModel]) return config.modelMapping[requestModel];
// 尝试去掉版本后缀匹配 (如 claude-3-sonnet-20240229 → claude-3-sonnet)
const base = requestModel.replace(/-\d{8}$/, '');
if (config.modelMapping[base]) return config.modelMapping[base];
// 原样返回让 chataibot 自己判断
return requestModel;
}
/**
* 根据模型名判断所属厂商 (与官网分组一致)
*/
function getOwner(model) {
if (model.startsWith('gpt-') || model.startsWith('o1') || model.startsWith('o3') || model.startsWith('o4')) return 'OpenAI';
if (model.startsWith('claude-')) return 'Anthropic';
if (model.startsWith('gemini-')) return 'Google';
return '其他';
}
/**
* 获取可用模型列表 (去重,只列 chataibot 原生模型名)
*/
export function getModelList() {
const seen = new Set();
const list = [];
for (const target of Object.values(config.modelMapping)) {
if (seen.has(target)) continue;
seen.add(target);
list.push({
id: target,
object: 'model',
created: 1700000000,
owned_by: getOwner(target),
});
}
return list;
}