Spaces:

bingn
/

ikun2

Runtime error

App Files Files Community

ikun2 / message-convert.js

bingn's picture

Upload 19 files

e3e4fcf verified 23 days ago

history blame contribute delete

14 kB

	/**
	* message-convert.js - 消息格式转换
	*
	* 将 OpenAI / Anthropic 的 messages 数组转换为
	* chataibot.pro 接受的单一 text 字符串
	*
	* chataibot.pro 限制单条消息 2500 字符。
	* 超长文本处理策略:
	* 1. 优先压缩: 截断 tool results → 删除中间轮次 → 压缩 system
	* 2. 仍超限 → 拆分为多条消息 (context prefill + final)
	*/

	import config from './config.js';
	import { serializeTools, serializeToolsAnthropic } from './tool-prompt.js';

	/**
	* chataibot.pro 单条消息字符上限
	* 实际限制 2500，留 100 余量
	*/
	const MAX_TEXT_LEN = 2400;

	/**
	* 从 content 字段提取纯文本
	* OpenAI/Anthropic 的 content 可能是字符串或多模态数组
	*/
	function extractText(content) {
	if (typeof content === 'string') return content;
	if (Array.isArray(content)) {
	return content
	.filter(part => part.type === 'text')
	.map(part => part.text)
	.join('\n');
	}
	return String(content \|\| '');
	}

	/**
	* 截断单个 [Tool Result: ...] 块的内容到指定字符数
	* 保留开头 + 末尾各一半，中间用省略标记
	*/
	function truncateToolResults(text, maxLen = 600) {
	return text.replace(/\[Tool Result: [^\]]\]\n([\s\S]?)(?=\n\[(?:User\|Assistant\|Tool Result\|System)\]\|\n```tool_calls\|$)/g,
	(match, content) => {
	if (match.length <= maxLen) return match;
	const header = match.substring(0, match.indexOf('\n') + 1);
	const keep = maxLen - header.length - 30;
	if (keep <= 0) return header + '(content truncated)';
	const half = Math.floor(keep / 2);
	return header + content.substring(0, half) + '\n...(truncated)...\n' + content.substring(content.length - half);
	});
	}

	/**
	* 解析文本为 [System]/[User]/[Assistant] 段落
	*/
	function parseSegments(text) {
	const segments = [];
	const segRegex = /\[(System\|User\|Assistant)\] /g;
	let m;
	while ((m = segRegex.exec(text)) !== null) {
	if (segments.length > 0) {
	segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start, m.index).trimEnd();
	}
	segments.push({ role: m[1], start: m.index, text: '' });
	}
	if (segments.length > 0) {
	segments[segments.length - 1].text = text.substring(segments[segments.length - 1].start).trimEnd();
	}
	return segments;
	}

	/**
	* 智能压缩到单条 ≤ MAX_TEXT_LEN
	*
	* 策略 (逐级加压):
	* 1. 截断 tool results 到 600/300/100/40 字符
	* 2. tool results 替换为一句话摘要
	* 3. 删除中间对话轮次 (仅保留 system + 最后 user/assistant)
	* 4. 截断 system prompt
	* 5. 硬截断
	*
	* @returns {string \| null} 压缩后的文本，null 表示无法压到单条
	*/
	function compressToSingle(text) {
	if (text.length <= MAX_TEXT_LEN) return text;

	const originalLen = text.length;

	// Step 1: 逐步截断 tool result 内容
	let result = text;
	for (const maxToolLen of [600, 300, 100, 40]) {
	result = truncateToolResults(result, maxToolLen);
	if (result.length <= MAX_TEXT_LEN) {
	console.log(`[Convert] 压缩 ${originalLen} → ${result.length} (截断 tool results)`);
	return result;
	}
	}

	// Step 2: 所有 tool result 替换为一行标记
	result = result.replace(/\[Tool Result: [^\]]\]\n[\s\S]?(?=\n\[(?:User\|Assistant\|System)\]\|\n```tool_calls\|$)/g,
	(match) => {
	const nl = match.indexOf('\n');
	return match.substring(0, nl > 0 ? nl : match.length) + '\n(omitted)';
	});
	if (result.length <= MAX_TEXT_LEN) {
	console.log(`[Convert] 压缩 ${originalLen} → ${result.length} (清除 tool results)`);
	return result;
	}

	// Step 3: 拆分段落，保留 system + 最后 user (+ 最后 assistant 如果有)
	const segments = parseSegments(result);
	if (segments.length <= 2) {
	// 段落太少，跳到 step 4
	} else {
	const systemSeg = segments[0]?.role === 'System' ? segments[0] : null;
	let lastUserIdx = -1;
	for (let i = segments.length - 1; i >= 0; i--) {
	if (segments[i].role === 'User') { lastUserIdx = i; break; }
	}

	// 保留 system + 最后 user + 最后一条(如果不同)
	const mustKeep = new Set();
	if (systemSeg) mustKeep.add(0);
	if (lastUserIdx >= 0) mustKeep.add(lastUserIdx);
	mustKeep.add(segments.length - 1);

	const kept = segments.filter((_, i) => mustKeep.has(i));
	const rebuilt = kept.map(s => s.text).join('\n\n');
	if (rebuilt.length <= MAX_TEXT_LEN) {
	console.log(`[Convert] 压缩 ${originalLen} → ${rebuilt.length} (删除中间轮次)`);
	return rebuilt;
	}

	// Step 4: 截断 system prompt，给 user 消息留空间
	if (systemSeg) {
	const lastMsg = lastUserIdx >= 0 ? segments[lastUserIdx].text : segments[segments.length - 1].text;
	const budgetForSystem = MAX_TEXT_LEN - lastMsg.length - 20;
	if (budgetForSystem > 200) {
	const truncated = systemSeg.text.substring(0, budgetForSystem) + '\n\n' + lastMsg;
	console.log(`[Convert] 压缩 ${originalLen} → ${truncated.length} (截断 system)`);
	return truncated;
	}
	// 仅保留 user 消息
	if (lastMsg.length <= MAX_TEXT_LEN) {
	console.log(`[Convert] 压缩 ${originalLen} → ${lastMsg.length} (仅 user 消息)`);
	return lastMsg;
	}
	}
	}

	// Step 5: 硬截断 (保留末尾，因为最后的内容最重要)
	const truncated = text.substring(text.length - MAX_TEXT_LEN);
	console.log(`[Convert] 压缩 ${originalLen} → ${MAX_TEXT_LEN} (硬截断末尾)`);
	return truncated;
	}

	/**
	* 主入口: 将文本处理为 chataibot 可发送的消息数组
	*
	* 优先压缩到单条 (零额外开销);
	* 极端情况下才拆分多条 (消耗额外额度)。
	*
	* @param {string} text - 完整转换后的文本
	* @returns {string[]} 消息数组，长度 ≥ 1，每条 ≤ MAX_TEXT_LEN
	*/
	export function splitToChunks(text) {
	if (!text) return [''];

	// 尝试压缩到单条 (绝大多数场景走这里)
	const single = compressToSingle(text);
	if (single && single.length <= MAX_TEXT_LEN) return [single];

	// 极端情况: 压缩后仍超限 → 多消息拆分
	const originalLen = text.length;

	// 先做 tool result 截断到 300 字符 (减少总量但保留上下文)
	const compressed = truncateToolResults(text, 300);

	const segments = parseSegments(compressed);
	if (segments.length === 0) return hardSplit(compressed);

	// 贪心打包段落到 chunks
	const chunks = [];
	let currentChunk = '';

	for (const seg of segments) {
	if (seg.text.length > MAX_TEXT_LEN) {
	if (currentChunk) { chunks.push(currentChunk.trimEnd()); currentChunk = ''; }
	chunks.push(...hardSplit(seg.text));
	continue;
	}
	const candidate = currentChunk ? currentChunk + '\n\n' + seg.text : seg.text;
	if (candidate.length <= MAX_TEXT_LEN) {
	currentChunk = candidate;
	} else {
	if (currentChunk) chunks.push(currentChunk.trimEnd());
	currentChunk = seg.text;
	}
	}
	if (currentChunk) chunks.push(currentChunk.trimEnd());

	console.log(`[Convert] 多消息拆分 ${originalLen} → ${chunks.length} 条 [${chunks.map(c => c.length).join(', ')}]`);
	return chunks;
	}

	/**
	* 按字符硬拆，尽量在换行处断开
	*/
	function hardSplit(text) {
	const chunks = [];
	let pos = 0;
	while (pos < text.length) {
	if (text.length - pos <= MAX_TEXT_LEN) {
	chunks.push(text.substring(pos));
	break;
	}
	let cut = pos + MAX_TEXT_LEN;
	const lastNL = text.lastIndexOf('\n', cut);
	if (lastNL > pos + 500) cut = lastNL;
	chunks.push(text.substring(pos, cut));
	pos = cut;
	while (pos < text.length && text[pos] === '\n') pos++;
	}
	return chunks;
	}

	/**
	* OpenAI messages → chataibot text
	* @param {Array} messages - [{ role: 'system'\|'user'\|'assistant'\|'tool', content }]
	* @param {Array} [tools] - OpenAI tools 数组
	* @param {*} [toolChoice] - tool_choice 参数
	*/
	export function openaiToText(messages, tools, toolChoice) {
	if (!messages \|\| !messages.length) return '';

	const system = messages.filter(m => m.role === 'system').map(m => extractText(m.content)).join('\n');
	const conversation = messages.filter(m => m.role !== 'system');

	// 工具定义注入到 system prompt 末尾
	const toolPrompt = serializeTools(tools, toolChoice);

	// 处理 tool 角色消息 (工具返回结果) — 转为文本格式
	const processedConversation = [];
	for (const msg of conversation) {
	if (msg.role === 'tool') {
	// 工具返回结果，格式化为文本
	const toolName = msg.name \|\| msg.tool_call_id \|\| 'unknown';
	processedConversation.push({
	role: 'user',
	content: `[Tool Result: ${toolName}]\n${extractText(msg.content)}`,
	});
	} else if (msg.role === 'assistant' && msg.tool_calls) {
	// assistant 发起的工具调用 — 转为文本表示
	let callText = extractText(msg.content) \|\| '';
	for (const tc of msg.tool_calls) {
	const fn = tc.function \|\| {};
	callText += `\n\`\`\`tool_calls\n[{"name": "${fn.name}", "arguments": ${fn.arguments \|\| '{}'}}]\n\`\`\``;
	}
	processedConversation.push({ role: 'assistant', content: callText.trim() });
	} else {
	processedConversation.push(msg);
	}
	}

	const fullSystem = system + toolPrompt;

	// 单轮: 只有一条 user 消息
	if (processedConversation.length === 1 && processedConversation[0].role === 'user') {
	const userText = extractText(processedConversation[0].content);
	return fullSystem ? `${fullSystem}\n\n${userText}` : userText;
	}

	// 多轮: 格式化为带角色标签的文本
	let text = '';
	if (fullSystem) text += `[System] ${fullSystem}\n\n`;

	for (const msg of processedConversation) {
	const role = msg.role === 'assistant' ? 'Assistant' : 'User';
	text += `[${role}] ${extractText(msg.content)}\n\n`;
	}

	return text.trim();
	}

	/**
	* Anthropic messages → chataibot text
	* @param {string\|undefined} system - system prompt (Anthropic 单独字段)
	* @param {Array} messages - [{ role: 'user'\|'assistant', content }]
	* @param {Array} [tools] - Anthropic tools 数组
	* @param {*} [toolChoice] - tool_choice 参数
	*/
	export function anthropicToText(system, messages, tools, toolChoice) {
	if (!messages \|\| !messages.length) return extractText(system) \|\| '';

	// 工具定义注入到 system prompt 末尾
	const toolPrompt = serializeToolsAnthropic(tools, toolChoice);
	const systemText = extractText(system) \|\| '';
	const fullSystem = systemText + toolPrompt;

	// 处理 Anthropic content 数组中的 tool_use 和 tool_result
	const processedMessages = [];
	for (const msg of messages) {
	if (Array.isArray(msg.content)) {
	// Anthropic content 可能包含 tool_use / tool_result blocks
	const parts = [];
	for (const block of msg.content) {
	if (block.type === 'text') {
	parts.push(block.text);
	} else if (block.type === 'tool_use') {
	parts.push(`\`\`\`tool_calls\n[{"name": "${block.name}", "arguments": ${JSON.stringify(block.input \|\| {})}}]\n\`\`\``);
	} else if (block.type === 'tool_result') {
	const resultContent = typeof block.content === 'string'
	? block.content
	: Array.isArray(block.content)
	? block.content.filter(c => c.type === 'text').map(c => c.text).join('\n')
	: JSON.stringify(block.content);
	parts.push(`[Tool Result: ${block.tool_use_id \|\| 'unknown'}]\n${resultContent}`);
	}
	}
	processedMessages.push({ role: msg.role, content: parts.join('\n') });
	} else {
	processedMessages.push(msg);
	}
	}

	// 单轮
	if (processedMessages.length === 1 && processedMessages[0].role === 'user') {
	const userText = typeof processedMessages[0].content === 'string'
	? processedMessages[0].content
	: extractText(processedMessages[0].content);
	return fullSystem ? `${fullSystem}\n\n${userText}` : userText;
	}

	// 多轮
	let text = '';
	if (fullSystem) text += `[System] ${fullSystem}\n\n`;

	for (const msg of processedMessages) {
	const role = msg.role === 'assistant' ? 'Assistant' : 'User';
	const content = typeof msg.content === 'string' ? msg.content : extractText(msg.content);
	text += `[${role}] ${content}\n\n`;
	}

	return text.trim();
	}

	/**
	* 解析并映射模型名
	*/
	export function resolveModel(requestModel) {
	if (!requestModel) return 'gpt-4o';
	// 精确匹配
	if (config.modelMapping[requestModel]) return config.modelMapping[requestModel];
	// 尝试去掉版本后缀匹配 (如 claude-3-sonnet-20240229 → claude-3-sonnet)
	const base = requestModel.replace(/-\d{8}$/, '');
	if (config.modelMapping[base]) return config.modelMapping[base];
	// 原样返回让 chataibot 自己判断
	return requestModel;
	}

	/**
	* 根据模型名判断所属厂商 (与官网分组一致)
	*/
	function getOwner(model) {
	if (model.startsWith('gpt-') \|\| model.startsWith('o1') \|\| model.startsWith('o3') \|\| model.startsWith('o4')) return 'OpenAI';
	if (model.startsWith('claude-')) return 'Anthropic';
	if (model.startsWith('gemini-')) return 'Google';
	return '其他';
	}

	/**
	* 获取可用模型列表 (去重，只列 chataibot 原生模型名)
	*/
	export function getModelList() {
	const seen = new Set();
	const list = [];
	for (const target of Object.values(config.modelMapping)) {
	if (seen.has(target)) continue;
	seen.add(target);
	list.push({
	id: target,
	object: 'model',
	created: 1700000000,
	owned_by: getOwner(target),
	});
	}
	return list;
	}