Spaces:

bardd
/

gravityyy-proxyyy

Running

App Files Files Community

gravityyy-proxyyy / src /format /thinking-utils.js

bardd's picture

Fix gemini-3.1-pro-high by routing to pro-low with thinkingLevel high.

4badc3b 11 days ago

History Blame Contribute Delete

25.3 kB

	/**
	* Thinking Block Utilities
	* Handles thinking block processing, validation, and filtering
	*/

	import { MIN_SIGNATURE_LENGTH } from '../constants.js';
	import { getCachedSignatureFamily } from './signature-cache.js';
	import { logger } from '../utils/logger.js';

	// ============================================================================
	// Gemini Thinking Budget Limits (Issue #289)
	// ============================================================================

	// Max thinking budget per Gemini model version
	// Gemini 2.5 Flash: max 24,576 (API error: "supported values are integers from 1 to 24576")
	const GEMINI_THINKING_BUDGET_LIMITS = {
	'2.5': 24576,
	};
	const GEMINI_DEFAULT_THINKING_BUDGET = 16000;
	const GEMINI_DEFAULT_THINKING_BUDGET_LIMIT = 128000;

	/**
	* Clamp thinking budget to the maximum supported by a Gemini model.
	* Different Gemini versions have different limits (e.g., 2.5 Flash max is 24,576).
	*
	* @param {string} modelName - The Gemini model name
	* @param {number\|undefined} budget - Requested thinking budget from the client
	* @returns {number} Clamped thinking budget
	*/
	export function clampGeminiThinkingBudget(modelName, budget) {
	const requestedBudget = budget \|\| GEMINI_DEFAULT_THINKING_BUDGET;
	const lower = (modelName \|\| '').toLowerCase();

	// Extract version like "2.5" or "3" from "gemini-2.5-flash-thinking"
	const versionMatch = lower.match(/gemini-(\d+(?:\.\d+)?)/);
	let maxBudget = GEMINI_DEFAULT_THINKING_BUDGET_LIMIT;

	if (versionMatch) {
	const version = versionMatch[1];
	// Check exact version (e.g., "2.5")
	if (GEMINI_THINKING_BUDGET_LIMITS[version]) {
	maxBudget = GEMINI_THINKING_BUDGET_LIMITS[version];
	} else {
	// Check major version (e.g., "2" for "2.5")
	const major = version.split('.')[0];
	if (GEMINI_THINKING_BUDGET_LIMITS[major]) {
	maxBudget = GEMINI_THINKING_BUDGET_LIMITS[major];
	}
	}
	}

	if (requestedBudget > maxBudget) {
	logger.debug(`[ThinkingUtils] Clamping Gemini thinking budget from ${requestedBudget} to ${maxBudget} for ${modelName}`);
	return maxBudget;
	}

	return requestedBudget;
	}

	// ============================================================================
	// Cache Control Cleaning (Issue #189)
	// ============================================================================

	/**
	* Remove cache_control fields from all content blocks in messages.
	* This is a critical fix for Issue #189 where Claude Code CLI sends cache_control
	* fields that the Cloud Code API rejects with "Extra inputs are not permitted".
	*
	* Inspired by Antigravity-Manager's clean_cache_control_from_messages() approach,
	* this function proactively strips cache_control from ALL block types at the
	* entry point of the conversion pipeline.
	*
	* @param {Array<Object>} messages - Array of messages in Anthropic format
	* @returns {Array<Object>} Messages with cache_control fields removed
	*/
	export function cleanCacheControl(messages) {
	if (!Array.isArray(messages)) return messages;

	let removedCount = 0;

	const cleaned = messages.map(message => {
	if (!message \|\| typeof message !== 'object') return message;

	// Handle string content (no cache_control possible)
	if (typeof message.content === 'string') return message;

	// Handle array content
	if (!Array.isArray(message.content)) return message;

	const cleanedContent = message.content.map(block => {
	if (!block \|\| typeof block !== 'object') return block;

	// Check if cache_control exists before destructuring
	if (block.cache_control === undefined) return block;

	// Create a shallow copy without cache_control
	const { cache_control, ...cleanBlock } = block;
	removedCount++;

	return cleanBlock;
	});

	return {
	...message,
	content: cleanedContent
	};
	});

	if (removedCount > 0) {
	logger.debug(`[ThinkingUtils] Removed cache_control from ${removedCount} block(s)`);
	}

	return cleaned;
	}

	/**
	* Check if a part is a thinking block
	* @param {Object} part - Content part to check
	* @returns {boolean} True if the part is a thinking block
	*/
	function isThinkingPart(part) {
	return part.type === 'thinking' \|\|
	part.type === 'redacted_thinking' \|\|
	part.thinking !== undefined \|\|
	part.thought === true;
	}

	/**
	* Check if a thinking part has a valid signature (>= MIN_SIGNATURE_LENGTH chars)
	*/
	function hasValidSignature(part) {
	const signature = part.thought === true ? part.thoughtSignature : part.signature;
	return typeof signature === 'string' && signature.length >= MIN_SIGNATURE_LENGTH;
	}

	/**
	* Check if conversation history contains Gemini-style messages.
	* Gemini puts thoughtSignature on tool_use blocks, Claude puts signature on thinking blocks.
	* @param {Array<Object>} messages - Array of messages
	* @returns {boolean} True if any tool_use has thoughtSignature (Gemini pattern)
	*/
	export function hasGeminiHistory(messages) {
	return messages.some(msg =>
	Array.isArray(msg.content) &&
	msg.content.some(block =>
	block.type === 'tool_use' && block.thoughtSignature !== undefined
	)
	);
	}

	/**
	* Check if conversation has unsigned thinking blocks that will be dropped.
	* These cause "Expected thinking but found text" errors.
	* @param {Array<Object>} messages - Array of messages
	* @returns {boolean} True if any assistant message has unsigned thinking blocks
	*/
	export function hasUnsignedThinkingBlocks(messages) {
	return messages.some(msg => {
	if (msg.role !== 'assistant' && msg.role !== 'model') return false;
	if (!Array.isArray(msg.content)) return false;
	return msg.content.some(block =>
	isThinkingPart(block) && !hasValidSignature(block)
	);
	});
	}

	/**
	* Sanitize a thinking part by keeping only allowed fields
	*/
	function sanitizeThinkingPart(part) {
	// Gemini-style thought blocks: { thought: true, text, thoughtSignature }
	if (part.thought === true) {
	const sanitized = { thought: true };
	if (part.text !== undefined) sanitized.text = part.text;
	if (part.thoughtSignature !== undefined) sanitized.thoughtSignature = part.thoughtSignature;
	return sanitized;
	}

	// Anthropic-style thinking blocks: { type: "thinking", thinking, signature }
	if (part.type === 'thinking' \|\| part.thinking !== undefined) {
	const sanitized = { type: 'thinking' };
	if (part.thinking !== undefined) sanitized.thinking = part.thinking;
	if (part.signature !== undefined) sanitized.signature = part.signature;
	return sanitized;
	}

	return part;
	}

	/**
	* Sanitize a thinking block by removing extra fields like cache_control.
	* Only keeps: type, thinking, signature (for thinking) or type, data (for redacted_thinking)
	*/
	function sanitizeAnthropicThinkingBlock(block) {
	if (!block) return block;

	if (block.type === 'thinking') {
	const sanitized = { type: 'thinking' };
	if (block.thinking !== undefined) sanitized.thinking = block.thinking;
	if (block.signature !== undefined) sanitized.signature = block.signature;
	return sanitized;
	}

	if (block.type === 'redacted_thinking') {
	const sanitized = { type: 'redacted_thinking' };
	if (block.data !== undefined) sanitized.data = block.data;
	return sanitized;
	}

	return block;
	}

	/**
	* Sanitize a text block by removing extra fields like cache_control.
	* Only keeps: type, text
	* @param {Object} block - Text block to sanitize
	* @returns {Object} Sanitized text block
	*/
	function sanitizeTextBlock(block) {
	if (!block \|\| block.type !== 'text') return block;

	const sanitized = { type: 'text' };
	if (block.text !== undefined) sanitized.text = block.text;
	return sanitized;
	}

	/**
	* Sanitize a tool_use block by removing extra fields like cache_control.
	* Only keeps: type, id, name, input, thoughtSignature (for Gemini)
	* @param {Object} block - Tool_use block to sanitize
	* @returns {Object} Sanitized tool_use block
	*/
	function sanitizeToolUseBlock(block) {
	if (!block \|\| block.type !== 'tool_use') return block;

	const sanitized = { type: 'tool_use' };
	if (block.id !== undefined) sanitized.id = block.id;
	if (block.name !== undefined) sanitized.name = block.name;
	if (block.input !== undefined) sanitized.input = block.input;
	// Preserve thoughtSignature for Gemini models
	if (block.thoughtSignature !== undefined) sanitized.thoughtSignature = block.thoughtSignature;
	return sanitized;
	}

	/**
	* Filter content array, keeping only thinking blocks with valid signatures.
	*/
	function filterContentArray(contentArray) {
	const filtered = [];

	for (const item of contentArray) {
	if (!item \|\| typeof item !== 'object') {
	filtered.push(item);
	continue;
	}

	if (!isThinkingPart(item)) {
	filtered.push(item);
	continue;
	}

	// Keep items with valid signatures
	if (hasValidSignature(item)) {
	filtered.push(sanitizeThinkingPart(item));
	continue;
	}

	// Drop unsigned thinking blocks
	logger.debug('[ThinkingUtils] Dropping unsigned thinking block');
	}

	return filtered;
	}

	/**
	* Filter unsigned thinking blocks from contents (Gemini format)
	*
	* @param {Array<{role: string, parts: Array}>} contents - Array of content objects in Gemini format
	* @returns {Array<{role: string, parts: Array}>} Filtered contents with unsigned thinking blocks removed
	*/
	export function filterUnsignedThinkingBlocks(contents) {
	return contents.map(content => {
	if (!content \|\| typeof content !== 'object') return content;

	if (Array.isArray(content.parts)) {
	return { ...content, parts: filterContentArray(content.parts) };
	}

	return content;
	});
	}

	/**
	* Remove trailing unsigned thinking blocks from assistant messages.
	* Claude/Gemini APIs require that assistant messages don't end with unsigned thinking blocks.
	*
	* @param {Array<Object>} content - Array of content blocks
	* @returns {Array<Object>} Content array with trailing unsigned thinking blocks removed
	*/
	export function removeTrailingThinkingBlocks(content) {
	if (!Array.isArray(content)) return content;
	if (content.length === 0) return content;

	// Work backwards from the end, removing thinking blocks
	let endIndex = content.length;
	for (let i = content.length - 1; i >= 0; i--) {
	const block = content[i];
	if (!block \|\| typeof block !== 'object') break;

	// Check if it's a thinking block (any format)
	const isThinking = isThinkingPart(block);

	if (isThinking) {
	// Check if it has a valid signature
	if (!hasValidSignature(block)) {
	endIndex = i;
	} else {
	break; // Stop at signed thinking block
	}
	} else {
	break; // Stop at first non-thinking block
	}
	}

	if (endIndex < content.length) {
	logger.debug('[ThinkingUtils] Removed', content.length - endIndex, 'trailing unsigned thinking blocks');
	return content.slice(0, endIndex);
	}

	return content;
	}

	/**
	* Filter thinking blocks: keep only those with valid signatures.
	* Blocks without signatures are dropped (API requires signatures).
	* Also sanitizes blocks to remove extra fields like cache_control.
	*
	* @param {Array<Object>} content - Array of content blocks
	* @returns {Array<Object>} Filtered content with only valid signed thinking blocks
	*/
	export function restoreThinkingSignatures(content) {
	if (!Array.isArray(content)) return content;

	const originalLength = content.length;
	const filtered = [];

	for (const block of content) {
	if (!block \|\| block.type !== 'thinking') {
	filtered.push(block);
	continue;
	}

	// Keep blocks with valid signatures (>= MIN_SIGNATURE_LENGTH chars), sanitized
	if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) {
	filtered.push(sanitizeAnthropicThinkingBlock(block));
	}
	// Unsigned thinking blocks are dropped
	}

	if (filtered.length < originalLength) {
	logger.debug(`[ThinkingUtils] Dropped ${originalLength - filtered.length} unsigned thinking block(s)`);
	}

	return filtered;
	}

	/**
	* Reorder content so that:
	* 1. Thinking blocks come first (required when thinking is enabled)
	* 2. Text blocks come in the middle (filtering out empty/useless ones)
	* 3. Tool_use blocks come at the end (required before tool_result)
	*
	* @param {Array<Object>} content - Array of content blocks
	* @returns {Array<Object>} Reordered content array
	*/
	export function reorderAssistantContent(content) {
	if (!Array.isArray(content)) return content;

	// Even for single-element arrays, we need to sanitize thinking blocks
	if (content.length === 1) {
	const block = content[0];
	if (block && (block.type === 'thinking' \|\| block.type === 'redacted_thinking')) {
	return [sanitizeAnthropicThinkingBlock(block)];
	}
	return content;
	}

	const thinkingBlocks = [];
	const textBlocks = [];
	const toolUseBlocks = [];
	let droppedEmptyBlocks = 0;

	for (const block of content) {
	if (!block) continue;

	if (block.type === 'thinking' \|\| block.type === 'redacted_thinking') {
	// Sanitize thinking blocks to remove cache_control and other extra fields
	thinkingBlocks.push(sanitizeAnthropicThinkingBlock(block));
	} else if (block.type === 'tool_use') {
	// Sanitize tool_use blocks to remove cache_control and other extra fields
	toolUseBlocks.push(sanitizeToolUseBlock(block));
	} else if (block.type === 'text') {
	// Only keep text blocks with meaningful content
	if (block.text && block.text.trim().length > 0) {
	// Sanitize text blocks to remove cache_control and other extra fields
	textBlocks.push(sanitizeTextBlock(block));
	} else {
	droppedEmptyBlocks++;
	}
	} else {
	// Other block types go in the text position
	textBlocks.push(block);
	}
	}

	if (droppedEmptyBlocks > 0) {
	logger.debug(`[ThinkingUtils] Dropped ${droppedEmptyBlocks} empty text block(s)`);
	}

	const reordered = [...thinkingBlocks, ...textBlocks, ...toolUseBlocks];

	// Log only if actual reordering happened (not just filtering)
	if (reordered.length === content.length) {
	const originalOrder = content.map(b => b?.type \|\| 'unknown').join(',');
	const newOrder = reordered.map(b => b?.type \|\| 'unknown').join(',');
	if (originalOrder !== newOrder) {
	logger.debug('[ThinkingUtils] Reordered assistant content');
	}
	}

	return reordered;
	}

	// ============================================================================
	// Thinking Recovery Functions
	// ============================================================================

	/**
	* Check if a message has any VALID (signed) thinking blocks.
	* Only counts thinking blocks that have valid signatures, not unsigned ones
	* that will be dropped later.
	*
	* @param {Object} message - Message to check
	* @returns {boolean} True if message has valid signed thinking blocks
	*/
	function messageHasValidThinking(message) {
	const content = message.content \|\| message.parts \|\| [];
	if (!Array.isArray(content)) return false;
	return content.some(block => {
	if (!isThinkingPart(block)) return false;
	// Check for valid signature (Anthropic style)
	if (block.signature && block.signature.length >= MIN_SIGNATURE_LENGTH) return true;
	// Check for thoughtSignature (Gemini style on functionCall)
	if (block.thoughtSignature && block.thoughtSignature.length >= MIN_SIGNATURE_LENGTH) return true;
	return false;
	});
	}

	/**
	* Check if a message has tool_use blocks
	* @param {Object} message - Message to check
	* @returns {boolean} True if message has tool_use blocks
	*/
	function messageHasToolUse(message) {
	const content = message.content \|\| message.parts \|\| [];
	if (!Array.isArray(content)) return false;
	return content.some(block =>
	block.type === 'tool_use' \|\| block.functionCall
	);
	}

	/**
	* Check if a message has tool_result blocks
	* @param {Object} message - Message to check
	* @returns {boolean} True if message has tool_result blocks
	*/
	function messageHasToolResult(message) {
	const content = message.content \|\| message.parts \|\| [];
	if (!Array.isArray(content)) return false;
	return content.some(block =>
	block.type === 'tool_result' \|\| block.functionResponse
	);
	}

	/**
	* Check if message is a plain user text message (not tool_result)
	* @param {Object} message - Message to check
	* @returns {boolean} True if message is plain user text
	*/
	function isPlainUserMessage(message) {
	if (message.role !== 'user') return false;
	const content = message.content \|\| message.parts \|\| [];
	if (!Array.isArray(content)) return typeof content === 'string';
	// Check if it has tool_result blocks
	return !content.some(block =>
	block.type === 'tool_result' \|\| block.functionResponse
	);
	}

	/**
	* Analyze conversation state to detect if we're in a corrupted state.
	* This includes:
	* 1. Tool loop: assistant has tool_use followed by tool_results (normal flow)
	* 2. Interrupted tool: assistant has tool_use followed by plain user message (interrupted)
	*
	* @param {Array<Object>} messages - Array of messages
	* @returns {Object} State object with inToolLoop, interruptedTool, turnHasThinking, etc.
	*/
	function analyzeConversationState(messages) {
	if (!Array.isArray(messages) \|\| messages.length === 0) {
	return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
	}

	// Find the last assistant message
	let lastAssistantIdx = -1;
	for (let i = messages.length - 1; i >= 0; i--) {
	if (messages[i].role === 'assistant' \|\| messages[i].role === 'model') {
	lastAssistantIdx = i;
	break;
	}
	}

	if (lastAssistantIdx === -1) {
	return { inToolLoop: false, interruptedTool: false, turnHasThinking: false, toolResultCount: 0 };
	}

	const lastAssistant = messages[lastAssistantIdx];
	const hasToolUse = messageHasToolUse(lastAssistant);
	const hasThinking = messageHasValidThinking(lastAssistant);

	// Count trailing tool results after the assistant message
	let toolResultCount = 0;
	let hasPlainUserMessageAfter = false;
	for (let i = lastAssistantIdx + 1; i < messages.length; i++) {
	if (messageHasToolResult(messages[i])) {
	toolResultCount++;
	}
	if (isPlainUserMessage(messages[i])) {
	hasPlainUserMessageAfter = true;
	}
	}

	// We're in a tool loop if: assistant has tool_use AND there are tool_results after
	const inToolLoop = hasToolUse && toolResultCount > 0;

	// We have an interrupted tool if: assistant has tool_use, NO tool_results,
	// but there IS a plain user message after (user interrupted and sent new message)
	const interruptedTool = hasToolUse && toolResultCount === 0 && hasPlainUserMessageAfter;

	return {
	inToolLoop,
	interruptedTool,
	turnHasThinking: hasThinking,
	toolResultCount,
	lastAssistantIdx
	};
	}

	/**
	* Check if conversation needs thinking recovery.
	*
	* Recovery is only needed when:
	* 1. We're in a tool loop or have an interrupted tool, AND
	* 2. No valid thinking blocks exist in the current turn
	*
	* Cross-model signature compatibility is handled by stripInvalidThinkingBlocks
	* during recovery (not here).
	*
	* @param {Array<Object>} messages - Array of messages
	* @returns {boolean} True if thinking recovery is needed
	*/
	export function needsThinkingRecovery(messages) {
	const state = analyzeConversationState(messages);

	// Recovery is only needed in tool loops or interrupted tools
	if (!state.inToolLoop && !state.interruptedTool) return false;

	// Need recovery if no valid thinking blocks exist
	return !state.turnHasThinking;
	}

	/**
	* Strip invalid or incompatible thinking blocks from messages.
	* Used before injecting synthetic messages for recovery.
	* Keeps valid thinking blocks to preserve context from previous turns.
	*
	* @param {Array<Object>} messages - Array of messages
	* @param {string} targetFamily - Target model family ('claude' or 'gemini')
	* @returns {Array<Object>} Messages with invalid thinking blocks removed
	*/
	function stripInvalidThinkingBlocks(messages, targetFamily = null) {
	let strippedCount = 0;

	const result = messages.map(msg => {
	const content = msg.content \|\| msg.parts;
	if (!Array.isArray(content)) return msg;

	const filtered = content.filter(block => {
	// Keep non-thinking blocks
	if (!isThinkingPart(block)) return true;

	// Check generic validity (has signature of sufficient length)
	if (!hasValidSignature(block)) {
	strippedCount++;
	return false;
	}

	// Check family compatibility only for Gemini targets
	// Claude can validate its own signatures, so we don't drop for Claude
	if (targetFamily === 'gemini') {
	const signature = block.thought === true ? block.thoughtSignature : block.signature;
	const signatureFamily = getCachedSignatureFamily(signature);

	// For Gemini: drop unknown or mismatched signatures
	if (!signatureFamily \|\| signatureFamily !== targetFamily) {
	strippedCount++;
	return false;
	}
	}

	return true;
	});

	// Use '.' instead of '' because claude models reject empty text parts
	if (msg.content) {
	return { ...msg, content: filtered.length > 0 ? filtered : [{ type: 'text', text: '.' }] };
	} else if (msg.parts) {
	return { ...msg, parts: filtered.length > 0 ? filtered : [{ text: '.' }] };
	}
	return msg;
	});

	if (strippedCount > 0) {
	logger.debug(`[ThinkingUtils] Stripped ${strippedCount} invalid/incompatible thinking block(s)`);
	}

	return result;
	}

	/**
	* Close tool loop by injecting synthetic messages.
	* This allows the model to start a fresh turn when thinking is corrupted.
	*
	* When thinking blocks are stripped (no valid signatures) and we're in the
	* middle of a tool loop OR have an interrupted tool, the conversation is in
	* a corrupted state. This function injects synthetic messages to close the
	* loop and allow the model to continue.
	*
	* @param {Array<Object>} messages - Array of messages
	* @param {string} targetFamily - Target model family ('claude' or 'gemini')
	* @returns {Array<Object>} Modified messages with synthetic messages injected
	*/
	export function closeToolLoopForThinking(messages, targetFamily = null) {
	const state = analyzeConversationState(messages);

	// Handle neither tool loop nor interrupted tool
	if (!state.inToolLoop && !state.interruptedTool) return messages;

	// Strip only invalid/incompatible thinking blocks (keep valid ones)
	let modified = stripInvalidThinkingBlocks(messages, targetFamily);

	if (state.interruptedTool) {
	// For interrupted tools: just strip thinking and add a synthetic assistant message
	// to acknowledge the interruption before the user's new message

	// Find where to insert the synthetic message (before the plain user message)
	const insertIdx = state.lastAssistantIdx + 1;

	// Insert synthetic assistant message acknowledging interruption
	modified.splice(insertIdx, 0, {
	role: 'assistant',
	content: [{ type: 'text', text: '[Tool call was interrupted.]' }]
	});

	logger.debug('[ThinkingUtils] Applied thinking recovery for interrupted tool');
	} else if (state.inToolLoop) {
	// For tool loops: add synthetic messages to close the loop
	const syntheticText = state.toolResultCount === 1
	? '[Tool execution completed.]'
	: `[${state.toolResultCount} tool executions completed.]`;

	// Inject synthetic model message to complete the turn
	modified.push({
	role: 'assistant',
	content: [{ type: 'text', text: syntheticText }]
	});

	// Inject synthetic user message to start fresh
	modified.push({
	role: 'user',
	content: [{ type: 'text', text: '[Continue]' }]
	});

	logger.debug('[ThinkingUtils] Applied thinking recovery for tool loop');
	}

	return modified;
	}