Spaces:

bardd
/

gravityyy-proxyyy

Running

App Files Files Community

gravityyy-proxyyy / src /format /request-converter.js

bardd's picture

Fix gemini-3.1-pro-high by routing to pro-low with thinkingLevel high.

4badc3b 16 days ago

History Blame Contribute Delete

14.5 kB

	/**
	* Request Converter
	* Converts Anthropic Messages API requests to Google Generative AI format
	*/

	import {
	GEMINI_MAX_OUTPUT_TOKENS,
	getModelFamily,
	isThinkingModel
	} from '../constants.js';
	import { convertContentToParts, convertRole } from './content-converter.js';
	import { sanitizeSchema, cleanSchema } from './schema-sanitizer.js';
	import {
	restoreThinkingSignatures,
	removeTrailingThinkingBlocks,
	reorderAssistantContent,
	filterUnsignedThinkingBlocks,
	hasGeminiHistory,
	hasUnsignedThinkingBlocks,
	needsThinkingRecovery,
	closeToolLoopForThinking,
	cleanCacheControl,
	clampGeminiThinkingBudget
	} from './thinking-utils.js';
	import { logger } from '../utils/logger.js';
	import { getReasoningThinkingConfig, isGeminiProTierSku } from './reasoning-effort.js';

	/**
	* Convert Anthropic Messages API request to the format expected by Cloud Code
	*
	* Uses Google Generative AI format, but for Claude models:
	* - Keeps tool_result in Anthropic format (required by Claude API)
	*
	* @param {Object} anthropicRequest - Anthropic format request
	* @returns {Object} Request body for Cloud Code API
	*/
	export function convertAnthropicToGoogle(anthropicRequest) {
	// [CRITICAL FIX] Pre-clean all cache_control fields from messages (Issue #189)
	// Claude Code CLI sends cache_control on various content blocks, but Cloud Code API
	// rejects them with "Extra inputs are not permitted". Clean them proactively here
	// before any other processing, following the pattern from Antigravity-Manager.
	const messages = cleanCacheControl(anthropicRequest.messages \|\| []);

	const { system, max_tokens, temperature, top_p, top_k, stop_sequences, tools, tool_choice, thinking, reasoning_effort } = anthropicRequest;
	const modelName = anthropicRequest.model \|\| '';
	const modelFamily = getModelFamily(modelName);
	const isClaudeModel = modelFamily === 'claude';
	const isGeminiModel = modelFamily === 'gemini';
	const isThinking = isThinkingModel(modelName);

	const googleRequest = {
	contents: [],
	generationConfig: {}
	};

	// Handle system instruction
	if (system) {
	let systemParts = [];
	if (typeof system === 'string') {
	systemParts = [{ text: system }];
	} else if (Array.isArray(system)) {
	// Filter for text blocks as system prompts are usually text
	// Anthropic supports text blocks in system prompts
	systemParts = system
	.filter(block => block.type === 'text')
	.map(block => ({ text: block.text }));
	}

	if (systemParts.length > 0) {
	googleRequest.systemInstruction = {
	parts: systemParts
	};
	}
	}

	// Add interleaved thinking hint for Claude thinking models with tools
	if (isClaudeModel && isThinking && tools && tools.length > 0) {
	const hint = 'Interleaved thinking is enabled. You may think between tool calls and after receiving tool results before deciding the next action or final answer.';
	if (!googleRequest.systemInstruction) {
	googleRequest.systemInstruction = { parts: [{ text: hint }] };
	} else {
	const lastPart = googleRequest.systemInstruction.parts[googleRequest.systemInstruction.parts.length - 1];
	if (lastPart && lastPart.text) {
	lastPart.text = `${lastPart.text}\n\n${hint}`;
	} else {
	googleRequest.systemInstruction.parts.push({ text: hint });
	}
	}
	}

	// Apply thinking recovery for Gemini thinking models when needed
	// Gemini needs recovery for tool loops/interrupted tools (stripped thinking)
	let processedMessages = messages;

	if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
	logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
	processedMessages = closeToolLoopForThinking(messages, 'gemini');
	}

	// For Claude: apply recovery for cross-model (Gemini→Claude) or unsigned thinking blocks
	// Unsigned thinking blocks occur when Claude Code strips signatures it doesn't understand
	const needsClaudeRecovery = hasGeminiHistory(messages) \|\| hasUnsignedThinkingBlocks(messages);
	if (isClaudeModel && isThinking && needsClaudeRecovery && needsThinkingRecovery(messages)) {
	logger.debug('[RequestConverter] Applying thinking recovery for Claude');
	processedMessages = closeToolLoopForThinking(messages, 'claude');
	}

	// Convert messages to contents, then filter unsigned thinking blocks
	for (const msg of processedMessages) {
	let msgContent = msg.content;

	// For assistant messages, process thinking blocks and reorder content
	if ((msg.role === 'assistant' \|\| msg.role === 'model') && Array.isArray(msgContent)) {
	// First, try to restore signatures for unsigned thinking blocks from cache
	msgContent = restoreThinkingSignatures(msgContent);
	// Remove trailing unsigned thinking blocks
	msgContent = removeTrailingThinkingBlocks(msgContent);
	// Reorder: thinking first, then text, then tool_use
	msgContent = reorderAssistantContent(msgContent);
	}

	const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel);

	// SAFETY: Google API requires at least one part per content message
	// This happens when all thinking blocks are filtered out (unsigned)
	if (parts.length === 0) {
	// Use '.' instead of '' because claude models reject empty text parts.
	// A single period is invisible in practice but satisfies the API requirement.
	logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
	parts.push({ text: '.' });
	}

	const content = {
	role: convertRole(msg.role),
	parts: parts
	};
	googleRequest.contents.push(content);
	}

	// Filter unsigned thinking blocks for Claude models
	if (isClaudeModel) {
	googleRequest.contents = filterUnsignedThinkingBlocks(googleRequest.contents);
	}

	// Generation config
	if (max_tokens) {
	googleRequest.generationConfig.maxOutputTokens = max_tokens;
	}
	if (temperature !== undefined) {
	googleRequest.generationConfig.temperature = temperature;
	}
	if (top_p !== undefined) {
	googleRequest.generationConfig.topP = top_p;
	}
	if (top_k !== undefined) {
	googleRequest.generationConfig.topK = top_k;
	}
	if (stop_sequences && stop_sequences.length > 0) {
	googleRequest.generationConfig.stopSequences = stop_sequences;
	}

	// Gemini 3.x performs better with temperature=1 when unset (ported from old Python proxy).
	if (isGeminiModel && /gemini-3(?:\.\d+)?-/i.test(modelName) && temperature === undefined) {
	googleRequest.generationConfig.temperature = 1;
	}

	// OpenAI reasoning_effort takes precedence when supplied by the adapter.
	// Anthropic /v1/messages requests without reasoning_effort keep the existing behavior.
	const reasoningConfig = reasoning_effort !== undefined
	? getReasoningThinkingConfig(modelName, reasoning_effort)
	: null;

	if (reasoningConfig) {
	googleRequest.generationConfig.thinkingConfig = reasoningConfig.config;
	// #region agent log
	fetch('http://127.0.0.1:7334/ingest/27587085-8299-4f8d-827a-e9584444390b',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'c04864'},body:JSON.stringify({sessionId:'c04864',location:'request-converter.js:thinking-applied',message:'thinkingConfig applied',data:{modelName,reasoning_effort,thinkingConfig:reasoningConfig.config},timestamp:Date.now(),hypothesisId:'H1'})}).catch(()=>{});
	// #endregion
	const budget = reasoningConfig.config.thinkingBudget
	?? reasoningConfig.config.thinking_budget;
	const detail = reasoningConfig.mode === 'level'
	? `thinkingLevel=${reasoningConfig.config.thinkingLevel}`
	: `thinkingBudget=${budget}`;
	logger.debug(`[RequestConverter] reasoning_effort=${reasoningConfig.effort} -> ${detail}`);

	if (isClaudeModel && budget > 0) {
	const currentMaxTokens = googleRequest.generationConfig.maxOutputTokens;
	if (currentMaxTokens && currentMaxTokens <= budget) {
	const adjustedMaxTokens = budget + 8192;
	logger.warn(`[RequestConverter] max_tokens (${currentMaxTokens}) <= thinking_budget (${budget}). Adjusting to ${adjustedMaxTokens}`);
	googleRequest.generationConfig.maxOutputTokens = adjustedMaxTokens;
	}
	}
	} else if (isThinking) {
	if (isClaudeModel) {
	// Claude thinking config
	const thinkingConfig = {
	include_thoughts: true
	};

	// Cloud Code API requires thinking_budget to actually produce thinking blocks.
	// Without it, include_thoughts alone is ignored and Claude falls back to
	// <thinking> XML tags in text. Default to 32000 when not provided (e.g. adaptive mode).
	const thinkingBudget = thinking?.budget_tokens \|\| 32000;
	thinkingConfig.thinking_budget = thinkingBudget;
	logger.debug(`[RequestConverter] Claude thinking enabled with budget: ${thinkingBudget}${!thinking?.budget_tokens ? ' (default)' : ''}`);

	// Validate max_tokens > thinking_budget as required by the API
	const currentMaxTokens = googleRequest.generationConfig.maxOutputTokens;
	if (currentMaxTokens && currentMaxTokens <= thinkingBudget) {
	const adjustedMaxTokens = thinkingBudget + 8192;
	if (thinking?.budget_tokens) {
	logger.warn(`[RequestConverter] max_tokens (${currentMaxTokens}) <= thinking_budget (${thinkingBudget}). Adjusting to ${adjustedMaxTokens} to satisfy API requirements`);
	} else {
	logger.debug(`[RequestConverter] Adjusting max_tokens to ${adjustedMaxTokens} for default thinking budget`);
	}
	googleRequest.generationConfig.maxOutputTokens = adjustedMaxTokens;
	}

	googleRequest.generationConfig.thinkingConfig = thinkingConfig;
	} else if (isGeminiModel && !isGeminiProTierSku(modelName)) {
	// Preserve the existing Anthropic behavior when no OpenAI reasoning_effort exists.
	const thinkingConfig = {
	includeThoughts: true,
	thinkingBudget: clampGeminiThinkingBudget(modelName, thinking?.budget_tokens)
	};
	logger.debug(`[RequestConverter] Gemini thinking enabled with budget: ${thinkingConfig.thinkingBudget}`);
	googleRequest.generationConfig.thinkingConfig = thinkingConfig;
	}
	}

	// Convert function tools and built-in Google Search tools.
	if (tools && tools.length > 0) {
	const functionDeclarations = [];
	let hasGoogleSearch = false;
	let hasGoogleSearchRetrieval = false;

	tools.forEach((tool, idx) => {
	const name = tool.name \|\| tool.function?.name \|\| tool.custom?.name \|\| `tool-${idx}`;
	const normalizedName = String(name).toLowerCase().replace(/[_-]/g, '');
	const normalizedType = String(tool.type \|\| '').toLowerCase().replace(/[_-]/g, '');

	if (tool.googleSearch !== undefined
	\|\| normalizedName === 'googlesearch'
	\|\| ['googlesearch', 'websearch', 'websearchpreview'].includes(normalizedType)) {
	hasGoogleSearch = true;
	return;
	}
	if (tool.googleSearchRetrieval !== undefined \|\| normalizedName === 'googlesearchretrieval') {
	hasGoogleSearchRetrieval = true;
	return;
	}

	const description = tool.description \|\| tool.function?.description \|\| tool.custom?.description \|\| '';
	const schema = tool.input_schema
	\|\| tool.function?.input_schema
	\|\| tool.function?.parameters
	\|\| tool.custom?.input_schema
	\|\| tool.parameters
	\|\| { type: 'object' };

	let parameters = sanitizeSchema(schema);
	parameters = cleanSchema(parameters);

	functionDeclarations.push({
	name: String(name).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64),
	description,
	parameters
	});
	});

	googleRequest.tools = [];
	if (functionDeclarations.length > 0) {
	googleRequest.tools.push({ functionDeclarations });
	}
	if (hasGoogleSearch) {
	googleRequest.tools.push({ googleSearch: {} });
	}
	if (hasGoogleSearchRetrieval) {
	googleRequest.tools.push({ googleSearchRetrieval: {} });
	}

	if (tool_choice?.type === 'none') {
	delete googleRequest.tools;
	} else if (googleRequest.tools.length > 0) {
	if (functionDeclarations.length > 0) {
	const functionCallingConfig = {};
	if (tool_choice?.type === 'any') {
	functionCallingConfig.mode = 'ANY';
	} else if (tool_choice?.type === 'tool' && tool_choice.name) {
	functionCallingConfig.mode = 'ANY';
	functionCallingConfig.allowedFunctionNames = [tool_choice.name];
	} else if (isClaudeModel) {
	functionCallingConfig.mode = 'VALIDATED';
	} else {
	functionCallingConfig.mode = 'AUTO';
	}

	googleRequest.toolConfig = { functionCallingConfig };
	}
	logger.debug(`[RequestConverter] Tools: ${JSON.stringify(googleRequest.tools).substring(0, 300)}`);
	}
	}

	// Cap max tokens for Gemini models
	if (isGeminiModel && googleRequest.generationConfig.maxOutputTokens > GEMINI_MAX_OUTPUT_TOKENS) {
	logger.debug(`[RequestConverter] Capping Gemini max_tokens from ${googleRequest.generationConfig.maxOutputTokens} to ${GEMINI_MAX_OUTPUT_TOKENS}`);
	googleRequest.generationConfig.maxOutputTokens = GEMINI_MAX_OUTPUT_TOKENS;
	}

	return googleRequest;
	}