gravityyy-proxyyy / src /format /request-converter.js
bardd's picture
Fix gemini-3.1-pro-high by routing to pro-low with thinkingLevel high.
4badc3b
Raw
History Blame Contribute Delete
14.5 kB
/**
* Request Converter
* Converts Anthropic Messages API requests to Google Generative AI format
*/
import {
GEMINI_MAX_OUTPUT_TOKENS,
getModelFamily,
isThinkingModel
} from '../constants.js';
import { convertContentToParts, convertRole } from './content-converter.js';
import { sanitizeSchema, cleanSchema } from './schema-sanitizer.js';
import {
restoreThinkingSignatures,
removeTrailingThinkingBlocks,
reorderAssistantContent,
filterUnsignedThinkingBlocks,
hasGeminiHistory,
hasUnsignedThinkingBlocks,
needsThinkingRecovery,
closeToolLoopForThinking,
cleanCacheControl,
clampGeminiThinkingBudget
} from './thinking-utils.js';
import { logger } from '../utils/logger.js';
import { getReasoningThinkingConfig, isGeminiProTierSku } from './reasoning-effort.js';
/**
* Convert Anthropic Messages API request to the format expected by Cloud Code
*
* Uses Google Generative AI format, but for Claude models:
* - Keeps tool_result in Anthropic format (required by Claude API)
*
* @param {Object} anthropicRequest - Anthropic format request
* @returns {Object} Request body for Cloud Code API
*/
export function convertAnthropicToGoogle(anthropicRequest) {
// [CRITICAL FIX] Pre-clean all cache_control fields from messages (Issue #189)
// Claude Code CLI sends cache_control on various content blocks, but Cloud Code API
// rejects them with "Extra inputs are not permitted". Clean them proactively here
// before any other processing, following the pattern from Antigravity-Manager.
const messages = cleanCacheControl(anthropicRequest.messages || []);
const { system, max_tokens, temperature, top_p, top_k, stop_sequences, tools, tool_choice, thinking, reasoning_effort } = anthropicRequest;
const modelName = anthropicRequest.model || '';
const modelFamily = getModelFamily(modelName);
const isClaudeModel = modelFamily === 'claude';
const isGeminiModel = modelFamily === 'gemini';
const isThinking = isThinkingModel(modelName);
const googleRequest = {
contents: [],
generationConfig: {}
};
// Handle system instruction
if (system) {
let systemParts = [];
if (typeof system === 'string') {
systemParts = [{ text: system }];
} else if (Array.isArray(system)) {
// Filter for text blocks as system prompts are usually text
// Anthropic supports text blocks in system prompts
systemParts = system
.filter(block => block.type === 'text')
.map(block => ({ text: block.text }));
}
if (systemParts.length > 0) {
googleRequest.systemInstruction = {
parts: systemParts
};
}
}
// Add interleaved thinking hint for Claude thinking models with tools
if (isClaudeModel && isThinking && tools && tools.length > 0) {
const hint = 'Interleaved thinking is enabled. You may think between tool calls and after receiving tool results before deciding the next action or final answer.';
if (!googleRequest.systemInstruction) {
googleRequest.systemInstruction = { parts: [{ text: hint }] };
} else {
const lastPart = googleRequest.systemInstruction.parts[googleRequest.systemInstruction.parts.length - 1];
if (lastPart && lastPart.text) {
lastPart.text = `${lastPart.text}\n\n${hint}`;
} else {
googleRequest.systemInstruction.parts.push({ text: hint });
}
}
}
// Apply thinking recovery for Gemini thinking models when needed
// Gemini needs recovery for tool loops/interrupted tools (stripped thinking)
let processedMessages = messages;
if (isGeminiModel && isThinking && needsThinkingRecovery(messages)) {
logger.debug('[RequestConverter] Applying thinking recovery for Gemini');
processedMessages = closeToolLoopForThinking(messages, 'gemini');
}
// For Claude: apply recovery for cross-model (Gemini→Claude) or unsigned thinking blocks
// Unsigned thinking blocks occur when Claude Code strips signatures it doesn't understand
const needsClaudeRecovery = hasGeminiHistory(messages) || hasUnsignedThinkingBlocks(messages);
if (isClaudeModel && isThinking && needsClaudeRecovery && needsThinkingRecovery(messages)) {
logger.debug('[RequestConverter] Applying thinking recovery for Claude');
processedMessages = closeToolLoopForThinking(messages, 'claude');
}
// Convert messages to contents, then filter unsigned thinking blocks
for (const msg of processedMessages) {
let msgContent = msg.content;
// For assistant messages, process thinking blocks and reorder content
if ((msg.role === 'assistant' || msg.role === 'model') && Array.isArray(msgContent)) {
// First, try to restore signatures for unsigned thinking blocks from cache
msgContent = restoreThinkingSignatures(msgContent);
// Remove trailing unsigned thinking blocks
msgContent = removeTrailingThinkingBlocks(msgContent);
// Reorder: thinking first, then text, then tool_use
msgContent = reorderAssistantContent(msgContent);
}
const parts = convertContentToParts(msgContent, isClaudeModel, isGeminiModel);
// SAFETY: Google API requires at least one part per content message
// This happens when all thinking blocks are filtered out (unsigned)
if (parts.length === 0) {
// Use '.' instead of '' because claude models reject empty text parts.
// A single period is invisible in practice but satisfies the API requirement.
logger.warn('[RequestConverter] WARNING: Empty parts array after filtering, adding placeholder');
parts.push({ text: '.' });
}
const content = {
role: convertRole(msg.role),
parts: parts
};
googleRequest.contents.push(content);
}
// Filter unsigned thinking blocks for Claude models
if (isClaudeModel) {
googleRequest.contents = filterUnsignedThinkingBlocks(googleRequest.contents);
}
// Generation config
if (max_tokens) {
googleRequest.generationConfig.maxOutputTokens = max_tokens;
}
if (temperature !== undefined) {
googleRequest.generationConfig.temperature = temperature;
}
if (top_p !== undefined) {
googleRequest.generationConfig.topP = top_p;
}
if (top_k !== undefined) {
googleRequest.generationConfig.topK = top_k;
}
if (stop_sequences && stop_sequences.length > 0) {
googleRequest.generationConfig.stopSequences = stop_sequences;
}
// Gemini 3.x performs better with temperature=1 when unset (ported from old Python proxy).
if (isGeminiModel && /gemini-3(?:\.\d+)?-/i.test(modelName) && temperature === undefined) {
googleRequest.generationConfig.temperature = 1;
}
// OpenAI reasoning_effort takes precedence when supplied by the adapter.
// Anthropic /v1/messages requests without reasoning_effort keep the existing behavior.
const reasoningConfig = reasoning_effort !== undefined
? getReasoningThinkingConfig(modelName, reasoning_effort)
: null;
if (reasoningConfig) {
googleRequest.generationConfig.thinkingConfig = reasoningConfig.config;
// #region agent log
fetch('http://127.0.0.1:7334/ingest/27587085-8299-4f8d-827a-e9584444390b',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'c04864'},body:JSON.stringify({sessionId:'c04864',location:'request-converter.js:thinking-applied',message:'thinkingConfig applied',data:{modelName,reasoning_effort,thinkingConfig:reasoningConfig.config},timestamp:Date.now(),hypothesisId:'H1'})}).catch(()=>{});
// #endregion
const budget = reasoningConfig.config.thinkingBudget
?? reasoningConfig.config.thinking_budget;
const detail = reasoningConfig.mode === 'level'
? `thinkingLevel=${reasoningConfig.config.thinkingLevel}`
: `thinkingBudget=${budget}`;
logger.debug(`[RequestConverter] reasoning_effort=${reasoningConfig.effort} -> ${detail}`);
if (isClaudeModel && budget > 0) {
const currentMaxTokens = googleRequest.generationConfig.maxOutputTokens;
if (currentMaxTokens && currentMaxTokens <= budget) {
const adjustedMaxTokens = budget + 8192;
logger.warn(`[RequestConverter] max_tokens (${currentMaxTokens}) <= thinking_budget (${budget}). Adjusting to ${adjustedMaxTokens}`);
googleRequest.generationConfig.maxOutputTokens = adjustedMaxTokens;
}
}
} else if (isThinking) {
if (isClaudeModel) {
// Claude thinking config
const thinkingConfig = {
include_thoughts: true
};
// Cloud Code API requires thinking_budget to actually produce thinking blocks.
// Without it, include_thoughts alone is ignored and Claude falls back to
// <thinking> XML tags in text. Default to 32000 when not provided (e.g. adaptive mode).
const thinkingBudget = thinking?.budget_tokens || 32000;
thinkingConfig.thinking_budget = thinkingBudget;
logger.debug(`[RequestConverter] Claude thinking enabled with budget: ${thinkingBudget}${!thinking?.budget_tokens ? ' (default)' : ''}`);
// Validate max_tokens > thinking_budget as required by the API
const currentMaxTokens = googleRequest.generationConfig.maxOutputTokens;
if (currentMaxTokens && currentMaxTokens <= thinkingBudget) {
const adjustedMaxTokens = thinkingBudget + 8192;
if (thinking?.budget_tokens) {
logger.warn(`[RequestConverter] max_tokens (${currentMaxTokens}) <= thinking_budget (${thinkingBudget}). Adjusting to ${adjustedMaxTokens} to satisfy API requirements`);
} else {
logger.debug(`[RequestConverter] Adjusting max_tokens to ${adjustedMaxTokens} for default thinking budget`);
}
googleRequest.generationConfig.maxOutputTokens = adjustedMaxTokens;
}
googleRequest.generationConfig.thinkingConfig = thinkingConfig;
} else if (isGeminiModel && !isGeminiProTierSku(modelName)) {
// Preserve the existing Anthropic behavior when no OpenAI reasoning_effort exists.
const thinkingConfig = {
includeThoughts: true,
thinkingBudget: clampGeminiThinkingBudget(modelName, thinking?.budget_tokens)
};
logger.debug(`[RequestConverter] Gemini thinking enabled with budget: ${thinkingConfig.thinkingBudget}`);
googleRequest.generationConfig.thinkingConfig = thinkingConfig;
}
}
// Convert function tools and built-in Google Search tools.
if (tools && tools.length > 0) {
const functionDeclarations = [];
let hasGoogleSearch = false;
let hasGoogleSearchRetrieval = false;
tools.forEach((tool, idx) => {
const name = tool.name || tool.function?.name || tool.custom?.name || `tool-${idx}`;
const normalizedName = String(name).toLowerCase().replace(/[_-]/g, '');
const normalizedType = String(tool.type || '').toLowerCase().replace(/[_-]/g, '');
if (tool.googleSearch !== undefined
|| normalizedName === 'googlesearch'
|| ['googlesearch', 'websearch', 'websearchpreview'].includes(normalizedType)) {
hasGoogleSearch = true;
return;
}
if (tool.googleSearchRetrieval !== undefined || normalizedName === 'googlesearchretrieval') {
hasGoogleSearchRetrieval = true;
return;
}
const description = tool.description || tool.function?.description || tool.custom?.description || '';
const schema = tool.input_schema
|| tool.function?.input_schema
|| tool.function?.parameters
|| tool.custom?.input_schema
|| tool.parameters
|| { type: 'object' };
let parameters = sanitizeSchema(schema);
parameters = cleanSchema(parameters);
functionDeclarations.push({
name: String(name).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64),
description,
parameters
});
});
googleRequest.tools = [];
if (functionDeclarations.length > 0) {
googleRequest.tools.push({ functionDeclarations });
}
if (hasGoogleSearch) {
googleRequest.tools.push({ googleSearch: {} });
}
if (hasGoogleSearchRetrieval) {
googleRequest.tools.push({ googleSearchRetrieval: {} });
}
if (tool_choice?.type === 'none') {
delete googleRequest.tools;
} else if (googleRequest.tools.length > 0) {
if (functionDeclarations.length > 0) {
const functionCallingConfig = {};
if (tool_choice?.type === 'any') {
functionCallingConfig.mode = 'ANY';
} else if (tool_choice?.type === 'tool' && tool_choice.name) {
functionCallingConfig.mode = 'ANY';
functionCallingConfig.allowedFunctionNames = [tool_choice.name];
} else if (isClaudeModel) {
functionCallingConfig.mode = 'VALIDATED';
} else {
functionCallingConfig.mode = 'AUTO';
}
googleRequest.toolConfig = { functionCallingConfig };
}
logger.debug(`[RequestConverter] Tools: ${JSON.stringify(googleRequest.tools).substring(0, 300)}`);
}
}
// Cap max tokens for Gemini models
if (isGeminiModel && googleRequest.generationConfig.maxOutputTokens > GEMINI_MAX_OUTPUT_TOKENS) {
logger.debug(`[RequestConverter] Capping Gemini max_tokens from ${googleRequest.generationConfig.maxOutputTokens} to ${GEMINI_MAX_OUTPUT_TOKENS}`);
googleRequest.generationConfig.maxOutputTokens = GEMINI_MAX_OUTPUT_TOKENS;
}
return googleRequest;
}