// Claude 请求转换 (Claude → Gemini v1internal) // 对应 transformClaudeRequestIn use super::models::*; use crate::proxy::mappers::signature_store::get_thought_signature; // Deprecated, kept for fallback use crate::proxy::mappers::tool_result_compressor; use crate::proxy::session_manager::SessionManager; use serde_json::{json, Value}; use std::collections::HashMap; // ===== Safety Settings Configuration ===== /// Safety threshold levels for Gemini API /// Can be configured via GEMINI_SAFETY_THRESHOLD environment variable #[derive(Debug, Clone, Copy, PartialEq)] pub enum SafetyThreshold { /// Disable all safety filters (default for proxy compatibility) Off, /// Block low probability and above BlockLowAndAbove, /// Block medium probability and above BlockMediumAndAbove, /// Only block high probability content BlockOnlyHigh, /// Don't block anything (BLOCK_NONE) BlockNone, } impl SafetyThreshold { /// Get threshold from environment variable or default to Off pub fn from_env() -> Self { match std::env::var("GEMINI_SAFETY_THRESHOLD").as_deref() { Ok("OFF") | Ok("off") => SafetyThreshold::Off, Ok("LOW") | Ok("low") => SafetyThreshold::BlockLowAndAbove, Ok("MEDIUM") | Ok("medium") => SafetyThreshold::BlockMediumAndAbove, Ok("HIGH") | Ok("high") => SafetyThreshold::BlockOnlyHigh, Ok("NONE") | Ok("none") => SafetyThreshold::BlockNone, _ => SafetyThreshold::Off, // Default: maintain current behavior } } /// Convert to Gemini API threshold string pub fn to_gemini_threshold(&self) -> &'static str { match self { SafetyThreshold::Off => "OFF", SafetyThreshold::BlockLowAndAbove => "BLOCK_LOW_AND_ABOVE", SafetyThreshold::BlockMediumAndAbove => "BLOCK_MEDIUM_AND_ABOVE", SafetyThreshold::BlockOnlyHigh => "BLOCK_ONLY_HIGH", SafetyThreshold::BlockNone => "BLOCK_NONE", } } } /// Build safety settings based on configuration fn build_safety_settings() -> Value { let threshold = SafetyThreshold::from_env(); let threshold_str = threshold.to_gemini_threshold(); json!([ { "category": "HARM_CATEGORY_HARASSMENT", "threshold": threshold_str }, { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": threshold_str }, { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": threshold_str }, { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": threshold_str }, { "category": "HARM_CATEGORY_CIVIC_INTEGRITY", "threshold": threshold_str }, ]) } /// 清理消息中的 cache_control 字段 /// /// 这个函数会深度遍历所有消息内容块,移除 cache_control 字段。 /// 这是必要的,因为: /// 1. VS Code 等客户端会将历史消息(包含 cache_control)原封不动发回 /// 2. Anthropic API 不接受请求中包含 cache_control 字段 /// 3. 即使是转发到 Gemini,也应该清理以保持协议纯净性 /// /// [FIX #593] 增强版本:添加详细日志用于调试 MCP 工具兼容性问题 pub fn clean_cache_control_from_messages(messages: &mut [Message]) { tracing::info!( "[DEBUG-593] Starting cache_control cleanup for {} messages", messages.len() ); let mut total_cleaned = 0; for (idx, msg) in messages.iter_mut().enumerate() { if let MessageContent::Array(blocks) = &mut msg.content { for (block_idx, block) in blocks.iter_mut().enumerate() { match block { ContentBlock::Thinking { cache_control, .. } => { if cache_control.is_some() { tracing::info!( "[ISSUE-744] Found cache_control in Thinking block at message[{}].content[{}]: {:?}", idx, block_idx, cache_control ); *cache_control = None; total_cleaned += 1; } } ContentBlock::Image { cache_control, .. } => { if cache_control.is_some() { tracing::debug!( "[Cache-Control-Cleaner] Removed cache_control from Image block at message[{}].content[{}]", idx, block_idx ); *cache_control = None; total_cleaned += 1; } } ContentBlock::Document { cache_control, .. } => { if cache_control.is_some() { tracing::debug!( "[Cache-Control-Cleaner] Removed cache_control from Document block at message[{}].content[{}]", idx, block_idx ); *cache_control = None; total_cleaned += 1; } } ContentBlock::ToolUse { cache_control, .. } => { if cache_control.is_some() { tracing::debug!( "[Cache-Control-Cleaner] Removed cache_control from ToolUse block at message[{}].content[{}]", idx, block_idx ); *cache_control = None; total_cleaned += 1; } } _ => {} } } } } if total_cleaned > 0 { tracing::info!( "[DEBUG-593] Cache control cleanup complete: removed {} cache_control fields", total_cleaned ); } else { tracing::debug!("[DEBUG-593] No cache_control fields found"); } } /// [FIX #593] 递归深度清理 JSON 中的 cache_control 字段 /// /// 用于处理嵌套结构和非标准位置的 cache_control。 /// 这是最后一道防线,确保发送给 Antigravity 的请求中不包含任何 cache_control。 fn deep_clean_cache_control(value: &mut Value) { match value { Value::Object(map) => { if map.remove("cache_control").is_some() { tracing::debug!("[DEBUG-593] Removed cache_control from nested JSON object"); } for (_, v) in map.iter_mut() { deep_clean_cache_control(v); } } Value::Array(arr) => { for item in arr.iter_mut() { deep_clean_cache_control(item); } } _ => {} } } /// [FIX #564] Sort blocks in assistant messages to ensure thinking blocks are first /// /// When context compression (kilo) reorders message blocks, thinking blocks may appear /// after text blocks. Claude/Anthropic API requires thinking blocks to be first if /// any thinking blocks exist in the message. This function pre-sorts blocks to ensure /// thinking/redacted_thinking blocks always come before other block types. fn sort_thinking_blocks_first(messages: &mut [Message]) { for msg in messages.iter_mut() { if msg.role == "assistant" { if let MessageContent::Array(blocks) = &mut msg.content { // [FIX #709] Triple-stage partition: [Thinking, Text, ToolUse] // This ensures protocol compliance while maintaining logical order. let mut thinking_blocks: Vec = Vec::new(); let mut text_blocks: Vec = Vec::new(); let mut tool_blocks: Vec = Vec::new(); let mut other_blocks: Vec = Vec::new(); let original_len = blocks.len(); let mut needs_reorder = false; let mut saw_non_thinking = false; for (_i, block) in blocks.iter().enumerate() { match block { ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => { if saw_non_thinking { needs_reorder = true; } } ContentBlock::Text { .. } => { saw_non_thinking = true; } ContentBlock::ToolUse { .. } => { saw_non_thinking = true; // Check if tool is after text (this is normal, but we want a strict group order) } _ => saw_non_thinking = true, } } if needs_reorder || original_len > 1 { // For safety, we always perform the triple partition if there's more than one block. // This also handles empty text block filtering. for block in blocks.drain(..) { match &block { ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => { thinking_blocks.push(block); } ContentBlock::Text { text } => { // Filter out purely empty or structural text like "(no content)" if !text.trim().is_empty() && text != "(no content)" { text_blocks.push(block); } } ContentBlock::ToolUse { .. } => { tool_blocks.push(block); } _ => { other_blocks.push(block); } } } // Reconstruct in strict order: Thinking -> Text/Other -> Tool blocks.extend(thinking_blocks); blocks.extend(text_blocks); blocks.extend(other_blocks); blocks.extend(tool_blocks); if needs_reorder { tracing::warn!( "[FIX #709] Reordered assistant messages to [Thinking, Text, Tool] structure." ); } } } } } } /// 合并 ClaudeRequest 中连续的同角色消息 /// /// 场景: 当从 Spec/Plan 模式切换回编码模式时,可能出现连续两条 "user" 消息 /// (一条是 ToolResult,一条是 )。 /// 这会违反角色交替规则,导致 400 报错。 pub fn merge_consecutive_messages(messages: &mut Vec) { if messages.len() <= 1 { return; } let mut merged: Vec = Vec::with_capacity(messages.len()); let old_messages = std::mem::take(messages); let mut messages_iter = old_messages.into_iter(); if let Some(mut current) = messages_iter.next() { for next in messages_iter { if current.role == next.role { // 合并内容 match (&mut current.content, next.content) { (MessageContent::Array(current_blocks), MessageContent::Array(next_blocks)) => { current_blocks.extend(next_blocks); } (MessageContent::Array(current_blocks), MessageContent::String(next_text)) => { current_blocks.push(ContentBlock::Text { text: next_text }); } (MessageContent::String(current_text), MessageContent::String(next_text)) => { *current_text = format!("{}\n\n{}", current_text, next_text); } (MessageContent::String(current_text), MessageContent::Array(next_blocks)) => { let mut new_blocks = vec![ContentBlock::Text { text: current_text.clone(), }]; new_blocks.extend(next_blocks); current.content = MessageContent::Array(new_blocks); } } } else { merged.push(current); current = next; } } merged.push(current); } *messages = merged; } /// 转换 Claude 请求为 Gemini v1internal 格式 /// [FIX #709] Reorder serialized Gemini parts to ensure thinking blocks are first fn reorder_gemini_parts(parts: &mut Vec) { if parts.len() <= 1 { return; } let mut thinking_parts = Vec::new(); let mut text_parts = Vec::new(); let mut tool_parts = Vec::new(); let mut other_parts = Vec::new(); for part in parts.drain(..) { if part.get("thought").and_then(|t| t.as_bool()) == Some(true) { thinking_parts.push(part); } else if part.get("functionCall").is_some() { tool_parts.push(part); } else if let Some(text) = part.get("text").and_then(|t| t.as_str()) { // Filter empty text parts that might have been created during merging if !text.trim().is_empty() && text != "(no content)" { text_parts.push(part); } } else { other_parts.push(part); } } parts.extend(thinking_parts); parts.extend(text_parts); parts.extend(other_parts); parts.extend(tool_parts); } pub fn transform_claude_request_in( claude_req: &ClaudeRequest, project_id: &str, is_retry: bool, account_id: Option<&str>, _session_id: &str, token: Option<&crate::proxy::token_manager::ProxyToken>, // [NEW] 支持动态规格 ) -> Result { let message_count = claude_req.messages.len(); // [CRITICAL FIX] 预先清理所有消息中的 cache_control 字段 // 这解决了 VS Code 插件等客户端在多轮对话中将历史消息的 cache_control 字段 // 原封不动发回导致的 "Extra inputs are not permitted" 错误 let mut cleaned_req = claude_req.clone(); // [FIX #813] 合并连续的同角色消息 (Consecutive User Messages) // 确保请求符合 Anthropic 和 Gemini 的角色交替协议 merge_consecutive_messages(&mut cleaned_req.messages); clean_cache_control_from_messages(&mut cleaned_req.messages); // [FIX #564] Pre-sort thinking blocks to be first in assistant messages // This handles cases where context compression (kilo) incorrectly reorders blocks sort_thinking_blocks_first(&mut cleaned_req.messages); // [FIX #1747] If thinking is auto-enabled by model default (e.g. Opus) but no // ThinkingConfig was provided by the client, inject a default config with a budget // to prevent 'thinking requires a budget' errors from upstream APIs. if cleaned_req.thinking.is_none() && should_enable_thinking_by_default(&cleaned_req.model) { let default_budget = crate::proxy::model_specs::get_thinking_budget(&cleaned_req.model, token); tracing::info!( "[Thinking-Mode] Injecting default ThinkingConfig (budget={}) for model: {}", default_budget, cleaned_req.model ); cleaned_req.thinking = Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(default_budget as u32), effort: None, }); } let claude_req = &cleaned_req; // 后续使用清理后的请求 // [NEW] Generate session ID for signature tracking // This enables session-isolated signature storage, preventing cross-conversation pollution let session_id = SessionManager::extract_session_id(claude_req); tracing::debug!("[Claude-Request] Session ID: {}", session_id); // 检测是否有联网工具 (server tool or built-in tool) let has_web_search_tool = claude_req .tools .as_ref() .map(|tools| { tools.iter().any(|t| { t.is_web_search() || t.name.as_deref() == Some("google_search") || t.name.as_deref() == Some("builtin_web_search") || t.type_.as_deref() == Some("web_search_20250305") || t.type_.as_deref() == Some("builtin_web_search") }) }) .unwrap_or(false); // 用于存储 tool_use id -> name 映射 let mut tool_id_to_name: HashMap = HashMap::new(); // 检测是否有 mcp__ 开头的工具 let has_mcp_tools = claude_req .tools .as_ref() .map(|tools| { tools.iter().any(|t| { t.name .as_deref() .map(|n| n.starts_with("mcp__")) .unwrap_or(false) }) }) .unwrap_or(false); // [New] 预先构建工具名称到原始 Schema 的映射,用于后续参数类型修正 let mut tool_name_to_schema = HashMap::new(); if let Some(tools) = &claude_req.tools { for tool in tools { if let (Some(name), Some(schema)) = (&tool.name, &tool.input_schema) { tool_name_to_schema.insert(name.clone(), schema.clone()); } } } // 1. System Instruction (注入动态身份防护 & MCP XML 协议) let system_instruction = build_system_instruction(&claude_req.system, &claude_req.model, has_mcp_tools); // Map model name (Use standard mapping) // [IMPROVED] 提取 web search 模型为常量,便于维护 const WEB_SEARCH_FALLBACK_MODEL: &str = "gemini-2.5-flash"; let mapped_model = crate::proxy::common::model_mapping::map_claude_model_to_gemini(&claude_req.model); // 将 Claude 工具转为 Value 数组以便探测联网 let tools_val: Option> = claude_req.tools.as_ref().map(|list| { list.iter() .map(|t| serde_json::to_value(t).unwrap_or(json!({}))) .collect() }); // Resolve grounding config let config = crate::proxy::mappers::common_utils::resolve_request_config( &claude_req.model, &mapped_model, &tools_val, claude_req.size.as_deref(), // [NEW] Pass size parameter claude_req.quality.as_deref(), // [NEW] Pass quality parameter None, // [NEW] image_size None, // body ); // [CRITICAL FIX] Disable dummy thought injection for Vertex AI // [CRITICAL FIX] Disable dummy thought injection for Vertex AI // Vertex AI rejects thinking blocks without valid signatures // Even if thinking is enabled, we should NOT inject dummy blocks for historical messages let allow_dummy_thought = false; // Check if thinking is enabled in the request let thinking_type = claude_req.thinking.as_ref().map(|t| t.type_.as_str()); let mut is_thinking_enabled = thinking_type == Some("enabled") || thinking_type == Some("adaptive") || (thinking_type.is_none() && should_enable_thinking_by_default(&claude_req.model)); // [NEW FIX] Check if target model supports thinking // Only models with "-thinking" suffix or Claude models support thinking // Regular Gemini models (gemini-2.5-flash, gemini-2.5-pro) do NOT support thinking // [FIX #1557] Allow "pro" models (e.g. gemini-3-pro, gemini-2.0-pro) to be recognized as thinking capable let target_model_supports_thinking = mapped_model.contains("-thinking") || mapped_model.starts_with("claude-") || mapped_model.contains("gemini-2.0-pro") || mapped_model.contains("gemini-3-pro") || mapped_model.contains("gemini-3.1-pro") // [FIX #2167] gemini-3-flash / gemini-3.1-flash 支持 thinking,必须纳入识别范围 || mapped_model.contains("gemini-3-flash") || mapped_model.contains("gemini-3.1-flash"); if is_thinking_enabled && !target_model_supports_thinking { tracing::warn!( "[Thinking-Mode] Target model '{}' does not support thinking. Force disabling thinking mode.", mapped_model ); is_thinking_enabled = false; } // [REMOVED] 智能降级检查 (should_disable_thinking_due_to_history) // 原因: 该检查过于激进,会导致 Claude Code CLI 在历史记录不完美时永久禁用思考模式 (Issue #2006) // 现在的策略是依赖 thinking_utils.rs 中的 Recovery 机制来修复历史,而不是禁用思考。 // [FIX #295 & #298] If thinking enabled but no signature available, // disable thinking to prevent Gemini 3 Pro rejection if is_thinking_enabled { let global_sig = get_thought_signature(); // Check if there are any thinking blocks in message history let has_thinking_history = claude_req.messages.iter().any(|m| { if m.role == "assistant" { if let MessageContent::Array(blocks) = &m.content { return blocks .iter() .any(|b| matches!(b, ContentBlock::Thinking { .. })); } } false }); // Check if there are function calls in the request let has_function_calls = claude_req.messages.iter().any(|m| { if let MessageContent::Array(blocks) = &m.content { blocks .iter() .any(|b| matches!(b, ContentBlock::ToolUse { .. })) } else { false } }); // [FIX #298] For first-time thinking requests (no thinking history), // we use permissive mode and let upstream handle validation. // We only enforce strict signature checks when function calls are involved. let needs_signature_check = has_function_calls; if !has_thinking_history && is_thinking_enabled { tracing::info!( "[Thinking-Mode] First thinking request detected. Using permissive mode - \ signature validation will be handled by upstream API." ); } if needs_signature_check && !has_valid_signature_for_function_calls( &claude_req.messages, &global_sig, &session_id, ) { // [FIX #2167] Flash 模型无签名时使用哨兵值而不是禁用 thinking // 禁用 thinking 会导致模型失去思考能力,哨兵值可让 Gemini 跳过签名校验 let is_flash_model = mapped_model.contains("gemini-3-flash") || mapped_model.contains("gemini-3.1-flash"); if is_flash_model { tracing::info!( "[Thinking-Mode] [FIX #2167] No signature for flash model function calls. \ Will rely on sentinel injection in build_contents." ); // 保持 is_thinking_enabled = true,由 build_contents 内的哨兵处理覆盖 } else { tracing::warn!( "[Thinking-Mode] [FIX #295] No valid signature found for function calls. \ Disabling thinking to prevent Gemini 3 Pro rejection." ); is_thinking_enabled = false; } } } // 4. Generation Config & Thinking (Pass final is_thinking_enabled) let generation_config = build_generation_config( claude_req, &mapped_model, has_web_search_tool, is_thinking_enabled, token, // [NEW] 传递 token 用于动态限额 ); // 2. Contents (Messages) let contents = build_google_contents( &claude_req.messages, claude_req, &mut tool_id_to_name, &tool_name_to_schema, is_thinking_enabled, allow_dummy_thought, &mapped_model, &session_id, is_retry, )?; // 3. Tools let tools = build_tools(&claude_req.tools, has_web_search_tool, &mapped_model)?; // 5. Safety Settings (configurable via GEMINI_SAFETY_THRESHOLD env var) let safety_settings = build_safety_settings(); // Build inner request let mut inner_request = json!({ "contents": contents, "safetySettings": safety_settings, }); if let Some(sys_inst) = system_instruction { inner_request["systemInstruction"] = sys_inst; } if !generation_config.is_null() { println!("DEBUG: Assigning generation_config: {}", generation_config); inner_request["generationConfig"] = generation_config; } if let Some(tools_val) = tools { inner_request["tools"] = tools_val; // 显式设置工具配置模式为 VALIDATED inner_request["toolConfig"] = json!({ "functionCallingConfig": { "mode": "VALIDATED" } }); } // 深度清理 [undefined] 字符串 (Cherry Studio 等客户端常见注入) crate::proxy::mappers::common_utils::deep_clean_undefined(&mut inner_request, 0); if config.inject_google_search && !has_web_search_tool { crate::proxy::mappers::common_utils::inject_google_search_tool(&mut inner_request, Some(&mapped_model)); } // Inject imageConfig if present (for image generation models) if let Some(image_config) = config.image_config { if let Some(obj) = inner_request.as_object_mut() { // 1. Remove tools (image generation does not support tools) obj.remove("tools"); // 2. Remove systemInstruction (image generation does not support system prompts) obj.remove("systemInstruction"); // 3. Clean generationConfig (remove responseMimeType, responseModalities etc.) let gen_config = obj.entry("generationConfig").or_insert_with(|| json!({})); if let Some(gen_obj) = gen_config.as_object_mut() { // [RESOLVE #1694] Check image thinking mode let image_thinking_mode = crate::proxy::config::get_image_thinking_mode(); if image_thinking_mode == "disabled" { tracing::debug!( "[Claude-Request] Image thinking mode disabled: enforcing includeThoughts=false for {}", mapped_model ); gen_obj.insert( "thinkingConfig".to_string(), json!({ "includeThoughts": false }), ); } gen_obj.remove("responseMimeType"); gen_obj.remove("responseModalities"); gen_obj.insert("imageConfig".to_string(), image_config); } } } // [ADDED v4.1.24] 注入稳定 sessionId 对齐官方规范 if let Some(account_id) = account_id { inner_request["sessionId"] = json!(crate::proxy::common::session::derive_session_id(account_id)); } // 生成 requestId // [CHANGED v4.1.24] Structured requestId to match official format let request_id = format!("agent/antigravity/{}/{}", &session_id[..session_id.len().min(8)], message_count); // 构建最终请求体 let mut body = json!({ "project": project_id, "requestId": request_id, "request": inner_request, "model": config.final_model, "userAgent": "antigravity", // [CHANGED v4.1.24] Use "agent" for all non-image requests "requestType": if config.request_type == "image_gen" { "image_gen" } else { "agent" }, }); // 如果提供了 metadata.user_id,则复用为 sessionId if let Some(metadata) = &claude_req.metadata { if let Some(user_id) = &metadata.user_id { body["request"]["sessionId"] = json!(user_id); } } // [FIX #593] 最后一道防线: 递归深度清理所有 cache_control 字段 // 确保发送给 Antigravity 的请求中不包含任何 cache_control deep_clean_cache_control(&mut body); tracing::debug!("[DEBUG-593] Final deep clean complete, request ready to send"); Ok(body) } /// Check if thinking mode should be enabled by default for a given model /// /// Claude Code v2.0.67+ enables thinking by default for Opus 4.5 models. /// This function determines if the model should have thinking enabled /// when no explicit thinking configuration is provided. fn should_enable_thinking_by_default(model: &str) -> bool { let model_lower = model.to_lowercase(); // Enable thinking by default for Opus 4.5 and 4.6 variants if model_lower.contains("opus-4-5") || model_lower.contains("opus-4.5") || model_lower.contains("opus-4-6") || model_lower.contains("opus-4.6") { tracing::debug!( "[Thinking-Mode] Auto-enabling thinking for Opus model: {}", model ); return true; } // Also enable for explicit thinking model variants if model_lower.contains("-thinking") { return true; } // [FIX #1557] Enable thinking by default for Gemini Pro models (gemini-3-pro, gemini-2.0-pro) // These models prioritize reasoning but clients might not send thinking config for them // unless they have "-thinking" suffix (which they don't in Antigravity mapping) if model_lower.contains("gemini-2.0-pro") || model_lower.contains("gemini-3-pro") || model_lower.contains("gemini-3.1-pro") { tracing::debug!( "[Thinking-Mode] Auto-enabling thinking for Gemini Pro model: {}", model ); return true; } // [FEATURE] 为 gemini-3-flash / gemini-3.1-flash 自动开启 thinking // 让 Cherry Studio 等客户端即使未显式传 thinking.type 也能获取思维链内容 if model_lower.contains("gemini-3-flash") || model_lower.contains("gemini-3.1-flash") { tracing::debug!( "[Thinking-Mode] Auto-enabling thinking for Flash model: {}", model ); return true; } false } /// Minimum length for a valid thought_signature const MIN_SIGNATURE_LENGTH: usize = 50; /// [FIX #295] Check if we have any valid signature available for function calls /// This prevents Gemini 3 Pro from rejecting requests due to missing thought_signature /// /// [NEW FIX] Now also checks Session Cache to support retry scenarios fn has_valid_signature_for_function_calls( messages: &[Message], global_sig: &Option, session_id: &str, // NEW: Add session_id parameter ) -> bool { // 1. Check global store (deprecated but kept for compatibility) if let Some(sig) = global_sig { if sig.len() >= MIN_SIGNATURE_LENGTH { tracing::debug!( "[Signature-Check] Found valid signature in global store (len: {})", sig.len() ); return true; } } // 2. [NEW] Check Session Cache - this is critical for retry scenarios // When retrying, the signature may not be in messages but exists in Session Cache if let Some(sig) = crate::proxy::SignatureCache::global().get_session_signature(session_id) { if sig.len() >= MIN_SIGNATURE_LENGTH { tracing::info!( "[Signature-Check] Found valid signature in SESSION cache (session: {}, len: {})", session_id, sig.len() ); return true; } } // 3. Check if any message has a thinking block with valid signature for msg in messages.iter().rev() { if msg.role == "assistant" { if let MessageContent::Array(blocks) = &msg.content { for block in blocks { if let ContentBlock::Thinking { signature: Some(sig), .. } = block { if sig.len() >= MIN_SIGNATURE_LENGTH { tracing::debug!( "[Signature-Check] Found valid signature in message history (len: {})", sig.len() ); return true; } } } } } } tracing::warn!( "[Signature-Check] No valid signature found (session: {}, checked: global store, session cache, message history)", session_id ); false } /// 构建 System Instruction (支持动态身份映射与 Prompt 隔离) fn build_system_instruction( system: &Option, _model_name: &str, has_mcp_tools: bool, ) -> Option { let mut parts = Vec::new(); // [NEW] Antigravity 身份指令 (原始简化版) let antigravity_identity = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.\n\ You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.\n\ **Absolute paths only**\n\ **Proactiveness**"; // [HYBRID] 检查用户是否已提供 Antigravity 身份 let mut user_has_antigravity = false; if let Some(sys) = system { match sys { SystemPrompt::String(text) => { if text.contains("You are Antigravity") { user_has_antigravity = true; } } SystemPrompt::Array(blocks) => { for block in blocks { if block.block_type == "text" && block.text.contains("You are Antigravity") { user_has_antigravity = true; break; } } } } } // 如果用户没有提供 Antigravity 身份,则注入 if !user_has_antigravity { parts.push(json!({"text": antigravity_identity})); } // [NEW] 注入全局系统提示词 (紧跟 Antigravity 身份之后) let global_prompt_config = crate::proxy::config::get_global_system_prompt(); if global_prompt_config.enabled && !global_prompt_config.content.trim().is_empty() { parts.push(json!({"text": global_prompt_config.content})); } // 添加用户的系统提示词 if let Some(sys) = system { match sys { SystemPrompt::String(text) => { // [MODIFIED] No longer filter "You are an interactive CLI tool" // We pass everything through to ensure Flash/Lite models get full instructions parts.push(json!({"text": text})); } SystemPrompt::Array(blocks) => { for block in blocks { if block.block_type == "text" { // [MODIFIED] No longer filter "You are an interactive CLI tool" parts.push(json!({"text": block.text})); } } } } } // [NEW] MCP XML Bridge: 如果存在 mcp__ 开头的工具,注入专用的调用协议 // 这能有效规避部分 MCP 链路在标准的 tool_use 协议下解析不稳的问题 if has_mcp_tools { let mcp_xml_prompt = "\n\ ==== MCP XML 工具调用协议 (Workaround) ====\n\ 当你需要调用名称以 `mcp__` 开头的 MCP 工具时:\n\ 1) 优先尝试 XML 格式调用:输出 `{\"arg\":\"value\"}`。\n\ 2) 必须直接输出 XML 块,无需 markdown 包装,内容为 JSON 格式的入参。\n\ 3) 这种方式具有更高的连通性和容错性,适用于大型结果返回场景。\n\ ==========================================="; parts.push(json!({"text": mcp_xml_prompt})); } // 如果用户没有提供任何系统提示词,添加结束标记 if !user_has_antigravity { parts.push(json!({"text": "\n--- [SYSTEM_PROMPT_END] ---"})); } Some(json!({ "role": "user", "parts": parts })) } /// 构建 Contents (Messages) fn build_contents( content: &MessageContent, is_assistant: bool, _claude_req: &ClaudeRequest, is_thinking_enabled: bool, session_id: &str, allow_dummy_thought: bool, is_retry: bool, tool_id_to_name: &mut HashMap, tool_name_to_schema: &HashMap, mapped_model: &str, last_thought_signature: &mut Option, pending_tool_use_ids: &mut Vec, last_user_task_text_normalized: &mut Option, previous_was_tool_result: &mut bool, _existing_tool_result_ids: &std::collections::HashSet, ) -> Result, String> { let mut parts = Vec::new(); // Track tool results in the current turn to identify missing ones let mut current_turn_tool_result_ids = std::collections::HashSet::new(); // Track if we have already seen non-thinking content in this message. // Anthropic/Gemini protocol: Thinking blocks MUST come first. let mut saw_non_thinking = false; match content { MessageContent::String(text) => { if text != "(no content)" { let trimmed = text.trim(); if !trimmed.is_empty() { parts.push(json!({"text": trimmed})); } } } MessageContent::Array(blocks) => { for item in blocks { match item { ContentBlock::Text { text } => { if text != "(no content)" && !text.trim().is_empty() { // [NEW] 任务去重逻辑: 如果当前是 User 消息,且紧跟在 ToolResult 之后, // 检查该文本是否与上一轮任务描述完全一致。 if !is_assistant && *previous_was_tool_result { if let Some(last_task) = last_user_task_text_normalized { let current_normalized = text.replace(|c: char| c.is_whitespace(), ""); if !current_normalized.is_empty() && current_normalized == *last_task { tracing::info!("[Claude-Request] Dropping duplicated task text echo (len: {})", text.len()); continue; } } } parts.push(json!({"text": text})); saw_non_thinking = true; // 记录最近一次 User 任务文本用于后续比对 if !is_assistant { *last_user_task_text_normalized = Some(text.replace(|c: char| c.is_whitespace(), "")); } *previous_was_tool_result = false; } } ContentBlock::Thinking { thinking, signature, .. } => { tracing::debug!( "[DEBUG-TRANSFORM] Processing thinking block. Sig: {:?}", signature ); // [HOTFIX] Gemini Protocol Enforcement: Thinking block MUST be the first block. // If we already have content (like Text), we must downgrade this thinking block to Text. if saw_non_thinking || !parts.is_empty() { tracing::warn!("[Claude-Request] Thinking block found at non-zero index (prev parts: {}). Downgrading to Text.", parts.len()); if !thinking.trim().is_empty() { parts.push(json!({ "text": thinking.trim() })); saw_non_thinking = true; } continue; } // [FIX] If thinking is disabled (smart downgrade), convert ALL thinking blocks to text // to avoid "thinking is disabled but message contains thinking" error if !is_thinking_enabled { tracing::warn!("[Claude-Request] Thinking disabled. Downgrading thinking block to text."); if !thinking.trim().is_empty() { parts.push(json!({ "text": thinking.trim() })); saw_non_thinking = true; } continue; } // [FIX] Empty thinking blocks cause "Field required" errors. // We downgrade them to Text to avoid structural errors and signature mismatch. if thinking.is_empty() { tracing::warn!("[Claude-Request] Empty thinking block detected. Downgrading to Text."); parts.push(json!({ "text": "..." })); continue; } // [FIX #752] Strict signature validation // Only use signatures that are cached and compatible with the target model if let Some(sig) = signature { // Check signature length first - if it's too short, it's definitely invalid if sig.len() < MIN_SIGNATURE_LENGTH { tracing::warn!( "[Thinking-Signature] Signature too short (len: {} < {}), downgrading to text.", sig.len(), MIN_SIGNATURE_LENGTH ); parts.push(json!({"text": thinking})); saw_non_thinking = true; continue; } let cached_family = crate::proxy::SignatureCache::global().get_signature_family(sig); match cached_family { Some(family) => { // Check compatibility // [NEW] If is_retry is true, force incompatibility to strip historical signatures // which likely caused the previous 400 error. let compatible = !is_retry && is_model_compatible(&family, mapped_model); if !compatible { tracing::warn!( "[Thinking-Signature] {} signature (Family: {}, Target: {}). Downgrading to text.", if is_retry { "Stripping historical" } else { "Incompatible" }, family, mapped_model ); parts.push(json!({"text": thinking})); saw_non_thinking = true; continue; } // Compatible and not a retry: use signature *last_thought_signature = Some(sig.clone()); let mut part = json!({ "text": thinking, "thought": true, "thoughtSignature": sig }); crate::proxy::common::json_schema::clean_json_schema(&mut part); parts.push(part); } None => { // For JSON tool calling compatibility, if signature is long enough but unknown, // we should trust it rather than downgrade to text if sig.len() >= MIN_SIGNATURE_LENGTH { tracing::debug!( "[Thinking-Signature] Unknown signature origin but valid length (len: {}), using as-is for JSON tool calling.", sig.len() ); *last_thought_signature = Some(sig.clone()); let mut part = json!({ "text": thinking, "thought": true, "thoughtSignature": sig }); crate::proxy::common::json_schema::clean_json_schema( &mut part, ); parts.push(part); } else { // Unknown and too short: downgrade to text for safety tracing::warn!( "[Thinking-Signature] Unknown signature origin and too short (len: {}). Downgrading to text for safety.", sig.len() ); parts.push(json!({"text": thinking})); saw_non_thinking = true; continue; } } } } else { // No signature: downgrade to text tracing::warn!( "[Thinking-Signature] No signature provided. Downgrading to text." ); parts.push(json!({"text": thinking})); saw_non_thinking = true; } } ContentBlock::RedactedThinking { data } => { // [FIX] 将 RedactedThinking 作为普通文本处理,保留上下文 tracing::debug!("[Claude-Request] Degrade RedactedThinking to text"); parts.push(json!({ "text": format!("[Redacted Thinking: {}]", data) })); saw_non_thinking = true; continue; } ContentBlock::Image { source, .. } => { if source.source_type == "base64" { parts.push(json!({ "inlineData": { "mimeType": source.media_type, "data": source.data } })); saw_non_thinking = true; } } ContentBlock::Document { source, .. } => { if source.source_type == "base64" { parts.push(json!({ "inlineData": { "mimeType": source.media_type, "data": source.data } })); saw_non_thinking = true; } } ContentBlock::ToolUse { id, name, input, signature, .. } => { let mut final_input = input.clone(); // [New] 利用通用引擎修正参数类型 (替代以前硬编码的 shell 工具修复逻辑) if let Some(original_schema) = tool_name_to_schema.get(name) { crate::proxy::common::json_schema::fix_tool_call_args( &mut final_input, original_schema, ); } let mut part = json!({ "functionCall": { "name": name, "args": final_input, "id": id } }); saw_non_thinking = true; // Track pending tool use if is_assistant { pending_tool_use_ids.push(id.clone()); } // 存储 id -> name 映射 tool_id_to_name.insert(id.clone(), name.clone()); // Signature resolution logic // Priority: Client -> Context -> Session Cache -> Tool Cache -> Global Store (deprecated) // [CRITICAL FIX] Do NOT use skip_thought_signature_validator for Vertex AI // Vertex AI rejects this sentinel value, so we only add thoughtSignature if we have a real one let final_sig = signature.as_ref() .or(last_thought_signature.as_ref()) .cloned() .or_else(|| { // [NEW v3.3.17] Try session-based signature cache first (Layer 3) // This provides conversation-level isolation crate::proxy::SignatureCache::global().get_session_signature(session_id) .map(|s| { tracing::info!( "[Claude-Request] Recovered signature from SESSION cache (session: {}, len: {})", session_id, s.len() ); s }) }) .or_else(|| { // Try tool-specific signature cache (Layer 1) crate::proxy::SignatureCache::global().get_tool_signature(id) .map(|s| { tracing::info!("[Claude-Request] Recovered signature from TOOL cache for tool_id: {}", id); s }) }) .or_else(|| { // [DEPRECATED] Global store fallback - kept for backward compatibility let global_sig = get_thought_signature(); if global_sig.is_some() { tracing::warn!( "[Claude-Request] Using deprecated GLOBAL thought_signature fallback (length: {}). \ This indicates session cache miss.", global_sig.as_ref().unwrap().len() ); } global_sig }); // [FIX #752] Validate signature before using // Only add thoughtSignature if we have a valid and compatible one if let Some(sig) = final_sig { // [NEW] If this is a retry, do NOT backfill signatures to avoid issues. if is_retry && signature.is_none() { tracing::warn!("[Tool-Signature] Skipping signature backfill for tool_use: {} during retry.", id); } else { // Check signature length first - if it's too short, it's definitely invalid if sig.len() < MIN_SIGNATURE_LENGTH { tracing::warn!( "[Tool-Signature] Signature too short for tool_use: {} (len: {} < {}), skipping.", id, sig.len(), MIN_SIGNATURE_LENGTH ); } else { // Check signature compatibility (optional for tool_use) let cached_family = crate::proxy::SignatureCache::global() .get_signature_family(&sig); let should_use_sig = match cached_family { Some(family) => { // For tool_use, check compatibility if is_model_compatible(&family, mapped_model) { true } else { tracing::warn!( "[Tool-Signature] Incompatible signature for tool_use: {} (Family: {}, Target: {})", id, family, mapped_model ); false } } None => { // For JSON tool calling compatibility, if signature is long enough but unknown, // we should trust it rather than drop it if sig.len() >= MIN_SIGNATURE_LENGTH { tracing::debug!( "[Tool-Signature] Unknown signature origin but valid length (len: {}) for tool_use: {}, using as-is for JSON tool calling.", sig.len(), id ); true } else { // Unknown and too short: only use in non-thinking mode if is_thinking_enabled { tracing::warn!( "[Tool-Signature] Unknown signature origin and too short for tool_use: {} (len: {}). Dropping in thinking mode.", id, sig.len() ); false } else { // In non-thinking mode, allow unknown signatures true } } } }; if should_use_sig { part["thoughtSignature"] = json!(sig); } } } } else { // [NEW] Handle missing signature for Gemini thinking models // Use skip_thought_signature_validator as a sentinel value let is_google_cloud = mapped_model.starts_with("projects/"); if is_thinking_enabled && !is_google_cloud { tracing::debug!("[Tool-Signature] Adding GEMINI_SKIP_SIGNATURE for tool_use: {}", id); part["thoughtSignature"] = json!("skip_thought_signature_validator"); } } parts.push(part); } ContentBlock::ToolResult { tool_use_id, content, is_error, .. } => { // Mark this tool ID as resolved in this turn current_turn_tool_result_ids.insert(tool_use_id.clone()); // 优先使用之前记录的 name,否则用 tool_use_id let func_name = tool_id_to_name .get(tool_use_id) .cloned() .unwrap_or_else(|| tool_use_id.clone()); // [FIX #593] 工具输出压缩: 处理超大工具输出 // 使用智能压缩策略(浏览器快照、大文件提示等) let mut compacted_content = content.clone(); if let Some(blocks) = compacted_content.as_array_mut() { tool_result_compressor::sanitize_tool_result_blocks(blocks); } // Smart Truncation: No longer stripping images from Tool Results // Tool results should pass transparency. If images are present, map them to inlineData. let mut extra_parts = Vec::new(); let mut merged_content = match &compacted_content { serde_json::Value::String(s) => s.clone(), serde_json::Value::Array(arr) => { let mut texts = Vec::new(); for block in arr { if let Some(text) = block.get("text").and_then(|v| v.as_str()) { texts.push(text.to_string()); } else if block.get("source").is_some() { if block.get("type").and_then(|v| v.as_str()) == Some("image") { let source = block.get("source").unwrap(); if let (Some(media_type), Some(data)) = ( source.get("media_type").and_then(|v| v.as_str()), source.get("data").and_then(|v| v.as_str()) ) { extra_parts.push(json!({ "inlineData": { "mimeType": media_type, "data": data } })); } } } } texts.join("\n") } _ => content.to_string(), }; // Smart Truncation: max chars limit const MAX_TOOL_RESULT_CHARS: usize = 200_000; if merged_content.len() > MAX_TOOL_RESULT_CHARS { tracing::warn!( "Truncating tool result from {} chars to {}", merged_content.len(), MAX_TOOL_RESULT_CHARS ); let mut truncated = merged_content .chars() .take(MAX_TOOL_RESULT_CHARS) .collect::(); truncated.push_str("\n...[truncated output]"); merged_content = truncated; } // [优化] 如果结果为空,注入显式确认信号,防止模型幻觉 if merged_content.trim().is_empty() { if is_error.unwrap_or(false) { merged_content = "Tool execution failed with no output.".to_string(); } else { merged_content = "Command executed successfully.".to_string(); } } let mut part = json!({ "functionResponse": { "name": func_name, "response": {"result": merged_content}, "id": tool_use_id } }); // [FIX] Tool Result 也需要回填签名(如果上下文中有) if let Some(sig) = last_thought_signature.as_ref() { part["thoughtSignature"] = json!(sig); } parts.push(part); // 追加图片 parts for extra in extra_parts { parts.push(extra); } // 标记状态,用于下一条 User 消息的去重判断 *previous_was_tool_result = true; } // ContentBlock::RedactedThinking handled above at line 583 ContentBlock::ServerToolUse { .. } | ContentBlock::WebSearchToolResult { .. } => { // 搜索结果 block 不应由客户端发回给上游 (已由 tool_result 替代) continue; } } } } } // If this is a User message, check if we need to inject missing tool results if !is_assistant && !pending_tool_use_ids.is_empty() { let missing_ids: Vec<_> = pending_tool_use_ids .iter() .filter(|id| !current_turn_tool_result_ids.contains(*id)) .cloned() .collect(); if !missing_ids.is_empty() { tracing::warn!("[Elastic-Recovery] Injecting {} missing tool results into User message (IDs: {:?})", missing_ids.len(), missing_ids); for id in missing_ids.iter().rev() { // Insert in reverse order to maintain order at index 0? No, just insert at 0. let name = tool_id_to_name.get(id).cloned().unwrap_or(id.clone()); let synthetic_part = json!({ "functionResponse": { "name": name, "response": { "result": "Tool execution interrupted. No result provided." }, "id": id } }); // Prepend to ensure they are present before any text parts.insert(0, synthetic_part); } } // All pending IDs are now handled (either present or injected) pending_tool_use_ids.clear(); } // Fix for "Thinking enabled, assistant message must start with thinking block" 400 error // [Optimization] Apply this to ALL assistant messages in history, not just the last one. // Vertex AI requires every assistant message to start with a thinking block when thinking is enabled. if allow_dummy_thought && is_assistant && is_thinking_enabled { let has_thought_part = parts.iter().any(|p| { p.get("thought").and_then(|v| v.as_bool()).unwrap_or(false) || p.get("thoughtSignature").is_some() || p.get("thought").and_then(|v| v.as_str()).is_some() // 某些情况下可能是 text + thought: true 的组合 }); if !has_thought_part { // Prepend a dummy thinking block to satisfy Gemini v1internal requirements parts.insert( 0, json!({ "text": "Thinking...", "thought": true }), ); tracing::debug!( "Injected dummy thought block for historical assistant message at index {}", parts.len() ); } else { // [Crucial Check] 即使有 thought 块,也必须保证它位于 parts 的首位 (Index 0) // 且必须包含 thought: true 标记 let first_is_thought = parts.get(0).map_or(false, |p| { (p.get("thought").is_some() || p.get("thoughtSignature").is_some()) && p.get("text").is_some() // 对于 v1internal,通常 text + thought: true 才是合规的思维块 }); if !first_is_thought { // 如果首项不符合思维块特征,强制补入一个 parts.insert( 0, json!({ "text": "...", "thought": true }), ); tracing::debug!("First part of model message at {} is not a valid thought block. Prepending dummy.", parts.len()); } else { // 确保首项包含了 thought: true (防止只有 signature 的情况) if let Some(p0) = parts.get_mut(0) { if p0.get("thought").is_none() { p0.as_object_mut() .map(|obj| obj.insert("thought".to_string(), json!(true))); } } } } } Ok(parts) } /// 构建 Contents (Messages) fn build_google_content( msg: &Message, claude_req: &ClaudeRequest, is_thinking_enabled: bool, session_id: &str, allow_dummy_thought: bool, is_retry: bool, tool_id_to_name: &mut HashMap, tool_name_to_schema: &HashMap, mapped_model: &str, last_thought_signature: &mut Option, pending_tool_use_ids: &mut Vec, last_user_task_text_normalized: &mut Option, previous_was_tool_result: &mut bool, existing_tool_result_ids: &std::collections::HashSet, ) -> Result { let role = if msg.role == "assistant" { "model" } else { &msg.role }; // Proactive Tool Chain Repair: // If we are about to process an Assistant message, but we still have pending tool_use_ids, // it means the previous turn was interrupted or the user ignored the tool. // We MUST inject a synthetic User message with error results to close the loop. if role == "model" && !pending_tool_use_ids.is_empty() { tracing::warn!("[Elastic-Recovery] Detected interrupted tool chain (Assistant -> Assistant). Injecting synthetic User message for IDs: {:?}", pending_tool_use_ids); let synthetic_parts: Vec = pending_tool_use_ids .iter() .filter(|id| !existing_tool_result_ids.contains(*id)) // [FIX #632] Only inject if ID is truly missing .map(|id| { let name = tool_id_to_name.get(id).cloned().unwrap_or(id.clone()); json!({ "functionResponse": { "name": name, "response": { "result": "Tool execution interrupted. No result provided." }, "id": id } }) }) .collect(); if !synthetic_parts.is_empty() { return Ok(json!({ "role": "user", "parts": synthetic_parts })); } // Clear pending IDs as we have handled them pending_tool_use_ids.clear(); } let parts = build_contents( &msg.content, msg.role == "assistant", claude_req, is_thinking_enabled, session_id, allow_dummy_thought, is_retry, tool_id_to_name, tool_name_to_schema, mapped_model, last_thought_signature, pending_tool_use_ids, last_user_task_text_normalized, previous_was_tool_result, existing_tool_result_ids, )?; if parts.is_empty() { return Ok(json!(null)); // Indicate no content to add } Ok(json!({ "role": role, "parts": parts })) } /// 构建 Contents (Messages) fn build_google_contents( messages: &[Message], claude_req: &ClaudeRequest, tool_id_to_name: &mut HashMap, tool_name_to_schema: &HashMap, is_thinking_enabled: bool, allow_dummy_thought: bool, mapped_model: &str, session_id: &str, // [NEW v3.3.17] Session ID for signature caching is_retry: bool, ) -> Result { let mut contents = Vec::new(); let mut last_thought_signature: Option = None; let mut _accumulated_usage: Option = None; // Track pending tool_use IDs for recovery let mut pending_tool_use_ids: Vec = Vec::new(); // [NEW] 用于识别并过滤 Claude Code 重复回显的任务指令 let mut last_user_task_text_normalized: Option = None; let mut previous_was_tool_result = false; let _msg_count = messages.len(); // [FIX #632] Pre-scan all messages to identify all tool_result IDs that ALREADY exist in the conversation. // This prevents Elastic-Recovery from injecting duplicate results if they are present later in the chain. let mut existing_tool_result_ids = std::collections::HashSet::new(); for msg in messages { if let MessageContent::Array(blocks) = &msg.content { for block in blocks { if let ContentBlock::ToolResult { tool_use_id, .. } = block { existing_tool_result_ids.insert(tool_use_id.clone()); } } } } for (_i, msg) in messages.iter().enumerate() { let google_content = build_google_content( msg, claude_req, is_thinking_enabled, session_id, allow_dummy_thought, is_retry, tool_id_to_name, tool_name_to_schema, mapped_model, &mut last_thought_signature, &mut pending_tool_use_ids, &mut last_user_task_text_normalized, &mut previous_was_tool_result, &existing_tool_result_ids, )?; if !google_content.is_null() { contents.push(google_content); } } // [Removed] ensure_last_assistant_has_thinking // Corrupted signature issues proved we cannot fake thinking blocks. // Instead we rely on should_disable_thinking_due_to_history to prevent this state. // [FIX P3-3] Strict Role Alternation (Message Merging) // Merge adjacent messages with the same role to satisfy Gemini's strict alternation rule let mut merged_contents = merge_adjacent_roles(contents); // [FIX P3-4] Deep "Un-thinking" Cleanup // If thinking is disabled (e.g. smart downgrade), recursively remove any stray 'thought'/'thoughtSignature' // This is critical because converting Thinking->Text isn't enough; metadata must be gone. if !is_thinking_enabled { for msg in &mut merged_contents { clean_thinking_fields_recursive(msg); } } Ok(json!(merged_contents)) } /// Merge adjacent messages with the same role fn merge_adjacent_roles(mut contents: Vec) -> Vec { if contents.is_empty() { return contents; } let mut merged = Vec::new(); let mut current_msg = contents.remove(0); for msg in contents { let current_role = current_msg["role"].as_str().unwrap_or_default(); let next_role = msg["role"].as_str().unwrap_or_default(); if current_role == next_role { // Merge parts if let Some(current_parts) = current_msg.get_mut("parts").and_then(|p| p.as_array_mut()) { if let Some(next_parts) = msg.get("parts").and_then(|p| p.as_array()) { current_parts.extend(next_parts.clone()); // [FIX #709] Core Fix: After merging parts from adjacent messages, // we must RE-SORT them to ensure any thinking blocks from the // second message are moved to the very front of the combined array. reorder_gemini_parts(current_parts); } } } else { merged.push(current_msg); current_msg = msg; } } merged.push(current_msg); merged } /// 构建 Tools fn build_tools( tools: &Option>, has_web_search: bool, mapped_model: &str, ) -> Result, String> { if let Some(tools_list) = tools { let mut function_declarations: Vec = Vec::new(); let mut has_google_search = has_web_search; for tool in tools_list { // 1. Detect server tools / built-in tools like web_search if tool.is_web_search() { has_google_search = true; continue; } if let Some(t_type) = &tool.type_ { if t_type == "web_search_20250305" { has_google_search = true; continue; } } // 2. Detect by name if let Some(name) = &tool.name { if name == "web_search" || name == "google_search" || name == "builtin_web_search" { has_google_search = true; continue; } // 3. Client tools require input_schema let mut input_schema = tool.input_schema.clone().unwrap_or(json!({ "type": "object", "properties": {} })); crate::proxy::common::json_schema::clean_json_schema(&mut input_schema); function_declarations.push(json!({ "name": name, "description": tool.description, "parameters": input_schema })); } } let mut tool_list = Vec::new(); // [优化] Gemini 2.0+ 及 3.0 系列模型通常支持混合工具调用 (Function Calling + Google Search) // 只有针对老旧模型或特定受限环境才需要互斥。 let model_lower = mapped_model.to_lowercase(); let supports_mixed_tools = model_lower.contains("gemini-2.0") || model_lower.contains("gemini-2.5") || model_lower.contains("gemini-3"); if !function_declarations.is_empty() { let mut func_obj = serde_json::Map::new(); func_obj.insert( "functionDeclarations".to_string(), json!(function_declarations), ); tool_list.push(json!(func_obj)); if has_google_search { if supports_mixed_tools { tracing::info!( "[Claude-Request] Enabling MIXED tool calling for {}: Function Calling + Google Search.", mapped_model ); let mut search_obj = serde_json::Map::new(); search_obj.insert("googleSearch".to_string(), json!({})); tool_list.push(json!(search_obj)); } else { tracing::info!( "[Claude-Request] Skipping googleSearch injection for {} due to existing function declarations. \ Older Gemini models may not support mixed tool types.", mapped_model ); } } } else if has_google_search { let mut search_obj = serde_json::Map::new(); search_obj.insert("googleSearch".to_string(), json!({})); tool_list.push(json!(search_obj)); } if !tool_list.is_empty() { return Ok(Some(json!(tool_list))); } } Ok(None) } /// 构建 Generation Config fn build_generation_config( claude_req: &ClaudeRequest, mapped_model: &str, _has_web_search: bool, is_thinking_enabled: bool, token: Option<&crate::proxy::token_manager::ProxyToken>, // [NEW] ) -> Value { let mut config = json!({}); // Thinking 配置 if is_thinking_enabled { let mut thinking_config = json!({"includeThoughts": true}); let user_thinking_type = claude_req.thinking.as_ref().map(|t| t.type_.as_str()); let user_is_adaptive = user_thinking_type == Some("adaptive"); let budget_tokens = claude_req .thinking .as_ref() .and_then(|t| t.budget_tokens) .unwrap_or_else(|| crate::proxy::model_specs::get_thinking_budget(mapped_model, token) as u32); let thinking_budget_cap = crate::proxy::model_specs::get_thinking_budget(mapped_model, token); let tb_config = crate::proxy::config::get_thinking_budget_config(); let budget = match tb_config.mode { crate::proxy::config::ThinkingBudgetMode::Passthrough => budget_tokens as u64, crate::proxy::config::ThinkingBudgetMode::Custom => { let mut custom_value = tb_config.custom_value as u64; // [FIX #1602] 针对 Gemini 系列模型,在自定义模式下也强制执行动态限额 let model_lower = mapped_model.to_lowercase(); let is_gemini_limited = (model_lower.contains("gemini") && !model_lower.contains("-image")) || model_lower.contains("flash") || model_lower.ends_with("-thinking"); if is_gemini_limited && custom_value > thinking_budget_cap { tracing::warn!( "[Claude-Request] Custom mode: capping thinking_budget from {} to {} for Gemini model {}", custom_value, thinking_budget_cap, mapped_model ); custom_value = thinking_budget_cap; } custom_value } crate::proxy::config::ThinkingBudgetMode::Auto => { // [FIX #1592] Use mapped model for robust detection, same as OpenAI protocol let model_lower = mapped_model.to_lowercase(); let is_gemini_limited = (model_lower.contains("gemini") && !model_lower.contains("-image")) || model_lower.contains("flash") || model_lower.ends_with("-thinking"); if is_gemini_limited && budget_tokens as u64 > thinking_budget_cap { tracing::info!( "[Claude-Request] Auto mode: capping thinking_budget from {} to {} for Gemini model {}", budget_tokens, thinking_budget_cap, mapped_model ); thinking_budget_cap } else { budget_tokens as u64 } } crate::proxy::config::ThinkingBudgetMode::Adaptive => budget_tokens as u64, // Adaptive 模式透传原始预算(但不作为限制),用于后续逻辑判断 }; let global_mode_is_adaptive = matches!(tb_config.mode, crate::proxy::config::ThinkingBudgetMode::Adaptive); // 只要用户指定 adaptive 或者全局配置为 adaptive,且是支持的思维模型,就启用自适应 let should_use_adaptive = (user_is_adaptive || global_mode_is_adaptive) && (mapped_model.to_lowercase().contains("claude") || mapped_model.to_lowercase().contains("gemini-3")); let effort = claude_req.output_config.as_ref().and_then(|c| c.effort.as_ref()) .or_else(|| claude_req.thinking.as_ref().and_then(|t| t.effort.as_ref())); if should_use_adaptive { // [FIX #2208] thinkingLevel is ONLY supported by Claude models via Vertex AI native protocol. // Gemini models (including gemini-3.x) use v1internal which only accepts thinkingBudget. // Previous code incorrectly used contains("gemini-3") as the condition, causing 400 INVALID_ARGUMENT // for gemini-3.1-pro-high / gemini-3.1-pro-low in adaptive mode. let lower_mapped = mapped_model.to_lowercase(); if lower_mapped.contains("claude") { // Claude 系列走 Vertex AI 原生协议,支持 thinkingLevel 分级参数 let mapped_level = match effort.map(|e| e.to_lowercase()).as_deref() { Some("low") => "low", Some("medium") => "medium", Some("high") | Some("max") => "high", _ => "high", }; tracing::debug!("[Claude-Request] Mapping adaptive mode to thinkingLevel: {} for Claude model", mapped_level); thinking_config["thinkingLevel"] = json!(mapped_level); // Claude using thinkingLevel must NOT have thinkingBudget to avoid conflict thinking_config.as_object_mut().unwrap().remove("thinkingBudget"); } else { // Gemini 系列(含 gemini-3.x)走 v1internal 协议,只接受 thinkingBudget,不支持 thinkingLevel // [FIX #2007] Cherry Studio / Claude Protocol 400 Error Fix // Gemini 1.5/2.0 models via Vertex AI often reject thinkingBudget: -1 (Adaptive) with 400 Invalid Argument // especially when maxOutputTokens is high. // We align with OpenAI mapper behavior: use 24576 as safe adaptive budget. tracing::debug!("[Claude-Request] Mapping adaptive mode to safe budget (24576) for Gemini model (thinkingLevel not supported)"); thinking_config["thinkingBudget"] = json!(24576); } // 针对自适应模式,如果没有显式设置,确保 maxOutputTokens 给足空间 // OpenAI mapper uses 57344 (24576 + 32768), we normally use 64k limit. if config.get("maxOutputTokens").is_none() { config["maxOutputTokens"] = json!(64000); } } else { // [FIX #2007] Opus 4.6 Thinking Alignment (OpenAI Protocol Recipe) // Explicitly set fixed budget for Opus 4.6 to match successful OpenAI pattern if mapped_model.to_lowercase().contains("claude-opus-4-6-thinking") { tracing::debug!("[Opus-Alignment] Enforcing fixed thinkingBudget 24576 for Opus 4.6"); thinking_config["thinkingBudget"] = json!(24576); } else { thinking_config["thinkingBudget"] = json!(budget); } } config["thinkingConfig"] = thinking_config; } // 其他参数 if let Some(temp) = claude_req.temperature { config["temperature"] = json!(temp); } if let Some(top_p) = claude_req.top_p { config["topP"] = json!(top_p); } else { config["topP"] = json!(1.0); // [CHANGED v4.1.24] Default topP=1.0 to match official client } if let Some(top_k) = claude_req.top_k { config["topK"] = json!(top_k); } else { config["topK"] = json!(40); // [ADDED v4.1.24] Default topK=40 to match official client } // web_search 强制 candidateCount=1 /*if has_web_search { config["candidateCount"] = json!(1); }*/ // max_tokens 映射为 maxOutputTokens // [FIX] 不再默认设置 81920,防止非思维模型 (如 claude-sonnet-4-6) 报 400 Invalid Argument let mut final_max_tokens: Option = claude_req.max_tokens.map(|t| t as i64); // [NEW] 确保 maxOutputTokens 大于 thinkingBudget (API 强约束) // [NEW] 确保 maxOutputTokens 大于 thinkingBudget (API 强约束) let model_lower = mapped_model.to_lowercase(); // 重新计算 should_use_adaptive (因为上面定义的作用域仅在其 if 块内有效,或者我们可以假设在这里也需要同样的逻辑) // 但为了简洁和解耦,我们这里重新从 config 读取 let tb_config_chk = crate::proxy::config::get_thinking_budget_config(); let global_adaptive = matches!(tb_config_chk.mode, crate::proxy::config::ThinkingBudgetMode::Adaptive); let req_adaptive = claude_req.thinking.as_ref().map(|t| t.type_ == "adaptive").unwrap_or(false); let is_adaptive_effective = (req_adaptive || global_adaptive) && model_lower.contains("claude"); // [FIX] Lower default overhead to keep total under 65536 let final_overhead = if is_adaptive_effective { 64000 } else { 32768 }; // [FIX #2007] Opus 4.6 Thinking Alignment // OpenAI logs show maxOutputTokens = 57344 (24576 + 32768) if model_lower.contains("claude-opus-4-6-thinking") && is_thinking_enabled { final_max_tokens = Some(57344); tracing::debug!("[Opus-Alignment] Enforcing maxOutputTokens 57344 for Opus 4.6"); } if let Some(thinking_config) = config.get("thinkingConfig") { if let Some(budget) = thinking_config .get("thinkingBudget") .and_then(|t| t.as_u64()) { let current = final_max_tokens.unwrap_or(0); if current <= budget as i64 { // [FIX #1675] 针对图像模型使用更小的增量 (2048) let overhead = if mapped_model.contains("-image") { 2048 } else { 8192 }; let boosted = (budget + overhead).min(65536); // [FIX] Never exceed hard limit final_max_tokens = Some(boosted as i64); tracing::info!( "[Generation-Config] Bumping maxOutputTokens to {} due to thinking budget of {}", boosted, budget ); } } else if is_adaptive_effective { // [FIX] Adaptive mode (no budget set in thinkingConfig), apply default maxOutputTokens if final_max_tokens.is_none() { final_max_tokens = Some(final_overhead as i64); } } } else { // No thinkingConfig if final_max_tokens.is_none() && is_adaptive_effective { final_max_tokens = Some(final_overhead as i64); } } if let Some(val) = final_max_tokens { // [FIX] Cap maxOutputTokens to 65536 to avoid INVALID_ARGUMENT (Cherry Studio sends 128000) // Gemini models typically support max 8192 or 65536 output tokens. 128k is usually invalid. let safe_limit = 65536; if val > safe_limit { tracing::warn!( "[Generation-Config] Capping maxOutputTokens from {} to {} to prevent 400 Invalid Argument", val, safe_limit ); config["maxOutputTokens"] = json!(safe_limit); } else { config["maxOutputTokens"] = json!(val); } } // [优化] 设置全局停止序列,防止模型幻觉出对话标记 // [FIX #2007] Opus 4.6 Thinking Alignment // Successful OpenAI logs show NO stop sequences were sent for Opus 4.6 Thinking. if !(model_lower.contains("claude-opus-4-6-thinking") && is_thinking_enabled) { config["stopSequences"] = json!(["<|user|>", "<|end_of_turn|>", "\n\nHuman:"]); } else { tracing::debug!("[Opus-Alignment] Skipping stopSequences for Opus 4.6 to match OpenAI protocol"); } config } /// Recursively remove 'thought' and 'thoughtSignature' fields /// Used when downgrading thinking (e.g. during 400 retry) pub fn clean_thinking_fields_recursive(val: &mut Value) { match val { Value::Object(map) => { map.remove("thought"); map.remove("thoughtSignature"); for (_, v) in map.iter_mut() { clean_thinking_fields_recursive(v); } } Value::Array(arr) => { for v in arr.iter_mut() { clean_thinking_fields_recursive(v); } } _ => {} } } /// Check if two model strings are compatible (same family) fn is_model_compatible(cached: &str, target: &str) -> bool { // Simple heuristic: check if they share the same base prefix // e.g. "gemini-1.5-pro" vs "gemini-1.5-pro-002" -> Compatible // "gemini-1.5-pro" vs "gemini-2.0-flash" -> Incompatible // Normalize let c = cached.to_lowercase(); let t = target.to_lowercase(); if c == t { return true; } // Check specific families // Vertex AI signatures are very strict. 1.5-pro vs 1.5-flash are NOT cross-compatible. // 2.0-flash vs 2.0-pro are also NOT cross-compatible. // Exact model string match (already handled by c == t) // Grouped family match (Claude models are more permissive) if c.contains("claude-3-5") && t.contains("claude-3-5") { return true; } if c.contains("claude-3-7") && t.contains("claude-3-7") { return true; } // Gemini models: strict family match required for signatures if c.contains("gemini-1.5-pro") && t.contains("gemini-1.5-pro") { return true; } if c.contains("gemini-1.5-flash") && t.contains("gemini-1.5-flash") { return true; } if c.contains("gemini-2.0-flash") && t.contains("gemini-2.0-flash") { return true; } if c.contains("gemini-2.0-pro") && t.contains("gemini-2.0-pro") { return true; } // Fallback: strict match required false } #[cfg(test)] mod tests { use super::*; use crate::proxy::common::json_schema::clean_json_schema; use crate::proxy::config::{ThinkingBudgetConfig, update_thinking_budget_config}; #[test] fn test_ephemeral_injection_debug() { // This test simulates the issue where cache_control might be injected let json_with_null = json!({ "model": "claude-3-5-sonnet-20241022", "messages": [ { "role": "assistant", "content": [ { "type": "thinking", "thinking": "test", "signature": "sig_1234567890", "cache_control": null } ] } ] }); let req: ClaudeRequest = serde_json::from_value(json_with_null).unwrap(); if let MessageContent::Array(blocks) = &req.messages[0].content { if let ContentBlock::Thinking { cache_control, .. } = &blocks[0] { assert!( cache_control.is_none(), "Deserialization should result in None for null cache_control" ); } } // Now test serialization let serialized = serde_json::to_value(&req).unwrap(); println!("Serialized: {}", serialized); assert!(serialized["messages"][0]["content"][0] .get("cache_control") .is_none()); } #[test] fn test_simple_request() { let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); let body = result.unwrap(); assert_eq!(body["project"], "test-project"); assert!(body["requestId"].as_str().unwrap().starts_with("agent/")); } #[test] fn test_clean_json_schema() { let mut schema = json!({ "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": false, "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", "minLength": 1, "exclusiveMinimum": 0 }, "unit": { "type": ["string", "null"], "enum": ["celsius", "fahrenheit"], "default": "celsius" }, "date": { "type": "string", "format": "date" } }, "required": ["location"] }); clean_json_schema(&mut schema); // Check removed fields assert!(schema.get("$schema").is_none()); assert!(schema.get("additionalProperties").is_none()); assert!(schema["properties"]["location"].get("minLength").is_none()); assert!(schema["properties"]["unit"].get("default").is_none()); assert!(schema["properties"]["date"].get("format").is_none()); // Check union type handling ["string", "null"] -> "string" assert_eq!(schema["properties"]["unit"]["type"], "string"); // Check types are lowercased assert_eq!(schema["type"], "object"); assert_eq!(schema["properties"]["location"]["type"], "string"); assert_eq!(schema["properties"]["date"]["type"], "string"); } #[test] fn test_complex_tool_result() { let req = ClaudeRequest { model: "claude-3-5-sonnet-20241022".to_string(), messages: vec![ Message { role: "user".to_string(), content: MessageContent::String("Run command".to_string()), }, Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ContentBlock::ToolUse { id: "call_1".to_string(), name: "run_command".to_string(), input: json!({"command": "ls"}), signature: None, cache_control: None, }]), }, Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::ToolResult { tool_use_id: "call_1".to_string(), content: json!([ {"type": "text", "text": "file1.txt\n"}, {"type": "text", "text": "file2.txt"} ]), is_error: Some(false), }]), }, ], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); let body = result.unwrap(); let contents = body["request"]["contents"].as_array().unwrap(); // Check the tool result message (last message) let tool_resp_msg = &contents[2]; let parts = tool_resp_msg["parts"].as_array().unwrap(); let func_resp = &parts[0]["functionResponse"]; assert_eq!(func_resp["name"], "run_command"); assert_eq!(func_resp["id"], "call_1"); // Verify merged content let resp_text = func_resp["response"]["result"].as_str().unwrap(); assert!(resp_text.contains("file1.txt")); assert!(resp_text.contains("file2.txt")); assert!(resp_text.contains("\n")); } #[test] fn test_cache_control_cleanup() { // 模拟 VS Code 插件发送的包含 cache_control 的历史消息 let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![ Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }, Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ ContentBlock::Thinking { thinking: "Let me think...".to_string(), signature: Some("sig123".to_string()), cache_control: Some(json!({"type": "ephemeral"})), // 这个应该被清理 }, ContentBlock::Text { text: "Here is my response".to_string(), }, ]), }, Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::Image { source: ImageSource { source_type: "base64".to_string(), media_type: "image/png".to_string(), data: "iVBORw0KGgo=".to_string(), }, cache_control: Some(json!({"type": "ephemeral"})), // 这个也应该被清理 }]), }, ], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); // 验证请求成功转换 let body = result.unwrap(); assert_eq!(body["project"], "test-project"); // 注意: cache_control 的清理发生在内部,我们无法直接从 JSON 输出验证 // 但如果没有清理,后续发送到 Anthropic API 时会报错 // 这个测试主要确保清理逻辑不会导致转换失败 } #[test] fn test_thinking_mode_auto_disable_on_tool_use_history() { // [场景] 历史消息中有一个工具调用链,且 Assistant 消息没有 Thinking 块 // 期望: 系统自动降级,禁用 Thinking 模式,以避免 400 错误 let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![ Message { role: "user".to_string(), content: MessageContent::String("Check files".to_string()), }, // Assistant 使用工具,但在非 Thinking 模式下 Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ ContentBlock::Text { text: "Checking...".to_string(), }, ContentBlock::ToolUse { id: "tool_1".to_string(), name: "list_files".to_string(), input: json!({}), cache_control: None, signature: None, }, ]), }, // 用户返回工具结果 Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::ToolResult { tool_use_id: "tool_1".to_string(), content: serde_json::Value::String("file1.txt\nfile2.txt".to_string()), is_error: Some(false), // cache_control: None, // removed }]), }, ], system: None, tools: Some(vec![Tool { name: Some("list_files".to_string()), description: Some("List files".to_string()), input_schema: Some(json!({"type": "object"})), type_: None, // cache_control: None, // removed }]), stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(1024), effort: None, }), metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); let body = result.unwrap(); let request = &body["request"]; // 验证: generationConfig 中不应包含 thinkingConfig (因为被降级了) // 即使请求中明确启用了 thinking if let Some(gen_config) = request.get("generationConfig") { assert!( gen_config.get("thinkingConfig").is_none(), "thinkingConfig should be removed due to downgrade" ); } // 验证: 依然能生成有效的请求体 assert!(request.get("contents").is_some()); } #[test] fn test_thinking_block_not_prepend_when_disabled() { // 验证当 thinking 未启用时,不会补全 thinking 块 let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![ Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }, Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ContentBlock::Text { text: "Response".to_string(), }]), }, ], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, // 未启用 thinking metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); let body = result.unwrap(); let contents = body["request"]["contents"].as_array().unwrap(); let last_model_msg = contents .iter() .rev() .find(|c| c["role"] == "model") .unwrap(); let parts = last_model_msg["parts"].as_array().unwrap(); // 验证没有补全 thinking 块 assert_eq!(parts.len(), 1, "Should only have the original text block"); assert_eq!(parts[0]["text"], "Response"); } #[test] fn test_thinking_block_empty_content_fix() { // [场景] 客户端发送了一个内容为空的 thinking 块 // 期望: 自动填充 "..." let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ ContentBlock::Thinking { thinking: "".to_string(), // 空内容 signature: Some("sig".to_string()), cache_control: None, }, ContentBlock::Text { text: "Hi".to_string(), }, ]), }], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(1024), effort: None, }), metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok(), "Transformation failed"); let body = result.unwrap(); let contents = body["request"]["contents"].as_array().unwrap(); let parts = contents[0]["parts"].as_array().unwrap(); // 验证 thinking 块 assert_eq!( parts[0]["text"], "...", "Empty thinking should be filled with ..." ); assert!( parts[0].get("thought").is_none(), "Empty thinking should be downgraded to text" ); } #[test] fn test_redacted_thinking_degradation() { // [场景] 客户端包含 RedactedThinking // 期望: 降级为普通文本,不带 thought: true let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ ContentBlock::RedactedThinking { data: "some data".to_string(), }, ContentBlock::Text { text: "Hi".to_string(), }, ]), }], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-project", false, None, "test_session", None); assert!(result.is_ok()); let body = result.unwrap(); let parts = body["request"]["contents"][0]["parts"].as_array().unwrap(); // 验证 RedactedThinking -> Text let text = parts[0]["text"].as_str().unwrap(); assert!(text.contains("[Redacted Thinking: some data]")); assert!( parts[0].get("thought").is_none(), "Redacted thinking should NOT have thought: true" ); } // ================================================================================== // [FIX #564] Test: Thinking blocks are sorted to be first after context compression // ================================================================================== #[test] fn test_thinking_blocks_sorted_first_after_compression() { // Simulate kilo context compression reordering: text BEFORE thinking let mut messages = vec![Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ // Wrong order: Text before Thinking (simulates kilo compression) ContentBlock::Text { text: "Some regular text".to_string(), }, ContentBlock::Thinking { thinking: "My thinking process".to_string(), signature: Some( "valid_signature_1234567890_abcdefghij_klmnopqrstuvwxyz_test".to_string(), ), cache_control: None, }, ContentBlock::Text { text: "More text".to_string(), }, ]), }]; // Apply the fix sort_thinking_blocks_first(&mut messages); // Verify thinking is now first if let MessageContent::Array(blocks) = &messages[0].content { assert_eq!(blocks.len(), 3, "Should still have 3 blocks"); assert!( matches!(blocks[0], ContentBlock::Thinking { .. }), "Thinking should be first" ); assert!( matches!(blocks[1], ContentBlock::Text { .. }), "Text should be second" ); assert!( matches!(blocks[2], ContentBlock::Text { .. }), "Text should be third" ); // Verify content preserved if let ContentBlock::Thinking { thinking, .. } = &blocks[0] { assert_eq!(thinking, "My thinking process"); } } else { panic!("Expected Array content"); } } #[test] fn test_thinking_blocks_no_reorder_when_already_first() { // Correct order: Thinking already first - should not trigger reorder let mut messages = vec![Message { role: "assistant".to_string(), content: MessageContent::Array(vec![ ContentBlock::Thinking { thinking: "My thinking".to_string(), signature: Some("sig123".to_string()), cache_control: None, }, ContentBlock::Text { text: "Some text".to_string(), }, ]), }]; // Apply the fix (should be no-op) sort_thinking_blocks_first(&mut messages); // Verify order unchanged if let MessageContent::Array(blocks) = &messages[0].content { assert!( matches!(blocks[0], ContentBlock::Thinking { .. }), "Thinking should still be first" ); assert!( matches!(blocks[1], ContentBlock::Text { .. }), "Text should still be second" ); } } #[test] fn test_merge_consecutive_messages() { let mut messages = vec![ Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }, Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::Text { text: "World".to_string(), }]), }, Message { role: "assistant".to_string(), content: MessageContent::String("Hi".to_string()), }, Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::ToolResult { tool_use_id: "test_id".to_string(), content: serde_json::json!("result"), is_error: None, }]), }, Message { role: "user".to_string(), content: MessageContent::Array(vec![ContentBlock::Text { text: "System Reminder".to_string(), }]), }, ]; merge_consecutive_messages(&mut messages); assert_eq!(messages.len(), 3); assert_eq!(messages[0].role, "user"); if let MessageContent::Array(blocks) = &messages[0].content { assert_eq!(blocks.len(), 2); match &blocks[0] { ContentBlock::Text { text } => assert_eq!(text, "Hello"), _ => panic!("Expected text block"), } match &blocks[1] { ContentBlock::Text { text } => assert_eq!(text, "World"), _ => panic!("Expected text block"), } } else { panic!("Expected array content at index 0"); } assert_eq!(messages[1].role, "assistant"); assert_eq!(messages[2].role, "user"); if let MessageContent::Array(blocks) = &messages[2].content { assert_eq!(blocks.len(), 2); match &blocks[0] { ContentBlock::ToolResult { tool_use_id, .. } => assert_eq!(tool_use_id, "test_id"), _ => panic!("Expected tool_result block"), } match &blocks[1] { ContentBlock::Text { text } => assert_eq!(text, "System Reminder"), _ => panic!("Expected text block"), } } else { panic!("Expected array content at index 2"); } } #[test] fn test_default_max_tokens() { let req = ClaudeRequest { model: "claude-3-opus".to_string(), messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }], system: None, tools: None, stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "test-v", false, None, "test_session", None).unwrap(); // [FIX] Since we removed the default 81920, maxOutputTokens should NOT be present // when max_tokens is None and thinking is disabled let gen_config = &result["request"]["generationConfig"]; assert!( gen_config.get("maxOutputTokens").is_none(), "maxOutputTokens should not be set when max_tokens is None" ); } #[test] fn test_claude_flash_thinking_budget_capping() { // Use full path or ensure import of ThinkingConfig // transform_claude_request and models are needed. // Assuming models are available via super imports, but let's be explicit if needed. // Setup request with high budget let req = ClaudeRequest { model: "gemini-2.0-flash-thinking-exp".to_string(), // Contains "flash" messages: vec![], thinking: Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(32000), effort: None, }), max_tokens: None, temperature: None, top_p: None, top_k: None, // Added missing field stream: false, system: None, tools: None, metadata: None, output_config: None, size: None, quality: None, }; let result = transform_claude_request_in(&req, "proj", false, None, "test_session", None).unwrap(); let budget = result["request"]["generationConfig"]["thinkingConfig"]["thinkingBudget"] .as_u64() .unwrap(); assert_eq!(budget, 24576); // capped by model_specs.get_thinking_budget("gemini-2.0-flash-thinking-exp") // Setup request for Pro thinking model (mock name for testing) let req_pro = ClaudeRequest { model: "gemini-2.0-pro-thinking-exp".to_string(), // Contains "thinking" but not "flash" messages: vec![], thinking: Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(32000), effort: None, }), max_tokens: None, temperature: None, top_p: None, top_k: None, // Added missing field stream: false, system: None, tools: None, metadata: None, output_config: None, size: None, quality: None, }; // Should cap let result_pro = transform_claude_request_in(&req_pro, "proj", false, None, "test_session", None).unwrap(); assert_eq!(result_pro["request"]["generationConfig"]["thinkingConfig"]["thinkingBudget"], 24576); } #[test] fn test_gemini_pro_thinking_support() { // Setup request for Gemini Pro (no -thinking suffix) let req = ClaudeRequest { model: "gemini-3-pro-preview".to_string(), messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }], thinking: Some(ThinkingConfig { type_: "enabled".to_string(), budget_tokens: Some(16000), effort: None, }), max_tokens: None, temperature: None, top_p: None, top_k: None, stream: false, system: None, tools: None, metadata: None, output_config: None, size: None, quality: None, }; // Transform let result = transform_claude_request_in(&req, "proj", false, None, "test_session", None).unwrap(); let gen_config = &result["request"]["generationConfig"]; // thinkingConfig should be present (not forced disabled) assert!( gen_config.get("thinkingConfig").is_some(), "thinkingConfig should be preserved for gemini-3-pro" ); let budget = gen_config["thinkingConfig"]["thinkingBudget"] .as_u64() .unwrap(); // [FIX #1592] Since it's < 24576, it should be kept as 16000 assert_eq!(budget, 16000); } #[test] fn test_gemini_pro_default_thinking() { // Setup request for Gemini Pro WITHOUT thinking config let req = ClaudeRequest { model: "gemini-3-pro-preview".to_string(), messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Hello".to_string()), }], thinking: None, // No thinking config provided by client max_tokens: None, temperature: None, top_p: None, top_k: None, stream: false, system: None, tools: None, metadata: None, output_config: None, size: None, quality: None, }; // Transform let result = transform_claude_request_in(&req, "proj", false, None, "test_session", None).unwrap(); let gen_config = &result["request"]["generationConfig"]; // thinkingConfig SHOULD be injected because of default-on logic assert!( gen_config.get("thinkingConfig").is_some(), "thinkingConfig should be auto-enabled for gemini-3-pro" ); } #[test] fn test_claude_image_thinking_mode_disabled() { // 1. Force image thinking mode to "disabled" crate::proxy::config::update_image_thinking_mode(Some("disabled".to_string())); // 2. Setup Claude request for an image model (mapped to gemini-3-pro-image) let req = ClaudeRequest { model: "gemini-3-pro-image".to_string(), // Explicitly use recognized image model messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Draw a cat".to_string()), }], thinking: None, max_tokens: None, temperature: None, top_p: None, top_k: None, stream: false, system: None, tools: None, metadata: None, output_config: None, size: Some("1024x1024".to_string()), quality: Some("hd".to_string()), }; // 3. Transform request let result = transform_claude_request_in(&req, "test-proj", false, None, "test_session", None).unwrap(); // 4. Verify thinkingConfig has includeThoughts: false let gen_config = result["request"]["generationConfig"].as_object().expect("Should have generationConfig"); let thinking_config = gen_config.get("thinkingConfig").and_then(|t| t.as_object()).expect("Should have thinkingConfig (explicitly disabled)"); assert_eq!(thinking_config["includeThoughts"], false); // 5. Reset global mode crate::proxy::config::update_image_thinking_mode(Some("enabled".to_string())); } #[test] fn test_claude_adaptive_global_config() { // Set global config to Adaptive + High effort let config = ThinkingBudgetConfig { mode: crate::proxy::config::ThinkingBudgetMode::Adaptive, custom_value: 0, effort: Some("high".to_string()), }; crate::proxy::config::update_thinking_budget_config(config); let req = ClaudeRequest { model: "claude-3-7-sonnet-thinking".to_string(), // thinking capable messages: vec![Message { role: "user".to_string(), content: MessageContent::String("test".to_string()), }], thinking: None, // No client thinking config stream: false, // ... minimal fields max_tokens: None, temperature: None, top_p: None, top_k: None, system: None, tools: None, metadata: None, output_config: None, size: None, quality: None, }; // Transform let result = transform_claude_request_in(&req, "test-proj", false, None, "test_session", None).unwrap(); let gen_config = result["request"]["generationConfig"].as_object().unwrap(); let thinking_config = gen_config["thinkingConfig"].as_object().unwrap(); // Check injection assert_eq!(thinking_config["includeThoughts"], true); assert_eq!(thinking_config["thinkingBudget"], -1); assert!(thinking_config.get("thinkingType").is_none()); assert!(thinking_config.get("effort").is_none()); // Check maxOutputTokens default for adaptive let max_output_tokens = gen_config["maxOutputTokens"].as_i64().unwrap(); assert_eq!(max_output_tokens, 131072); // Reset global config crate::proxy::config::update_thinking_budget_config(ThinkingBudgetConfig::default()); } #[test] fn test_mixed_tools_injection_for_gemini_2_0() { // [场景] 使用 Gemini 2.0 模型,同时提供自定义工具和启用全网搜索 // 期望: 转换后的请求应同时包含 googleSearch 和 functionDeclarations let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), // 映射到 gemini-2.0-flash-exp messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Help me search and use tools".to_string()), }], system: None, tools: Some(vec![Tool { type_: None, name: Some("get_weather".to_string()), description: Some("Get weather".to_string()), input_schema: Some(serde_json::json!({ "type": "object", "properties": { "location": {"type": "string"} } })), }]), stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; // 模拟映射到 Gemini 2.0 let mapped_model = "gemini-2.0-flash-exp"; // 这里我们直接测试 build_tools 函数 (它是 pub(crate) 且在同模块下) let result = build_tools(&req.tools, true, mapped_model); assert!(result.is_ok()); let tools_val = result.unwrap().expect("Should have tools"); let tools_arr = tools_val.as_array().expect("Tools should be an array"); let has_google_search = tools_arr.iter().any(|t| t.get("googleSearch").is_some()); let has_functions = tools_arr.iter().any(|t| t.get("functionDeclarations").is_some()); assert!(has_google_search, "Gemini 2.0 should support mixed Google Search"); assert!(has_functions, "Gemini 2.0 should support mixed function declarations"); } #[test] fn test_no_mixed_tools_for_older_gemini() { // [场景] 使用 Gemini 1.5 模型,同时提供自定义工具和启用全网搜索 // 期望: 转换后的请求应只包含 functionDeclarations,googleSearch 被跳过以避免 400 let req = ClaudeRequest { model: "claude-sonnet-4-6".to_string(), messages: vec![Message { role: "user".to_string(), content: MessageContent::String("Help me search and use tools".to_string()), }], system: None, tools: Some(vec![Tool { type_: None, name: Some("get_weather".to_string()), description: Some("Get weather".to_string()), input_schema: Some(serde_json::json!({ "type": "object", "properties": { "location": {"type": "string"} } })), }]), stream: false, max_tokens: None, temperature: None, top_p: None, top_k: None, thinking: None, metadata: None, output_config: None, size: None, quality: None, }; // 模拟映射到 Gemini 1.5 let mapped_model = "gemini-1.5-flash-002"; // 测试 build_tools 函数 let result = build_tools(&req.tools, true, mapped_model); assert!(result.is_ok()); let tools_val = result.unwrap().expect("Should have tools"); let tools_arr = tools_val.as_array().expect("Tools should be an array"); let has_google_search = tools_arr.iter().any(|t| t.get("googleSearch").is_some()); let has_functions = tools_arr.iter().any(|t| t.get("functionDeclarations").is_some()); assert!(!has_google_search, "Older Gemini models should NOT have mixed tools"); assert!(has_functions); } }