Spaces:
Paused
feat(provider): add detailed transaction logging for gemini cli provider
Browse filesTo improve the debuggability of the custom Gemini CLI provider, this commit introduces a comprehensive file-based logging system that captures the full lifecycle of each API call. It also includes several fixes to improve provider stability and compatibility.
Key changes include:
- A new `_GeminiCliFileLogger` class that creates a unique directory per request in `logs/gemini_cli_logs/`. It logs the request payload, raw SSE stream, final reassembled response, and any errors encountered during the transaction.
- A custom `_stream_to_completion_response` implementation to correctly reassemble streaming chunks into a final completion object for logging and non-streaming responses.
- A new `_transform_tool_schemas` function to sanitize tool schemas for the Gemini CLI endpoint, removing unsupported properties like `strict` and `additionalProperties`.
- A fix in `RotatingClient` to ensure the loop correctly continues to the next credential after an inner-loop failure.
- Added logging in the main proxy app to capture reasoning parameters from incoming requests for better upstream visibility.
|
@@ -66,7 +66,7 @@ from proxy_app.batch_manager import EmbeddingBatcher
|
|
| 66 |
from proxy_app.detailed_logger import DetailedLogger
|
| 67 |
|
| 68 |
# --- Logging Configuration ---
|
| 69 |
-
LOG_DIR = Path(__file__).resolve().parent.parent / "logs"
|
| 70 |
LOG_DIR.mkdir(exist_ok=True)
|
| 71 |
|
| 72 |
# Configure a file handler for INFO-level logs and higher
|
|
@@ -489,6 +489,25 @@ async def chat_completions(
|
|
| 489 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
| 490 |
Handles both streaming and non-streaming responses and logs them.
|
| 491 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
|
| 493 |
try:
|
| 494 |
request_data = await request.json()
|
|
|
|
| 66 |
from proxy_app.detailed_logger import DetailedLogger
|
| 67 |
|
| 68 |
# --- Logging Configuration ---
|
| 69 |
+
LOG_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
|
| 70 |
LOG_DIR.mkdir(exist_ok=True)
|
| 71 |
|
| 72 |
# Configure a file handler for INFO-level logs and higher
|
|
|
|
| 489 |
OpenAI-compatible endpoint powered by the RotatingClient.
|
| 490 |
Handles both streaming and non-streaming responses and logs them.
|
| 491 |
"""
|
| 492 |
+
if ENABLE_REQUEST_LOGGING:
|
| 493 |
+
# Preserve and re-use the request body so downstream code can still call request.json()
|
| 494 |
+
raw_body = await request.body()
|
| 495 |
+
try:
|
| 496 |
+
parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
|
| 497 |
+
except Exception:
|
| 498 |
+
parsed_body = {}
|
| 499 |
+
# Reattach the raw body for later reads
|
| 500 |
+
request._body = raw_body
|
| 501 |
+
|
| 502 |
+
# Extract the fields we want to log (supporting possible nesting in generationConfig)
|
| 503 |
+
model = parsed_body.get("model")
|
| 504 |
+
generation_cfg = parsed_body.get("generationConfig", {}) or parsed_body.get("generation_config", {}) or {}
|
| 505 |
+
reasoning_effort = parsed_body.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
|
| 506 |
+
custom_reasoning_budget = parsed_body.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
|
| 507 |
+
|
| 508 |
+
logging.getLogger("rotator_library").info(
|
| 509 |
+
f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
|
| 510 |
+
)
|
| 511 |
logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
|
| 512 |
try:
|
| 513 |
request_data = await request.json()
|
|
@@ -8,11 +8,6 @@ import logging
|
|
| 8 |
|
| 9 |
from .provider_urls import get_provider_endpoint
|
| 10 |
|
| 11 |
-
LOGS_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
|
| 12 |
-
|
| 13 |
-
# Create directories if they don't exist
|
| 14 |
-
LOGS_DIR.mkdir(exist_ok=True)
|
| 15 |
-
|
| 16 |
def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
|
| 17 |
"""
|
| 18 |
Logs a concise, single-line summary of an incoming request to the console.
|
|
|
|
| 8 |
|
| 9 |
from .provider_urls import get_provider_endpoint
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
|
| 12 |
"""
|
| 13 |
Logs a concise, single-line summary of an incoming request to the console.
|
|
@@ -710,6 +710,10 @@ class RotatingClient:
|
|
| 710 |
raise last_exception
|
| 711 |
await self.usage_manager.record_failure(current_cred, model, classified_error)
|
| 712 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
|
| 714 |
else: # This is the standard API Key / litellm-handled provider logic
|
| 715 |
is_oauth = provider in self.oauth_providers
|
|
@@ -736,6 +740,12 @@ class RotatingClient:
|
|
| 736 |
|
| 737 |
litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
|
| 738 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 739 |
for attempt in range(self.max_retries):
|
| 740 |
try:
|
| 741 |
lib_logger.info(f"Attempting stream with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
|
|
@@ -749,6 +759,7 @@ class RotatingClient:
|
|
| 749 |
else:
|
| 750 |
lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
|
| 751 |
|
|
|
|
| 752 |
response = await litellm.acompletion(
|
| 753 |
**litellm_kwargs,
|
| 754 |
logger_fn=self._litellm_logger_callback
|
|
|
|
| 710 |
raise last_exception
|
| 711 |
await self.usage_manager.record_failure(current_cred, model, classified_error)
|
| 712 |
break
|
| 713 |
+
|
| 714 |
+
# If the inner loop breaks, it means the key failed and we need to rotate.
|
| 715 |
+
# Continue to the next iteration of the outer while loop to pick a new key.
|
| 716 |
+
continue
|
| 717 |
|
| 718 |
else: # This is the standard API Key / litellm-handled provider logic
|
| 719 |
is_oauth = provider in self.oauth_providers
|
|
|
|
| 740 |
|
| 741 |
litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
|
| 742 |
|
| 743 |
+
# If the provider is 'qwen_code', set the custom provider to 'qwen'
|
| 744 |
+
# and strip the prefix from the model name for LiteLLM.
|
| 745 |
+
if provider == "qwen_code":
|
| 746 |
+
litellm_kwargs["custom_llm_provider"] = "qwen"
|
| 747 |
+
litellm_kwargs["model"] = model.split('/', 1)[1]
|
| 748 |
+
|
| 749 |
for attempt in range(self.max_retries):
|
| 750 |
try:
|
| 751 |
lib_logger.info(f"Attempting stream with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
|
|
|
|
| 759 |
else:
|
| 760 |
lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
|
| 761 |
|
| 762 |
+
lib_logger.info(f"DEBUG: litellm.acompletion kwargs: {litellm_kwargs}")
|
| 763 |
response = await litellm.acompletion(
|
| 764 |
**litellm_kwargs,
|
| 765 |
logger_fn=self._litellm_logger_callback
|
|
@@ -10,11 +10,68 @@ from .provider_interface import ProviderInterface
|
|
| 10 |
from .gemini_auth_base import GeminiAuthBase
|
| 11 |
import litellm
|
| 12 |
from litellm.exceptions import RateLimitError
|
|
|
|
| 13 |
import os
|
| 14 |
from pathlib import Path
|
|
|
|
|
|
|
| 15 |
|
| 16 |
lib_logger = logging.getLogger('rotator_library')
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
|
| 19 |
|
| 20 |
# [NEW] Hardcoded model list based on Kilo example
|
|
@@ -277,6 +334,161 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 277 |
|
| 278 |
yield openai_chunk
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
|
| 281 |
model = kwargs["model"]
|
| 282 |
credential_path = kwargs.pop("credential_identifier")
|
|
@@ -293,6 +505,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 293 |
|
| 294 |
# Handle :thinking suffix from Kilo example
|
| 295 |
model_name = model.split('/')[-1].replace(':thinking', '')
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
gen_config = {
|
| 298 |
"maxOutputTokens": kwargs.get("max_tokens", 64000), # Increased default
|
|
@@ -325,14 +540,14 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 325 |
request_payload["request"]["systemInstruction"] = system_instruction
|
| 326 |
|
| 327 |
if "tools" in kwargs and kwargs["tools"]:
|
| 328 |
-
function_declarations = [
|
| 329 |
-
tool["function"] for tool in kwargs["tools"] if tool.get("type") == "function"
|
| 330 |
-
]
|
| 331 |
if function_declarations:
|
| 332 |
request_payload["request"]["tools"] = [{"functionDeclarations": function_declarations}]
|
| 333 |
|
| 334 |
-
# Log the final payload for debugging
|
| 335 |
lib_logger.debug(f"Gemini CLI Request Payload: {json.dumps(request_payload, indent=2)}")
|
|
|
|
|
|
|
| 336 |
url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
|
| 337 |
|
| 338 |
async def stream_handler():
|
|
@@ -342,19 +557,42 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 342 |
"X-Goog-Api-Client": "gl-node/22.17.0",
|
| 343 |
"Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
|
| 344 |
})
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
try:
|
| 360 |
response_gen = await do_call()
|
|
@@ -378,7 +616,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
|
|
| 378 |
else:
|
| 379 |
# Accumulate stream for non-streaming response
|
| 380 |
chunks = [chunk async for chunk in response_gen]
|
| 381 |
-
return
|
| 382 |
|
| 383 |
# Use the shared GeminiAuthBase for auth logic
|
| 384 |
# get_models is not applicable for this custom provider
|
|
|
|
| 10 |
from .gemini_auth_base import GeminiAuthBase
|
| 11 |
import litellm
|
| 12 |
from litellm.exceptions import RateLimitError
|
| 13 |
+
from litellm.llms.vertex_ai.common_utils import _build_vertex_schema
|
| 14 |
import os
|
| 15 |
from pathlib import Path
|
| 16 |
+
import uuid
|
| 17 |
+
from datetime import datetime
|
| 18 |
|
| 19 |
lib_logger = logging.getLogger('rotator_library')
|
| 20 |
|
| 21 |
+
LOGS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "logs"
|
| 22 |
+
GEMINI_CLI_LOGS_DIR = LOGS_DIR / "gemini_cli_logs"
|
| 23 |
+
|
| 24 |
+
class _GeminiCliFileLogger:
|
| 25 |
+
"""A simple file logger for a single Gemini CLI transaction."""
|
| 26 |
+
def __init__(self, model_name: str):
|
| 27 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 28 |
+
request_id = str(uuid.uuid4())
|
| 29 |
+
# Sanitize model name for directory
|
| 30 |
+
safe_model_name = model_name.replace('/', '_').replace(':', '_')
|
| 31 |
+
self.log_dir = GEMINI_CLI_LOGS_DIR / f"{timestamp}_{safe_model_name}_{request_id}"
|
| 32 |
+
try:
|
| 33 |
+
self.log_dir.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
self.enabled = True
|
| 35 |
+
except Exception as e:
|
| 36 |
+
lib_logger.error(f"Failed to create Gemini CLI log directory: {e}")
|
| 37 |
+
self.enabled = False
|
| 38 |
+
|
| 39 |
+
def log_request(self, payload: Dict[str, Any]):
|
| 40 |
+
"""Logs the request payload sent to Gemini."""
|
| 41 |
+
if not self.enabled: return
|
| 42 |
+
try:
|
| 43 |
+
with open(self.log_dir / "request_payload.json", "w", encoding="utf-8") as f:
|
| 44 |
+
json.dump(payload, f, indent=2, ensure_ascii=False)
|
| 45 |
+
except Exception as e:
|
| 46 |
+
lib_logger.error(f"_GeminiCliFileLogger: Failed to write request: {e}")
|
| 47 |
+
|
| 48 |
+
def log_response_chunk(self, chunk: str):
|
| 49 |
+
"""Logs a raw chunk from the Gemini response stream."""
|
| 50 |
+
if not self.enabled: return
|
| 51 |
+
try:
|
| 52 |
+
with open(self.log_dir / "response_stream.log", "a", encoding="utf-8") as f:
|
| 53 |
+
f.write(chunk + "\n")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
lib_logger.error(f"_GeminiCliFileLogger: Failed to write response chunk: {e}")
|
| 56 |
+
|
| 57 |
+
def log_error(self, error_message: str):
|
| 58 |
+
"""Logs an error message."""
|
| 59 |
+
if not self.enabled: return
|
| 60 |
+
try:
|
| 61 |
+
with open(self.log_dir / "error.log", "a", encoding="utf-8") as f:
|
| 62 |
+
f.write(f"[{datetime.utcnow().isoformat()}] {error_message}\n")
|
| 63 |
+
except Exception as e:
|
| 64 |
+
lib_logger.error(f"_GeminiCliFileLogger: Failed to write error: {e}")
|
| 65 |
+
|
| 66 |
+
def log_final_response(self, response_data: Dict[str, Any]):
|
| 67 |
+
"""Logs the final, reassembled response."""
|
| 68 |
+
if not self.enabled: return
|
| 69 |
+
try:
|
| 70 |
+
with open(self.log_dir / "final_response.json", "w", encoding="utf-8") as f:
|
| 71 |
+
json.dump(response_data, f, indent=2, ensure_ascii=False)
|
| 72 |
+
except Exception as e:
|
| 73 |
+
lib_logger.error(f"_GeminiCliFileLogger: Failed to write final response: {e}")
|
| 74 |
+
|
| 75 |
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
|
| 76 |
|
| 77 |
# [NEW] Hardcoded model list based on Kilo example
|
|
|
|
| 334 |
|
| 335 |
yield openai_chunk
|
| 336 |
|
| 337 |
+
def _stream_to_completion_response(self, chunks: List[litellm.ModelResponse]) -> litellm.ModelResponse:
|
| 338 |
+
"""
|
| 339 |
+
Manually reassembles streaming chunks into a complete response.
|
| 340 |
+
This replaces the non-existent litellm.utils.stream_to_completion_response function.
|
| 341 |
+
"""
|
| 342 |
+
if not chunks:
|
| 343 |
+
raise ValueError("No chunks provided for reassembly")
|
| 344 |
+
|
| 345 |
+
# Initialize the final response structure
|
| 346 |
+
final_message = {"role": "assistant"}
|
| 347 |
+
aggregated_tool_calls = {}
|
| 348 |
+
usage_data = None
|
| 349 |
+
finish_reason = None
|
| 350 |
+
|
| 351 |
+
# Get the first chunk for basic response metadata
|
| 352 |
+
first_chunk = chunks[0]
|
| 353 |
+
|
| 354 |
+
# Process each chunk to aggregate content
|
| 355 |
+
for chunk in chunks:
|
| 356 |
+
if not hasattr(chunk, 'choices') or not chunk.choices:
|
| 357 |
+
continue
|
| 358 |
+
|
| 359 |
+
choice = chunk.choices[0]
|
| 360 |
+
delta = choice.get("delta", {})
|
| 361 |
+
|
| 362 |
+
# Aggregate content
|
| 363 |
+
if "content" in delta and delta["content"] is not None:
|
| 364 |
+
if "content" not in final_message:
|
| 365 |
+
final_message["content"] = ""
|
| 366 |
+
final_message["content"] += delta["content"]
|
| 367 |
+
|
| 368 |
+
# Aggregate reasoning content
|
| 369 |
+
if "reasoning_content" in delta and delta["reasoning_content"] is not None:
|
| 370 |
+
if "reasoning_content" not in final_message:
|
| 371 |
+
final_message["reasoning_content"] = ""
|
| 372 |
+
final_message["reasoning_content"] += delta["reasoning_content"]
|
| 373 |
+
|
| 374 |
+
# Aggregate tool calls
|
| 375 |
+
if "tool_calls" in delta and delta["tool_calls"]:
|
| 376 |
+
for tc_chunk in delta["tool_calls"]:
|
| 377 |
+
index = tc_chunk["index"]
|
| 378 |
+
if index not in aggregated_tool_calls:
|
| 379 |
+
aggregated_tool_calls[index] = {"function": {"name": "", "arguments": ""}}
|
| 380 |
+
if "id" in tc_chunk:
|
| 381 |
+
aggregated_tool_calls[index]["id"] = tc_chunk["id"]
|
| 382 |
+
if "function" in tc_chunk:
|
| 383 |
+
if "name" in tc_chunk["function"] and tc_chunk["function"]["name"] is not None:
|
| 384 |
+
aggregated_tool_calls[index]["function"]["name"] += tc_chunk["function"]["name"]
|
| 385 |
+
if "arguments" in tc_chunk["function"] and tc_chunk["function"]["arguments"] is not None:
|
| 386 |
+
aggregated_tool_calls[index]["function"]["arguments"] += tc_chunk["function"]["arguments"]
|
| 387 |
+
|
| 388 |
+
# Aggregate function calls (legacy format)
|
| 389 |
+
if "function_call" in delta and delta["function_call"] is not None:
|
| 390 |
+
if "function_call" not in final_message:
|
| 391 |
+
final_message["function_call"] = {"name": "", "arguments": ""}
|
| 392 |
+
if "name" in delta["function_call"] and delta["function_call"]["name"] is not None:
|
| 393 |
+
final_message["function_call"]["name"] += delta["function_call"]["name"]
|
| 394 |
+
if "arguments" in delta["function_call"] and delta["function_call"]["arguments"] is not None:
|
| 395 |
+
final_message["function_call"]["arguments"] += delta["function_call"]["arguments"]
|
| 396 |
+
|
| 397 |
+
# Get finish reason from the last chunk that has it
|
| 398 |
+
if choice.get("finish_reason"):
|
| 399 |
+
finish_reason = choice["finish_reason"]
|
| 400 |
+
|
| 401 |
+
# Handle usage data from the last chunk that has it
|
| 402 |
+
for chunk in reversed(chunks):
|
| 403 |
+
if hasattr(chunk, 'usage') and chunk.usage:
|
| 404 |
+
usage_data = chunk.usage
|
| 405 |
+
break
|
| 406 |
+
|
| 407 |
+
# Add tool calls to final message if any
|
| 408 |
+
if aggregated_tool_calls:
|
| 409 |
+
final_message["tool_calls"] = list(aggregated_tool_calls.values())
|
| 410 |
+
|
| 411 |
+
# Ensure standard fields are present for consistent logging
|
| 412 |
+
for field in ["content", "tool_calls", "function_call"]:
|
| 413 |
+
if field not in final_message:
|
| 414 |
+
final_message[field] = None
|
| 415 |
+
|
| 416 |
+
# Construct the final response
|
| 417 |
+
final_choice = {
|
| 418 |
+
"index": 0,
|
| 419 |
+
"message": final_message,
|
| 420 |
+
"finish_reason": finish_reason
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
# Create the final ModelResponse
|
| 424 |
+
final_response_data = {
|
| 425 |
+
"id": first_chunk.id,
|
| 426 |
+
"object": "chat.completion",
|
| 427 |
+
"created": first_chunk.created,
|
| 428 |
+
"model": first_chunk.model,
|
| 429 |
+
"choices": [final_choice],
|
| 430 |
+
"usage": usage_data
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
return litellm.ModelResponse(**final_response_data)
|
| 434 |
+
|
| 435 |
+
def _gemini_cli_transform_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 436 |
+
"""
|
| 437 |
+
Recursively transforms a JSON schema to be compatible with the Gemini CLI endpoint.
|
| 438 |
+
- Converts `type: ["type", "null"]` to `type: "type", nullable: true`
|
| 439 |
+
- Removes unsupported properties like `strict` and `additionalProperties`.
|
| 440 |
+
"""
|
| 441 |
+
if not isinstance(schema, dict):
|
| 442 |
+
return schema
|
| 443 |
+
|
| 444 |
+
# Handle nullable types
|
| 445 |
+
if 'type' in schema and isinstance(schema['type'], list):
|
| 446 |
+
types = schema['type']
|
| 447 |
+
if 'null' in types:
|
| 448 |
+
schema['nullable'] = True
|
| 449 |
+
remaining_types = [t for t in types if t != 'null']
|
| 450 |
+
if len(remaining_types) == 1:
|
| 451 |
+
schema['type'] = remaining_types[0]
|
| 452 |
+
elif len(remaining_types) > 1:
|
| 453 |
+
schema['type'] = remaining_types # Let's see if Gemini supports this
|
| 454 |
+
else:
|
| 455 |
+
del schema['type']
|
| 456 |
+
|
| 457 |
+
# Recurse into properties
|
| 458 |
+
if 'properties' in schema and isinstance(schema['properties'], dict):
|
| 459 |
+
for prop_schema in schema['properties'].values():
|
| 460 |
+
self._gemini_cli_transform_schema(prop_schema)
|
| 461 |
+
|
| 462 |
+
# Recurse into items (for arrays)
|
| 463 |
+
if 'items' in schema and isinstance(schema['items'], dict):
|
| 464 |
+
self._gemini_cli_transform_schema(schema['items'])
|
| 465 |
+
|
| 466 |
+
# Clean up unsupported properties
|
| 467 |
+
schema.pop("strict", None)
|
| 468 |
+
schema.pop("additionalProperties", None)
|
| 469 |
+
|
| 470 |
+
return schema
|
| 471 |
+
|
| 472 |
+
def _transform_tool_schemas(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
| 473 |
+
"""
|
| 474 |
+
Transforms a list of OpenAI-style tool schemas into the format required by the Gemini CLI API.
|
| 475 |
+
This uses a custom schema transformer instead of litellm's generic one.
|
| 476 |
+
"""
|
| 477 |
+
transformed_declarations = []
|
| 478 |
+
for tool in tools:
|
| 479 |
+
if tool.get("type") == "function" and "function" in tool:
|
| 480 |
+
new_function = json.loads(json.dumps(tool["function"]))
|
| 481 |
+
|
| 482 |
+
# The Gemini CLI API does not support the 'strict' property.
|
| 483 |
+
new_function.pop("strict", None)
|
| 484 |
+
|
| 485 |
+
if "parameters" in new_function and isinstance(new_function["parameters"], dict):
|
| 486 |
+
new_function["parameters"] = self._gemini_cli_transform_schema(new_function["parameters"])
|
| 487 |
+
|
| 488 |
+
transformed_declarations.append(new_function)
|
| 489 |
+
|
| 490 |
+
return transformed_declarations
|
| 491 |
+
|
| 492 |
async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
|
| 493 |
model = kwargs["model"]
|
| 494 |
credential_path = kwargs.pop("credential_identifier")
|
|
|
|
| 505 |
|
| 506 |
# Handle :thinking suffix from Kilo example
|
| 507 |
model_name = model.split('/')[-1].replace(':thinking', '')
|
| 508 |
+
|
| 509 |
+
# [NEW] Create a dedicated file logger for this request
|
| 510 |
+
file_logger = _GeminiCliFileLogger(model_name=model_name)
|
| 511 |
|
| 512 |
gen_config = {
|
| 513 |
"maxOutputTokens": kwargs.get("max_tokens", 64000), # Increased default
|
|
|
|
| 540 |
request_payload["request"]["systemInstruction"] = system_instruction
|
| 541 |
|
| 542 |
if "tools" in kwargs and kwargs["tools"]:
|
| 543 |
+
function_declarations = self._transform_tool_schemas(kwargs["tools"])
|
|
|
|
|
|
|
| 544 |
if function_declarations:
|
| 545 |
request_payload["request"]["tools"] = [{"functionDeclarations": function_declarations}]
|
| 546 |
|
| 547 |
+
# Log the final payload for debugging and to the dedicated file
|
| 548 |
lib_logger.debug(f"Gemini CLI Request Payload: {json.dumps(request_payload, indent=2)}")
|
| 549 |
+
file_logger.log_request(request_payload)
|
| 550 |
+
|
| 551 |
url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
|
| 552 |
|
| 553 |
async def stream_handler():
|
|
|
|
| 557 |
"X-Goog-Api-Client": "gl-node/22.17.0",
|
| 558 |
"Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
|
| 559 |
})
|
| 560 |
+
try:
|
| 561 |
+
async with client.stream("POST", url, headers=final_headers, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
|
| 562 |
+
if response.status_code != 200:
|
| 563 |
+
error_body = await response.aread()
|
| 564 |
+
error_text = error_body.decode('utf-8', errors='ignore')
|
| 565 |
+
file_logger.log_error(f"HTTP Error: {response.status_code}\n{error_text}")
|
| 566 |
+
response.raise_for_status()
|
| 567 |
+
|
| 568 |
+
async for line in response.aiter_lines():
|
| 569 |
+
file_logger.log_response_chunk(line)
|
| 570 |
+
if line.startswith('data: '):
|
| 571 |
+
data_str = line[6:]
|
| 572 |
+
if data_str == "[DONE]": break
|
| 573 |
+
try:
|
| 574 |
+
chunk = json.loads(data_str)
|
| 575 |
+
for openai_chunk in self._convert_chunk_to_openai(chunk, model):
|
| 576 |
+
yield litellm.ModelResponse(**openai_chunk)
|
| 577 |
+
except json.JSONDecodeError:
|
| 578 |
+
lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
|
| 579 |
+
except Exception as e:
|
| 580 |
+
file_logger.log_error(f"Stream handler exception: {str(e)}")
|
| 581 |
+
raise
|
| 582 |
+
|
| 583 |
+
async def logging_stream_wrapper():
|
| 584 |
+
"""Wraps the stream to log the final reassembled response."""
|
| 585 |
+
openai_chunks = []
|
| 586 |
+
try:
|
| 587 |
+
async for chunk in stream_handler():
|
| 588 |
+
openai_chunks.append(chunk)
|
| 589 |
+
yield chunk
|
| 590 |
+
finally:
|
| 591 |
+
if openai_chunks:
|
| 592 |
+
final_response = self._stream_to_completion_response(openai_chunks)
|
| 593 |
+
file_logger.log_final_response(final_response.dict())
|
| 594 |
+
|
| 595 |
+
return logging_stream_wrapper()
|
| 596 |
|
| 597 |
try:
|
| 598 |
response_gen = await do_call()
|
|
|
|
| 616 |
else:
|
| 617 |
# Accumulate stream for non-streaming response
|
| 618 |
chunks = [chunk async for chunk in response_gen]
|
| 619 |
+
return self._stream_to_completion_response(chunks)
|
| 620 |
|
| 621 |
# Use the shared GeminiAuthBase for auth logic
|
| 622 |
# get_models is not applicable for this custom provider
|