Mirrowel commited on
Commit
ad45c64
·
1 Parent(s): 55a5dbb

feat(provider): add detailed transaction logging for gemini cli provider

Browse files

To improve the debuggability of the custom Gemini CLI provider, this commit introduces a comprehensive file-based logging system that captures the full lifecycle of each API call. It also includes several fixes to improve provider stability and compatibility.

Key changes include:
- A new `_GeminiCliFileLogger` class that creates a unique directory per request in `logs/gemini_cli_logs/`. It logs the request payload, raw SSE stream, final reassembled response, and any errors encountered during the transaction.
- A custom `_stream_to_completion_response` implementation to correctly reassemble streaming chunks into a final completion object for logging and non-streaming responses.
- A new `_transform_tool_schemas` function to sanitize tool schemas for the Gemini CLI endpoint, removing unsupported properties like `strict` and `additionalProperties`.
- A fix in `RotatingClient` to ensure the loop correctly continues to the next credential after an inner-loop failure.
- Added logging in the main proxy app to capture reasoning parameters from incoming requests for better upstream visibility.

src/proxy_app/main.py CHANGED
@@ -66,7 +66,7 @@ from proxy_app.batch_manager import EmbeddingBatcher
66
  from proxy_app.detailed_logger import DetailedLogger
67
 
68
  # --- Logging Configuration ---
69
- LOG_DIR = Path(__file__).resolve().parent.parent / "logs"
70
  LOG_DIR.mkdir(exist_ok=True)
71
 
72
  # Configure a file handler for INFO-level logs and higher
@@ -489,6 +489,25 @@ async def chat_completions(
489
  OpenAI-compatible endpoint powered by the RotatingClient.
490
  Handles both streaming and non-streaming responses and logs them.
491
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
493
  try:
494
  request_data = await request.json()
 
66
  from proxy_app.detailed_logger import DetailedLogger
67
 
68
  # --- Logging Configuration ---
69
+ LOG_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
70
  LOG_DIR.mkdir(exist_ok=True)
71
 
72
  # Configure a file handler for INFO-level logs and higher
 
489
  OpenAI-compatible endpoint powered by the RotatingClient.
490
  Handles both streaming and non-streaming responses and logs them.
491
  """
492
+ if ENABLE_REQUEST_LOGGING:
493
+ # Preserve and re-use the request body so downstream code can still call request.json()
494
+ raw_body = await request.body()
495
+ try:
496
+ parsed_body = json.loads(raw_body.decode("utf-8")) if raw_body else {}
497
+ except Exception:
498
+ parsed_body = {}
499
+ # Reattach the raw body for later reads
500
+ request._body = raw_body
501
+
502
+ # Extract the fields we want to log (supporting possible nesting in generationConfig)
503
+ model = parsed_body.get("model")
504
+ generation_cfg = parsed_body.get("generationConfig", {}) or parsed_body.get("generation_config", {}) or {}
505
+ reasoning_effort = parsed_body.get("reasoning_effort") or generation_cfg.get("reasoning_effort")
506
+ custom_reasoning_budget = parsed_body.get("custom_reasoning_budget") or generation_cfg.get("custom_reasoning_budget", False)
507
+
508
+ logging.getLogger("rotator_library").info(
509
+ f"Handling reasoning parameters: model={model}, reasoning_effort={reasoning_effort}, custom_reasoning_budget={custom_reasoning_budget}"
510
+ )
511
  logger = DetailedLogger() if ENABLE_REQUEST_LOGGING else None
512
  try:
513
  request_data = await request.json()
src/proxy_app/request_logger.py CHANGED
@@ -8,11 +8,6 @@ import logging
8
 
9
  from .provider_urls import get_provider_endpoint
10
 
11
- LOGS_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
12
-
13
- # Create directories if they don't exist
14
- LOGS_DIR.mkdir(exist_ok=True)
15
-
16
  def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
17
  """
18
  Logs a concise, single-line summary of an incoming request to the console.
 
8
 
9
  from .provider_urls import get_provider_endpoint
10
 
 
 
 
 
 
11
  def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
12
  """
13
  Logs a concise, single-line summary of an incoming request to the console.
src/rotator_library/client.py CHANGED
@@ -710,6 +710,10 @@ class RotatingClient:
710
  raise last_exception
711
  await self.usage_manager.record_failure(current_cred, model, classified_error)
712
  break
 
 
 
 
713
 
714
  else: # This is the standard API Key / litellm-handled provider logic
715
  is_oauth = provider in self.oauth_providers
@@ -736,6 +740,12 @@ class RotatingClient:
736
 
737
  litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
738
 
 
 
 
 
 
 
739
  for attempt in range(self.max_retries):
740
  try:
741
  lib_logger.info(f"Attempting stream with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
@@ -749,6 +759,7 @@ class RotatingClient:
749
  else:
750
  lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
751
 
 
752
  response = await litellm.acompletion(
753
  **litellm_kwargs,
754
  logger_fn=self._litellm_logger_callback
 
710
  raise last_exception
711
  await self.usage_manager.record_failure(current_cred, model, classified_error)
712
  break
713
+
714
+ # If the inner loop breaks, it means the key failed and we need to rotate.
715
+ # Continue to the next iteration of the outer while loop to pick a new key.
716
+ continue
717
 
718
  else: # This is the standard API Key / litellm-handled provider logic
719
  is_oauth = provider in self.oauth_providers
 
740
 
741
  litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
742
 
743
+ # If the provider is 'qwen_code', set the custom provider to 'qwen'
744
+ # and strip the prefix from the model name for LiteLLM.
745
+ if provider == "qwen_code":
746
+ litellm_kwargs["custom_llm_provider"] = "qwen"
747
+ litellm_kwargs["model"] = model.split('/', 1)[1]
748
+
749
  for attempt in range(self.max_retries):
750
  try:
751
  lib_logger.info(f"Attempting stream with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
 
759
  else:
760
  lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
761
 
762
+ lib_logger.info(f"DEBUG: litellm.acompletion kwargs: {litellm_kwargs}")
763
  response = await litellm.acompletion(
764
  **litellm_kwargs,
765
  logger_fn=self._litellm_logger_callback
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -10,11 +10,68 @@ from .provider_interface import ProviderInterface
10
  from .gemini_auth_base import GeminiAuthBase
11
  import litellm
12
  from litellm.exceptions import RateLimitError
 
13
  import os
14
  from pathlib import Path
 
 
15
 
16
  lib_logger = logging.getLogger('rotator_library')
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
19
 
20
  # [NEW] Hardcoded model list based on Kilo example
@@ -277,6 +334,161 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
277
 
278
  yield openai_chunk
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
281
  model = kwargs["model"]
282
  credential_path = kwargs.pop("credential_identifier")
@@ -293,6 +505,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
293
 
294
  # Handle :thinking suffix from Kilo example
295
  model_name = model.split('/')[-1].replace(':thinking', '')
 
 
 
296
 
297
  gen_config = {
298
  "maxOutputTokens": kwargs.get("max_tokens", 64000), # Increased default
@@ -325,14 +540,14 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
325
  request_payload["request"]["systemInstruction"] = system_instruction
326
 
327
  if "tools" in kwargs and kwargs["tools"]:
328
- function_declarations = [
329
- tool["function"] for tool in kwargs["tools"] if tool.get("type") == "function"
330
- ]
331
  if function_declarations:
332
  request_payload["request"]["tools"] = [{"functionDeclarations": function_declarations}]
333
 
334
- # Log the final payload for debugging, as requested
335
  lib_logger.debug(f"Gemini CLI Request Payload: {json.dumps(request_payload, indent=2)}")
 
 
336
  url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
337
 
338
  async def stream_handler():
@@ -342,19 +557,42 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
342
  "X-Goog-Api-Client": "gl-node/22.17.0",
343
  "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
344
  })
345
- async with client.stream("POST", url, headers=final_headers, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
346
- response.raise_for_status()
347
- async for line in response.aiter_lines():
348
- if line.startswith('data: '):
349
- data_str = line[6:]
350
- if data_str == "[DONE]": break
351
- try:
352
- chunk = json.loads(data_str)
353
- for openai_chunk in self._convert_chunk_to_openai(chunk, model):
354
- yield litellm.ModelResponse(**openai_chunk)
355
- except json.JSONDecodeError:
356
- lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
357
- return stream_handler()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  try:
360
  response_gen = await do_call()
@@ -378,7 +616,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
378
  else:
379
  # Accumulate stream for non-streaming response
380
  chunks = [chunk async for chunk in response_gen]
381
- return litellm.utils.stream_to_completion_response(chunks)
382
 
383
  # Use the shared GeminiAuthBase for auth logic
384
  # get_models is not applicable for this custom provider
 
10
  from .gemini_auth_base import GeminiAuthBase
11
  import litellm
12
  from litellm.exceptions import RateLimitError
13
+ from litellm.llms.vertex_ai.common_utils import _build_vertex_schema
14
  import os
15
  from pathlib import Path
16
+ import uuid
17
+ from datetime import datetime
18
 
19
  lib_logger = logging.getLogger('rotator_library')
20
 
21
+ LOGS_DIR = Path(__file__).resolve().parent.parent.parent.parent / "logs"
22
+ GEMINI_CLI_LOGS_DIR = LOGS_DIR / "gemini_cli_logs"
23
+
24
+ class _GeminiCliFileLogger:
25
+ """A simple file logger for a single Gemini CLI transaction."""
26
+ def __init__(self, model_name: str):
27
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
28
+ request_id = str(uuid.uuid4())
29
+ # Sanitize model name for directory
30
+ safe_model_name = model_name.replace('/', '_').replace(':', '_')
31
+ self.log_dir = GEMINI_CLI_LOGS_DIR / f"{timestamp}_{safe_model_name}_{request_id}"
32
+ try:
33
+ self.log_dir.mkdir(parents=True, exist_ok=True)
34
+ self.enabled = True
35
+ except Exception as e:
36
+ lib_logger.error(f"Failed to create Gemini CLI log directory: {e}")
37
+ self.enabled = False
38
+
39
+ def log_request(self, payload: Dict[str, Any]):
40
+ """Logs the request payload sent to Gemini."""
41
+ if not self.enabled: return
42
+ try:
43
+ with open(self.log_dir / "request_payload.json", "w", encoding="utf-8") as f:
44
+ json.dump(payload, f, indent=2, ensure_ascii=False)
45
+ except Exception as e:
46
+ lib_logger.error(f"_GeminiCliFileLogger: Failed to write request: {e}")
47
+
48
+ def log_response_chunk(self, chunk: str):
49
+ """Logs a raw chunk from the Gemini response stream."""
50
+ if not self.enabled: return
51
+ try:
52
+ with open(self.log_dir / "response_stream.log", "a", encoding="utf-8") as f:
53
+ f.write(chunk + "\n")
54
+ except Exception as e:
55
+ lib_logger.error(f"_GeminiCliFileLogger: Failed to write response chunk: {e}")
56
+
57
+ def log_error(self, error_message: str):
58
+ """Logs an error message."""
59
+ if not self.enabled: return
60
+ try:
61
+ with open(self.log_dir / "error.log", "a", encoding="utf-8") as f:
62
+ f.write(f"[{datetime.utcnow().isoformat()}] {error_message}\n")
63
+ except Exception as e:
64
+ lib_logger.error(f"_GeminiCliFileLogger: Failed to write error: {e}")
65
+
66
+ def log_final_response(self, response_data: Dict[str, Any]):
67
+ """Logs the final, reassembled response."""
68
+ if not self.enabled: return
69
+ try:
70
+ with open(self.log_dir / "final_response.json", "w", encoding="utf-8") as f:
71
+ json.dump(response_data, f, indent=2, ensure_ascii=False)
72
+ except Exception as e:
73
+ lib_logger.error(f"_GeminiCliFileLogger: Failed to write final response: {e}")
74
+
75
  CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
76
 
77
  # [NEW] Hardcoded model list based on Kilo example
 
334
 
335
  yield openai_chunk
336
 
337
+ def _stream_to_completion_response(self, chunks: List[litellm.ModelResponse]) -> litellm.ModelResponse:
338
+ """
339
+ Manually reassembles streaming chunks into a complete response.
340
+ This replaces the non-existent litellm.utils.stream_to_completion_response function.
341
+ """
342
+ if not chunks:
343
+ raise ValueError("No chunks provided for reassembly")
344
+
345
+ # Initialize the final response structure
346
+ final_message = {"role": "assistant"}
347
+ aggregated_tool_calls = {}
348
+ usage_data = None
349
+ finish_reason = None
350
+
351
+ # Get the first chunk for basic response metadata
352
+ first_chunk = chunks[0]
353
+
354
+ # Process each chunk to aggregate content
355
+ for chunk in chunks:
356
+ if not hasattr(chunk, 'choices') or not chunk.choices:
357
+ continue
358
+
359
+ choice = chunk.choices[0]
360
+ delta = choice.get("delta", {})
361
+
362
+ # Aggregate content
363
+ if "content" in delta and delta["content"] is not None:
364
+ if "content" not in final_message:
365
+ final_message["content"] = ""
366
+ final_message["content"] += delta["content"]
367
+
368
+ # Aggregate reasoning content
369
+ if "reasoning_content" in delta and delta["reasoning_content"] is not None:
370
+ if "reasoning_content" not in final_message:
371
+ final_message["reasoning_content"] = ""
372
+ final_message["reasoning_content"] += delta["reasoning_content"]
373
+
374
+ # Aggregate tool calls
375
+ if "tool_calls" in delta and delta["tool_calls"]:
376
+ for tc_chunk in delta["tool_calls"]:
377
+ index = tc_chunk["index"]
378
+ if index not in aggregated_tool_calls:
379
+ aggregated_tool_calls[index] = {"function": {"name": "", "arguments": ""}}
380
+ if "id" in tc_chunk:
381
+ aggregated_tool_calls[index]["id"] = tc_chunk["id"]
382
+ if "function" in tc_chunk:
383
+ if "name" in tc_chunk["function"] and tc_chunk["function"]["name"] is not None:
384
+ aggregated_tool_calls[index]["function"]["name"] += tc_chunk["function"]["name"]
385
+ if "arguments" in tc_chunk["function"] and tc_chunk["function"]["arguments"] is not None:
386
+ aggregated_tool_calls[index]["function"]["arguments"] += tc_chunk["function"]["arguments"]
387
+
388
+ # Aggregate function calls (legacy format)
389
+ if "function_call" in delta and delta["function_call"] is not None:
390
+ if "function_call" not in final_message:
391
+ final_message["function_call"] = {"name": "", "arguments": ""}
392
+ if "name" in delta["function_call"] and delta["function_call"]["name"] is not None:
393
+ final_message["function_call"]["name"] += delta["function_call"]["name"]
394
+ if "arguments" in delta["function_call"] and delta["function_call"]["arguments"] is not None:
395
+ final_message["function_call"]["arguments"] += delta["function_call"]["arguments"]
396
+
397
+ # Get finish reason from the last chunk that has it
398
+ if choice.get("finish_reason"):
399
+ finish_reason = choice["finish_reason"]
400
+
401
+ # Handle usage data from the last chunk that has it
402
+ for chunk in reversed(chunks):
403
+ if hasattr(chunk, 'usage') and chunk.usage:
404
+ usage_data = chunk.usage
405
+ break
406
+
407
+ # Add tool calls to final message if any
408
+ if aggregated_tool_calls:
409
+ final_message["tool_calls"] = list(aggregated_tool_calls.values())
410
+
411
+ # Ensure standard fields are present for consistent logging
412
+ for field in ["content", "tool_calls", "function_call"]:
413
+ if field not in final_message:
414
+ final_message[field] = None
415
+
416
+ # Construct the final response
417
+ final_choice = {
418
+ "index": 0,
419
+ "message": final_message,
420
+ "finish_reason": finish_reason
421
+ }
422
+
423
+ # Create the final ModelResponse
424
+ final_response_data = {
425
+ "id": first_chunk.id,
426
+ "object": "chat.completion",
427
+ "created": first_chunk.created,
428
+ "model": first_chunk.model,
429
+ "choices": [final_choice],
430
+ "usage": usage_data
431
+ }
432
+
433
+ return litellm.ModelResponse(**final_response_data)
434
+
435
+ def _gemini_cli_transform_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
436
+ """
437
+ Recursively transforms a JSON schema to be compatible with the Gemini CLI endpoint.
438
+ - Converts `type: ["type", "null"]` to `type: "type", nullable: true`
439
+ - Removes unsupported properties like `strict` and `additionalProperties`.
440
+ """
441
+ if not isinstance(schema, dict):
442
+ return schema
443
+
444
+ # Handle nullable types
445
+ if 'type' in schema and isinstance(schema['type'], list):
446
+ types = schema['type']
447
+ if 'null' in types:
448
+ schema['nullable'] = True
449
+ remaining_types = [t for t in types if t != 'null']
450
+ if len(remaining_types) == 1:
451
+ schema['type'] = remaining_types[0]
452
+ elif len(remaining_types) > 1:
453
+ schema['type'] = remaining_types # Let's see if Gemini supports this
454
+ else:
455
+ del schema['type']
456
+
457
+ # Recurse into properties
458
+ if 'properties' in schema and isinstance(schema['properties'], dict):
459
+ for prop_schema in schema['properties'].values():
460
+ self._gemini_cli_transform_schema(prop_schema)
461
+
462
+ # Recurse into items (for arrays)
463
+ if 'items' in schema and isinstance(schema['items'], dict):
464
+ self._gemini_cli_transform_schema(schema['items'])
465
+
466
+ # Clean up unsupported properties
467
+ schema.pop("strict", None)
468
+ schema.pop("additionalProperties", None)
469
+
470
+ return schema
471
+
472
+ def _transform_tool_schemas(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
473
+ """
474
+ Transforms a list of OpenAI-style tool schemas into the format required by the Gemini CLI API.
475
+ This uses a custom schema transformer instead of litellm's generic one.
476
+ """
477
+ transformed_declarations = []
478
+ for tool in tools:
479
+ if tool.get("type") == "function" and "function" in tool:
480
+ new_function = json.loads(json.dumps(tool["function"]))
481
+
482
+ # The Gemini CLI API does not support the 'strict' property.
483
+ new_function.pop("strict", None)
484
+
485
+ if "parameters" in new_function and isinstance(new_function["parameters"], dict):
486
+ new_function["parameters"] = self._gemini_cli_transform_schema(new_function["parameters"])
487
+
488
+ transformed_declarations.append(new_function)
489
+
490
+ return transformed_declarations
491
+
492
  async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
493
  model = kwargs["model"]
494
  credential_path = kwargs.pop("credential_identifier")
 
505
 
506
  # Handle :thinking suffix from Kilo example
507
  model_name = model.split('/')[-1].replace(':thinking', '')
508
+
509
+ # [NEW] Create a dedicated file logger for this request
510
+ file_logger = _GeminiCliFileLogger(model_name=model_name)
511
 
512
  gen_config = {
513
  "maxOutputTokens": kwargs.get("max_tokens", 64000), # Increased default
 
540
  request_payload["request"]["systemInstruction"] = system_instruction
541
 
542
  if "tools" in kwargs and kwargs["tools"]:
543
+ function_declarations = self._transform_tool_schemas(kwargs["tools"])
 
 
544
  if function_declarations:
545
  request_payload["request"]["tools"] = [{"functionDeclarations": function_declarations}]
546
 
547
+ # Log the final payload for debugging and to the dedicated file
548
  lib_logger.debug(f"Gemini CLI Request Payload: {json.dumps(request_payload, indent=2)}")
549
+ file_logger.log_request(request_payload)
550
+
551
  url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
552
 
553
  async def stream_handler():
 
557
  "X-Goog-Api-Client": "gl-node/22.17.0",
558
  "Client-Metadata": "ideType=IDE_UNSPECIFIED,platform=PLATFORM_UNSPECIFIED,pluginType=GEMINI",
559
  })
560
+ try:
561
+ async with client.stream("POST", url, headers=final_headers, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
562
+ if response.status_code != 200:
563
+ error_body = await response.aread()
564
+ error_text = error_body.decode('utf-8', errors='ignore')
565
+ file_logger.log_error(f"HTTP Error: {response.status_code}\n{error_text}")
566
+ response.raise_for_status()
567
+
568
+ async for line in response.aiter_lines():
569
+ file_logger.log_response_chunk(line)
570
+ if line.startswith('data: '):
571
+ data_str = line[6:]
572
+ if data_str == "[DONE]": break
573
+ try:
574
+ chunk = json.loads(data_str)
575
+ for openai_chunk in self._convert_chunk_to_openai(chunk, model):
576
+ yield litellm.ModelResponse(**openai_chunk)
577
+ except json.JSONDecodeError:
578
+ lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
579
+ except Exception as e:
580
+ file_logger.log_error(f"Stream handler exception: {str(e)}")
581
+ raise
582
+
583
+ async def logging_stream_wrapper():
584
+ """Wraps the stream to log the final reassembled response."""
585
+ openai_chunks = []
586
+ try:
587
+ async for chunk in stream_handler():
588
+ openai_chunks.append(chunk)
589
+ yield chunk
590
+ finally:
591
+ if openai_chunks:
592
+ final_response = self._stream_to_completion_response(openai_chunks)
593
+ file_logger.log_final_response(final_response.dict())
594
+
595
+ return logging_stream_wrapper()
596
 
597
  try:
598
  response_gen = await do_call()
 
616
  else:
617
  # Accumulate stream for non-streaming response
618
  chunks = [chunk async for chunk in response_gen]
619
+ return self._stream_to_completion_response(chunks)
620
 
621
  # Use the shared GeminiAuthBase for auth logic
622
  # get_models is not applicable for this custom provider