Mirrowel commited on
Commit
62e7cf3
·
1 Parent(s): 7830a78

One huge ass bugfix i can't even list here. It's a mess i'll fix later

Browse files
src/proxy_app/main.py CHANGED
@@ -608,6 +608,9 @@ async def streaming_response_wrapper(
608
  # --- Final Response Construction ---
609
  if aggregated_tool_calls:
610
  final_message["tool_calls"] = list(aggregated_tool_calls.values())
 
 
 
611
 
612
  # Ensure standard fields are present for consistent logging
613
  for field in ["content", "tool_calls", "function_call"]:
 
608
  # --- Final Response Construction ---
609
  if aggregated_tool_calls:
610
  final_message["tool_calls"] = list(aggregated_tool_calls.values())
611
+ # CRITICAL FIX: Override finish_reason when tool_calls exist
612
+ # This ensures OpenCode and other agentic systems continue the conversation loop
613
+ finish_reason = "tool_calls"
614
 
615
  # Ensure standard fields are present for consistent logging
616
  for field in ["content", "tool_calls", "function_call"]:
src/rotator_library/client.py CHANGED
@@ -495,11 +495,19 @@ class RotatingClient:
495
  """
496
  A hybrid wrapper for streaming that buffers fragmented JSON, handles client disconnections gracefully,
497
  and distinguishes between content and streamed errors.
 
 
 
 
 
 
498
  """
499
  last_usage = None
500
  stream_completed = False
501
  stream_iterator = stream.__aiter__()
502
  json_buffer = ""
 
 
503
 
504
  try:
505
  while True:
@@ -507,26 +515,64 @@ class RotatingClient:
507
  lib_logger.info(
508
  f"Client disconnected. Aborting stream for credential ...{key[-6:]}."
509
  )
510
- # Do not yield [DONE] because the client is gone.
511
- # The 'finally' block will handle key release.
512
  break
513
 
514
  try:
515
  chunk = await stream_iterator.__anext__()
516
  if json_buffer:
517
- # If we are about to discard a buffer, it means data was likely lost.
518
- # Log this as a warning to make it visible.
519
  lib_logger.warning(
520
  f"Discarding incomplete JSON buffer from previous chunk: {json_buffer}"
521
  )
522
  json_buffer = ""
523
 
524
- yield f"data: {json.dumps(chunk.dict())}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
 
526
  if hasattr(chunk, "usage") and chunk.usage:
527
- last_usage = (
528
- chunk.usage
529
- ) # Overwrite with the latest (cumulative)
530
 
531
  except StopAsyncIteration:
532
  stream_completed = True
 
495
  """
496
  A hybrid wrapper for streaming that buffers fragmented JSON, handles client disconnections gracefully,
497
  and distinguishes between content and streamed errors.
498
+
499
+ FINISH_REASON HANDLING:
500
+ Providers just translate chunks - this wrapper handles ALL finish_reason logic:
501
+ 1. Strip finish_reason from intermediate chunks (litellm defaults to "stop")
502
+ 2. Track accumulated_finish_reason with priority: tool_calls > length/content_filter > stop
503
+ 3. Only emit finish_reason on final chunk (detected by usage.completion_tokens > 0)
504
  """
505
  last_usage = None
506
  stream_completed = False
507
  stream_iterator = stream.__aiter__()
508
  json_buffer = ""
509
+ accumulated_finish_reason = None # Track strongest finish_reason across chunks
510
+ has_tool_calls = False # Track if ANY tool calls were seen in stream
511
 
512
  try:
513
  while True:
 
515
  lib_logger.info(
516
  f"Client disconnected. Aborting stream for credential ...{key[-6:]}."
517
  )
 
 
518
  break
519
 
520
  try:
521
  chunk = await stream_iterator.__anext__()
522
  if json_buffer:
 
 
523
  lib_logger.warning(
524
  f"Discarding incomplete JSON buffer from previous chunk: {json_buffer}"
525
  )
526
  json_buffer = ""
527
 
528
+ # Convert chunk to dict, handling both litellm.ModelResponse and raw dicts
529
+ if hasattr(chunk, "dict"):
530
+ chunk_dict = chunk.dict()
531
+ elif hasattr(chunk, "model_dump"):
532
+ chunk_dict = chunk.model_dump()
533
+ else:
534
+ chunk_dict = chunk
535
+
536
+ # === FINISH_REASON LOGIC ===
537
+ # Providers send raw chunks without finish_reason logic.
538
+ # This wrapper determines finish_reason based on accumulated state.
539
+ if "choices" in chunk_dict and chunk_dict["choices"]:
540
+ choice = chunk_dict["choices"][0]
541
+ delta = choice.get("delta", {})
542
+ usage = chunk_dict.get("usage", {})
543
+
544
+ # Track tool_calls across ALL chunks - if we ever see one, finish_reason must be tool_calls
545
+ if delta.get("tool_calls"):
546
+ has_tool_calls = True
547
+ accumulated_finish_reason = "tool_calls"
548
+
549
+ # Detect final chunk: has usage with completion_tokens > 0
550
+ has_completion_tokens = (
551
+ usage and
552
+ isinstance(usage, dict) and
553
+ usage.get("completion_tokens", 0) > 0
554
+ )
555
+
556
+ if has_completion_tokens:
557
+ # FINAL CHUNK: Determine correct finish_reason
558
+ if has_tool_calls:
559
+ # Tool calls always win
560
+ choice["finish_reason"] = "tool_calls"
561
+ elif accumulated_finish_reason:
562
+ # Use accumulated reason (length, content_filter, etc.)
563
+ choice["finish_reason"] = accumulated_finish_reason
564
+ else:
565
+ # Default to stop
566
+ choice["finish_reason"] = "stop"
567
+ else:
568
+ # INTERMEDIATE CHUNK: Never emit finish_reason
569
+ # (litellm.ModelResponse defaults to "stop" which is wrong)
570
+ choice["finish_reason"] = None
571
+
572
+ yield f"data: {json.dumps(chunk_dict)}\n\n"
573
 
574
  if hasattr(chunk, "usage") and chunk.usage:
575
+ last_usage = chunk.usage
 
 
576
 
577
  except StopAsyncIteration:
578
  stream_completed = True
src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -16,7 +16,6 @@ Key Features:
16
 
17
  from __future__ import annotations
18
 
19
- import asyncio
20
  import copy
21
  import hashlib
22
  import json
@@ -58,7 +57,7 @@ AVAILABLE_MODELS = [
58
  #"gemini-2.5-pro",
59
  #"gemini-2.5-flash",
60
  #"gemini-2.5-flash-lite",
61
- "gemini-3-pro-preview",
62
  #"gemini-3-pro-image-preview",
63
  #"gemini-2.5-computer-use-preview-10-2025",
64
  "claude-sonnet-4-5", # Internally mapped to -thinking variant when reasoning_effort is provided
@@ -71,12 +70,13 @@ DEFAULT_MAX_OUTPUT_TOKENS = 16384
71
  MODEL_ALIAS_MAP = {
72
  "rev19-uic3-1p": "gemini-2.5-computer-use-preview-10-2025",
73
  "gemini-3-pro-image": "gemini-3-pro-image-preview",
 
74
  "gemini-3-pro-high": "gemini-3-pro-preview",
75
  }
76
  MODEL_ALIAS_REVERSE = {v: k for k, v in MODEL_ALIAS_MAP.items()}
77
 
78
  # Models to exclude from dynamic discovery
79
- EXCLUDED_MODELS = {"chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro"}
80
 
81
  # Gemini finish reason mapping
82
  FINISH_REASON_MAP = {
@@ -101,15 +101,28 @@ You MUST follow these rules strictly:
101
 
102
  1. DO NOT use your internal training data to guess tool parameters
103
  2. ONLY use the exact parameter structure defined in the tool schema
104
- 3. If a tool takes a 'files' parameter, it is ALWAYS an array of objects with specific properties, NEVER a simple array of strings
105
- 4. If a tool edits code, it takes structured JSON objects with specific fields, NEVER raw diff strings or plain text
106
- 5. Parameter names in schemas are EXACT - do not substitute with similar names from your training (e.g., use 'follow_up' not 'suggested_answers')
107
- 6. Array parameters have specific item types - check the schema's 'items' field for the exact structure
108
- 7. When you see "STRICT PARAMETERS" in a tool description, those type definitions override any assumptions
109
 
110
  If you are unsure about a tool's parameters, YOU MUST read the schema definition carefully. Your training data about common tool names like 'read_file' or 'apply_diff' does NOT apply here.
111
  """
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  # =============================================================================
115
  # HELPER FUNCTIONS
@@ -169,8 +182,9 @@ def _recursively_parse_json_strings(obj: Any) -> Any:
169
  Antigravity sometimes returns tool arguments with JSON-stringified values:
170
  {"files": "[{...}]"} instead of {"files": [{...}]}.
171
 
172
- Additionally handles malformed double-encoded JSON where Antigravity
173
- returns strings like '[{...}]}' (extra trailing '}').
 
174
  """
175
  if isinstance(obj, dict):
176
  return {k: _recursively_parse_json_strings(v) for k, v in obj.items()}
@@ -178,6 +192,23 @@ def _recursively_parse_json_strings(obj: Any) -> Any:
178
  return [_recursively_parse_json_strings(item) for item in obj]
179
  elif isinstance(obj, str):
180
  stripped = obj.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # Check if it looks like JSON (starts with { or [)
182
  if stripped and stripped[0] in ('{', '['):
183
  # Try standard parsing first
@@ -215,7 +246,7 @@ def _recursively_parse_json_strings(obj: Any) -> Any:
215
  cleaned = stripped[:last_brace+1]
216
  parsed = json.loads(cleaned)
217
  lib_logger.warning(
218
- f"Auto-corrected malformed JSON string: "
219
  f"truncated {len(stripped) - len(cleaned)} extra chars"
220
  )
221
  return _recursively_parse_json_strings(parsed)
@@ -369,6 +400,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
369
  self._enable_signature_cache = _env_bool("ANTIGRAVITY_ENABLE_SIGNATURE_CACHE", True)
370
  self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
371
  self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
 
372
 
373
  # Gemini 3 tool fix configuration
374
  self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
@@ -381,6 +413,16 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
381
  DEFAULT_GEMINI3_SYSTEM_INSTRUCTION
382
  )
383
 
 
 
 
 
 
 
 
 
 
 
384
  # Log configuration
385
  self._log_config()
386
 
@@ -389,7 +431,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
389
  lib_logger.debug(
390
  f"Antigravity config: signatures_in_client={self._preserve_signatures_in_client}, "
391
  f"cache={self._enable_signature_cache}, dynamic_models={self._enable_dynamic_models}, "
392
- f"gemini3_fix={self._enable_gemini3_tool_fix}"
393
  )
394
 
395
  # =========================================================================
@@ -558,7 +600,10 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
558
  if msg.get("role") == "assistant" and msg.get("tool_calls"):
559
  for tc in msg["tool_calls"]:
560
  if tc.get("type") == "function":
561
- tool_id_to_name[tc["id"]] = tc["function"]["name"]
 
 
 
562
 
563
  # Convert each message
564
  for msg in messages:
@@ -654,6 +699,11 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
654
  tool_id = tc.get("id", "")
655
  func_name = tc["function"]["name"]
656
 
 
 
 
 
 
657
  # Add prefix for Gemini 3
658
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
659
  func_name = f"{self._gemini3_tool_prefix}{func_name}"
@@ -728,6 +778,15 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
728
  func_name = tool_id_to_name.get(tool_id, "unknown_function")
729
  content = msg.get("content", "{}")
730
 
 
 
 
 
 
 
 
 
 
731
  # Add prefix for Gemini 3
732
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
733
  func_name = f"{self._gemini3_tool_prefix}{func_name}"
@@ -758,10 +817,12 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
758
 
759
  Converts linear format (call, response, call, response)
760
  to grouped format (model with calls, user with all responses).
 
 
761
  """
762
  new_contents = []
763
- pending_groups = []
764
- collected_responses = []
765
 
766
  for content in contents:
767
  role = content.get("role")
@@ -770,15 +831,33 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
770
  response_parts = [p for p in parts if "functionResponse" in p]
771
 
772
  if response_parts:
773
- collected_responses.extend(response_parts)
 
 
 
 
 
 
 
 
 
 
 
774
 
775
- # Try to satisfy pending groups
776
  for i in range(len(pending_groups) - 1, -1, -1):
777
  group = pending_groups[i]
778
- if len(collected_responses) >= group["count"]:
779
- group_responses = collected_responses[:group["count"]]
780
- collected_responses = collected_responses[group["count"]:]
 
 
 
781
  new_contents.append({"parts": group_responses, "role": "user"})
 
 
 
 
782
  pending_groups.pop(i)
783
  break
784
  continue
@@ -787,16 +866,32 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
787
  func_calls = [p for p in parts if "functionCall" in p]
788
  new_contents.append(content)
789
  if func_calls:
790
- pending_groups.append({"count": len(func_calls)})
 
 
 
 
791
  else:
792
  new_contents.append(content)
793
 
794
- # Handle remaining groups
795
  for group in pending_groups:
796
- if len(collected_responses) >= group["count"]:
797
- group_responses = collected_responses[:group["count"]]
798
- collected_responses = collected_responses[group["count"]:]
 
799
  new_contents.append({"parts": group_responses, "role": "user"})
 
 
 
 
 
 
 
 
 
 
 
800
 
801
  return new_contents
802
 
@@ -823,12 +918,16 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
823
 
824
  def _inject_signature_into_descriptions(
825
  self,
826
- tools: List[Dict[str, Any]]
 
827
  ) -> List[Dict[str, Any]]:
828
- """Inject parameter signatures into tool descriptions for Gemini 3."""
829
  if not tools:
830
  return tools
831
 
 
 
 
832
  modified = copy.deepcopy(tools)
833
  for tool in modified:
834
  for func_decl in tool.get("functionDeclarations", []):
@@ -854,7 +953,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
854
  )
855
 
856
  if param_list:
857
- sig_str = self._gemini3_description_prompt.replace(
858
  "{params}", ", ".join(param_list)
859
  )
860
  func_decl["description"] = func_decl.get("description", "") + sig_str
@@ -892,6 +991,42 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
892
  return name[len(self._gemini3_tool_prefix):]
893
  return name
894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
895
  # =========================================================================
896
  # REQUEST TRANSFORMATION
897
  # =========================================================================
@@ -936,7 +1071,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
936
  gemini_payload: Dict[str, Any],
937
  model: str,
938
  max_tokens: Optional[int] = None,
939
- reasoning_effort: Optional[str] = None
 
940
  ) -> Dict[str, Any]:
941
  """
942
  Transform Gemini CLI payload to complete Antigravity format.
@@ -954,6 +1090,16 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
954
  if internal_model == "claude-sonnet-4-5" and not internal_model.endswith("-thinking"):
955
  internal_model = "claude-sonnet-4-5-thinking"
956
 
 
 
 
 
 
 
 
 
 
 
957
  # Wrap in Antigravity envelope
958
  antigravity_payload = {
959
  "project": _generate_project_id(),
@@ -983,10 +1129,15 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
983
 
984
  antigravity_payload["request"]["generationConfig"] = gen_config
985
 
986
- # Set toolConfig mode
987
- tool_config = antigravity_payload["request"].setdefault("toolConfig", {})
988
- func_config = tool_config.setdefault("functionCallingConfig", {})
989
- func_config["mode"] = "VALIDATED"
 
 
 
 
 
990
 
991
  # Handle Gemini 3 thinking logic
992
  if not internal_model.startswith("gemini-3-"):
@@ -1053,7 +1204,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1053
  reasoning_content = ""
1054
  tool_calls = []
1055
  first_sig_seen = False
1056
- tool_idx = 0
 
1057
 
1058
  for part in content_parts:
1059
  has_func = "functionCall" in part
@@ -1099,23 +1251,29 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1099
  if tool_calls:
1100
  delta["tool_calls"] = tool_calls
1101
  delta["role"] = "assistant"
 
 
 
1102
  elif text_content or reasoning_content:
1103
  delta["role"] = "assistant"
1104
 
1105
- # Handle finish reason
1106
- finish_reason = self._map_finish_reason(candidate.get("finishReason"), bool(tool_calls))
1107
- if finish_reason and accumulator is not None:
 
 
1108
  accumulator["is_complete"] = True
1109
 
1110
- # Build usage
1111
- usage = self._build_usage(chunk.get("usageMetadata", {}))
 
1112
 
1113
  response = {
1114
  "id": chunk.get("responseId", f"chatcmpl-{uuid.uuid4().hex[:24]}"),
1115
  "object": "chat.completion.chunk",
1116
  "created": int(time.time()),
1117
  "model": model,
1118
- "choices": [{"index": 0, "delta": delta, "finish_reason": finish_reason}]
1119
  }
1120
 
1121
  if usage:
@@ -1188,12 +1346,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1188
  finish_reason = self._map_finish_reason(candidate.get("finishReason"), bool(tool_calls))
1189
  usage = self._build_usage(response.get("usageMetadata", {}))
1190
 
 
1191
  result = {
1192
  "id": response.get("responseId", f"chatcmpl-{uuid.uuid4().hex[:24]}"),
1193
  "object": "chat.completion",
1194
  "created": int(time.time()),
1195
  "model": model,
1196
- "choices": [{"index": 0, "message": message, "finish_reason": finish_reason}]
1197
  }
1198
 
1199
  if usage:
@@ -1212,6 +1371,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1212
  func_call = part["functionCall"]
1213
  tool_id = func_call.get("id") or f"call_{uuid.uuid4().hex[:24]}"
1214
 
 
 
1215
  tool_name = func_call.get("name", "")
1216
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
1217
  tool_name = self._strip_gemini3_prefix(tool_name)
@@ -1383,6 +1544,7 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1383
  stream = kwargs.get("stream", False)
1384
  credential_path = kwargs.pop("credential_identifier", kwargs.get("api_key", ""))
1385
  tools = kwargs.get("tools")
 
1386
  reasoning_effort = kwargs.get("reasoning_effort")
1387
  top_p = kwargs.get("top_p")
1388
  max_tokens = kwargs.get("max_tokens")
@@ -1402,9 +1564,12 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1402
  if system_instruction:
1403
  gemini_payload["system_instruction"] = system_instruction
1404
 
1405
- # Inject Gemini 3 system instruction
1406
- if self._is_gemini_3(model) and self._enable_gemini3_tool_fix and tools:
1407
- self._inject_gemini3_system_instruction(gemini_payload)
 
 
 
1408
 
1409
  # Add generation config
1410
  gen_config = {}
@@ -1423,13 +1588,23 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1423
  if gemini_tools:
1424
  gemini_payload["tools"] = gemini_tools
1425
 
1426
- # Apply Gemini 3 tool transformations
1427
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
 
1428
  gemini_payload["tools"] = self._apply_gemini3_namespace(gemini_payload["tools"])
1429
- gemini_payload["tools"] = self._inject_signature_into_descriptions(gemini_payload["tools"])
 
 
 
 
 
 
 
 
 
1430
 
1431
  # Transform to Antigravity format
1432
- payload = self._transform_to_antigravity_format(gemini_payload, model, max_tokens, reasoning_effort)
1433
  file_logger.log_request(payload)
1434
 
1435
  # Make API call
@@ -1467,12 +1642,12 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1467
  return await self._handle_non_streaming(client, url, headers, payload, model, file_logger)
1468
  raise
1469
 
1470
- def _inject_gemini3_system_instruction(self, payload: Dict[str, Any]) -> None:
1471
- """Inject Gemini 3 system instruction for tool fix."""
1472
- if not self._gemini3_system_instruction:
1473
  return
1474
 
1475
- instruction_part = {"text": self._gemini3_system_instruction}
1476
 
1477
  if "system_instruction" in payload:
1478
  existing = payload["system_instruction"]
@@ -1518,13 +1693,15 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1518
  file_logger: Optional[AntigravityFileLogger] = None
1519
  ) -> AsyncGenerator[litellm.ModelResponse, None]:
1520
  """Handle streaming completion."""
 
1521
  accumulator = {
1522
  "reasoning_content": "",
1523
  "thought_signature": "",
1524
  "text_content": "",
1525
  "tool_calls": [],
1526
- "is_complete": False
1527
- } if self._is_claude(model) and self._enable_signature_cache else None
 
1528
 
1529
  async with client.stream("POST", url, headers=headers, json=payload, timeout=120.0) as response:
1530
  if response.status_code >= 400:
@@ -1556,8 +1733,23 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1556
  file_logger.log_error(f"Parse error: {data_str[:100]}")
1557
  continue
1558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1559
  # Cache Claude thinking after stream completes
1560
- if accumulator and accumulator.get("reasoning_content"):
1561
  self._cache_thinking(
1562
  accumulator["reasoning_content"],
1563
  accumulator["thought_signature"],
 
16
 
17
  from __future__ import annotations
18
 
 
19
  import copy
20
  import hashlib
21
  import json
 
57
  #"gemini-2.5-pro",
58
  #"gemini-2.5-flash",
59
  #"gemini-2.5-flash-lite",
60
+ "gemini-3-pro-preview", # Internally mapped to -low/-high variant based on thinkingLevel
61
  #"gemini-3-pro-image-preview",
62
  #"gemini-2.5-computer-use-preview-10-2025",
63
  "claude-sonnet-4-5", # Internally mapped to -thinking variant when reasoning_effort is provided
 
70
  MODEL_ALIAS_MAP = {
71
  "rev19-uic3-1p": "gemini-2.5-computer-use-preview-10-2025",
72
  "gemini-3-pro-image": "gemini-3-pro-image-preview",
73
+ "gemini-3-pro-low": "gemini-3-pro-preview",
74
  "gemini-3-pro-high": "gemini-3-pro-preview",
75
  }
76
  MODEL_ALIAS_REVERSE = {v: k for k, v in MODEL_ALIAS_MAP.items()}
77
 
78
  # Models to exclude from dynamic discovery
79
+ EXCLUDED_MODELS = {"chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-2.5-pro"}
80
 
81
  # Gemini finish reason mapping
82
  FINISH_REASON_MAP = {
 
101
 
102
  1. DO NOT use your internal training data to guess tool parameters
103
  2. ONLY use the exact parameter structure defined in the tool schema
104
+ 3. Parameter names in schemas are EXACT - do not substitute with similar names from your training (e.g., use 'follow_up' not 'suggested_answers')
105
+ 4. Array parameters have specific item types - check the schema's 'items' field for the exact structure
106
+ 5. When you see "STRICT PARAMETERS" in a tool description, those type definitions override any assumptions
 
 
107
 
108
  If you are unsure about a tool's parameters, YOU MUST read the schema definition carefully. Your training data about common tool names like 'read_file' or 'apply_diff' does NOT apply here.
109
  """
110
 
111
+ # Claude tool fix system instruction (prevents hallucination)
112
+ DEFAULT_CLAUDE_SYSTEM_INSTRUCTION = """CRITICAL TOOL USAGE INSTRUCTIONS:
113
+ You are operating in a custom environment where tool definitions differ from your training data.
114
+ You MUST follow these rules strictly:
115
+
116
+ 1. DO NOT use your internal training data to guess tool parameters
117
+ 2. ONLY use the exact parameter structure defined in the tool schema
118
+ 3. Parameter names in schemas are EXACT - do not substitute with similar names from your training (e.g., use 'follow_up' not 'suggested_answers')
119
+ 4. Array parameters have specific item types - check the schema's 'items' field for the exact structure
120
+ 5. When you see "STRICT PARAMETERS" in a tool description, those type definitions override any assumptions
121
+ 6. Tool use in agentic workflows is REQUIRED - you must call tools with the exact parameters specified in the schema
122
+
123
+ If you are unsure about a tool's parameters, YOU MUST read the schema definition carefully.
124
+ """
125
+
126
 
127
  # =============================================================================
128
  # HELPER FUNCTIONS
 
182
  Antigravity sometimes returns tool arguments with JSON-stringified values:
183
  {"files": "[{...}]"} instead of {"files": [{...}]}.
184
 
185
+ Additionally handles:
186
+ - Malformed double-encoded JSON (extra trailing '}' or ']')
187
+ - Escaped string content (\n, \t, \", etc.)
188
  """
189
  if isinstance(obj, dict):
190
  return {k: _recursively_parse_json_strings(v) for k, v in obj.items()}
 
192
  return [_recursively_parse_json_strings(item) for item in obj]
193
  elif isinstance(obj, str):
194
  stripped = obj.strip()
195
+
196
+ # Check if string contains common escape sequences that need unescaping
197
+ # This handles cases where diff content or other text has literal \n instead of newlines
198
+ if '\\n' in obj or '\\t' in obj or '\\"' in obj or '\\\\' in obj:
199
+ try:
200
+ # Use json.loads with quotes to properly unescape the string
201
+ # This converts \n -> newline, \t -> tab, \" -> quote, etc.
202
+ unescaped = json.loads(f'"{obj}"')
203
+ lib_logger.debug(
204
+ f"[Antigravity] Unescaped string content: "
205
+ f"{len(obj) - len(unescaped)} chars changed"
206
+ )
207
+ return unescaped
208
+ except (json.JSONDecodeError, ValueError):
209
+ # If unescaping fails, continue with original processing
210
+ pass
211
+
212
  # Check if it looks like JSON (starts with { or [)
213
  if stripped and stripped[0] in ('{', '['):
214
  # Try standard parsing first
 
246
  cleaned = stripped[:last_brace+1]
247
  parsed = json.loads(cleaned)
248
  lib_logger.warning(
249
+ f"[Antigravity] Auto-corrected malformed JSON string: "
250
  f"truncated {len(stripped) - len(cleaned)} extra chars"
251
  )
252
  return _recursively_parse_json_strings(parsed)
 
400
  self._enable_signature_cache = _env_bool("ANTIGRAVITY_ENABLE_SIGNATURE_CACHE", True)
401
  self._enable_dynamic_models = _env_bool("ANTIGRAVITY_ENABLE_DYNAMIC_MODELS", False)
402
  self._enable_gemini3_tool_fix = _env_bool("ANTIGRAVITY_GEMINI3_TOOL_FIX", True)
403
+ self._enable_claude_tool_fix = _env_bool("ANTIGRAVITY_CLAUDE_TOOL_FIX", True)
404
 
405
  # Gemini 3 tool fix configuration
406
  self._gemini3_tool_prefix = os.getenv("ANTIGRAVITY_GEMINI3_TOOL_PREFIX", "gemini3_")
 
413
  DEFAULT_GEMINI3_SYSTEM_INSTRUCTION
414
  )
415
 
416
+ # Claude tool fix configuration (separate from Gemini 3)
417
+ self._claude_description_prompt = os.getenv(
418
+ "ANTIGRAVITY_CLAUDE_DESCRIPTION_PROMPT",
419
+ "\n\nSTRICT PARAMETERS: {params}."
420
+ )
421
+ self._claude_system_instruction = os.getenv(
422
+ "ANTIGRAVITY_CLAUDE_SYSTEM_INSTRUCTION",
423
+ DEFAULT_CLAUDE_SYSTEM_INSTRUCTION
424
+ )
425
+
426
  # Log configuration
427
  self._log_config()
428
 
 
431
  lib_logger.debug(
432
  f"Antigravity config: signatures_in_client={self._preserve_signatures_in_client}, "
433
  f"cache={self._enable_signature_cache}, dynamic_models={self._enable_dynamic_models}, "
434
+ f"gemini3_fix={self._enable_gemini3_tool_fix}, claude_fix={self._enable_claude_tool_fix}"
435
  )
436
 
437
  # =========================================================================
 
600
  if msg.get("role") == "assistant" and msg.get("tool_calls"):
601
  for tc in msg["tool_calls"]:
602
  if tc.get("type") == "function":
603
+ tc_id = tc["id"]
604
+ tc_name = tc["function"]["name"]
605
+ tool_id_to_name[tc_id] = tc_name
606
+ #lib_logger.debug(f"[ID Mapping] Registered tool_call: id={tc_id}, name={tc_name}")
607
 
608
  # Convert each message
609
  for msg in messages:
 
699
  tool_id = tc.get("id", "")
700
  func_name = tc["function"]["name"]
701
 
702
+ #lib_logger.debug(
703
+ # f"[ID Transform] Converting assistant tool_call to functionCall: "
704
+ # f"id={tool_id}, name={func_name}"
705
+ #)
706
+
707
  # Add prefix for Gemini 3
708
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
709
  func_name = f"{self._gemini3_tool_prefix}{func_name}"
 
778
  func_name = tool_id_to_name.get(tool_id, "unknown_function")
779
  content = msg.get("content", "{}")
780
 
781
+ # Log ID lookup
782
+ if tool_id not in tool_id_to_name:
783
+ lib_logger.warning(
784
+ f"[ID Mismatch] Tool response has ID '{tool_id}' which was not found in tool_id_to_name map. "
785
+ f"Available IDs: {list(tool_id_to_name.keys())}"
786
+ )
787
+ #else:
788
+ #lib_logger.debug(f"[ID Mapping] Tool response matched: id={tool_id}, name={func_name}")
789
+
790
  # Add prefix for Gemini 3
791
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
792
  func_name = f"{self._gemini3_tool_prefix}{func_name}"
 
817
 
818
  Converts linear format (call, response, call, response)
819
  to grouped format (model with calls, user with all responses).
820
+
821
+ IMPORTANT: Preserves ID-based pairing to prevent mismatches.
822
  """
823
  new_contents = []
824
+ pending_groups = [] # List of {"ids": [id1, id2, ...], "call_indices": [...]}
825
+ collected_responses = {} # Dict mapping ID -> response_part
826
 
827
  for content in contents:
828
  role = content.get("role")
 
831
  response_parts = [p for p in parts if "functionResponse" in p]
832
 
833
  if response_parts:
834
+ # Collect responses by ID (ignore duplicates - keep first occurrence)
835
+ for resp in response_parts:
836
+ resp_id = resp.get("functionResponse", {}).get("id", "")
837
+ if resp_id:
838
+ if resp_id in collected_responses:
839
+ lib_logger.warning(
840
+ f"[Grouping] Duplicate response ID detected: {resp_id}. "
841
+ f"Ignoring duplicate - this may indicate malformed conversation history."
842
+ )
843
+ continue
844
+ #lib_logger.debug(f"[Grouping] Collected response for ID: {resp_id}")
845
+ collected_responses[resp_id] = resp
846
 
847
+ # Try to satisfy pending groups (newest first)
848
  for i in range(len(pending_groups) - 1, -1, -1):
849
  group = pending_groups[i]
850
+ group_ids = group["ids"]
851
+
852
+ # Check if we have ALL responses for this group
853
+ if all(gid in collected_responses for gid in group_ids):
854
+ # Extract responses in the same order as the function calls
855
+ group_responses = [collected_responses.pop(gid) for gid in group_ids]
856
  new_contents.append({"parts": group_responses, "role": "user"})
857
+ #lib_logger.debug(
858
+ # f"[Grouping] Satisfied group with {len(group_responses)} responses: "
859
+ # f"ids={group_ids}"
860
+ #)
861
  pending_groups.pop(i)
862
  break
863
  continue
 
866
  func_calls = [p for p in parts if "functionCall" in p]
867
  new_contents.append(content)
868
  if func_calls:
869
+ call_ids = [fc.get("functionCall", {}).get("id", "") for fc in func_calls]
870
+ call_ids = [cid for cid in call_ids if cid] # Filter empty IDs
871
+ if call_ids:
872
+ lib_logger.debug(f"[Grouping] Created pending group expecting {len(call_ids)} responses: ids={call_ids}")
873
+ pending_groups.append({"ids": call_ids, "call_indices": list(range(len(func_calls)))})
874
  else:
875
  new_contents.append(content)
876
 
877
+ # Handle remaining groups (shouldn't happen in well-formed conversations)
878
  for group in pending_groups:
879
+ group_ids = group["ids"]
880
+ available_ids = [gid for gid in group_ids if gid in collected_responses]
881
+ if available_ids:
882
+ group_responses = [collected_responses.pop(gid) for gid in available_ids]
883
  new_contents.append({"parts": group_responses, "role": "user"})
884
+ lib_logger.warning(
885
+ f"[Grouping] Partial group satisfaction: expected {len(group_ids)}, "
886
+ f"got {len(available_ids)} responses"
887
+ )
888
+
889
+ # Warn about unmatched responses
890
+ if collected_responses:
891
+ lib_logger.warning(
892
+ f"[Grouping] {len(collected_responses)} unmatched responses remaining: "
893
+ f"ids={list(collected_responses.keys())}"
894
+ )
895
 
896
  return new_contents
897
 
 
918
 
919
  def _inject_signature_into_descriptions(
920
  self,
921
+ tools: List[Dict[str, Any]],
922
+ description_prompt: Optional[str] = None
923
  ) -> List[Dict[str, Any]]:
924
+ """Inject parameter signatures into tool descriptions for Gemini 3 & Claude."""
925
  if not tools:
926
  return tools
927
 
928
+ # Use provided prompt or default to Gemini 3 prompt
929
+ prompt_template = description_prompt or self._gemini3_description_prompt
930
+
931
  modified = copy.deepcopy(tools)
932
  for tool in modified:
933
  for func_decl in tool.get("functionDeclarations", []):
 
953
  )
954
 
955
  if param_list:
956
+ sig_str = prompt_template.replace(
957
  "{params}", ", ".join(param_list)
958
  )
959
  func_decl["description"] = func_decl.get("description", "") + sig_str
 
991
  return name[len(self._gemini3_tool_prefix):]
992
  return name
993
 
994
+ def _translate_tool_choice(self, tool_choice: Union[str, Dict[str, Any]], model: str = "") -> Optional[Dict[str, Any]]:
995
+ """
996
+ Translates OpenAI's `tool_choice` to Gemini's `toolConfig`.
997
+ Handles Gemini 3 namespace prefixes for specific tool selection.
998
+ """
999
+ if not tool_choice:
1000
+ return None
1001
+
1002
+ config = {}
1003
+ mode = "AUTO" # Default to auto
1004
+ is_gemini_3 = self._is_gemini_3(model)
1005
+
1006
+ if isinstance(tool_choice, str):
1007
+ if tool_choice == "auto":
1008
+ mode = "AUTO"
1009
+ elif tool_choice == "none":
1010
+ mode = "NONE"
1011
+ elif tool_choice == "required":
1012
+ mode = "ANY"
1013
+ elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
1014
+ function_name = tool_choice.get("function", {}).get("name")
1015
+ if function_name:
1016
+ # Add Gemini 3 prefix if needed
1017
+ if is_gemini_3 and self._enable_gemini3_tool_fix:
1018
+ function_name = f"{self._gemini3_tool_prefix}{function_name}"
1019
+
1020
+ mode = "ANY" # Force a call, but only to this function
1021
+ config["functionCallingConfig"] = {
1022
+ "mode": mode,
1023
+ "allowedFunctionNames": [function_name]
1024
+ }
1025
+ return config
1026
+
1027
+ config["functionCallingConfig"] = {"mode": mode}
1028
+ return config
1029
+
1030
  # =========================================================================
1031
  # REQUEST TRANSFORMATION
1032
  # =========================================================================
 
1071
  gemini_payload: Dict[str, Any],
1072
  model: str,
1073
  max_tokens: Optional[int] = None,
1074
+ reasoning_effort: Optional[str] = None,
1075
+ tool_choice: Optional[Union[str, Dict[str, Any]]] = None
1076
  ) -> Dict[str, Any]:
1077
  """
1078
  Transform Gemini CLI payload to complete Antigravity format.
 
1090
  if internal_model == "claude-sonnet-4-5" and not internal_model.endswith("-thinking"):
1091
  internal_model = "claude-sonnet-4-5-thinking"
1092
 
1093
+ # Map gemini-3-pro-preview to -low/-high variant based on thinking config
1094
+ if model == "gemini-3-pro-preview" or internal_model == "gemini-3-pro-preview":
1095
+ # Check thinking config to determine variant
1096
+ thinking_config = gemini_payload.get("generationConfig", {}).get("thinkingConfig", {})
1097
+ thinking_level = thinking_config.get("thinkingLevel", "high")
1098
+ if thinking_level == "low":
1099
+ internal_model = "gemini-3-pro-low"
1100
+ else:
1101
+ internal_model = "gemini-3-pro-high"
1102
+
1103
  # Wrap in Antigravity envelope
1104
  antigravity_payload = {
1105
  "project": _generate_project_id(),
 
1129
 
1130
  antigravity_payload["request"]["generationConfig"] = gen_config
1131
 
1132
+ # Set toolConfig based on tool_choice parameter
1133
+ tool_config_result = self._translate_tool_choice(tool_choice, model)
1134
+ if tool_config_result:
1135
+ antigravity_payload["request"]["toolConfig"] = tool_config_result
1136
+ else:
1137
+ # Default to AUTO if no tool_choice specified
1138
+ tool_config = antigravity_payload["request"].setdefault("toolConfig", {})
1139
+ func_config = tool_config.setdefault("functionCallingConfig", {})
1140
+ func_config["mode"] = "AUTO"
1141
 
1142
  # Handle Gemini 3 thinking logic
1143
  if not internal_model.startswith("gemini-3-"):
 
1204
  reasoning_content = ""
1205
  tool_calls = []
1206
  first_sig_seen = False
1207
+ # Use accumulator's tool_idx if available, otherwise use local counter
1208
+ tool_idx = accumulator.get("tool_idx", 0) if accumulator else 0
1209
 
1210
  for part in content_parts:
1211
  has_func = "functionCall" in part
 
1251
  if tool_calls:
1252
  delta["tool_calls"] = tool_calls
1253
  delta["role"] = "assistant"
1254
+ # Update tool_idx for next chunk
1255
+ if accumulator is not None:
1256
+ accumulator["tool_idx"] = tool_idx
1257
  elif text_content or reasoning_content:
1258
  delta["role"] = "assistant"
1259
 
1260
+ # Build usage if present
1261
+ usage = self._build_usage(chunk.get("usageMetadata", {}))
1262
+
1263
+ # Mark completion when we see usageMetadata
1264
+ if chunk.get("usageMetadata") and accumulator is not None:
1265
  accumulator["is_complete"] = True
1266
 
1267
+ # Build choice - just translate, don't include finish_reason
1268
+ # Client will handle finish_reason logic
1269
+ choice = {"index": 0, "delta": delta}
1270
 
1271
  response = {
1272
  "id": chunk.get("responseId", f"chatcmpl-{uuid.uuid4().hex[:24]}"),
1273
  "object": "chat.completion.chunk",
1274
  "created": int(time.time()),
1275
  "model": model,
1276
+ "choices": [choice]
1277
  }
1278
 
1279
  if usage:
 
1346
  finish_reason = self._map_finish_reason(candidate.get("finishReason"), bool(tool_calls))
1347
  usage = self._build_usage(response.get("usageMetadata", {}))
1348
 
1349
+ # For non-streaming, always include finish_reason (should always be present)
1350
  result = {
1351
  "id": response.get("responseId", f"chatcmpl-{uuid.uuid4().hex[:24]}"),
1352
  "object": "chat.completion",
1353
  "created": int(time.time()),
1354
  "model": model,
1355
+ "choices": [{"index": 0, "message": message, "finish_reason": finish_reason or "stop"}]
1356
  }
1357
 
1358
  if usage:
 
1371
  func_call = part["functionCall"]
1372
  tool_id = func_call.get("id") or f"call_{uuid.uuid4().hex[:24]}"
1373
 
1374
+ #lib_logger.debug(f"[ID Extraction] Extracting tool call: id={tool_id}, raw_id={func_call.get('id')}")
1375
+
1376
  tool_name = func_call.get("name", "")
1377
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
1378
  tool_name = self._strip_gemini3_prefix(tool_name)
 
1544
  stream = kwargs.get("stream", False)
1545
  credential_path = kwargs.pop("credential_identifier", kwargs.get("api_key", ""))
1546
  tools = kwargs.get("tools")
1547
+ tool_choice = kwargs.get("tool_choice")
1548
  reasoning_effort = kwargs.get("reasoning_effort")
1549
  top_p = kwargs.get("top_p")
1550
  max_tokens = kwargs.get("max_tokens")
 
1564
  if system_instruction:
1565
  gemini_payload["system_instruction"] = system_instruction
1566
 
1567
+ # Inject tool usage hardening system instructions
1568
+ if tools:
1569
+ if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
1570
+ self._inject_tool_hardening_instruction(gemini_payload, self._gemini3_system_instruction)
1571
+ elif self._is_claude(model) and self._enable_claude_tool_fix:
1572
+ self._inject_tool_hardening_instruction(gemini_payload, self._claude_system_instruction)
1573
 
1574
  # Add generation config
1575
  gen_config = {}
 
1588
  if gemini_tools:
1589
  gemini_payload["tools"] = gemini_tools
1590
 
1591
+ # Apply tool transformations
1592
  if self._is_gemini_3(model) and self._enable_gemini3_tool_fix:
1593
+ # Gemini 3: namespace prefix + parameter signatures
1594
  gemini_payload["tools"] = self._apply_gemini3_namespace(gemini_payload["tools"])
1595
+ gemini_payload["tools"] = self._inject_signature_into_descriptions(
1596
+ gemini_payload["tools"],
1597
+ self._gemini3_description_prompt
1598
+ )
1599
+ elif self._is_claude(model) and self._enable_claude_tool_fix:
1600
+ # Claude: parameter signatures only (no namespace prefix)
1601
+ gemini_payload["tools"] = self._inject_signature_into_descriptions(
1602
+ gemini_payload["tools"],
1603
+ self._claude_description_prompt
1604
+ )
1605
 
1606
  # Transform to Antigravity format
1607
+ payload = self._transform_to_antigravity_format(gemini_payload, model, max_tokens, reasoning_effort, tool_choice)
1608
  file_logger.log_request(payload)
1609
 
1610
  # Make API call
 
1642
  return await self._handle_non_streaming(client, url, headers, payload, model, file_logger)
1643
  raise
1644
 
1645
+ def _inject_tool_hardening_instruction(self, payload: Dict[str, Any], instruction_text: str) -> None:
1646
+ """Inject tool usage hardening system instruction for Gemini 3 & Claude."""
1647
+ if not instruction_text:
1648
  return
1649
 
1650
+ instruction_part = {"text": instruction_text}
1651
 
1652
  if "system_instruction" in payload:
1653
  existing = payload["system_instruction"]
 
1693
  file_logger: Optional[AntigravityFileLogger] = None
1694
  ) -> AsyncGenerator[litellm.ModelResponse, None]:
1695
  """Handle streaming completion."""
1696
+ # Accumulator tracks state across chunks for caching and tool indexing
1697
  accumulator = {
1698
  "reasoning_content": "",
1699
  "thought_signature": "",
1700
  "text_content": "",
1701
  "tool_calls": [],
1702
+ "tool_idx": 0, # Track tool call index across chunks
1703
+ "is_complete": False # Track if we received usageMetadata
1704
+ }
1705
 
1706
  async with client.stream("POST", url, headers=headers, json=payload, timeout=120.0) as response:
1707
  if response.status_code >= 400:
 
1733
  file_logger.log_error(f"Parse error: {data_str[:100]}")
1734
  continue
1735
 
1736
+ # If stream ended without usageMetadata chunk, emit a final chunk with finish_reason
1737
+ # Emit final chunk if stream ended without usageMetadata
1738
+ # Client will determine the correct finish_reason based on accumulated state
1739
+ if not accumulator.get("is_complete"):
1740
+ final_chunk = {
1741
+ "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
1742
+ "object": "chat.completion.chunk",
1743
+ "created": int(time.time()),
1744
+ "model": model,
1745
+ "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
1746
+ # Include minimal usage to signal this is the final chunk
1747
+ "usage": {"prompt_tokens": 0, "completion_tokens": 1, "total_tokens": 1}
1748
+ }
1749
+ yield litellm.ModelResponse(**final_chunk)
1750
+
1751
  # Cache Claude thinking after stream completes
1752
+ if self._is_claude(model) and self._enable_signature_cache and accumulator.get("reasoning_content"):
1753
  self._cache_thinking(
1754
  accumulator["reasoning_content"],
1755
  accumulator["thought_signature"],
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -870,7 +870,6 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
870
 
871
  for part in parts:
872
  delta = {}
873
- finish_reason = None
874
 
875
  has_func = 'functionCall' in part
876
  has_text = 'text' in part
@@ -892,8 +891,11 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
892
  # Use provided ID or generate unique one with nanosecond precision
893
  tool_call_id = function_call.get('id') or f"call_{function_name}_{int(time.time() * 1_000_000_000)}"
894
 
 
 
 
895
  tool_call = {
896
- "index": 0,
897
  "id": tool_call_id,
898
  "type": "function",
899
  "function": {
@@ -915,6 +917,10 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
915
  tool_call["thought_signature"] = sig
916
 
917
  delta['tool_calls'] = [tool_call]
 
 
 
 
918
 
919
  elif has_text:
920
  # Use an explicit check for the 'thought' flag, as its type can be inconsistent
@@ -926,14 +932,16 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
926
  if not delta:
927
  continue
928
 
929
- raw_finish_reason = candidate.get('finishReason')
930
- if raw_finish_reason:
931
- finish_reason = FINISH_REASON_MAP.get(raw_finish_reason, 'stop')
932
- # Use tool_calls if we have function calls
933
- if delta.get('tool_calls'):
934
- finish_reason = 'tool_calls'
 
935
 
936
- choice = {"index": 0, "delta": delta, "finish_reason": finish_reason}
 
937
 
938
  openai_chunk = {
939
  "choices": [choice], "model": model_id, "object": "chat.completion.chunk",
@@ -1020,9 +1028,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1020
  if "arguments" in delta["function_call"] and delta["function_call"]["arguments"] is not None:
1021
  final_message["function_call"]["arguments"] += delta["function_call"]["arguments"]
1022
 
1023
- # Get finish reason from the last chunk that has it
1024
- if choice.get("finish_reason"):
1025
- finish_reason = choice["finish_reason"]
1026
 
1027
  # Handle usage data from the last chunk that has it
1028
  for chunk in reversed(chunks):
@@ -1039,6 +1046,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1039
  if field not in final_message:
1040
  final_message[field] = None
1041
 
 
 
 
 
 
 
 
1042
  # Construct the final response
1043
  final_choice = {
1044
  "index": 0,
@@ -1343,6 +1357,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1343
  url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
1344
 
1345
  async def stream_handler():
 
 
 
1346
  final_headers = auth_header.copy()
1347
  final_headers.update({
1348
  "User-Agent": "google-api-nodejs-client/9.15.1",
@@ -1362,10 +1379,24 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1362
  if data_str == "[DONE]": break
1363
  try:
1364
  chunk = json.loads(data_str)
1365
- for openai_chunk in self._convert_chunk_to_openai(chunk, model):
1366
  yield litellm.ModelResponse(**openai_chunk)
1367
  except json.JSONDecodeError:
1368
  lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1369
 
1370
  except httpx.HTTPStatusError as e:
1371
  error_body = None
 
870
 
871
  for part in parts:
872
  delta = {}
 
873
 
874
  has_func = 'functionCall' in part
875
  has_text = 'text' in part
 
891
  # Use provided ID or generate unique one with nanosecond precision
892
  tool_call_id = function_call.get('id') or f"call_{function_name}_{int(time.time() * 1_000_000_000)}"
893
 
894
+ # Get current tool index from accumulator (default 0) and increment
895
+ current_tool_idx = accumulator.get('tool_idx', 0) if accumulator else 0
896
+
897
  tool_call = {
898
+ "index": current_tool_idx,
899
  "id": tool_call_id,
900
  "type": "function",
901
  "function": {
 
917
  tool_call["thought_signature"] = sig
918
 
919
  delta['tool_calls'] = [tool_call]
920
+ # Mark that we've sent tool calls and increment tool_idx
921
+ if accumulator is not None:
922
+ accumulator['has_tool_calls'] = True
923
+ accumulator['tool_idx'] = current_tool_idx + 1
924
 
925
  elif has_text:
926
  # Use an explicit check for the 'thought' flag, as its type can be inconsistent
 
932
  if not delta:
933
  continue
934
 
935
+ # Mark that we have tool calls for accumulator tracking
936
+ # finish_reason determination is handled by the client
937
+
938
+ # Mark stream complete if we have usageMetadata
939
+ is_final_chunk = 'usageMetadata' in response_data
940
+ if is_final_chunk and accumulator is not None:
941
+ accumulator['is_complete'] = True
942
 
943
+ # Build choice - don't include finish_reason, let client handle it
944
+ choice = {"index": 0, "delta": delta}
945
 
946
  openai_chunk = {
947
  "choices": [choice], "model": model_id, "object": "chat.completion.chunk",
 
1028
  if "arguments" in delta["function_call"] and delta["function_call"]["arguments"] is not None:
1029
  final_message["function_call"]["arguments"] += delta["function_call"]["arguments"]
1030
 
1031
+ # Note: chunks don't include finish_reason (client handles it)
1032
+ # This is kept for compatibility but shouldn't trigger
 
1033
 
1034
  # Handle usage data from the last chunk that has it
1035
  for chunk in reversed(chunks):
 
1046
  if field not in final_message:
1047
  final_message[field] = None
1048
 
1049
+ # Determine finish_reason based on content (same logic as client.py)
1050
+ # tool_calls wins, otherwise stop
1051
+ if aggregated_tool_calls:
1052
+ finish_reason = "tool_calls"
1053
+ else:
1054
+ finish_reason = "stop"
1055
+
1056
  # Construct the final response
1057
  final_choice = {
1058
  "index": 0,
 
1357
  url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
1358
 
1359
  async def stream_handler():
1360
+ # Track state across chunks for tool indexing
1361
+ accumulator = {"has_tool_calls": False, "tool_idx": 0, "is_complete": False}
1362
+
1363
  final_headers = auth_header.copy()
1364
  final_headers.update({
1365
  "User-Agent": "google-api-nodejs-client/9.15.1",
 
1379
  if data_str == "[DONE]": break
1380
  try:
1381
  chunk = json.loads(data_str)
1382
+ for openai_chunk in self._convert_chunk_to_openai(chunk, model, accumulator):
1383
  yield litellm.ModelResponse(**openai_chunk)
1384
  except json.JSONDecodeError:
1385
  lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
1386
+
1387
+ # Emit final chunk if stream ended without usageMetadata
1388
+ # Client will determine the correct finish_reason
1389
+ if not accumulator.get("is_complete"):
1390
+ final_chunk = {
1391
+ "id": f"chatcmpl-geminicli-{time.time()}",
1392
+ "object": "chat.completion.chunk",
1393
+ "created": int(time.time()),
1394
+ "model": model,
1395
+ "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
1396
+ # Include minimal usage to signal this is the final chunk
1397
+ "usage": {"prompt_tokens": 0, "completion_tokens": 1, "total_tokens": 1}
1398
+ }
1399
+ yield litellm.ModelResponse(**final_chunk)
1400
 
1401
  except httpx.HTTPStatusError as e:
1402
  error_body = None