Mirrowel commited on
Commit
d4593e5
Β·
1 Parent(s): 62e7cf3

fix(gemini): πŸ› consolidate parallel tool responses and improve rate limit handling

Browse files

This commit addresses multiple issues with Gemini API providers related to parallel function calling and rate limit error handling:

**Tool Response Consolidation:**
- Parallel function responses are now consolidated into a single user message as required by Gemini API specification
- Previously, consecutive tool responses were sent as separate messages, causing API errors
- Implemented pending tool parts accumulation pattern in both GeminiCliProvider and AntigravityProvider
- Tool responses are flushed when a non-tool message is encountered or at the end of message processing

**Thought Signature Handling:**
- Fixed parallel function call signature behavior to match Gemini 3 API requirements
- Only the first parallel function call in a message receives a thoughtSignature field
- Subsequent parallel calls no longer include thoughtSignature to prevent API validation errors
- Removed `first_sig_seen` tracking flags since signatures are now stored per tool call

**Rate Limit Error Handling:**
- Added `extract_retry_after_from_body()` function to parse retry-after times from various API error formats
- Improved Gemini CLI rate limit error messages with extracted retry-after information
- Enhanced error logging to capture and display response bodies before raising HTTPStatusError
- Reduced log noise by using debug level for rate limit rotation events instead of info/warning
- Better error context propagation for 429 responses

**Code Quality:**
- Removed unused `first_sig_seen` tracking variables
- Improved inline documentation explaining Gemini API parallel function call requirements
- Consistent role mapping (tool -> user) across message transformation logic

src/rotator_library/error_handler.py CHANGED
@@ -17,6 +17,42 @@ from litellm.exceptions import (
17
  )
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  class NoAvailableKeysError(Exception):
21
  """Raised when no API keys are available for a request after waiting."""
22
 
@@ -106,6 +142,8 @@ def get_retry_after(error: Exception) -> Optional[int]:
106
  r"wait for\s*(\d+)\s*seconds?",
107
  r'"retryDelay":\s*"(\d+)s"',
108
  r"x-ratelimit-reset:?\s*(\d+)",
 
 
109
  ]
110
 
111
  for pattern in patterns:
 
17
  )
18
 
19
 
20
+ def extract_retry_after_from_body(error_body: Optional[str]) -> Optional[int]:
21
+ """
22
+ Extract the retry-after time from an API error response body.
23
+
24
+ Handles various error formats including:
25
+ - Gemini CLI: "Your quota will reset after 39s."
26
+ - Generic: "quota will reset after 120s", "retry after 60s"
27
+
28
+ Args:
29
+ error_body: The raw error response body
30
+
31
+ Returns:
32
+ The retry time in seconds, or None if not found
33
+ """
34
+ if not error_body:
35
+ return None
36
+
37
+ # Pattern to match various "reset after Xs" or "retry after Xs" formats
38
+ patterns = [
39
+ r"quota will reset after\s*(\d+)s",
40
+ r"reset after\s*(\d+)s",
41
+ r"retry after\s*(\d+)s",
42
+ r"try again in\s*(\d+)\s*seconds?",
43
+ ]
44
+
45
+ for pattern in patterns:
46
+ match = re.search(pattern, error_body, re.IGNORECASE)
47
+ if match:
48
+ try:
49
+ return int(match.group(1))
50
+ except (ValueError, IndexError):
51
+ continue
52
+
53
+ return None
54
+
55
+
56
  class NoAvailableKeysError(Exception):
57
  """Raised when no API keys are available for a request after waiting."""
58
 
 
142
  r"wait for\s*(\d+)\s*seconds?",
143
  r'"retryDelay":\s*"(\d+)s"',
144
  r"x-ratelimit-reset:?\s*(\d+)",
145
+ r"quota will reset after\s*(\d+)s", # Gemini CLI rate limit format
146
+ r"reset after\s*(\d+)s", # Generic reset after format
147
  ]
148
 
149
  for pattern in patterns:
src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -605,23 +605,38 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
605
  tool_id_to_name[tc_id] = tc_name
606
  #lib_logger.debug(f"[ID Mapping] Registered tool_call: id={tc_id}, name={tc_name}")
607
 
608
- # Convert each message
 
 
 
609
  for msg in messages:
610
  role = msg.get("role")
611
  content = msg.get("content")
612
  parts = []
613
 
 
 
 
 
 
614
  if role == "user":
615
  parts = self._transform_user_message(content)
616
  elif role == "assistant":
617
  parts = self._transform_assistant_message(msg, model, tool_id_to_name)
618
  elif role == "tool":
619
- parts = self._transform_tool_message(msg, model, tool_id_to_name)
 
 
 
620
 
621
  if parts:
622
- gemini_role = "model" if role == "assistant" else "user" if role == "tool" else "user"
623
  gemini_contents.append({"role": gemini_role, "parts": parts})
624
 
 
 
 
 
625
  return system_instruction, gemini_contents
626
 
627
  def _parse_content_parts(
@@ -687,6 +702,9 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
687
  parts.append({"text": content})
688
 
689
  # Add tool calls
 
 
 
690
  for tc in tool_calls:
691
  if tc.get("type") != "function":
692
  continue
@@ -717,6 +735,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
717
  }
718
 
719
  # Add thoughtSignature for Gemini 3
 
 
720
  if self._is_gemini_3(model):
721
  sig = tc.get("thought_signature")
722
  if not sig and tool_id and self._enable_signature_cache:
@@ -724,9 +744,13 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
724
 
725
  if sig:
726
  func_part["thoughtSignature"] = sig
727
- else:
 
728
  func_part["thoughtSignature"] = "skip_thought_signature_validator"
729
- lib_logger.warning(f"Missing thoughtSignature for {tool_id}, using bypass")
 
 
 
730
 
731
  parts.append(func_part)
732
 
@@ -1146,13 +1170,20 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1146
  del thinking_config["thinkingLevel"]
1147
  thinking_config["thinkingBudget"] = -1
1148
 
1149
- # Add thoughtSignature to function calls for Gemini 3
 
1150
  if internal_model.startswith("gemini-3-"):
1151
  for content in antigravity_payload["request"].get("contents", []):
1152
  if content.get("role") == "model":
 
1153
  for part in content.get("parts", []):
1154
- if "functionCall" in part and "thoughtSignature" not in part:
1155
- part["thoughtSignature"] = "skip_thought_signature_validator"
 
 
 
 
 
1156
 
1157
  # Claude-specific tool schema transformation
1158
  if internal_model.startswith("claude-sonnet-"):
@@ -1203,7 +1234,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1203
  text_content = ""
1204
  reasoning_content = ""
1205
  tool_calls = []
1206
- first_sig_seen = False
1207
  # Use accumulator's tool_idx if available, otherwise use local counter
1208
  tool_idx = accumulator.get("tool_idx", 0) if accumulator else 0
1209
 
@@ -1235,8 +1265,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1235
  if has_func:
1236
  tool_call = self._extract_tool_call(part, model, tool_idx, accumulator)
1237
 
1238
- if has_sig and not first_sig_seen:
1239
- first_sig_seen = True
1240
  self._handle_tool_signature(tool_call, part["thoughtSignature"])
1241
 
1242
  tool_calls.append(tool_call)
@@ -1298,7 +1328,6 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1298
  reasoning_content = ""
1299
  tool_calls = []
1300
  thought_sig = ""
1301
- first_sig_seen = False
1302
 
1303
  for part in content_parts:
1304
  has_func = "functionCall" in part
@@ -1321,8 +1350,8 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
1321
  if has_func:
1322
  tool_call = self._extract_tool_call(part, model, len(tool_calls))
1323
 
1324
- if has_sig and not first_sig_seen:
1325
- first_sig_seen = True
1326
  self._handle_tool_signature(tool_call, part["thoughtSignature"])
1327
 
1328
  tool_calls.append(tool_call)
 
605
  tool_id_to_name[tc_id] = tc_name
606
  #lib_logger.debug(f"[ID Mapping] Registered tool_call: id={tc_id}, name={tc_name}")
607
 
608
+ # Convert each message, consolidating consecutive tool responses
609
+ # Per Gemini docs: parallel function responses must be in a single user message
610
+ pending_tool_parts = []
611
+
612
  for msg in messages:
613
  role = msg.get("role")
614
  content = msg.get("content")
615
  parts = []
616
 
617
+ # Flush pending tool parts before non-tool message
618
+ if pending_tool_parts and role != "tool":
619
+ gemini_contents.append({"role": "user", "parts": pending_tool_parts})
620
+ pending_tool_parts = []
621
+
622
  if role == "user":
623
  parts = self._transform_user_message(content)
624
  elif role == "assistant":
625
  parts = self._transform_assistant_message(msg, model, tool_id_to_name)
626
  elif role == "tool":
627
+ tool_parts = self._transform_tool_message(msg, model, tool_id_to_name)
628
+ # Accumulate tool responses instead of adding individually
629
+ pending_tool_parts.extend(tool_parts)
630
+ continue
631
 
632
  if parts:
633
+ gemini_role = "model" if role == "assistant" else "user"
634
  gemini_contents.append({"role": gemini_role, "parts": parts})
635
 
636
+ # Flush any remaining tool parts
637
+ if pending_tool_parts:
638
+ gemini_contents.append({"role": "user", "parts": pending_tool_parts})
639
+
640
  return system_instruction, gemini_contents
641
 
642
  def _parse_content_parts(
 
702
  parts.append({"text": content})
703
 
704
  # Add tool calls
705
+ # Track if we've seen the first function call in this message
706
+ # Per Gemini docs: Only the FIRST parallel function call gets a signature
707
+ first_func_in_msg = True
708
  for tc in tool_calls:
709
  if tc.get("type") != "function":
710
  continue
 
735
  }
736
 
737
  # Add thoughtSignature for Gemini 3
738
+ # Per Gemini docs: Only the FIRST parallel function call gets a signature.
739
+ # Subsequent parallel calls should NOT have a thoughtSignature field.
740
  if self._is_gemini_3(model):
741
  sig = tc.get("thought_signature")
742
  if not sig and tool_id and self._enable_signature_cache:
 
744
 
745
  if sig:
746
  func_part["thoughtSignature"] = sig
747
+ elif first_func_in_msg:
748
+ # Only add bypass to the first function call if no sig available
749
  func_part["thoughtSignature"] = "skip_thought_signature_validator"
750
+ lib_logger.warning(f"Missing thoughtSignature for first func call {tool_id}, using bypass")
751
+ # Subsequent parallel calls: no signature field at all
752
+
753
+ first_func_in_msg = False
754
 
755
  parts.append(func_part)
756
 
 
1170
  del thinking_config["thinkingLevel"]
1171
  thinking_config["thinkingBudget"] = -1
1172
 
1173
+ # Ensure first function call in each model message has a thoughtSignature for Gemini 3
1174
+ # Per Gemini docs: Only the FIRST parallel function call gets a signature
1175
  if internal_model.startswith("gemini-3-"):
1176
  for content in antigravity_payload["request"].get("contents", []):
1177
  if content.get("role") == "model":
1178
+ first_func_seen = False
1179
  for part in content.get("parts", []):
1180
+ if "functionCall" in part:
1181
+ if not first_func_seen:
1182
+ # First function call in this message - needs a signature
1183
+ if "thoughtSignature" not in part:
1184
+ part["thoughtSignature"] = "skip_thought_signature_validator"
1185
+ first_func_seen = True
1186
+ # Subsequent parallel calls: leave as-is (no signature)
1187
 
1188
  # Claude-specific tool schema transformation
1189
  if internal_model.startswith("claude-sonnet-"):
 
1234
  text_content = ""
1235
  reasoning_content = ""
1236
  tool_calls = []
 
1237
  # Use accumulator's tool_idx if available, otherwise use local counter
1238
  tool_idx = accumulator.get("tool_idx", 0) if accumulator else 0
1239
 
 
1265
  if has_func:
1266
  tool_call = self._extract_tool_call(part, model, tool_idx, accumulator)
1267
 
1268
+ # Store signature for each tool call (needed for parallel tool calls)
1269
+ if has_sig:
1270
  self._handle_tool_signature(tool_call, part["thoughtSignature"])
1271
 
1272
  tool_calls.append(tool_call)
 
1328
  reasoning_content = ""
1329
  tool_calls = []
1330
  thought_sig = ""
 
1331
 
1332
  for part in content_parts:
1333
  has_func = "functionCall" in part
 
1350
  if has_func:
1351
  tool_call = self._extract_tool_call(part, model, len(tool_calls))
1352
 
1353
+ # Store signature for each tool call (needed for parallel tool calls)
1354
+ if has_sig:
1355
  self._handle_tool_signature(tool_call, part["thoughtSignature"])
1356
 
1357
  tool_calls.append(tool_call)
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -13,6 +13,7 @@ from .provider_cache import ProviderCache
13
  from ..model_definitions import ModelDefinitions
14
  import litellm
15
  from litellm.exceptions import RateLimitError
 
16
  import os
17
  from pathlib import Path
18
  import uuid
@@ -125,6 +126,7 @@ def _env_int(key: str, default: int) -> int:
125
  """Get integer from environment variable."""
126
  return int(os.getenv(key, str(default)))
127
 
 
128
  class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
129
  skip_cost_calculation = True
130
 
@@ -684,11 +686,21 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
684
  if tool_call.get("type") == "function":
685
  tool_call_id_to_name[tool_call["id"]] = tool_call["function"]["name"]
686
 
 
 
 
 
 
687
  for msg in messages:
688
  role = msg.get("role")
689
  content = msg.get("content")
690
  parts = []
691
- gemini_role = "model" if role == "assistant" else "tool" if role == "tool" else "user"
 
 
 
 
 
692
 
693
  if role == "user":
694
  if isinstance(content, str):
@@ -725,6 +737,9 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
725
  if isinstance(content, str):
726
  parts.append({"text": content})
727
  if msg.get("tool_calls"):
 
 
 
728
  for tool_call in msg["tool_calls"]:
729
  if tool_call.get("type") == "function":
730
  try:
@@ -748,6 +763,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
748
  }
749
 
750
  # Add thoughtSignature for Gemini 3
 
 
751
  if is_gemini_3:
752
  sig = tool_call.get("thought_signature")
753
  if not sig and tool_id and self._enable_signature_cache:
@@ -755,9 +772,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
755
 
756
  if sig:
757
  func_part["thoughtSignature"] = sig
758
- else:
 
759
  func_part["thoughtSignature"] = "skip_thought_signature_validator"
760
- lib_logger.warning(f"Missing thoughtSignature for {tool_id}, using bypass")
 
 
 
761
 
762
  parts.append(func_part)
763
 
@@ -771,17 +792,24 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
771
 
772
  # Wrap the tool response in a 'result' object
773
  response_content = {"result": content}
774
- parts.append({
 
775
  "functionResponse": {
776
  "name": function_name,
777
  "response": response_content,
778
  "id": tool_call_id
779
  }
780
  })
 
 
781
 
782
  if parts:
783
  gemini_contents.append({"role": gemini_role, "parts": parts})
784
 
 
 
 
 
785
  if not gemini_contents or gemini_contents[0]['role'] != 'user':
786
  gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
787
 
@@ -866,7 +894,6 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
866
  candidate = candidates[0]
867
  parts = candidate.get('content', {}).get('parts', [])
868
  is_gemini_3 = self._is_gemini_3(model_id)
869
- first_sig_seen = False
870
 
871
  for part in parts:
872
  delta = {}
@@ -905,8 +932,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
905
  }
906
 
907
  # Handle thoughtSignature for Gemini 3
908
- if is_gemini_3 and has_sig and not first_sig_seen:
909
- first_sig_seen = True
910
  sig = part['thoughtSignature']
911
 
912
  if self._enable_signature_cache:
@@ -1369,6 +1396,15 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1369
  })
1370
  try:
1371
  async with client.stream("POST", url, headers=final_headers, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
 
 
 
 
 
 
 
 
 
1372
  # This will raise an HTTPStatusError for 4xx/5xx responses
1373
  response.raise_for_status()
1374
 
@@ -1405,16 +1441,24 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1405
  error_body = e.response.text
1406
  except Exception:
1407
  pass
1408
- log_line = f"Stream handler HTTPStatusError: {str(e)}"
 
1409
  if error_body:
1410
- log_line = f"{log_line} | response_body={error_body}"
1411
- file_logger.log_error(log_line)
 
 
1412
  if e.response.status_code == 429:
1413
- # Pass the raw response object to the exception. Do not read the
1414
- # response body here as it will close the stream and cause a
1415
- # 'StreamClosed' error in the client's stream reader.
 
 
 
 
 
1416
  raise RateLimitError(
1417
- message=f"Gemini CLI rate limit exceeded: {e.request.url}",
1418
  llm_provider="gemini_cli",
1419
  model=model,
1420
  response=e.response
@@ -1451,7 +1495,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1451
  for idx, attempt_model in enumerate(fallback_models):
1452
  is_fallback = idx > 0
1453
  if is_fallback:
1454
- lib_logger.info(f"Gemini CLI rate limited, retrying with fallback model: {attempt_model}")
 
1455
  elif has_fallbacks:
1456
  lib_logger.debug(f"Attempting primary model: {attempt_model} (with {len(fallback_models)-1} fallback(s) available)")
1457
  else:
@@ -1473,8 +1518,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1473
  if idx + 1 < len(fallback_models):
1474
  lib_logger.debug(f"Rate limit hit on {attempt_model}, trying next fallback...")
1475
  continue
1476
- # If this was the last fallback option, raise the error
1477
- lib_logger.error(f"Rate limit hit on all fallback models (tried {len(fallback_models)} models)")
1478
  raise
1479
 
1480
  # Should not reach here, but raise last error if we do
 
13
  from ..model_definitions import ModelDefinitions
14
  import litellm
15
  from litellm.exceptions import RateLimitError
16
+ from ..error_handler import extract_retry_after_from_body
17
  import os
18
  from pathlib import Path
19
  import uuid
 
126
  """Get integer from environment variable."""
127
  return int(os.getenv(key, str(default)))
128
 
129
+
130
  class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
131
  skip_cost_calculation = True
132
 
 
686
  if tool_call.get("type") == "function":
687
  tool_call_id_to_name[tool_call["id"]] = tool_call["function"]["name"]
688
 
689
+ # Process messages and consolidate consecutive tool responses
690
+ # Per Gemini docs: parallel function responses must be in a single user message,
691
+ # not interleaved as separate messages
692
+ pending_tool_parts = [] # Accumulate tool responses
693
+
694
  for msg in messages:
695
  role = msg.get("role")
696
  content = msg.get("content")
697
  parts = []
698
+ gemini_role = "model" if role == "assistant" else "user" # tool -> user in Gemini
699
+
700
+ # If we have pending tool parts and hit a non-tool message, flush them first
701
+ if pending_tool_parts and role != "tool":
702
+ gemini_contents.append({"role": "user", "parts": pending_tool_parts})
703
+ pending_tool_parts = []
704
 
705
  if role == "user":
706
  if isinstance(content, str):
 
737
  if isinstance(content, str):
738
  parts.append({"text": content})
739
  if msg.get("tool_calls"):
740
+ # Track if we've seen the first function call in this message
741
+ # Per Gemini docs: Only the FIRST parallel function call gets a signature
742
+ first_func_in_msg = True
743
  for tool_call in msg["tool_calls"]:
744
  if tool_call.get("type") == "function":
745
  try:
 
763
  }
764
 
765
  # Add thoughtSignature for Gemini 3
766
+ # Per Gemini docs: Only the FIRST parallel function call gets a signature.
767
+ # Subsequent parallel calls should NOT have a thoughtSignature field.
768
  if is_gemini_3:
769
  sig = tool_call.get("thought_signature")
770
  if not sig and tool_id and self._enable_signature_cache:
 
772
 
773
  if sig:
774
  func_part["thoughtSignature"] = sig
775
+ elif first_func_in_msg:
776
+ # Only add bypass to the first function call if no sig available
777
  func_part["thoughtSignature"] = "skip_thought_signature_validator"
778
+ lib_logger.warning(f"Missing thoughtSignature for first func call {tool_id}, using bypass")
779
+ # Subsequent parallel calls: no signature field at all
780
+
781
+ first_func_in_msg = False
782
 
783
  parts.append(func_part)
784
 
 
792
 
793
  # Wrap the tool response in a 'result' object
794
  response_content = {"result": content}
795
+ # Accumulate tool responses - they'll be combined into one user message
796
+ pending_tool_parts.append({
797
  "functionResponse": {
798
  "name": function_name,
799
  "response": response_content,
800
  "id": tool_call_id
801
  }
802
  })
803
+ # Don't add parts here - tool responses are handled via pending_tool_parts
804
+ continue
805
 
806
  if parts:
807
  gemini_contents.append({"role": gemini_role, "parts": parts})
808
 
809
+ # Flush any remaining tool parts at end of messages
810
+ if pending_tool_parts:
811
+ gemini_contents.append({"role": "user", "parts": pending_tool_parts})
812
+
813
  if not gemini_contents or gemini_contents[0]['role'] != 'user':
814
  gemini_contents.insert(0, {"role": "user", "parts": [{"text": ""}]})
815
 
 
894
  candidate = candidates[0]
895
  parts = candidate.get('content', {}).get('parts', [])
896
  is_gemini_3 = self._is_gemini_3(model_id)
 
897
 
898
  for part in parts:
899
  delta = {}
 
932
  }
933
 
934
  # Handle thoughtSignature for Gemini 3
935
+ # Store signature for each tool call (needed for parallel tool calls)
936
+ if is_gemini_3 and has_sig:
937
  sig = part['thoughtSignature']
938
 
939
  if self._enable_signature_cache:
 
1396
  })
1397
  try:
1398
  async with client.stream("POST", url, headers=final_headers, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
1399
+ # Read and log error body before raise_for_status for better debugging
1400
+ if response.status_code >= 400:
1401
+ try:
1402
+ error_body = await response.aread()
1403
+ lib_logger.error(f"Gemini CLI API error {response.status_code}: {error_body.decode()}")
1404
+ file_logger.log_error(f"API error {response.status_code}: {error_body.decode()}")
1405
+ except Exception:
1406
+ pass
1407
+
1408
  # This will raise an HTTPStatusError for 4xx/5xx responses
1409
  response.raise_for_status()
1410
 
 
1441
  error_body = e.response.text
1442
  except Exception:
1443
  pass
1444
+
1445
+ # Only log to file logger (for detailed logging)
1446
  if error_body:
1447
+ file_logger.log_error(f"HTTPStatusError {e.response.status_code}: {error_body}")
1448
+ else:
1449
+ file_logger.log_error(f"HTTPStatusError {e.response.status_code}: {str(e)}")
1450
+
1451
  if e.response.status_code == 429:
1452
+ # Extract retry-after time from the error body
1453
+ retry_after = extract_retry_after_from_body(error_body)
1454
+ retry_info = f" (retry after {retry_after}s)" if retry_after else ""
1455
+ error_msg = f"Gemini CLI rate limit exceeded{retry_info}"
1456
+ if error_body:
1457
+ error_msg = f"{error_msg} | {error_body}"
1458
+ # Only log at debug level - rotation happens silently
1459
+ lib_logger.debug(f"Gemini CLI 429 rate limit: retry_after={retry_after}s")
1460
  raise RateLimitError(
1461
+ message=error_msg,
1462
  llm_provider="gemini_cli",
1463
  model=model,
1464
  response=e.response
 
1495
  for idx, attempt_model in enumerate(fallback_models):
1496
  is_fallback = idx > 0
1497
  if is_fallback:
1498
+ # Silent rotation - only log at debug level
1499
+ lib_logger.debug(f"Rate limited on previous model, trying fallback: {attempt_model}")
1500
  elif has_fallbacks:
1501
  lib_logger.debug(f"Attempting primary model: {attempt_model} (with {len(fallback_models)-1} fallback(s) available)")
1502
  else:
 
1518
  if idx + 1 < len(fallback_models):
1519
  lib_logger.debug(f"Rate limit hit on {attempt_model}, trying next fallback...")
1520
  continue
1521
+ # If this was the last fallback option, log error and raise
1522
+ lib_logger.warning(f"Rate limit exhausted on all fallback models (tried {len(fallback_models)} models)")
1523
  raise
1524
 
1525
  # Should not reach here, but raise last error if we do