Spaces:

elmerzole
/

llm-api-proxy

Paused

App Files Files Community

Mirrowel commited on Dec 5, 2025

Commit

f03c448

2 Parent(s): 8c2f222 abdc406

Merge branch 'Antigravity'

Browse files

Files changed (3) hide show

src/rotator_library/client.py +13 -9
src/rotator_library/error_handler.py +160 -65
src/rotator_library/providers/antigravity_provider.py +8 -5

src/rotator_library/client.py CHANGED Viewed

@@ -620,8 +620,9 @@ class RotatingClient:
                     litellm.ServiceUnavailableError,
                     litellm.InternalServerError,
                     APIConnectionError,
                 ) as e:
-                    # This is a critical, typed error from litellm that signals a key failure.
                     # We do not try to parse it here. We wrap it and raise it immediately
                     # for the outer retry loop to handle.
                     lib_logger.warning(
@@ -1065,7 +1066,10 @@ class RotatingClient:
                             )
                             # Only trigger provider-wide cooldown for rate limits, not quota issues
-                            if classified_error.status_code == 429 and classified_error.error_type != "quota_exceeded":
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration
@@ -1225,9 +1229,9 @@ class RotatingClient:
                             # Handle rate limits with cooldown (exclude quota_exceeded from provider-wide cooldown)
                             if (
-                                (classified_error.status_code == 429 and classified_error.error_type != "quota_exceeded")
-                                or classified_error.error_type == "rate_limit"
-                            ):
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration
@@ -1494,7 +1498,7 @@ class RotatingClient:
                                 lib_logger.info(
                                     f"Attempting stream with credential {mask_credential(current_cred)} (Attempt {attempt + 1}/{self.max_retries})"
                                 )
                                 if pre_request_callback:
                                     try:
                                         await pre_request_callback(
@@ -1973,9 +1977,9 @@ class RotatingClient:
                             # Handle rate limits with cooldown (exclude quota_exceeded)
                             if (
-                                (classified_error.status_code == 429 and classified_error.error_type != "quota_exceeded")
-                                or classified_error.error_type == "rate_limit"
-                            ):
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration

                     litellm.ServiceUnavailableError,
                     litellm.InternalServerError,
                     APIConnectionError,
+                    httpx.HTTPStatusError,
                 ) as e:
+                    # This is a critical, typed error from litellm or httpx that signals a key failure.
                     # We do not try to parse it here. We wrap it and raise it immediately
                     # for the outer retry loop to handle.
                     lib_logger.warning(
                             )
                             # Only trigger provider-wide cooldown for rate limits, not quota issues
+                            if (
+                                classified_error.status_code == 429
+                                and classified_error.error_type != "quota_exceeded"
+                            ):
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration
                             # Handle rate limits with cooldown (exclude quota_exceeded from provider-wide cooldown)
                             if (
+                                classified_error.status_code == 429
+                                and classified_error.error_type != "quota_exceeded"
+                            ) or classified_error.error_type == "rate_limit":
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration
                                 lib_logger.info(
                                     f"Attempting stream with credential {mask_credential(current_cred)} (Attempt {attempt + 1}/{self.max_retries})"
                                 )
                                 if pre_request_callback:
                                     try:
                                         await pre_request_callback(
                             # Handle rate limits with cooldown (exclude quota_exceeded)
                             if (
+                                classified_error.status_code == 429
+                                and classified_error.error_type != "quota_exceeded"
+                            ) or classified_error.error_type == "rate_limit":
                                 cooldown_duration = classified_error.retry_after or 60
                                 await self.cooldown_manager.start_cooldown(
                                     provider, cooldown_duration

src/rotator_library/error_handler.py CHANGED Viewed

@@ -18,12 +18,60 @@ from litellm.exceptions import (
 )
 def extract_retry_after_from_body(error_body: Optional[str]) -> Optional[int]:
     """
     Extract the retry-after time from an API error response body.
     Handles various error formats including:
     - Gemini CLI: "Your quota will reset after 39s."
     - Generic: "quota will reset after 120s", "retry after 60s"
     Args:
@@ -35,21 +83,21 @@ def extract_retry_after_from_body(error_body: Optional[str]) -> Optional[int]:
     if not error_body:
         return None
-    # Pattern to match various "reset after Xs" or "retry after Xs" formats
     patterns = [
-        r"quota will reset after\s*(\d+)s",
-        r"reset after\s*(\d+)s",
-        r"retry after\s*(\d+)s",
         r"try again in\s*(\d+)\s*seconds?",
     ]
     for pattern in patterns:
         match = re.search(pattern, error_body, re.IGNORECASE)
         if match:
-            try:
-                return int(match.group(1))
-            except (ValueError, IndexError):
-                continue
     return None
@@ -306,14 +354,91 @@ class ClassifiedError:
         return f"ClassifiedError(type={self.error_type}, status={self.status_code}, retry_after={self.retry_after}, original_exc={self.original_exception})"
 def get_retry_after(error: Exception) -> Optional[int]:
     """
     Extracts the 'retry-after' duration in seconds from an exception message.
     Handles both integer and string representations of the duration, as well as JSON bodies.
     Also checks HTTP response headers for httpx.HTTPStatusError instances.
     """
-    # 0. For httpx errors, check response headers first (most reliable)
     if isinstance(error, httpx.HTTPStatusError):
         headers = error.response.headers
         # Check standard Retry-After header (case-insensitive)
         retry_header = headers.get("retry-after") or headers.get("Retry-After")
@@ -339,81 +464,51 @@ def get_retry_after(error: Exception) -> Optional[int]:
             except (ValueError, TypeError):
                 pass
-    error_str = str(error).lower()
-    # 1. Try to parse JSON from the error string to find 'retryDelay'
-    try:
-        # It's common for the actual JSON to be embedded in the string representation
-        json_match = re.search(r"(\{.*\})", error_str, re.DOTALL)
-        if json_match:
-            error_json = json.loads(json_match.group(1))
-            retry_info = error_json.get("error", {}).get("details", [{}])[0]
-            if retry_info.get("@type") == "type.googleapis.com/google.rpc.RetryInfo":
-                delay_str = retry_info.get("retryDelay", {}).get("seconds")
-                if delay_str:
-                    return int(delay_str)
-                # Fallback for the other format
-                delay_str = retry_info.get("retryDelay")
-                if isinstance(delay_str, str) and delay_str.endswith("s"):
-                    return int(delay_str[:-1])
-    except (json.JSONDecodeError, IndexError, KeyError, TypeError):
-        pass  # If JSON parsing fails, proceed to regex and attribute checks
-    # 2. Common regex patterns for 'retry-after' (with duration format support)
     patterns = [
         r"retry[-_\s]after:?\s*(\d+)",  # Matches: retry-after, retry_after, retry after
         r"retry in\s*(\d+)\s*seconds?",
         r"wait for\s*(\d+)\s*seconds?",
-        r'"retryDelay":\s*"(\d+)s"',
         r"x-ratelimit-reset:?\s*(\d+)",
-        r"quota will reset after\s*(\d+)s",  # Gemini CLI rate limit format
-        r"reset after\s*(\d+)s",  # Generic reset after format
     ]
     for pattern in patterns:
-        match = re.search(pattern, error_str)
         if match:
             try:
-                return int(match.group(1))
             except (ValueError, IndexError):
                 continue
-    # 3. Handle duration formats like "60s", "2m", "1h"
-    duration_match = re.search(r"(\d+)\s*([smh])", error_str)
-    if duration_match:
-        try:
-            value = int(duration_match.group(1))
-            unit = duration_match.group(2)
-            if unit == "s":
-                return value
-            elif unit == "m":
-                return value * 60
-            elif unit == "h":
-                return value * 3600
-        except (ValueError, IndexError):
-            pass
-    # 4. Handle cases where the error object itself has the attribute
     if hasattr(error, "retry_after"):
         value = getattr(error, "retry_after")
         if isinstance(value, int):
             return value
         if isinstance(value, str):
-            # Try to parse string formats
-            if value.isdigit():
-                return int(value)
-            # Handle "60s", "2m" format in attribute
-            duration_match = re.search(r"(\d+)\s*([smh])", value.lower())
-            if duration_match:
-                val = int(duration_match.group(1))
-                unit = duration_match.group(2)
-                if unit == "s":
-                    return val
-                elif unit == "m":
-                    return val * 60
-                elif unit == "h":
-                    return val * 3600
     return None

 )
+def _parse_duration_string(duration_str: str) -> Optional[int]:
+    """
+    Parse duration strings in various formats to total seconds.
+    Handles:
+    - Compound durations: '156h14m36.752463453s', '2h30m', '45m30s'
+    - Simple durations: '562476.752463453s', '3600s', '60m', '2h'
+    - Plain seconds (no unit): '562476'
+    Args:
+        duration_str: Duration string to parse
+    Returns:
+        Total seconds as integer, or None if parsing fails
+    """
+    if not duration_str:
+        return None
+    total_seconds = 0
+    remaining = duration_str.strip().lower()
+    # Try parsing as plain number first (no units)
+    try:
+        return int(float(remaining))
+    except ValueError:
+        pass
+    # Parse hours component
+    hour_match = re.match(r"(\d+)h", remaining)
+    if hour_match:
+        total_seconds += int(hour_match.group(1)) * 3600
+        remaining = remaining[hour_match.end() :]
+    # Parse minutes component
+    min_match = re.match(r"(\d+)m", remaining)
+    if min_match:
+        total_seconds += int(min_match.group(1)) * 60
+        remaining = remaining[min_match.end() :]
+    # Parse seconds component (including decimals like 36.752463453s)
+    sec_match = re.match(r"([\d.]+)s", remaining)
+    if sec_match:
+        total_seconds += int(float(sec_match.group(1)))
+    return total_seconds if total_seconds > 0 else None
 def extract_retry_after_from_body(error_body: Optional[str]) -> Optional[int]:
     """
     Extract the retry-after time from an API error response body.
     Handles various error formats including:
     - Gemini CLI: "Your quota will reset after 39s."
+    - Antigravity: "quota will reset after 156h14m36s"
     - Generic: "quota will reset after 120s", "retry after 60s"
     Args:
     if not error_body:
         return None
+    # Pattern to match various "reset after" formats - capture the full duration string
     patterns = [
+        r"quota will reset after\s*([\dhmso.]+)",  # Matches compound: 156h14m36s or 120s
+        r"reset after\s*([\dhmso.]+)",
+        r"retry after\s*([\dhmso.]+)",
         r"try again in\s*(\d+)\s*seconds?",
     ]
     for pattern in patterns:
         match = re.search(pattern, error_body, re.IGNORECASE)
         if match:
+            duration_str = match.group(1)
+            result = _parse_duration_string(duration_str)
+            if result is not None:
+                return result
     return None
         return f"ClassifiedError(type={self.error_type}, status={self.status_code}, retry_after={self.retry_after}, original_exc={self.original_exception})"
+def _extract_retry_from_json_body(json_text: str) -> Optional[int]:
+    """
+    Extract retry delay from a JSON error response body.
+    Handles Antigravity/Google API error formats with details array containing:
+    - RetryInfo with retryDelay: "562476.752463453s"
+    - ErrorInfo metadata with quotaResetDelay: "156h14m36.752463453s"
+    Args:
+        json_text: JSON string (original case, not lowercased)
+    Returns:
+        Retry delay in seconds, or None if not found
+    """
+    try:
+        # Find JSON object in the text
+        json_match = re.search(r"(\{.*\})", json_text, re.DOTALL)
+        if not json_match:
+            return None
+        error_json = json.loads(json_match.group(1))
+        details = error_json.get("error", {}).get("details", [])
+        # Iterate through ALL details items (not just index 0)
+        for detail in details:
+            detail_type = detail.get("@type", "")
+            # Check RetryInfo for retryDelay (most authoritative)
+            # Note: Case-sensitive key names as returned by API
+            if "google.rpc.RetryInfo" in detail_type:
+                delay_str = detail.get("retryDelay")
+                if delay_str:
+                    # Handle both {"seconds": "123"} format and "123.456s" string format
+                    if isinstance(delay_str, dict):
+                        seconds = delay_str.get("seconds")
+                        if seconds:
+                            return int(float(seconds))
+                    elif isinstance(delay_str, str):
+                        result = _parse_duration_string(delay_str)
+                        if result is not None:
+                            return result
+            # Check ErrorInfo metadata for quotaResetDelay (Antigravity-specific)
+            if "google.rpc.ErrorInfo" in detail_type:
+                metadata = detail.get("metadata", {})
+                # Try both camelCase and lowercase variants
+                quota_reset_delay = metadata.get("quotaResetDelay") or metadata.get(
+                    "quotaresetdelay"
+                )
+                if quota_reset_delay:
+                    result = _parse_duration_string(quota_reset_delay)
+                    if result is not None:
+                        return result
+    except (json.JSONDecodeError, IndexError, KeyError, TypeError):
+        pass
+    return None
 def get_retry_after(error: Exception) -> Optional[int]:
     """
     Extracts the 'retry-after' duration in seconds from an exception message.
     Handles both integer and string representations of the duration, as well as JSON bodies.
     Also checks HTTP response headers for httpx.HTTPStatusError instances.
+    Supports Antigravity/Google API error formats:
+    - RetryInfo with retryDelay: "562476.752463453s"
+    - ErrorInfo metadata with quotaResetDelay: "156h14m36.752463453s"
+    - Human-readable message: "quota will reset after 156h14m36s"
     """
+    # 0. For httpx errors, check response body and headers
     if isinstance(error, httpx.HTTPStatusError):
+        # First, try to parse the response body JSON (contains retryDelay/quotaResetDelay)
+        # This is where Antigravity puts the retry information
+        try:
+            response_text = error.response.text
+            if response_text:
+                result = _extract_retry_from_json_body(response_text)
+                if result is not None:
+                    return result
+        except Exception:
+            pass  # Response body may not be available
+        # Fallback to HTTP headers
         headers = error.response.headers
         # Check standard Retry-After header (case-insensitive)
         retry_header = headers.get("retry-after") or headers.get("Retry-After")
             except (ValueError, TypeError):
                 pass
+    # 1. Try to parse JSON from the error string representation
+    # Some exceptions embed JSON in their string representation
+    error_str = str(error)
+    result = _extract_retry_from_json_body(error_str)
+    if result is not None:
+        return result
+    # 2. Common regex patterns for 'retry-after' (with compound duration support)
+    # Use lowercase for pattern matching
+    error_str_lower = error_str.lower()
     patterns = [
         r"retry[-_\s]after:?\s*(\d+)",  # Matches: retry-after, retry_after, retry after
         r"retry in\s*(\d+)\s*seconds?",
         r"wait for\s*(\d+)\s*seconds?",
+        r'"retrydelay":\s*"([\d.]+)s?"',  # retryDelay in JSON (lowercased)
         r"x-ratelimit-reset:?\s*(\d+)",
+        # Compound duration patterns (Antigravity format)
+        r"quota will reset after\s*([\dhms.]+)",  # e.g., "156h14m36s" or "120s"
+        r"reset after\s*([\dhms.]+)",
+        r'"quotaresetdelay":\s*"([\dhms.]+)"',  # quotaResetDelay in JSON (lowercased)
     ]
     for pattern in patterns:
+        match = re.search(pattern, error_str_lower)
         if match:
+            duration_str = match.group(1)
+            # Try parsing as compound duration first
+            result = _parse_duration_string(duration_str)
+            if result is not None:
+                return result
+            # Fallback to simple integer
             try:
+                return int(duration_str)
             except (ValueError, IndexError):
                 continue
+    # 3. Handle cases where the error object itself has the attribute
     if hasattr(error, "retry_after"):
         value = getattr(error, "retry_after")
         if isinstance(value, int):
             return value
         if isinstance(value, str):
+            result = _parse_duration_string(value)
+            if result is not None:
+                return result
     return None

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -1919,15 +1919,18 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
         system_instruction = None
         gemini_contents = []
-        # Extract system prompt
-        if messages and messages[0].get("role") == "system":
             system_content = messages.pop(0).get("content", "")
             if system_content:
-                system_parts = self._parse_content_parts(
                     system_content, _strip_cache_control=True
                 )
-                if system_parts:
-                    system_instruction = {"role": "user", "parts": system_parts}
         # Build tool_call_id → name mapping
         tool_id_to_name = {}

         system_instruction = None
         gemini_contents = []
+        # Extract system prompts (handle multiple consecutive system messages)
+        system_parts = []
+        while messages and messages[0].get("role") == "system":
             system_content = messages.pop(0).get("content", "")
             if system_content:
+                new_parts = self._parse_content_parts(
                     system_content, _strip_cache_control=True
                 )
+                system_parts.extend(new_parts)
+        if system_parts:
+            system_instruction = {"role": "user", "parts": system_parts}
         # Build tool_call_id → name mapping
         tool_id_to_name = {}