Spaces:

Nomearod
/

agentbench

Sleeping

Nomearod Claude Opus 4.6 (1M context) commited on Mar 24

Commit

2a4cb78

1 Parent(s): f9202c2

fix: handle OpenAI rate limit / quota errors as 503 instead of 500

- ProviderRateLimitError for insufficient_quota and rate_limit errors
- Middleware catches it and returns 503 with clear error message
- Previously returned bare 500 Internal Server Error

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

agent_bench/core/provider.py +8 -0
agent_bench/serving/middleware.py +22 -2

agent_bench/core/provider.py CHANGED Viewed

@@ -21,6 +21,10 @@ class ProviderTimeoutError(Exception):
     """Raised when the LLM provider times out."""
 # --- Pure formatting functions (used by providers and tests directly) ---
@@ -187,6 +191,10 @@ class OpenAIProvider(LLMProvider):
             response = await self.client.chat.completions.create(**kwargs)
         except APITimeoutError as e:
             raise ProviderTimeoutError(f"OpenAI timed out: {e}") from e
         latency_ms = (time.perf_counter() - start) * 1000
         choice = response.choices[0]

     """Raised when the LLM provider times out."""
+class ProviderRateLimitError(Exception):
+    """Raised when the LLM provider returns a rate limit / quota error."""
 # --- Pure formatting functions (used by providers and tests directly) ---
             response = await self.client.chat.completions.create(**kwargs)
         except APITimeoutError as e:
             raise ProviderTimeoutError(f"OpenAI timed out: {e}") from e
+        except Exception as e:
+            if "insufficient_quota" in str(e) or "rate_limit" in str(e).lower():
+                raise ProviderRateLimitError(f"OpenAI rate limit / quota: {e}") from e
+            raise
         latency_ms = (time.perf_counter() - start) * 1000
         choice = response.choices[0]

agent_bench/serving/middleware.py CHANGED Viewed

@@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoin
 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
-from agent_bench.core.provider import ProviderTimeoutError
 logger = structlog.get_logger()
@@ -81,7 +81,6 @@ class RequestMiddleware(BaseHTTPMiddleware):
                 latency_ms=round(latency_ms, 2),
                 request_id=request_id,
             )
-            # Record error in metrics if available
             metrics = getattr(request.app.state, "metrics", None)
             if metrics is not None:
                 metrics.record(latency_ms, error=True)
@@ -91,6 +90,27 @@ class RequestMiddleware(BaseHTTPMiddleware):
                 headers={"X-Request-ID": request_id},
             )
         except Exception:
             latency_ms = (time.perf_counter() - start) * 1000
             logger.exception(

 from starlette.requests import Request
 from starlette.responses import JSONResponse, Response
+from agent_bench.core.provider import ProviderRateLimitError, ProviderTimeoutError
 logger = structlog.get_logger()
                 latency_ms=round(latency_ms, 2),
                 request_id=request_id,
             )
             metrics = getattr(request.app.state, "metrics", None)
             if metrics is not None:
                 metrics.record(latency_ms, error=True)
                 headers={"X-Request-ID": request_id},
             )
+        except ProviderRateLimitError:
+            latency_ms = (time.perf_counter() - start) * 1000
+            logger.error(
+                "provider_rate_limit",
+                method=request.method,
+                path=str(request.url.path),
+                latency_ms=round(latency_ms, 2),
+                request_id=request_id,
+            )
+            metrics = getattr(request.app.state, "metrics", None)
+            if metrics is not None:
+                metrics.record(latency_ms, error=True)
+            return JSONResponse(
+                status_code=503,
+                content={
+                    "detail": "Provider rate limit or quota exceeded",
+                    "request_id": request_id,
+                },
+                headers={"X-Request-ID": request_id},
+            )
         except Exception:
             latency_ms = (time.perf_counter() - start) * 1000
             logger.exception(