Nomearod Claude Opus 4.6 (1M context) commited on
Commit
2a4cb78
·
1 Parent(s): f9202c2

fix: handle OpenAI rate limit / quota errors as 503 instead of 500

Browse files

- ProviderRateLimitError for insufficient_quota and rate_limit errors
- Middleware catches it and returns 503 with clear error message
- Previously returned bare 500 Internal Server Error

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

agent_bench/core/provider.py CHANGED
@@ -21,6 +21,10 @@ class ProviderTimeoutError(Exception):
21
  """Raised when the LLM provider times out."""
22
 
23
 
 
 
 
 
24
  # --- Pure formatting functions (used by providers and tests directly) ---
25
 
26
 
@@ -187,6 +191,10 @@ class OpenAIProvider(LLMProvider):
187
  response = await self.client.chat.completions.create(**kwargs)
188
  except APITimeoutError as e:
189
  raise ProviderTimeoutError(f"OpenAI timed out: {e}") from e
 
 
 
 
190
  latency_ms = (time.perf_counter() - start) * 1000
191
 
192
  choice = response.choices[0]
 
21
  """Raised when the LLM provider times out."""
22
 
23
 
24
+ class ProviderRateLimitError(Exception):
25
+ """Raised when the LLM provider returns a rate limit / quota error."""
26
+
27
+
28
  # --- Pure formatting functions (used by providers and tests directly) ---
29
 
30
 
 
191
  response = await self.client.chat.completions.create(**kwargs)
192
  except APITimeoutError as e:
193
  raise ProviderTimeoutError(f"OpenAI timed out: {e}") from e
194
+ except Exception as e:
195
+ if "insufficient_quota" in str(e) or "rate_limit" in str(e).lower():
196
+ raise ProviderRateLimitError(f"OpenAI rate limit / quota: {e}") from e
197
+ raise
198
  latency_ms = (time.perf_counter() - start) * 1000
199
 
200
  choice = response.choices[0]
agent_bench/serving/middleware.py CHANGED
@@ -11,7 +11,7 @@ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoin
11
  from starlette.requests import Request
12
  from starlette.responses import JSONResponse, Response
13
 
14
- from agent_bench.core.provider import ProviderTimeoutError
15
 
16
  logger = structlog.get_logger()
17
 
@@ -81,7 +81,6 @@ class RequestMiddleware(BaseHTTPMiddleware):
81
  latency_ms=round(latency_ms, 2),
82
  request_id=request_id,
83
  )
84
- # Record error in metrics if available
85
  metrics = getattr(request.app.state, "metrics", None)
86
  if metrics is not None:
87
  metrics.record(latency_ms, error=True)
@@ -91,6 +90,27 @@ class RequestMiddleware(BaseHTTPMiddleware):
91
  headers={"X-Request-ID": request_id},
92
  )
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  except Exception:
95
  latency_ms = (time.perf_counter() - start) * 1000
96
  logger.exception(
 
11
  from starlette.requests import Request
12
  from starlette.responses import JSONResponse, Response
13
 
14
+ from agent_bench.core.provider import ProviderRateLimitError, ProviderTimeoutError
15
 
16
  logger = structlog.get_logger()
17
 
 
81
  latency_ms=round(latency_ms, 2),
82
  request_id=request_id,
83
  )
 
84
  metrics = getattr(request.app.state, "metrics", None)
85
  if metrics is not None:
86
  metrics.record(latency_ms, error=True)
 
90
  headers={"X-Request-ID": request_id},
91
  )
92
 
93
+ except ProviderRateLimitError:
94
+ latency_ms = (time.perf_counter() - start) * 1000
95
+ logger.error(
96
+ "provider_rate_limit",
97
+ method=request.method,
98
+ path=str(request.url.path),
99
+ latency_ms=round(latency_ms, 2),
100
+ request_id=request_id,
101
+ )
102
+ metrics = getattr(request.app.state, "metrics", None)
103
+ if metrics is not None:
104
+ metrics.record(latency_ms, error=True)
105
+ return JSONResponse(
106
+ status_code=503,
107
+ content={
108
+ "detail": "Provider rate limit or quota exceeded",
109
+ "request_id": request_id,
110
+ },
111
+ headers={"X-Request-ID": request_id},
112
+ )
113
+
114
  except Exception:
115
  latency_ms = (time.perf_counter() - start) * 1000
116
  logger.exception(