XQ commited on
Commit
fa0cc5c
·
1 Parent(s): fdc3773

Fix 429 issue

Browse files
Files changed (2) hide show
  1. src/api/routes.py +28 -0
  2. src/ui/app.py +8 -0
src/api/routes.py CHANGED
@@ -265,7 +265,34 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
265
  """
266
  event_queue: queue.Queue = queue.Queue()
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  def _run() -> None:
 
 
 
 
 
269
  try:
270
  for event in _query_router.route_stream(
271
  query=request.question, top_k=request.top_k
@@ -278,6 +305,7 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
278
  else:
279
  event_queue.put({"step": "error", "code": 500, "message": exc_str})
280
  finally:
 
281
  event_queue.put(None) # sentinel
282
 
283
  threading.Thread(target=_run, daemon=True).start()
 
265
  """
266
  event_queue: queue.Queue = queue.Queue()
267
 
268
+ class _RateLimitLogHandler(logging.Handler):
269
+ """Temporary handler that detects SDK-internal 429 retries via logs."""
270
+
271
+ _PATTERNS = ("429", "retrying request", "too many requests", "rate limit")
272
+
273
+ def emit(self, record: logging.LogRecord) -> None:
274
+ msg = record.getMessage().lower()
275
+ if any(p in msg for p in self._PATTERNS):
276
+ retry_sec = ""
277
+ # Extract wait time from "Retrying request … in 5.000000 seconds"
278
+ if "retrying" in msg and "seconds" in msg:
279
+ for part in msg.split():
280
+ try:
281
+ retry_sec = f" ({float(part):.0f}s)"
282
+ break
283
+ except ValueError:
284
+ continue
285
+ event_queue.put({
286
+ "step": "rate_limit",
287
+ "message": f"API rate limit — retrying{retry_sec}",
288
+ })
289
+
290
  def _run() -> None:
291
+ handler = _RateLimitLogHandler()
292
+ handler.setLevel(logging.INFO)
293
+ # Attach to root logger to catch openai/httpx/httpcore messages
294
+ root_logger = logging.getLogger()
295
+ root_logger.addHandler(handler)
296
  try:
297
  for event in _query_router.route_stream(
298
  query=request.question, top_k=request.top_k
 
305
  else:
306
  event_queue.put({"step": "error", "code": 500, "message": exc_str})
307
  finally:
308
+ root_logger.removeHandler(handler)
309
  event_queue.put(None) # sentinel
310
 
311
  threading.Thread(target=_run, daemon=True).start()
src/ui/app.py CHANGED
@@ -800,6 +800,14 @@ if search_clicked and question.strip():
800
  else "Answer generated"
801
  )
802
 
 
 
 
 
 
 
 
 
803
  elif _step == "done":
804
  data = _event.get("result", {})
805
  _status.update(label=t["status_done"], state="complete", expanded=False)
 
800
  else "Answer generated"
801
  )
802
 
803
+ elif _step == "rate_limit":
804
+ _rl_msg = _event.get("message", "")
805
+ st.warning(
806
+ f"⏳ {_rl_msg} — vent venligst ..."
807
+ if lang == "da"
808
+ else f"⏳ {_rl_msg} — please wait ..."
809
+ )
810
+
811
  elif _step == "done":
812
  data = _event.get("result", {})
813
  _status.update(label=t["status_done"], state="complete", expanded=False)