Spaces:
Running
Running
XQ commited on
Commit ·
fa0cc5c
1
Parent(s): fdc3773
Fix 429 issue
Browse files- src/api/routes.py +28 -0
- src/ui/app.py +8 -0
src/api/routes.py
CHANGED
|
@@ -265,7 +265,34 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
|
|
| 265 |
"""
|
| 266 |
event_queue: queue.Queue = queue.Queue()
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
def _run() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
try:
|
| 270 |
for event in _query_router.route_stream(
|
| 271 |
query=request.question, top_k=request.top_k
|
|
@@ -278,6 +305,7 @@ async def query_stream(request: QueryRequest) -> StreamingResponse:
|
|
| 278 |
else:
|
| 279 |
event_queue.put({"step": "error", "code": 500, "message": exc_str})
|
| 280 |
finally:
|
|
|
|
| 281 |
event_queue.put(None) # sentinel
|
| 282 |
|
| 283 |
threading.Thread(target=_run, daemon=True).start()
|
|
|
|
| 265 |
"""
|
| 266 |
event_queue: queue.Queue = queue.Queue()
|
| 267 |
|
| 268 |
+
class _RateLimitLogHandler(logging.Handler):
|
| 269 |
+
"""Temporary handler that detects SDK-internal 429 retries via logs."""
|
| 270 |
+
|
| 271 |
+
_PATTERNS = ("429", "retrying request", "too many requests", "rate limit")
|
| 272 |
+
|
| 273 |
+
def emit(self, record: logging.LogRecord) -> None:
|
| 274 |
+
msg = record.getMessage().lower()
|
| 275 |
+
if any(p in msg for p in self._PATTERNS):
|
| 276 |
+
retry_sec = ""
|
| 277 |
+
# Extract wait time from "Retrying request … in 5.000000 seconds"
|
| 278 |
+
if "retrying" in msg and "seconds" in msg:
|
| 279 |
+
for part in msg.split():
|
| 280 |
+
try:
|
| 281 |
+
retry_sec = f" ({float(part):.0f}s)"
|
| 282 |
+
break
|
| 283 |
+
except ValueError:
|
| 284 |
+
continue
|
| 285 |
+
event_queue.put({
|
| 286 |
+
"step": "rate_limit",
|
| 287 |
+
"message": f"API rate limit — retrying{retry_sec}",
|
| 288 |
+
})
|
| 289 |
+
|
| 290 |
def _run() -> None:
|
| 291 |
+
handler = _RateLimitLogHandler()
|
| 292 |
+
handler.setLevel(logging.INFO)
|
| 293 |
+
# Attach to root logger to catch openai/httpx/httpcore messages
|
| 294 |
+
root_logger = logging.getLogger()
|
| 295 |
+
root_logger.addHandler(handler)
|
| 296 |
try:
|
| 297 |
for event in _query_router.route_stream(
|
| 298 |
query=request.question, top_k=request.top_k
|
|
|
|
| 305 |
else:
|
| 306 |
event_queue.put({"step": "error", "code": 500, "message": exc_str})
|
| 307 |
finally:
|
| 308 |
+
root_logger.removeHandler(handler)
|
| 309 |
event_queue.put(None) # sentinel
|
| 310 |
|
| 311 |
threading.Thread(target=_run, daemon=True).start()
|
src/ui/app.py
CHANGED
|
@@ -800,6 +800,14 @@ if search_clicked and question.strip():
|
|
| 800 |
else "Answer generated"
|
| 801 |
)
|
| 802 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
elif _step == "done":
|
| 804 |
data = _event.get("result", {})
|
| 805 |
_status.update(label=t["status_done"], state="complete", expanded=False)
|
|
|
|
| 800 |
else "Answer generated"
|
| 801 |
)
|
| 802 |
|
| 803 |
+
elif _step == "rate_limit":
|
| 804 |
+
_rl_msg = _event.get("message", "")
|
| 805 |
+
st.warning(
|
| 806 |
+
f"⏳ {_rl_msg} — vent venligst ..."
|
| 807 |
+
if lang == "da"
|
| 808 |
+
else f"⏳ {_rl_msg} — please wait ..."
|
| 809 |
+
)
|
| 810 |
+
|
| 811 |
elif _step == "done":
|
| 812 |
data = _event.get("result", {})
|
| 813 |
_status.update(label=t["status_done"], state="complete", expanded=False)
|