Spaces:
Running
Running
fix(BACKEND-1): move rate limiter eviction AFTER current key pruning -- prevents losing rate-limit tracking on burst-after-idle patterns
9d85e1e | import os, sys, time, hashlib | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | |
| from collections import defaultdict | |
| from fastapi import Request, HTTPException | |
| from starlette.middleware.base import BaseHTTPMiddleware | |
| from loguru import logger | |
| LIMITS = { | |
| "/search": (100, 60), | |
| "/profile": (100, 60), | |
| "/risk": (100, 60), | |
| "/graph": (50, 60), | |
| "/investigate":(30, 60), | |
| "/export": (10, 60), | |
| "/translate": (10, 60), | |
| "/health": (1000,60), | |
| "/admin": (5, 60), | |
| "default": (200, 60), | |
| } | |
| class SlidingWindowRateLimiter(BaseHTTPMiddleware): | |
| def __init__(self, app): | |
| super().__init__(app) | |
| # BUG-14 NOTE: in-memory state -- each worker starts fresh on restart. | |
| # Rate limits are per-worker, not cross-worker. Acceptable for single-process HF Spaces. | |
| # For multi-worker: replace with Redis-backed sliding window. | |
| self._windows: dict[str, list[float]] = defaultdict(list) | |
| # BUG-14 FIX: add eviction to prevent unbounded memory growth on long-running server | |
| self._evict_at = 0.0 | |
| def _get_limit(self, path: str) -> tuple[int, int]: | |
| for prefix, limit in LIMITS.items(): | |
| if prefix != "default" and path.startswith(prefix): | |
| return limit | |
| return LIMITS["default"] | |
| def _get_ip(self, request: Request) -> str: | |
| forwarded = request.headers.get("X-Forwarded-For", "") | |
| raw = forwarded.split(",")[0].strip() if forwarded else ( | |
| request.client.host if request.client else "unknown" | |
| ) | |
| return hashlib.sha256(raw.encode()).hexdigest()[:16] | |
| async def dispatch(self, request: Request, call_next): | |
| ip = self._get_ip(request) | |
| path = request.url.path | |
| max_req, window = self._get_limit(path) | |
| key = f"{ip}:{path.split('/')[1]}" | |
| now = time.time() | |
| # BACKEND-1 FIX: prune current key FIRST, THEN evict stale entries. | |
| # Previous order evicted before pruning -- lost tracking on burst-after-idle keys. | |
| self._windows[key] = [t for t in self._windows[key] if now - t < window] | |
| # Evict stale entries every 5 minutes to prevent memory leak | |
| if now - self._evict_at > 300: | |
| stale_keys = [k for k, ts in self._windows.items() if not ts or now - ts[0] > window] | |
| for k in stale_keys: | |
| del self._windows[k] | |
| self._evict_at = now | |
| if len(self._windows[key]) >= max_req: | |
| retry = int(window - (now - self._windows[key][0])) | |
| logger.warning(f"[RateLimit] {ip} exceeded {max_req}/min on {path}") | |
| raise HTTPException( | |
| status_code=429, | |
| detail=f"Rate limit exceeded. Retry after {retry} seconds.", | |
| headers={"Retry-After": str(retry)}, | |
| ) | |
| self._windows[key].append(now) | |
| return await call_next(request) | |