Tighten per-IP rate limits to keep daily spend per visitor under $1
Browse filesReplace the loose '30/minute' cap with a layered '5/minute;20/hour;30/day'
on both /query and /ui/query. Same key (remote IP) on all three windows;
slowapi enforces them independently and the most restrictive wins.
Worst-case spend per IP per day: 30 queries x ~$0.011 (max_tokens=2048
on claude-haiku-4-5) = ~$0.33. Comfortably under the $1 ceiling and
still leaves room for a legitimate evaluator to try a handful of queries
in a single sitting.
- api/main.py +2 -2
api/main.py
CHANGED
|
@@ -159,7 +159,7 @@ def health() -> JSONResponse:
|
|
| 159 |
|
| 160 |
|
| 161 |
@app.post("/query", response_model=QueryResponse)
|
| 162 |
-
@limiter.limit("
|
| 163 |
def query(request: Request, body: QueryRequest) -> QueryResponse:
|
| 164 |
"""Run the RAG pipeline end-to-end. See module docstring for guarantees."""
|
| 165 |
qhash = hash_query(body.query)
|
|
@@ -302,7 +302,7 @@ def _corpus_stats() -> dict[str, Any]:
|
|
| 302 |
|
| 303 |
|
| 304 |
@app.post("/ui/query", response_class=HTMLResponse)
|
| 305 |
-
@limiter.limit("
|
| 306 |
def ui_query(
|
| 307 |
request: Request,
|
| 308 |
query: str = Form(..., min_length=1, max_length=2000),
|
|
|
|
| 159 |
|
| 160 |
|
| 161 |
@app.post("/query", response_model=QueryResponse)
|
| 162 |
+
@limiter.limit("5/minute;20/hour;30/day")
|
| 163 |
def query(request: Request, body: QueryRequest) -> QueryResponse:
|
| 164 |
"""Run the RAG pipeline end-to-end. See module docstring for guarantees."""
|
| 165 |
qhash = hash_query(body.query)
|
|
|
|
| 302 |
|
| 303 |
|
| 304 |
@app.post("/ui/query", response_class=HTMLResponse)
|
| 305 |
+
@limiter.limit("5/minute;20/hour;30/day")
|
| 306 |
def ui_query(
|
| 307 |
request: Request,
|
| 308 |
query: str = Form(..., min_length=1, max_length=2000),
|