Update agent/server.py
Browse files- agent/server.py +52 -3
agent/server.py
CHANGED
|
@@ -11,11 +11,12 @@ import uuid
|
|
| 11 |
import json
|
| 12 |
import traceback
|
| 13 |
from typing import Optional, Callable
|
| 14 |
-
from collections import deque
|
| 15 |
import time
|
| 16 |
import math
|
| 17 |
|
| 18 |
from fastapi import FastAPI
|
|
|
|
| 19 |
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
from fastapi.responses import FileResponse
|
| 21 |
from fastapi.staticfiles import StaticFiles
|
|
@@ -40,6 +41,41 @@ from tools.constraints_tool import apply_constraints
|
|
| 40 |
|
| 41 |
os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
class ChatRequest(BaseModel):
|
| 44 |
query: str
|
| 45 |
clarification_answer: Optional[str] = None
|
|
@@ -306,14 +342,27 @@ def chat(req: ChatRequest):
|
|
| 306 |
|
| 307 |
|
| 308 |
@app.post("/recommend")
|
| 309 |
-
def recommend(req: RecommendRequest):
|
| 310 |
if not _allow_request():
|
| 311 |
return {"error": "rate limit exceeded"}
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
resp = {"recommended_assessments": final_results}
|
| 314 |
if req.verbose:
|
| 315 |
resp["debug"] = _sanitize_debug(debug_payload)
|
| 316 |
resp["summary"] = summary
|
|
|
|
|
|
|
| 317 |
return resp
|
| 318 |
|
| 319 |
|
|
|
|
| 11 |
import json
|
| 12 |
import traceback
|
| 13 |
from typing import Optional, Callable
|
| 14 |
+
from collections import deque, OrderedDict
|
| 15 |
import time
|
| 16 |
import math
|
| 17 |
|
| 18 |
from fastapi import FastAPI
|
| 19 |
+
from fastapi import Response
|
| 20 |
from fastapi.middleware.cors import CORSMiddleware
|
| 21 |
from fastapi.responses import FileResponse
|
| 22 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
| 41 |
|
| 42 |
os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
|
| 43 |
|
| 44 |
+
import hashlib
|
| 45 |
+
|
| 46 |
+
# ---------------------------
|
| 47 |
+
# Simple in-memory TTL + LRU cache for responses
|
| 48 |
+
# ---------------------------
|
| 49 |
+
_CACHE_MAX_ITEMS = int(os.getenv("RECO_CACHE_MAX_ITEMS", "500"))
|
| 50 |
+
_CACHE_TTL_SECONDS = int(os.getenv("RECO_CACHE_TTL_SECONDS", str(24 * 3600)))
|
| 51 |
+
|
| 52 |
+
_reco_cache: OrderedDict[str, tuple[float, dict]] = OrderedDict()
|
| 53 |
+
|
| 54 |
+
def _normalize_query(q: str) -> str:
|
| 55 |
+
return " ".join((q or "").lower().split())
|
| 56 |
+
|
| 57 |
+
def _cache_key(query: str, llm_model: str | None, verbose: bool, endpoint: str) -> str:
|
| 58 |
+
model = (llm_model or os.getenv("LLM_MODEL", "") or "default").strip().lower()
|
| 59 |
+
raw = f"ep={endpoint}|q={_normalize_query(query)}|m={model}|v={int(verbose)}"
|
| 60 |
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
| 61 |
+
|
| 62 |
+
def _cache_get(key: str):
|
| 63 |
+
item = _reco_cache.get(key)
|
| 64 |
+
if not item:
|
| 65 |
+
return None
|
| 66 |
+
expires_at, value = item
|
| 67 |
+
if time.time() > expires_at:
|
| 68 |
+
_reco_cache.pop(key, None)
|
| 69 |
+
return None
|
| 70 |
+
_reco_cache.move_to_end(key) # LRU refresh
|
| 71 |
+
return value
|
| 72 |
+
|
| 73 |
+
def _cache_set(key: str, value: dict):
|
| 74 |
+
_reco_cache[key] = (time.time() + _CACHE_TTL_SECONDS, value)
|
| 75 |
+
_reco_cache.move_to_end(key)
|
| 76 |
+
while len(_reco_cache) > _CACHE_MAX_ITEMS:
|
| 77 |
+
_reco_cache.popitem(last=False)
|
| 78 |
+
|
| 79 |
class ChatRequest(BaseModel):
|
| 80 |
query: str
|
| 81 |
clarification_answer: Optional[str] = None
|
|
|
|
| 342 |
|
| 343 |
|
| 344 |
@app.post("/recommend")
|
| 345 |
+
def recommend(req: RecommendRequest, response: Response):
|
| 346 |
if not _allow_request():
|
| 347 |
return {"error": "rate limit exceeded"}
|
| 348 |
+
|
| 349 |
+
key = _cache_key(req.query, req.llm_model, req.verbose, endpoint="/recommend")
|
| 350 |
+
cached = _cache_get(key)
|
| 351 |
+
if cached is not None:
|
| 352 |
+
response.headers["X-Cache"] = "HIT"
|
| 353 |
+
return cached
|
| 354 |
+
|
| 355 |
+
response.headers["X-Cache"] = "MISS"
|
| 356 |
+
|
| 357 |
+
final_results, summary, debug_payload = _run_pipeline(
|
| 358 |
+
req.query, verbose=req.verbose, llm_model=req.llm_model
|
| 359 |
+
)
|
| 360 |
resp = {"recommended_assessments": final_results}
|
| 361 |
if req.verbose:
|
| 362 |
resp["debug"] = _sanitize_debug(debug_payload)
|
| 363 |
resp["summary"] = summary
|
| 364 |
+
|
| 365 |
+
_cache_set(key, resp)
|
| 366 |
return resp
|
| 367 |
|
| 368 |
|