AgamP commited on
Commit
0a70294
·
verified ·
1 Parent(s): 30d16ab

Update agent/server.py

Browse files
Files changed (1) hide show
  1. agent/server.py +52 -3
agent/server.py CHANGED
@@ -11,11 +11,12 @@ import uuid
11
  import json
12
  import traceback
13
  from typing import Optional, Callable
14
- from collections import deque
15
  import time
16
  import math
17
 
18
  from fastapi import FastAPI
 
19
  from fastapi.middleware.cors import CORSMiddleware
20
  from fastapi.responses import FileResponse
21
  from fastapi.staticfiles import StaticFiles
@@ -40,6 +41,41 @@ from tools.constraints_tool import apply_constraints
40
 
41
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  class ChatRequest(BaseModel):
44
  query: str
45
  clarification_answer: Optional[str] = None
@@ -306,14 +342,27 @@ def chat(req: ChatRequest):
306
 
307
 
308
  @app.post("/recommend")
309
- def recommend(req: RecommendRequest):
310
  if not _allow_request():
311
  return {"error": "rate limit exceeded"}
312
- final_results, summary, debug_payload = _run_pipeline(req.query, verbose=req.verbose, llm_model=req.llm_model)
 
 
 
 
 
 
 
 
 
 
 
313
  resp = {"recommended_assessments": final_results}
314
  if req.verbose:
315
  resp["debug"] = _sanitize_debug(debug_payload)
316
  resp["summary"] = summary
 
 
317
  return resp
318
 
319
 
 
11
  import json
12
  import traceback
13
  from typing import Optional, Callable
14
+ from collections import deque, OrderedDict
15
  import time
16
  import math
17
 
18
  from fastapi import FastAPI
19
+ from fastapi import Response
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.responses import FileResponse
22
  from fastapi.staticfiles import StaticFiles
 
41
 
42
  os.environ.setdefault("HF_HOME", "/home/user/.cache/huggingface")
43
 
44
+ import hashlib
45
+
46
+ # ---------------------------
47
+ # Simple in-memory TTL + LRU cache for responses
48
+ # ---------------------------
49
+ _CACHE_MAX_ITEMS = int(os.getenv("RECO_CACHE_MAX_ITEMS", "500"))
50
+ _CACHE_TTL_SECONDS = int(os.getenv("RECO_CACHE_TTL_SECONDS", str(24 * 3600)))
51
+
52
+ _reco_cache: OrderedDict[str, tuple[float, dict]] = OrderedDict()
53
+
54
+ def _normalize_query(q: str) -> str:
55
+ return " ".join((q or "").lower().split())
56
+
57
+ def _cache_key(query: str, llm_model: str | None, verbose: bool, endpoint: str) -> str:
58
+ model = (llm_model or os.getenv("LLM_MODEL", "") or "default").strip().lower()
59
+ raw = f"ep={endpoint}|q={_normalize_query(query)}|m={model}|v={int(verbose)}"
60
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()
61
+
62
+ def _cache_get(key: str):
63
+ item = _reco_cache.get(key)
64
+ if not item:
65
+ return None
66
+ expires_at, value = item
67
+ if time.time() > expires_at:
68
+ _reco_cache.pop(key, None)
69
+ return None
70
+ _reco_cache.move_to_end(key) # LRU refresh
71
+ return value
72
+
73
+ def _cache_set(key: str, value: dict):
74
+ _reco_cache[key] = (time.time() + _CACHE_TTL_SECONDS, value)
75
+ _reco_cache.move_to_end(key)
76
+ while len(_reco_cache) > _CACHE_MAX_ITEMS:
77
+ _reco_cache.popitem(last=False)
78
+
79
  class ChatRequest(BaseModel):
80
  query: str
81
  clarification_answer: Optional[str] = None
 
342
 
343
 
344
  @app.post("/recommend")
345
+ def recommend(req: RecommendRequest, response: Response):
346
  if not _allow_request():
347
  return {"error": "rate limit exceeded"}
348
+
349
+ key = _cache_key(req.query, req.llm_model, req.verbose, endpoint="/recommend")
350
+ cached = _cache_get(key)
351
+ if cached is not None:
352
+ response.headers["X-Cache"] = "HIT"
353
+ return cached
354
+
355
+ response.headers["X-Cache"] = "MISS"
356
+
357
+ final_results, summary, debug_payload = _run_pipeline(
358
+ req.query, verbose=req.verbose, llm_model=req.llm_model
359
+ )
360
  resp = {"recommended_assessments": final_results}
361
  if req.verbose:
362
  resp["debug"] = _sanitize_debug(debug_payload)
363
  resp["summary"] = summary
364
+
365
+ _cache_set(key, resp)
366
  return resp
367
 
368