ketannnn commited on
Commit
20ae104
·
1 Parent(s): f42877a

feat: enforce native preloading of neural model to prevent HTTP connection timeouts

Browse files
Files changed (1) hide show
  1. backend/main.py +12 -0
backend/main.py CHANGED
@@ -58,6 +58,18 @@ async def lifespan(app: FastAPI):
58
 
59
  app.state.qdrant = _qdrant_client
60
  app.state.qdrant_ready = _qdrant_ready
 
 
 
 
 
 
 
 
 
 
 
 
61
  yield
62
  _qdrant_client.close()
63
 
 
58
 
59
  app.state.qdrant = _qdrant_client
60
  app.state.qdrant_ready = _qdrant_ready
61
+
62
+ # -----------------------------------------------------
63
+ # CRITICAL: Pre-load the 2.3 GB Neural Cross-Encoder
64
+ # to entirely prevent HF Gateway 60-second 500 timeouts
65
+ # during user requests.
66
+ # -----------------------------------------------------
67
+ import asyncio
68
+ from src.ml.reranker import _get_reranker
69
+ logger.info(f"Preloading Neural Reranker `{settings.reranker_model}`. This may take ~60 seconds to cache...")
70
+ await asyncio.to_thread(_get_reranker)
71
+ logger.info("Neural Reranker fully loaded into memory!")
72
+
73
  yield
74
  _qdrant_client.close()
75