VcRlAgent commited on
Commit
e91b1d8
·
1 Parent(s): 1a4bc8e

trivial change

Browse files
Files changed (4) hide show
  1. app/main.py +1 -1
  2. docker-runpod/Dockerfile +13 -0
  3. handler.py +43 -0
  4. requirements.txt.runpod +8 -0
app/main.py CHANGED
@@ -29,7 +29,7 @@ app.include_router(ask_routes.router, prefix="/api", tags=["Query"])
29
  app.include_router(metrics_routes.router, prefix="/api", tags=["Metrics"])
30
  #app.include_router(debug_routes.router, prefix="/api", tags=["Debug"])
31
 
32
- logger.info("✅ Routers initialized:")
33
  for route in app.routes:
34
  logger.info(f" - {route.path}")
35
 
 
29
  app.include_router(metrics_routes.router, prefix="/api", tags=["Metrics"])
30
  #app.include_router(debug_routes.router, prefix="/api", tags=["Debug"])
31
 
32
+ logger.info("✅ Routers initialized ::")
33
  for route in app.routes:
34
  logger.info(f" - {route.path}")
35
 
docker-runpod/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-runtime
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ # Pre-download model (faster cold starts)
9
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
10
+
11
+ COPY . .
12
+
13
+ CMD ["python", "handler.py"]
handler.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import runpod
2
+ from sentence_transformers import SentenceTransformer
3
+ import faiss
4
+ import torch
5
+ import json
6
+
7
+ # Load model once (stays in memory between calls)
8
+ model = None
9
+ index = None
10
+
11
+ def load_models():
12
+ global model, index
13
+ if model is None:
14
+ print("Loading model...")
15
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
16
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
17
+ model = model.to(device)
18
+
19
+ # Initialize FAISS
20
+ dimension = 384
21
+ index = faiss.IndexFlatL2(dimension)
22
+ print("Models loaded!")
23
+
24
+ def handler(event):
25
+ """RunPod serverless handler"""
26
+ load_models()
27
+
28
+ input_data = event["input"]
29
+ query = input_data.get("query", "")
30
+
31
+ # Your RAG logic
32
+ embedding = model.encode([query])
33
+
34
+ # FAISS search (add your logic)
35
+ # distances, indices = index.search(embedding, k=5)
36
+
37
+ return {
38
+ "embedding": embedding[0].tolist(),
39
+ "status": "success"
40
+ }
41
+
42
+ if __name__ == "__main__":
43
+ runpod.serverless.start({"handler": handler})
requirements.txt.runpod ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ runpod
2
+ fastapi==0.109.0
3
+ sentence-transformers
4
+ faiss-cpu
5
+ torch
6
+ transformers
7
+ python-multipart==0.0.6 # if you accept file uploads
8
+