Spaces:
Sleeping
Sleeping
trivial change
Browse files- app/main.py +1 -1
- docker-runpod/Dockerfile +13 -0
- handler.py +43 -0
- requirements.txt.runpod +8 -0
app/main.py
CHANGED
|
@@ -29,7 +29,7 @@ app.include_router(ask_routes.router, prefix="/api", tags=["Query"])
|
|
| 29 |
app.include_router(metrics_routes.router, prefix="/api", tags=["Metrics"])
|
| 30 |
#app.include_router(debug_routes.router, prefix="/api", tags=["Debug"])
|
| 31 |
|
| 32 |
-
logger.info("✅ Routers initialized
|
| 33 |
for route in app.routes:
|
| 34 |
logger.info(f" - {route.path}")
|
| 35 |
|
|
|
|
| 29 |
app.include_router(metrics_routes.router, prefix="/api", tags=["Metrics"])
|
| 30 |
#app.include_router(debug_routes.router, prefix="/api", tags=["Debug"])
|
| 31 |
|
| 32 |
+
logger.info("✅ Routers initialized ::")
|
| 33 |
for route in app.routes:
|
| 34 |
logger.info(f" - {route.path}")
|
| 35 |
|
docker-runpod/Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-runtime
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
# Pre-download model (faster cold starts)
|
| 9 |
+
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
|
| 10 |
+
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
CMD ["python", "handler.py"]
|
handler.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import runpod
|
| 2 |
+
from sentence_transformers import SentenceTransformer
|
| 3 |
+
import faiss
|
| 4 |
+
import torch
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
# Load model once (stays in memory between calls)
|
| 8 |
+
model = None
|
| 9 |
+
index = None
|
| 10 |
+
|
| 11 |
+
def load_models():
|
| 12 |
+
global model, index
|
| 13 |
+
if model is None:
|
| 14 |
+
print("Loading model...")
|
| 15 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 16 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 17 |
+
model = model.to(device)
|
| 18 |
+
|
| 19 |
+
# Initialize FAISS
|
| 20 |
+
dimension = 384
|
| 21 |
+
index = faiss.IndexFlatL2(dimension)
|
| 22 |
+
print("Models loaded!")
|
| 23 |
+
|
| 24 |
+
def handler(event):
|
| 25 |
+
"""RunPod serverless handler"""
|
| 26 |
+
load_models()
|
| 27 |
+
|
| 28 |
+
input_data = event["input"]
|
| 29 |
+
query = input_data.get("query", "")
|
| 30 |
+
|
| 31 |
+
# Your RAG logic
|
| 32 |
+
embedding = model.encode([query])
|
| 33 |
+
|
| 34 |
+
# FAISS search (add your logic)
|
| 35 |
+
# distances, indices = index.search(embedding, k=5)
|
| 36 |
+
|
| 37 |
+
return {
|
| 38 |
+
"embedding": embedding[0].tolist(),
|
| 39 |
+
"status": "success"
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
runpod.serverless.start({"handler": handler})
|
requirements.txt.runpod
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
runpod
|
| 2 |
+
fastapi==0.109.0
|
| 3 |
+
sentence-transformers
|
| 4 |
+
faiss-cpu
|
| 5 |
+
torch
|
| 6 |
+
transformers
|
| 7 |
+
python-multipart==0.0.6 # if you accept file uploads
|
| 8 |
+
|