Spaces:
Sleeping
Sleeping
File size: 1,081 Bytes
e91b1d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import runpod
from sentence_transformers import SentenceTransformer
import faiss
import torch
import json
# Load model once (stays in memory between calls)
model = None
index = None
def load_models():
global model, index
if model is None:
print("Loading model...")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
# Initialize FAISS
dimension = 384
index = faiss.IndexFlatL2(dimension)
print("Models loaded!")
def handler(event):
"""RunPod serverless handler"""
load_models()
input_data = event["input"]
query = input_data.get("query", "")
# Your RAG logic
embedding = model.encode([query])
# FAISS search (add your logic)
# distances, indices = index.search(embedding, k=5)
return {
"embedding": embedding[0].tolist(),
"status": "success"
}
if __name__ == "__main__":
runpod.serverless.start({"handler": handler}) |