NewtonBot / retriever.py
Kolyadual's picture
Upload folder using huggingface_hub
9250542 verified
import numpy as np
import json
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
def load_knowledge():
embeddings = np.load("knowledge/embeddings.npy")
with open("knowledge/chunks.json", "r", encoding="utf-8") as f:
chunks = json.load(f)
return embeddings, chunks
def find_relevant_context(query, top_k=3):
emb, chunks = load_knowledge()
query_emb = model.encode([query])
scores = np.dot(emb, query_emb.T).flatten()
best_idx = np.argsort(scores)[-top_k:][::-1]
context = ""
for i in best_idx:
if scores[i] > 0.2:
context += f"[{chunks[i]['source']}] {chunks[i]['title']}: {chunks[i]['text']}\n\n"
return context.strip()