File size: 764 Bytes
9250542 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import numpy as np
import json
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
def load_knowledge():
embeddings = np.load("knowledge/embeddings.npy")
with open("knowledge/chunks.json", "r", encoding="utf-8") as f:
chunks = json.load(f)
return embeddings, chunks
def find_relevant_context(query, top_k=3):
emb, chunks = load_knowledge()
query_emb = model.encode([query])
scores = np.dot(emb, query_emb.T).flatten()
best_idx = np.argsort(scores)[-top_k:][::-1]
context = ""
for i in best_idx:
if scores[i] > 0.2:
context += f"[{chunks[i]['source']}] {chunks[i]['title']}: {chunks[i]['text']}\n\n"
return context.strip()
|