import numpy as np import json from sentence_transformers import SentenceTransformer model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') def load_knowledge(): embeddings = np.load("knowledge/embeddings.npy") with open("knowledge/chunks.json", "r", encoding="utf-8") as f: chunks = json.load(f) return embeddings, chunks def find_relevant_context(query, top_k=3): emb, chunks = load_knowledge() query_emb = model.encode([query]) scores = np.dot(emb, query_emb.T).flatten() best_idx = np.argsort(scores)[-top_k:][::-1] context = "" for i in best_idx: if scores[i] > 0.2: context += f"[{chunks[i]['source']}] {chunks[i]['title']}: {chunks[i]['text']}\n\n" return context.strip()