| import numpy as np | |
| import json | |
| from sentence_transformers import SentenceTransformer | |
| model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') | |
| def load_knowledge(): | |
| embeddings = np.load("knowledge/embeddings.npy") | |
| with open("knowledge/chunks.json", "r", encoding="utf-8") as f: | |
| chunks = json.load(f) | |
| return embeddings, chunks | |
| def find_relevant_context(query, top_k=3): | |
| emb, chunks = load_knowledge() | |
| query_emb = model.encode([query]) | |
| scores = np.dot(emb, query_emb.T).flatten() | |
| best_idx = np.argsort(scores)[-top_k:][::-1] | |
| context = "" | |
| for i in best_idx: | |
| if scores[i] > 0.2: | |
| context += f"[{chunks[i]['source']}] {chunks[i]['title']}: {chunks[i]['text']}\n\n" | |
| return context.strip() | |