| from typing import List, Dict | |
| import faiss | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" | |
| class SemanticIndex: | |
| def __init__(self, documents: List[Dict]): | |
| self.model = SentenceTransformer(MODEL_NAME) | |
| self.texts = [d["text"] for d in documents] | |
| self.meta = documents | |
| embeddings = self.model.encode(self.texts, show_progress_bar=False) | |
| self.embeddings = np.array(embeddings).astype("float32") | |
| self.index = faiss.IndexFlatL2(self.embeddings.shape[1]) | |
| self.index.add(self.embeddings) | |
| def search(self, query: str, k: int = 5) -> List[Dict]: | |
| q = self.model.encode([query]).astype("float32") | |
| _, indices = self.index.search(q, k) | |
| return [self.meta[i] for i in indices[0]] |