| import hnswlib |
| import numpy as np |
| import os |
|
|
|
|
| class SearchEngine: |
| def __init__(self, dim: int, max_elements: int, space="cosine"): |
| self.index = hnswlib.Index(space=space, dim=dim) |
| self.max_elements = max_elements |
| self.is_initialized = False |
| self.space = space |
|
|
| def init_index(self): |
| self.index.init_index(max_elements=self.max_elements, ef_construction=200, M=16) |
| self.index.set_ef(50) |
| self.is_initialized = True |
|
|
| def add_embeddings(self, embeddings: np.ndarray): |
| if not self.is_initialized: |
| self.init_index() |
| self.index.add_items(embeddings) |
|
|
| def save_index(self, path="models/embeddings_index.bin"): |
| os.makedirs(os.path.dirname(path), exist_ok=True) |
| self.index.save_index(path) |
|
|
| def load_index(self, path="models/embeddings_index.bin"): |
| self.index.load_index(path) |
| self.is_initialized = True |
|
|
| def search(self, query_vector, top_k=5): |
| labels, distances = self.index.knn_query(query_vector, k=top_k) |
|
|
| |
| |
| return labels[0], distances[0] |
|
|