import hnswlib import numpy as np import os class SearchEngine: def __init__(self, dim: int, max_elements: int, space="cosine"): self.index = hnswlib.Index(space=space, dim=dim) self.max_elements = max_elements self.is_initialized = False self.space = space def init_index(self): self.index.init_index(max_elements=self.max_elements, ef_construction=200, M=16) self.index.set_ef(50) self.is_initialized = True def add_embeddings(self, embeddings: np.ndarray): if not self.is_initialized: self.init_index() self.index.add_items(embeddings) def save_index(self, path="models/embeddings_index.bin"): os.makedirs(os.path.dirname(path), exist_ok=True) self.index.save_index(path) def load_index(self, path="models/embeddings_index.bin"): self.index.load_index(path) self.is_initialized = True def search(self, query_vector, top_k=5): labels, distances = self.index.knn_query(query_vector, k=top_k) # if self.space == "ip": # return labels[0][::-1], distances[0][::-1] return labels[0], distances[0]