| | import numpy as np |
| |
|
| | class VectorStoreService: |
| | """ |
| | Simple in-memory vector store. |
| | Stores embeddings and corresponding documents. |
| | """ |
| |
|
| | def __init__(self, embeddings: list[list[float]], documents: list[str]): |
| | assert len(embeddings) == len(documents), "Embeddings and documents must match in length." |
| | self.embeddings = np.array(embeddings, dtype=np.float32) |
| | self.documents = documents |
| |
|
| | def search_with_scores(self, query_vec: list[float], k: int = 3): |
| | """ |
| | Returns top-k documents along with cosine similarity scores. |
| | :param query_vec: single query vector |
| | :param k: number of top results |
| | :return: list of tuples (document, similarity_score) |
| | """ |
| | query_vec = np.array(query_vec, dtype=np.float32) |
| |
|
| | |
| | query_norm = query_vec / np.linalg.norm(query_vec) |
| | doc_norms = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True) |
| |
|
| | |
| | similarities = doc_norms.dot(query_norm) |
| |
|
| | |
| | top_indices = np.argsort(-similarities)[:k] |
| |
|
| | |
| | return [(self.documents[i], float(similarities[i])) for i in top_indices] |
| |
|