File size: 1,289 Bytes
a5d886c e6e9c5c a5d886c e6e9c5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | import numpy as np
class VectorStoreService:
"""
Simple in-memory vector store.
Stores embeddings and corresponding documents.
"""
def __init__(self, embeddings: list[list[float]], documents: list[str]):
assert len(embeddings) == len(documents), "Embeddings and documents must match in length."
self.embeddings = np.array(embeddings, dtype=np.float32)
self.documents = documents
def search_with_scores(self, query_vec: list[float], k: int = 3):
"""
Returns top-k documents along with cosine similarity scores.
:param query_vec: single query vector
:param k: number of top results
:return: list of tuples (document, similarity_score)
"""
query_vec = np.array(query_vec, dtype=np.float32)
# Normalize vectors for cosine similarity
query_norm = query_vec / np.linalg.norm(query_vec)
doc_norms = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)
# Cosine similarity
similarities = doc_norms.dot(query_norm)
# Get top-k indices
top_indices = np.argsort(-similarities)[:k]
# Return documents with similarity scores
return [(self.documents[i], float(similarities[i])) for i in top_indices]
|