File size: 1,289 Bytes
a5d886c
 
 
e6e9c5c
 
 
 
 
 
 
 
a5d886c
 
e6e9c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np

class VectorStoreService:
    """
    Simple in-memory vector store.
    Stores embeddings and corresponding documents.
    """

    def __init__(self, embeddings: list[list[float]], documents: list[str]):
        assert len(embeddings) == len(documents), "Embeddings and documents must match in length."
        self.embeddings = np.array(embeddings, dtype=np.float32)
        self.documents = documents

    def search_with_scores(self, query_vec: list[float], k: int = 3):
        """
        Returns top-k documents along with cosine similarity scores.
        :param query_vec: single query vector
        :param k: number of top results
        :return: list of tuples (document, similarity_score)
        """
        query_vec = np.array(query_vec, dtype=np.float32)

        # Normalize vectors for cosine similarity
        query_norm = query_vec / np.linalg.norm(query_vec)
        doc_norms = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)

        # Cosine similarity
        similarities = doc_norms.dot(query_norm)

        # Get top-k indices
        top_indices = np.argsort(-similarities)[:k]

        # Return documents with similarity scores
        return [(self.documents[i], float(similarities[i])) for i in top_indices]