Sandei commited on
Commit
e6e9c5c
·
1 Parent(s): fa8d3c4

vector service update

Browse files
Files changed (1) hide show
  1. service/vector_store_service.py +29 -6
service/vector_store_service.py CHANGED
@@ -1,11 +1,34 @@
1
  import numpy as np
2
 
3
  class VectorStoreService:
4
- def __init__(self, embeddings, documents):
5
- self.embeddings = np.array(embeddings)
 
 
 
 
 
 
6
  self.documents = documents
7
 
8
- def search(self, query_embedding, k=3):
9
- scores = query_embedding @ self.embeddings.T
10
- top_k = scores.argsort()[-k:][::-1]
11
- return [self.documents[i] for i in top_k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
 
3
  class VectorStoreService:
4
+ """
5
+ Simple in-memory vector store.
6
+ Stores embeddings and corresponding documents.
7
+ """
8
+
9
+ def __init__(self, embeddings: list[list[float]], documents: list[str]):
10
+ assert len(embeddings) == len(documents), "Embeddings and documents must match in length."
11
+ self.embeddings = np.array(embeddings, dtype=np.float32)
12
  self.documents = documents
13
 
14
+ def search_with_scores(self, query_vec: list[float], k: int = 3):
15
+ """
16
+ Returns top-k documents along with cosine similarity scores.
17
+ :param query_vec: single query vector
18
+ :param k: number of top results
19
+ :return: list of tuples (document, similarity_score)
20
+ """
21
+ query_vec = np.array(query_vec, dtype=np.float32)
22
+
23
+ # Normalize vectors for cosine similarity
24
+ query_norm = query_vec / np.linalg.norm(query_vec)
25
+ doc_norms = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)
26
+
27
+ # Cosine similarity
28
+ similarities = doc_norms.dot(query_norm)
29
+
30
+ # Get top-k indices
31
+ top_indices = np.argsort(-similarities)[:k]
32
+
33
+ # Return documents with similarity scores
34
+ return [(self.documents[i], float(similarities[i])) for i in top_indices]