vector service update
Browse files
service/vector_store_service.py
CHANGED
|
@@ -1,11 +1,34 @@
|
|
| 1 |
import numpy as np
|
| 2 |
|
| 3 |
class VectorStoreService:
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
self.documents = documents
|
| 7 |
|
| 8 |
-
def
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
|
| 3 |
class VectorStoreService:
|
| 4 |
+
"""
|
| 5 |
+
Simple in-memory vector store.
|
| 6 |
+
Stores embeddings and corresponding documents.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
def __init__(self, embeddings: list[list[float]], documents: list[str]):
|
| 10 |
+
assert len(embeddings) == len(documents), "Embeddings and documents must match in length."
|
| 11 |
+
self.embeddings = np.array(embeddings, dtype=np.float32)
|
| 12 |
self.documents = documents
|
| 13 |
|
| 14 |
+
def search_with_scores(self, query_vec: list[float], k: int = 3):
|
| 15 |
+
"""
|
| 16 |
+
Returns top-k documents along with cosine similarity scores.
|
| 17 |
+
:param query_vec: single query vector
|
| 18 |
+
:param k: number of top results
|
| 19 |
+
:return: list of tuples (document, similarity_score)
|
| 20 |
+
"""
|
| 21 |
+
query_vec = np.array(query_vec, dtype=np.float32)
|
| 22 |
+
|
| 23 |
+
# Normalize vectors for cosine similarity
|
| 24 |
+
query_norm = query_vec / np.linalg.norm(query_vec)
|
| 25 |
+
doc_norms = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)
|
| 26 |
+
|
| 27 |
+
# Cosine similarity
|
| 28 |
+
similarities = doc_norms.dot(query_norm)
|
| 29 |
+
|
| 30 |
+
# Get top-k indices
|
| 31 |
+
top_indices = np.argsort(-similarities)[:k]
|
| 32 |
+
|
| 33 |
+
# Return documents with similarity scores
|
| 34 |
+
return [(self.documents[i], float(similarities[i])) for i in top_indices]
|