cryogenic22 commited on
Commit
5cbcca5
·
verified ·
1 Parent(s): 8665f8c

Create vector_store.py

Browse files
Files changed (1) hide show
  1. vector_store.py +39 -0
vector_store.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/core/vector_store.py
2
+ class FAISSVectorStore:
3
+ def __init__(self, embedding_dim: int = 384): # GTE-small dimension
4
+ self.index = faiss.IndexFlatIP(embedding_dim) # Inner product for cosine similarity
5
+ self.documents = []
6
+ self.metadatas = []
7
+
8
+ def add_documents(self, chunks: List[str], embeddings: List[List[float]], metadatas: List[Dict]):
9
+ if not self.index.is_trained:
10
+ self.index = faiss.IndexIDMap(self.index)
11
+
12
+ self.documents.extend(chunks)
13
+ self.metadatas.extend(metadatas)
14
+
15
+ # Add embeddings to FAISS index
16
+ self.index.add(np.array(embeddings))
17
+
18
+ def similarity_search(self, query: str, embedder: DocumentEmbedder, k: int = 5) -> List[Dict]:
19
+ # Embed query
20
+ query_embedding = embedder.embedding_model.embed_query(query)
21
+
22
+ # Search in FAISS
23
+ distances, indices = self.index.search(
24
+ np.array([query_embedding]), k
25
+ )
26
+
27
+ # Return results with metadata
28
+ results = []
29
+ for i, idx in enumerate(indices[0]):
30
+ if idx == -1: # FAISS returns -1 for not found
31
+ continue
32
+
33
+ results.append({
34
+ "content": self.documents[idx],
35
+ "metadata": self.metadatas[idx],
36
+ "score": float(distances[0][i])
37
+ })
38
+
39
+ return results