Spaces:
Sleeping
Sleeping
File size: 1,410 Bytes
5cbcca5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | # src/core/vector_store.py
class FAISSVectorStore:
def __init__(self, embedding_dim: int = 384): # GTE-small dimension
self.index = faiss.IndexFlatIP(embedding_dim) # Inner product for cosine similarity
self.documents = []
self.metadatas = []
def add_documents(self, chunks: List[str], embeddings: List[List[float]], metadatas: List[Dict]):
if not self.index.is_trained:
self.index = faiss.IndexIDMap(self.index)
self.documents.extend(chunks)
self.metadatas.extend(metadatas)
# Add embeddings to FAISS index
self.index.add(np.array(embeddings))
def similarity_search(self, query: str, embedder: DocumentEmbedder, k: int = 5) -> List[Dict]:
# Embed query
query_embedding = embedder.embedding_model.embed_query(query)
# Search in FAISS
distances, indices = self.index.search(
np.array([query_embedding]), k
)
# Return results with metadata
results = []
for i, idx in enumerate(indices[0]):
if idx == -1: # FAISS returns -1 for not found
continue
results.append({
"content": self.documents[idx],
"metadata": self.metadatas[idx],
"score": float(distances[0][i])
})
return results |