# src/core/vector_store.py class FAISSVectorStore: def __init__(self, embedding_dim: int = 384): # GTE-small dimension self.index = faiss.IndexFlatIP(embedding_dim) # Inner product for cosine similarity self.documents = [] self.metadatas = [] def add_documents(self, chunks: List[str], embeddings: List[List[float]], metadatas: List[Dict]): if not self.index.is_trained: self.index = faiss.IndexIDMap(self.index) self.documents.extend(chunks) self.metadatas.extend(metadatas) # Add embeddings to FAISS index self.index.add(np.array(embeddings)) def similarity_search(self, query: str, embedder: DocumentEmbedder, k: int = 5) -> List[Dict]: # Embed query query_embedding = embedder.embedding_model.embed_query(query) # Search in FAISS distances, indices = self.index.search( np.array([query_embedding]), k ) # Return results with metadata results = [] for i, idx in enumerate(indices[0]): if idx == -1: # FAISS returns -1 for not found continue results.append({ "content": self.documents[idx], "metadata": self.metadatas[idx], "score": float(distances[0][i]) }) return results