Spaces:
Sleeping
Sleeping
| # src/core/vector_store.py | |
| class FAISSVectorStore: | |
| def __init__(self, embedding_dim: int = 384): # GTE-small dimension | |
| self.index = faiss.IndexFlatIP(embedding_dim) # Inner product for cosine similarity | |
| self.documents = [] | |
| self.metadatas = [] | |
| def add_documents(self, chunks: List[str], embeddings: List[List[float]], metadatas: List[Dict]): | |
| if not self.index.is_trained: | |
| self.index = faiss.IndexIDMap(self.index) | |
| self.documents.extend(chunks) | |
| self.metadatas.extend(metadatas) | |
| # Add embeddings to FAISS index | |
| self.index.add(np.array(embeddings)) | |
| def similarity_search(self, query: str, embedder: DocumentEmbedder, k: int = 5) -> List[Dict]: | |
| # Embed query | |
| query_embedding = embedder.embedding_model.embed_query(query) | |
| # Search in FAISS | |
| distances, indices = self.index.search( | |
| np.array([query_embedding]), k | |
| ) | |
| # Return results with metadata | |
| results = [] | |
| for i, idx in enumerate(indices[0]): | |
| if idx == -1: # FAISS returns -1 for not found | |
| continue | |
| results.append({ | |
| "content": self.documents[idx], | |
| "metadata": self.metadatas[idx], | |
| "score": float(distances[0][i]) | |
| }) | |
| return results |