Spaces:
Running
Running
| """ | |
| vector_store.py | |
| --------------- | |
| Creates, persists, and loads a FAISS vector index. | |
| Key behaviours | |
| -------------- | |
| * build() β embeds documents and stores FAISS index | |
| * load() β loads existing FAISS index from disk | |
| * search() β returns top-K (Document, normalized score) | |
| """ | |
| import logging | |
| from pathlib import Path | |
| from typing import List, Tuple | |
| from langchain.schema import Document | |
| from langchain_community.vectorstores import FAISS | |
| from app.config import VECTOR_DB_PATH | |
| from components.embedder import HuggingFaceEmbedder | |
| logger = logging.getLogger(__name__) | |
| class VectorStore: | |
| """ | |
| FAISS vector store wrapper (stable + production-safe). | |
| """ | |
| def __init__( | |
| self, | |
| embedder: HuggingFaceEmbedder | None = None, | |
| index_path: str = VECTOR_DB_PATH, | |
| ) -> None: | |
| self.embedder = embedder or HuggingFaceEmbedder() | |
| self.index_path = index_path | |
| self._store: FAISS | None = None | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Build index | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def build(self, documents: List[Document]) -> None: | |
| if not documents: | |
| raise ValueError("Cannot build vector store from empty documents.") | |
| logger.info("Building FAISS index from %d chunks β¦", len(documents)) | |
| self._store = FAISS.from_documents(documents, self.embedder) | |
| self._persist() | |
| logger.info("FAISS index saved to '%s'.", self.index_path) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Load index | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def load(self) -> bool: | |
| index_file = Path(self.index_path) / "index.faiss" | |
| if not index_file.exists(): | |
| logger.info("No existing FAISS index found at '%s'.", self.index_path) | |
| return False | |
| logger.info("Loading FAISS index from '%s' β¦", self.index_path) | |
| self._store = FAISS.load_local( | |
| self.index_path, | |
| self.embedder, | |
| allow_dangerous_deserialization=True, | |
| ) | |
| logger.info("FAISS index loaded (%d vectors).", self._store.index.ntotal) | |
| return True | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # SEARCH (FIXED VERSION) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def search( | |
| self, | |
| query: str, | |
| k: int = 4, | |
| ) -> List[Tuple[Document, float]]: | |
| """ | |
| Returns: | |
| List of (Document, relevance_score 0β1) | |
| """ | |
| self._require_store() | |
| # STEP 1: get raw FAISS distances (stable) | |
| results = self._store.similarity_search_with_score(query, k=k) | |
| processed: List[Tuple[Document, float]] = [] | |
| for doc, distance in results: | |
| # Convert distance β similarity score (0β1) | |
| score = 1.0 / (1.0 + float(distance)) | |
| processed.append((doc, score)) | |
| return processed | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Add documents | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def add_documents(self, documents: List[Document]) -> None: | |
| self._require_store() | |
| self._store.add_documents(documents) | |
| self._persist() | |
| logger.info("Added %d chunks to index.", len(documents)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Status | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def is_ready(self) -> bool: | |
| return self._store is not None | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # Internal helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| def _persist(self) -> None: | |
| Path(self.index_path).mkdir(parents=True, exist_ok=True) | |
| self._store.save_local(self.index_path) | |
| def _require_store(self) -> None: | |
| if self._store is None: | |
| raise RuntimeError( | |
| "Vector store not ready. Call build() or load() first." | |
| ) |