Spaces:
Running
Running
| import os | |
| import faiss | |
| import numpy as np | |
| import pickle | |
| from typing import List, Tuple | |
| from src.config import Settings | |
| class VectorStore: | |
| def __init__(self, settings: Settings): | |
| self.settings = settings | |
| self.index = None | |
| self.docs: List[str] = [] | |
| def build(self, texts: List[str], embedder) -> "VectorStore": | |
| self.docs = texts | |
| X = embedder(texts).astype("float32") | |
| self.index = faiss.IndexFlatIP(X.shape[1]) | |
| self.index.add(X) | |
| return self | |
| def save(self): | |
| faiss.write_index(self.index, self.settings.index_path) | |
| with open(self.settings.docs_path, "wb") as f: | |
| pickle.dump(self.docs, f) | |
| def load(self) -> "VectorStore": | |
| if os.path.exists(self.settings.index_path) and os.path.exists(self.settings.docs_path): | |
| self.index = faiss.read_index(self.settings.index_path) | |
| with open(self.settings.docs_path, "rb") as f: | |
| self.docs = pickle.load(f) | |
| else: | |
| raise FileNotFoundError("Index or docs not found. Run ingestion first.") | |
| return self | |
| def search(self, query: str, embedder, k: int = 5) -> List[Tuple[str, float]]: | |
| q = embedder([query]).astype("float32") | |
| sims, ids = self.index.search(q, k) | |
| hits = [] | |
| for idx, score in zip(ids[0], sims[0]): | |
| if idx == -1: | |
| continue | |
| hits.append((self.docs[idx], float(score))) | |
| return hits | |