kgbchatbot / src /retrieval /vectorstore.py
thomascerniglia's picture
Initial commit: add app and source
4abd84c
import os
import faiss
import numpy as np
import pickle
from typing import List, Tuple
from src.config import Settings
class VectorStore:
def __init__(self, settings: Settings):
self.settings = settings
self.index = None
self.docs: List[str] = []
def build(self, texts: List[str], embedder) -> "VectorStore":
self.docs = texts
X = embedder(texts).astype("float32")
self.index = faiss.IndexFlatIP(X.shape[1])
self.index.add(X)
return self
def save(self):
faiss.write_index(self.index, self.settings.index_path)
with open(self.settings.docs_path, "wb") as f:
pickle.dump(self.docs, f)
def load(self) -> "VectorStore":
if os.path.exists(self.settings.index_path) and os.path.exists(self.settings.docs_path):
self.index = faiss.read_index(self.settings.index_path)
with open(self.settings.docs_path, "rb") as f:
self.docs = pickle.load(f)
else:
raise FileNotFoundError("Index or docs not found. Run ingestion first.")
return self
def search(self, query: str, embedder, k: int = 5) -> List[Tuple[str, float]]:
q = embedder([query]).astype("float32")
sims, ids = self.index.search(q, k)
hits = []
for idx, score in zip(ids[0], sims[0]):
if idx == -1:
continue
hits.append((self.docs[idx], float(score)))
return hits