deepdive-IR / vector /store.py
Ritabanm's picture
Upload 25 files
94b06be verified
# vector/store.py — NumPy cosine index (no FAISS)
from typing import List, Tuple
from pathlib import Path
import json, numpy as np
class FaissStore:
def __init__(self, dim: int, index_dir: str):
self.dim = dim
self.index_dir = Path(index_dir)
self.index_dir.mkdir(parents=True, exist_ok=True)
self.vec_path = self.index_dir / "vecs.npy"
self.meta_path = self.index_dir / "meta.json"
self.vecs = np.load(self.vec_path) if self.vec_path.exists() else np.zeros((0, dim), dtype="float32")
self.meta = json.loads(self.meta_path.read_text()) if self.meta_path.exists() else []
def add(self, embeddings: List[List[float]], metadatas: List[dict]):
arr = np.asarray(embeddings, dtype="float32")
self.vecs = np.vstack([self.vecs, arr]) if self.vecs.size else arr
self.meta.extend(metadatas)
def save(self):
np.save(self.vec_path, self.vecs)
self.meta_path.write_text(json.dumps(self.meta))
def search(self, query_emb: List[float], k: int = 6) -> List[Tuple[float, dict]]:
if len(self.meta) == 0:
return []
q = np.asarray(query_emb, dtype="float32")
q /= (np.linalg.norm(q) + 1e-9)
X = self.vecs
X = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-9)
sims = X @ q
idx = np.argsort(-sims)[:k]
return [(float(sims[i]), self.meta[i]) for i in idx]