File size: 2,025 Bytes
b2150c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os 
import faiss
import pickle 

DATA_DIR = "data"
INDEX_PATH = os.path.join(DATA_DIR, "vector.index")
META_PATH = os.path.join(DATA_DIR, "vector_meta.pkl")

os.makedirs(DATA_DIR, exist_ok=True)

_vector_store = None

def get_vector_store(dim: int = 384):
    global _vector_store
    if _vector_store is None:
        _vector_store = VectorStore(dim)
    return _vector_store

class VectorStore:
    def __init__(self, dim:int):
        self.dim = dim
        self.index = None 
        self.metadata = []
        
        self._load_or_create()
    
    def _load_or_create(self):
        if os.path.exists(INDEX_PATH) and os.path.exists(META_PATH):
            self.index = faiss.read_index(INDEX_PATH)
            with open(META_PATH, "rb") as f:
                self.metadata = pickle.load(f)
        else:
            self.index = faiss.IndexFlatL2(self.dim)
            self.metadata = []
        
    def add(self, embeddings, docs):
        if len(embeddings) != len(docs):
            raise ValueError(
                f"Embedding count: ({len(embeddings)}) "
                f"!= doc count: ({len(docs)})"
            )
            
        self.index.add(embeddings)
        self.metadata.extend(docs)
        self._persist()
        
        if self.index.ntotal != len(self.metadata):
            raise RuntimeError(
                f"FAISS Index ({self.index.ntotal}) "
                f"!= metadata: ({len(self.metadata)})"
            )
    
    def search(self, query_embedding, k = 5):
        if self.index.ntotal == 0:
            return []
        
        k = min(k, self.index.ntotal)
        distances, indices = self.index.search(query_embedding, k)
        results = []
        for idx in indices[0]:
            if 0 <= idx < len(self.metadata):
                results.append(self.metadata[idx])
        
        return results
    
    def _persist(self):
        faiss.write_index(self.index, INDEX_PATH)
        with open(META_PATH, "wb") as f:
            pickle.dump(self.metadata, f)