File size: 1,410 Bytes
5cbcca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# src/core/vector_store.py
class FAISSVectorStore:
    def __init__(self, embedding_dim: int = 384):  # GTE-small dimension
        self.index = faiss.IndexFlatIP(embedding_dim)  # Inner product for cosine similarity
        self.documents = []
        self.metadatas = []

    def add_documents(self, chunks: List[str], embeddings: List[List[float]], metadatas: List[Dict]):
        if not self.index.is_trained:
            self.index = faiss.IndexIDMap(self.index)
        
        self.documents.extend(chunks)
        self.metadatas.extend(metadatas)
        
        # Add embeddings to FAISS index
        self.index.add(np.array(embeddings))

    def similarity_search(self, query: str, embedder: DocumentEmbedder, k: int = 5) -> List[Dict]:
        # Embed query
        query_embedding = embedder.embedding_model.embed_query(query)
        
        # Search in FAISS
        distances, indices = self.index.search(
            np.array([query_embedding]), k
        )
        
        # Return results with metadata
        results = []
        for i, idx in enumerate(indices[0]):
            if idx == -1:  # FAISS returns -1 for not found
                continue
            
            results.append({
                "content": self.documents[idx],
                "metadata": self.metadatas[idx],
                "score": float(distances[0][i])
            })
        
        return results