File size: 2,059 Bytes
59cece6
 
 
 
 
 
 
 
d8211f5
 
 
 
 
 
 
59cece6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from typing import List, Tuple
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from .types import MemoryEntry

class VectorIndex:
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        # Fix for HuggingFace Spaces: Avoid meta tensor errors
        # by explicitly loading model to CPU with proper device map
        self.model = SentenceTransformer(
            model_name,
            device='cpu',  # Explicit CPU device
            trust_remote_code=False  # Disable to avoid security issues
        )
        self.dim = self.model.get_sentence_embedding_dimension()
        self.index = faiss.IndexFlatIP(self.dim)
        self.ids = []
        self._mem_cache = []

    def _embed(self, texts):
        emb = self.model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
        return emb.astype("float32")

    def add_or_update(self, memories: List[MemoryEntry]):
        # Optimization: Incrementally add new memories instead of full rebuild
        # This allows duplicates in the index, but retrieval deduplicates by ID.
        self._mem_cache.extend(memories)
        
        # Keep track of IDs for later retrieval/BM25
        self.ids.extend([m.memory_id for m in memories])
        
        new_texts = [f"{m.type.value}|{m.key}={m.value}" for m in memories]
        if new_texts:
            emb = self._embed(new_texts)
            self.index.add(emb)

    def search(self, query, top_k=10):
        if not self.ids:
            return []
            
        # Semantic search (FAISS is O(log N) or O(1) mostly)
        q = self._embed([query])
        
        # Search for slightly more candidates to give reranker variety
        k_search = min(top_k * 5, len(self.ids)) 
        scores, idxs = self.index.search(q, k_search)
        
        # Return tuples (mid, score)
        results = []
        for score, idx in zip(scores[0], idxs[0]):
            if idx >= 0:
                results.append((self.ids[int(idx)], float(score)))
                
        return results