FerrellSyntheticIntelligence commited on
Commit
28ae43b
·
1 Parent(s): acc228c

feat: integrate production-grade FAISS MemoryEngine and services singleton

Browse files
Files changed (2) hide show
  1. memory_engine.py +42 -0
  2. services.py +13 -0
memory_engine.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import threading
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Tuple
6
+ import faiss
7
+ import numpy as np
8
+
9
+ def _ensure_numpy(vec: np.ndarray, dim: int) -> np.ndarray:
10
+ if not isinstance(vec, np.ndarray): raise TypeError("Vector must be a numpy.ndarray")
11
+ if vec.ndim != 1: raise ValueError("Vector must be 1-dimensional")
12
+ if vec.shape[0] != dim: raise ValueError(f"Vector length {vec.shape[0]} does not match dim={dim}")
13
+ return np.ascontiguousarray(vec.astype(np.float32))
14
+
15
+ class MemoryEngine:
16
+ def __init__(self, dim: int, index_factory: str = "Flat", metric: str = "l2"):
17
+ self.dim = dim
18
+ self._lock = threading.RLock()
19
+ self.metric = faiss.METRIC_L2 if metric == "l2" else faiss.METRIC_INNER_PRODUCT
20
+ self.index = faiss.index_factory(dim, index_factory, self.metric)
21
+ self._metadata: Dict[int, Dict[str, Any]] = {}
22
+
23
+ def add(self, vector: np.ndarray, meta: Dict[str, Any] | None = None) -> int:
24
+ vec = _ensure_numpy(vector, self.dim)
25
+ with self._lock:
26
+ self.index.add(np.expand_dims(vec, axis=0))
27
+ new_id = self.index.ntotal - 1
28
+ self._metadata[new_id] = meta or {}
29
+ return new_id
30
+
31
+ def query(self, vector: np.ndarray, k: int = 5) -> List[Tuple[int, float, Dict[str, Any]]]:
32
+ vec = _ensure_numpy(vector, self.dim)
33
+ with self._lock:
34
+ distances, ids = self.index.search(np.expand_dims(vec, axis=0), k)
35
+ return [(int(idx), float(dist), self._metadata.get(int(idx), {}))
36
+ for idx, dist in zip(ids[0], distances[0]) if idx != -1]
37
+
38
+ def save(self, folder: str):
39
+ path = Path(folder)
40
+ path.mkdir(parents=True, exist_ok=True)
41
+ faiss.write_index(self.index, str(path / "faiss.index"))
42
+ with (path / "metadata.json").open("w") as f: json.dump(self._metadata, f)
services.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from memory_engine import MemoryEngine
3
+
4
+ EMBED_DIM = 768
5
+ _memory_engine = None
6
+
7
+ def get_memory_engine():
8
+ global _memory_engine
9
+ if _memory_engine is None:
10
+ _memory_engine = MemoryEngine(dim=EMBED_DIM)
11
+ if Path("data/memory_store").is_dir():
12
+ _memory_engine.load("data/memory_store")
13
+ return _memory_engine