File size: 1,709 Bytes
f051f2e
c044be1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f051f2e
c044be1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
\
import os, json
from typing import List
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

class Retriever:
    def __init__(self, index_path: str, meta_path: str):
        if not (os.path.exists(index_path) and os.path.exists(meta_path)):
            self._ready = False
            self._err = f"Missing index or meta at {index_path} / {meta_path}"
            return
        self.index = faiss.read_index(index_path)
        meta = json.loads(open(meta_path, "r", encoding="utf-8").read())
        self.docs = meta["docs"]
        self.model_name = meta["model"]
        self.embed = SentenceTransformer(self.model_name)
        self._ready = True
        self._err = None

    def ready(self) -> bool:
        return self._ready

    def reason(self) -> str:
        return self._err or ""

    def retrieve(self, query: str, k: int = 6) -> List[str]:
        if not self._ready: return []
        q = self.embed.encode([query], convert_to_numpy=True, normalize_embeddings=True)
        D, I = self.index.search(q.astype(np.float32), k)
        chunks = []
        for idx in I[0]:
            if 0 <= idx < len(self.docs):
                chunks.append(self.docs[idx]["text"])
        return chunks

_retriever = None
def init_retriever(index_path="rag_store/index.faiss", meta_path="rag_store/meta.json"):
    global _retriever
    if _retriever is None:
        _retriever = Retriever(index_path, meta_path)
    return _retriever

def retrieve_context(query: str, k: int = 6) -> str:
    r = init_retriever()
    if not r.ready():
        return "(No policy index found. Run build_policy_index.py to enable RAG.)"
    return "\n---\n".join(r.retrieve(query, k=k))