Spaces:
Sleeping
Sleeping
File size: 1,709 Bytes
f051f2e c044be1 f051f2e c044be1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
\
import os, json
from typing import List
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
class Retriever:
def __init__(self, index_path: str, meta_path: str):
if not (os.path.exists(index_path) and os.path.exists(meta_path)):
self._ready = False
self._err = f"Missing index or meta at {index_path} / {meta_path}"
return
self.index = faiss.read_index(index_path)
meta = json.loads(open(meta_path, "r", encoding="utf-8").read())
self.docs = meta["docs"]
self.model_name = meta["model"]
self.embed = SentenceTransformer(self.model_name)
self._ready = True
self._err = None
def ready(self) -> bool:
return self._ready
def reason(self) -> str:
return self._err or ""
def retrieve(self, query: str, k: int = 6) -> List[str]:
if not self._ready: return []
q = self.embed.encode([query], convert_to_numpy=True, normalize_embeddings=True)
D, I = self.index.search(q.astype(np.float32), k)
chunks = []
for idx in I[0]:
if 0 <= idx < len(self.docs):
chunks.append(self.docs[idx]["text"])
return chunks
_retriever = None
def init_retriever(index_path="rag_store/index.faiss", meta_path="rag_store/meta.json"):
global _retriever
if _retriever is None:
_retriever = Retriever(index_path, meta_path)
return _retriever
def retrieve_context(query: str, k: int = 6) -> str:
r = init_retriever()
if not r.ready():
return "(No policy index found. Run build_policy_index.py to enable RAG.)"
return "\n---\n".join(r.retrieve(query, k=k))
|