Spaces:
Sleeping
Sleeping
Lincoln Gombedza
Remove heavy ML deps for HF Spaces β RAG degrades gracefully without guide
aa4b661 unverified | """ | |
| RAG knowledge base for A-EQUIP guide. | |
| Uses sentence-transformers + FAISS for semantic search when available. | |
| Gracefully degrades to keyword search or no-op if not installed. | |
| """ | |
| import os | |
| import re | |
| import numpy as np | |
| from pathlib import Path | |
| class PNAKnowledgeBase: | |
| """FAISS-backed semantic search over A-EQUIP model guide. | |
| Gracefully degrades to no-op if: | |
| - sentence-transformers / faiss-cpu not installed | |
| - Guide file not found | |
| """ | |
| def __init__(self, guide_path: str | None = None): | |
| self.chunks: list[str] = [] | |
| self.index = None | |
| if guide_path is None: | |
| here = Path(__file__).parent.parent | |
| candidates = [ | |
| here / "assets" / "aequip_guide.md", | |
| here / "Professional nurse advocate A-EQUIP model Guide.md", | |
| ] | |
| for c in candidates: | |
| if c.exists(): | |
| guide_path = str(c) | |
| break | |
| if guide_path and os.path.exists(guide_path): | |
| self._build_index(guide_path) | |
| def _build_index(self, path: str): | |
| try: | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| except ImportError: | |
| return # RAG deps not installed β degrade gracefully | |
| with open(path, encoding="utf-8") as f: | |
| content = f.read() | |
| raw = re.split(r"\n{2,}", content) | |
| self.chunks = [c.strip() for c in raw if len(c.strip()) > 60] | |
| if not self.chunks: | |
| return | |
| encoder = SentenceTransformer("all-MiniLM-L6-v2", device="cpu") | |
| embeddings = encoder.encode(self.chunks, show_progress_bar=False) | |
| dim = embeddings.shape[1] | |
| self.index = faiss.IndexFlatL2(dim) | |
| self.index.add(np.array(embeddings, dtype="float32")) | |
| self._encoder = encoder | |
| def search(self, query: str, top_k: int = 3) -> str: | |
| if self.index is None or not self.chunks: | |
| return "" | |
| qvec = self._encoder.encode([query]) | |
| _, indices = self.index.search(np.array(qvec, dtype="float32"), top_k) | |
| results = [self.chunks[i] for i in indices[0] if i != -1] | |
| return "\n\n---\n\n".join(results) | |