Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,785 Bytes
dc563b1 5ee6662 dc563b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import marko
class PNAKnowledgeBase:
def __init__(self, guide_path):
self.guide_path = guide_path
# Force CPU for embeddings to avoid ZeroGPU device mismatch
self.encoder = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
self.chunks = []
self.index = None
if os.path.exists(guide_path):
self._process_guide()
else:
print(f"Warning: Guide not found at {guide_path}")
def _process_guide(self):
with open(self.guide_path, 'r', encoding='utf-8') as f:
content = f.read()
# Simple chunking by paragraph/section
# For nursing docs, we want to keep context together
chunks = content.split('\n\n')
self.chunks = [c.strip() for c in chunks if len(c.strip()) > 50]
# Create FAISS index
embeddings = self.encoder.encode(self.chunks)
dimension = embeddings.shape[1]
self.index = faiss.IndexFlatL2(dimension)
self.index.add(np.array(embeddings).astype('float32'))
print(f"Knowledge Base initialized with {len(self.chunks)} chunks.")
def search(self, query, top_k=3):
if not self.index:
return ""
query_vector = self.encoder.encode([query])
distances, indices = self.index.search(np.array(query_vector).astype('float32'), top_k)
results = [self.chunks[i] for i in indices[0] if i != -1]
return "\n\n---\n\n".join(results)
if __name__ == "__main__":
# Test
kb = PNAKnowledgeBase("Professional nurse advocate A-EQUIP model Guide.md")
print(kb.search("What are the four functions of A-EQUIP?"))
|