Rupeia_Customer / src /retrieval.py
ayush2917's picture
Update src/retrieval.py
532e893 verified
raw
history blame contribute delete
904 Bytes
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain_huggingface import HuggingFaceEmbeddings
import json
class RetrievalSystem:
def __init__(self, document_path, embedder_model):
self.embedder = HuggingFaceEmbeddings(model_name=embedder_model)
self.vectorstore = self._build_vectorstore(document_path)
def _build_vectorstore(self, document_path):
with open(document_path, "r") as f:
docs_data = json.load(f)
documents = [Document(page_content=doc["content"], metadata={"category": doc["category"], "subkeyword": doc["subkeyword"]}) for doc in docs_data]
return FAISS.from_documents(documents, embedding=self.embedder)
def get_context(self, query, k=2):
docs = self.vectorstore.similarity_search(query, k=k)
return " ".join([doc.page_content for doc in docs])