watershed-project / chatbot.py
parthmax's picture
3rd change
c2736eb verified
import os
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
class WatershedChatbot:
def __init__(self):
os.environ['HF_HOME'] = '/tmp/hf_cache' # Set a writeable cache dir
# Use valid HuggingFace model
self.embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
self.vectorstore = FAISS.load_local(
"dpr_vector_store_hf",
embeddings=self.embedding,
allow_dangerous_deserialization=True
)
self.all_districts = sorted({doc.metadata.get("district", "") for doc in self.vectorstore.docstore._dict.values()})
self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
def get_districts(self):
return self.all_districts
def answer_query(self, query: str, district: str = None):
retriever = self.vectorstore.as_retriever(
search_kwargs={"filter": {"district": district}} if district and district != "All" else {}
)
qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=retriever, return_source_documents=True)
result = qa_chain(query)
return {
"answer": result["result"],
"sources": [
{
"source_file": doc.metadata.get("source_file", "Unknown"),
"chunk_index": doc.metadata.get("chunk_index", "N/A")
}
for doc in result["source_documents"]
]
}