Spaces:

parthmax
/

watershed-project

Sleeping

watershed-project / chatbot.py

3rd change

c2736eb verified 9 months ago

1.63 kB

	import os
	from langchain_community.vectorstores import FAISS
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain.chains import RetrievalQA

	class WatershedChatbot:
	def __init__(self):
	os.environ['HF_HOME'] = '/tmp/hf_cache' # Set a writeable cache dir

	# Use valid HuggingFace model
	self.embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

	self.vectorstore = FAISS.load_local(
	"dpr_vector_store_hf",
	embeddings=self.embedding,
	allow_dangerous_deserialization=True
	)

	self.all_districts = sorted({doc.metadata.get("district", "") for doc in self.vectorstore.docstore._dict.values()})

	self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)

	def get_districts(self):
	return self.all_districts

	def answer_query(self, query: str, district: str = None):
	retriever = self.vectorstore.as_retriever(
	search_kwargs={"filter": {"district": district}} if district and district != "All" else {}
	)
	qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=retriever, return_source_documents=True)
	result = qa_chain(query)
	return {
	"answer": result["result"],
	"sources": [
	{
	"source_file": doc.metadata.get("source_file", "Unknown"),
	"chunk_index": doc.metadata.get("chunk_index", "N/A")
	}
	for doc in result["source_documents"]
	]
	}