import os from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains import RetrievalQA class WatershedChatbot: def __init__(self): os.environ['HF_HOME'] = '/tmp/hf_cache' # Set a writeable cache dir # Use valid HuggingFace model self.embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") self.vectorstore = FAISS.load_local( "dpr_vector_store_hf", embeddings=self.embedding, allow_dangerous_deserialization=True ) self.all_districts = sorted({doc.metadata.get("district", "") for doc in self.vectorstore.docstore._dict.values()}) self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3) def get_districts(self): return self.all_districts def answer_query(self, query: str, district: str = None): retriever = self.vectorstore.as_retriever( search_kwargs={"filter": {"district": district}} if district and district != "All" else {} ) qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=retriever, return_source_documents=True) result = qa_chain(query) return { "answer": result["result"], "sources": [ { "source_file": doc.metadata.get("source_file", "Unknown"), "chunk_index": doc.metadata.get("chunk_index", "N/A") } for doc in result["source_documents"] ] }