Spaces:
Sleeping
Sleeping
| import os | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain.chains import RetrievalQA | |
| class WatershedChatbot: | |
| def __init__(self): | |
| os.environ['HF_HOME'] = '/tmp/hf_cache' # Set a writeable cache dir | |
| # Use valid HuggingFace model | |
| self.embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
| self.vectorstore = FAISS.load_local( | |
| "dpr_vector_store_hf", | |
| embeddings=self.embedding, | |
| allow_dangerous_deserialization=True | |
| ) | |
| self.all_districts = sorted({doc.metadata.get("district", "") for doc in self.vectorstore.docstore._dict.values()}) | |
| self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3) | |
| def get_districts(self): | |
| return self.all_districts | |
| def answer_query(self, query: str, district: str = None): | |
| retriever = self.vectorstore.as_retriever( | |
| search_kwargs={"filter": {"district": district}} if district and district != "All" else {} | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=retriever, return_source_documents=True) | |
| result = qa_chain(query) | |
| return { | |
| "answer": result["result"], | |
| "sources": [ | |
| { | |
| "source_file": doc.metadata.get("source_file", "Unknown"), | |
| "chunk_index": doc.metadata.get("chunk_index", "N/A") | |
| } | |
| for doc in result["source_documents"] | |
| ] | |
| } | |