Spaces:
Sleeping
Sleeping
File size: 1,634 Bytes
31e2c70 c2736eb 31e2c70 c2736eb 31e2c70 c2736eb 31e2c70 c2736eb 31e2c70 c2736eb 31e2c70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
class WatershedChatbot:
def __init__(self):
os.environ['HF_HOME'] = '/tmp/hf_cache' # Set a writeable cache dir
# Use valid HuggingFace model
self.embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
self.vectorstore = FAISS.load_local(
"dpr_vector_store_hf",
embeddings=self.embedding,
allow_dangerous_deserialization=True
)
self.all_districts = sorted({doc.metadata.get("district", "") for doc in self.vectorstore.docstore._dict.values()})
self.llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3)
def get_districts(self):
return self.all_districts
def answer_query(self, query: str, district: str = None):
retriever = self.vectorstore.as_retriever(
search_kwargs={"filter": {"district": district}} if district and district != "All" else {}
)
qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=retriever, return_source_documents=True)
result = qa_chain(query)
return {
"answer": result["result"],
"sources": [
{
"source_file": doc.metadata.get("source_file", "Unknown"),
"chunk_index": doc.metadata.get("chunk_index", "N/A")
}
for doc in result["source_documents"]
]
}
|