|
|
import streamlit as st
|
|
|
from langchain.document_loaders import PyPDFLoader
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
from langchain.vectorstores import Chroma
|
|
|
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
from langchain.chains import RetrievalQA
|
|
|
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
|
import tempfile
|
|
|
import os
|
|
|
from dotenv import load_dotenv
|
|
|
from pydantic import SecretStr
|
|
|
|
|
|
|
|
|
load_dotenv()
|
|
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
|
|
|
|
|
|
|
|
st.title("π LangChain RAG Chatbot")
|
|
|
|
|
|
|
|
|
if "chat_history" not in st.session_state:
|
|
|
st.session_state.chat_history = []
|
|
|
|
|
|
if "qa_chain" not in st.session_state:
|
|
|
st.session_state.qa_chain = None
|
|
|
|
|
|
|
|
|
|
|
|
st.subheader("Upload your PDF")
|
|
|
pdf_file = st.file_uploader("Upload", type="pdf")
|
|
|
|
|
|
if pdf_file is not None and st.session_state.qa_chain is None:
|
|
|
with st.spinner("π Processing document..."):
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
|
|
tmp_file.write(pdf_file.read())
|
|
|
tmp_path = tmp_file.name
|
|
|
|
|
|
|
|
|
loader = PyPDFLoader(tmp_path)
|
|
|
documents = loader.load_and_split()
|
|
|
|
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
|
|
chunks = splitter.split_documents(documents)
|
|
|
|
|
|
|
|
|
|
|
|
vectordb = Chroma.from_documents(
|
|
|
chunks, embeddings, persist_directory="./chroma_db"
|
|
|
)
|
|
|
retriever = vectordb.as_retriever()
|
|
|
|
|
|
|
|
|
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=SecretStr(GOOGLE_API_KEY) if GOOGLE_API_KEY else None)
|
|
|
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
|
|
|
|
|
|
|
|
|
st.session_state.qa_chain = qa_chain
|
|
|
st.success("β
Document processed and indexed!")
|
|
|
|
|
|
|
|
|
|
|
|
if st.session_state.qa_chain:
|
|
|
st.subheader("π¬ Ask a question")
|
|
|
|
|
|
question = st.text_input("You:", key="user_input")
|
|
|
|
|
|
if question:
|
|
|
with st.spinner("π€ Generating answer..."):
|
|
|
answer = st.session_state.qa_chain.run(question)
|
|
|
st.session_state.chat_history.append({"user": question, "bot": answer})
|
|
|
|
|
|
|
|
|
for chat in st.session_state.chat_history:
|
|
|
st.markdown(f"π§ **You:** {chat['user']}")
|
|
|
st.markdown(f"π€ **Bot:** {chat['bot']}")
|
|
|
|
|
|
|
|
|
if st.button("π Reset Chat"):
|
|
|
st.session_state.chat_history = []
|
|
|
st.session_state.qa_chain = None
|
|
|
st.rerun()
|
|
|
else:
|
|
|
st.info("π Please upload a PDF to begin.")
|
|
|
|