| | |
| | import streamlit as st |
| | from langchain.chains.history_aware_retriever import create_history_aware_retriever |
| | from langchain.chains.retrieval import create_retrieval_chain |
| | from langchain.chains.combine_documents import create_stuff_documents_chain |
| | from langchain_community.vectorstores import FAISS |
| | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder |
| | from langchain_groq import ChatGroq |
| | from langchain_huggingface import HuggingFaceEmbeddings |
| | from langchain_text_splitters import RecursiveCharacterTextSplitter |
| | from langchain_community.document_loaders import PyPDFLoader |
| | import os |
| | from dotenv import load_dotenv |
| | |
| | |
| | load_dotenv() |
| | os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN') |
| | os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY') |
| | |
| | embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
| | llm = ChatGroq(model="Gemma2-9b-It") |
| | st.set_page_config(page_title="PDFSense", page_icon="๐") |
| | |
| | st.title("๐ ๐๐๐
๐๐๐ง๐ฌ๐ : ๐๐๐
๐๐ฎ๐๐ฌ๐ญ๐ข๐จ๐ง ๐๐ง๐ฌ๐ฐ๐๐ซ๐ข๐ง๐ ๐๐ฌ๐ฌ๐ข๐ฌ๐ญ๐๐ง๐ญ ๐ฐ๐ข๐ญ๐ก ๐๐ก๐๐ญ ๐๐ข๐ฌ๐ญ๐จ๐ซ๐ฒ") |
| |
|
| | |
| | uploaded_files = st.file_uploader("Drop PDF files here", type="pdf", accept_multiple_files=True) |
| |
|
| | |
| | if "messages" not in st.session_state: |
| | st.session_state["messages"] = [ |
| | {"role": "assistant", "content": "Hi! I am PDFSense. Upload your PDF and ask me anything related to it."} |
| | ] |
| | st.text("If the application fails to read the PDFs, try refreshing the webpage.") |
| | |
| | if uploaded_files: |
| | documents = [] |
| | for uploaded_file in uploaded_files: |
| | temppdf = "./temp.pdf" |
| | with open(temppdf, "wb") as file: |
| | file.write(uploaded_file.getvalue()) |
| | docs = PyPDFLoader(temppdf).load() |
| | documents.extend(docs) |
| | os.remove("./temp.pdf") |
| |
|
| | |
| | text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500) |
| | splits = text_splitter.split_documents(documents) |
| | faiss_index = FAISS.from_documents(splits, embeddings) |
| | retriever = faiss_index.as_retriever() |
| |
|
| | |
| | context_prompt = ChatPromptTemplate.from_messages([ |
| | ("system", "Refactor the question using chat history for context."), |
| | MessagesPlaceholder("chat_history"), |
| | ("human", "{input}") |
| | ]) |
| | history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt) |
| |
|
| | system_prompt = ( |
| | "You are PDFSense, a PDF reading assistant. Use the following context to answer the question: " |
| | "{context}. If unsure, respond with 'I don't know.'" |
| | ) |
| | prompt = ChatPromptTemplate.from_messages([ |
| | ("system", system_prompt), |
| | MessagesPlaceholder("chat_history"), |
| | ("human", "{input}") |
| | ]) |
| | qa_chain = create_stuff_documents_chain(llm, prompt) |
| | rag_chain = create_retrieval_chain(history_aware_ret, qa_chain) |
| |
|
| | |
| | for msg in st.session_state["messages"]: |
| | st.chat_message(msg["role"]).write(msg["content"]) |
| |
|
| | |
| | if user_input := st.chat_input(placeholder="Ask a question about your uploaded PDF..."): |
| | st.session_state["messages"].append({"role": "user", "content": user_input}) |
| | st.chat_message("user").write(user_input) |
| |
|
| | |
| | with st.chat_message("assistant"): |
| | chat_history = [{"role": msg["role"], "content": msg["content"]} for msg in st.session_state["messages"]] |
| | result = rag_chain.invoke({"input": user_input, "chat_history": chat_history}) |
| | |
| | |
| | answer = result.get("answer", "I don't know.") |
| | st.session_state["messages"].append({"role": "assistant", "content": answer}) |
| | st.write(answer) |
| | else: |
| | st.error("Enter PDFs.") |