import tempfile import hashlib import streamlit as st from langchain.llms import HuggingFaceHub from langchain.schema import SystemMessage, HumanMessage, AIMessage from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from streamlit_pdf_viewer import pdf_viewer import dotenv dotenv.load_dotenv() import os token = os.getenv("Token") def init_page() -> None: st.set_page_config(page_title="PDF Chatbot") st.subheader("๐Ÿ’ฌ PDF Chat with multi LLMs") def init_messages() -> None: if "messages" not in st.session_state: st.session_state.messages = [ SystemMessage(content="You are a helpful AI assistant. Reply in markdown format.") ] def main() -> None: init_page() init_messages() # Initialize session state variables if 'vectorstore' not in st.session_state: st.session_state.vectorstore = None if 'current_file_hash' not in st.session_state: st.session_state.current_file_hash = None # Sidebar: LLM selection and PDF file uploader with st.sidebar: st.title("Options") selected_model = st.selectbox( "Select LLM", options=[ "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-7B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", "bigscience/bloom", "google/flan-t5-xxl" ], index=0, key="selected_model" ) uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"]) if uploaded_file: binary_data = uploaded_file.getvalue() pdf_viewer(input=binary_data, width=300) # Initialize LLM with API token from Spaces secrets llm = HuggingFaceHub( repo_id=st.session_state.selected_model, model_kwargs={"temperature": 0.5, "max_length": 500}, huggingfacehub_api_token=token, ) if uploaded_file: # Compute file hash to check for changes file_hash = hashlib.md5(uploaded_file.getvalue()).hexdigest() # Process file using temporary file if st.session_state.current_file_hash != file_hash or st.session_state.vectorstore is None: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(uploaded_file.getbuffer()) loader = PyPDFLoader(tmp.name) pages = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) texts = text_splitter.split_documents(pages) embeddings = HuggingFaceEmbeddings() vectorstore = FAISS.from_documents(texts, embeddings) st.session_state.vectorstore = vectorstore st.session_state.current_file_hash = file_hash # Chat interface if user_input := st.chat_input("Input your question about the PDF:"): st.session_state.messages.append(HumanMessage(content=user_input)) with st.spinner("Analyzing ..."): try: qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=st.session_state.vectorstore.as_retriever() ) answer = qa_chain.run(user_input) except Exception as e: answer = f"An error occurred: {str(e)}" st.session_state.messages.append(AIMessage(content=answer)) # Display chat messages for message in st.session_state.get("messages", []): if isinstance(message, AIMessage): with st.chat_message("assistant", avatar="๐Ÿ‘ฝ"): st.markdown(message.content) elif isinstance(message, HumanMessage): with st.chat_message("user", avatar="๐Ÿ™‹โ€โ™‚๏ธ"): st.markdown(message.content) # Clear conversation button if st.button("๐Ÿงน Clear Conversation", key="clear_chat"): st.session_state.messages = [ SystemMessage(content="You are a helpful AI assistant. Reply in markdown format.") ] st.rerun() else: st.write("Please upload a PDF file to start querying.") if __name__ == "__main__": main()