Spaces:
Sleeping
Sleeping
| import tempfile | |
| import hashlib | |
| import streamlit as st | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.schema import SystemMessage, HumanMessage, AIMessage | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from streamlit_pdf_viewer import pdf_viewer | |
| import dotenv | |
| dotenv.load_dotenv() | |
| import os | |
| token = os.getenv("Token") | |
| def init_page() -> None: | |
| st.set_page_config(page_title="PDF Chatbot") | |
| st.subheader("π¬ PDF Chat with multi LLMs") | |
| def init_messages() -> None: | |
| if "messages" not in st.session_state: | |
| st.session_state.messages = [ | |
| SystemMessage(content="You are a helpful AI assistant. Reply in markdown format.") | |
| ] | |
| def main() -> None: | |
| init_page() | |
| init_messages() | |
| # Initialize session state variables | |
| if 'vectorstore' not in st.session_state: | |
| st.session_state.vectorstore = None | |
| if 'current_file_hash' not in st.session_state: | |
| st.session_state.current_file_hash = None | |
| # Sidebar: LLM selection and PDF file uploader | |
| with st.sidebar: | |
| st.title("Options") | |
| selected_model = st.selectbox( | |
| "Select LLM", | |
| options=[ | |
| "deepseek-ai/DeepSeek-V3", | |
| "Qwen/Qwen2.5-7B-Instruct", | |
| "meta-llama/Llama-3.1-8B-Instruct", | |
| "mistralai/Mistral-7B-Instruct-v0.3", | |
| "bigscience/bloom", | |
| "google/flan-t5-xxl" | |
| ], | |
| index=0, | |
| key="selected_model" | |
| ) | |
| uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"]) | |
| if uploaded_file: | |
| binary_data = uploaded_file.getvalue() | |
| pdf_viewer(input=binary_data, width=300) | |
| # Initialize LLM with API token from Spaces secrets | |
| llm = HuggingFaceHub( | |
| repo_id=st.session_state.selected_model, | |
| model_kwargs={"temperature": 0.5, "max_length": 500}, | |
| huggingfacehub_api_token=token, | |
| ) | |
| if uploaded_file: | |
| # Compute file hash to check for changes | |
| file_hash = hashlib.md5(uploaded_file.getvalue()).hexdigest() | |
| # Process file using temporary file | |
| if st.session_state.current_file_hash != file_hash or st.session_state.vectorstore is None: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
| tmp.write(uploaded_file.getbuffer()) | |
| loader = PyPDFLoader(tmp.name) | |
| pages = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| texts = text_splitter.split_documents(pages) | |
| embeddings = HuggingFaceEmbeddings() | |
| vectorstore = FAISS.from_documents(texts, embeddings) | |
| st.session_state.vectorstore = vectorstore | |
| st.session_state.current_file_hash = file_hash | |
| # Chat interface | |
| if user_input := st.chat_input("Input your question about the PDF:"): | |
| st.session_state.messages.append(HumanMessage(content=user_input)) | |
| with st.spinner("Analyzing ..."): | |
| try: | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=st.session_state.vectorstore.as_retriever() | |
| ) | |
| answer = qa_chain.run(user_input) | |
| except Exception as e: | |
| answer = f"An error occurred: {str(e)}" | |
| st.session_state.messages.append(AIMessage(content=answer)) | |
| # Display chat messages | |
| for message in st.session_state.get("messages", []): | |
| if isinstance(message, AIMessage): | |
| with st.chat_message("assistant", avatar="π½"): | |
| st.markdown(message.content) | |
| elif isinstance(message, HumanMessage): | |
| with st.chat_message("user", avatar="πββοΈ"): | |
| st.markdown(message.content) | |
| # Clear conversation button | |
| if st.button("π§Ή Clear Conversation", key="clear_chat"): | |
| st.session_state.messages = [ | |
| SystemMessage(content="You are a helpful AI assistant. Reply in markdown format.") | |
| ] | |
| st.rerun() | |
| else: | |
| st.write("Please upload a PDF file to start querying.") | |
| if __name__ == "__main__": | |
| main() |