Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import ConversationalRetrievalChain | |
| from langchain.llms import HuggingFaceHub | |
| from langchain.memory import ConversationBufferMemory | |
| import os | |
| # App title and color theme | |
| st.set_page_config(page_title="π PDF Q&A Agent", layout="centered", page_icon="π") | |
| st.markdown( | |
| \"\"\" | |
| <div style="background-color:#E3E8FF;padding:10px;border-radius:10px"> | |
| <h2 style="color:#3C3C88;text-align:center">π Student PDF Assistant</h2> | |
| <p style="color:#444;text-align:center">Ask questions from your uploaded PDF and generate Q&A for chapters!</p> | |
| </div> | |
| \"\"\", unsafe_allow_html=True | |
| ) | |
| # Upload PDF | |
| uploaded_file = st.file_uploader("π Upload your PDF file", type=["pdf"]) | |
| if uploaded_file: | |
| # Save PDF temporarily | |
| with open("uploaded.pdf", "wb") as f: | |
| f.write(uploaded_file.read()) | |
| st.success("β PDF uploaded successfully!") | |
| # Load and split PDF | |
| loader = PyPDFLoader("uploaded.pdf") | |
| pages = loader.load_and_split() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150) | |
| chunks = text_splitter.split_documents(pages) | |
| # Embedding | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectordb = FAISS.from_documents(chunks, embeddings) | |
| # Load Open Source LLM from Hugging Face (Mistral or any lightweight LLM) | |
| repo_id = "mistralai/Mistral-7B-Instruct-v0.1" | |
| llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0.5, "max_new_tokens":500}) | |
| # Memory and Chain | |
| memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
| qa_chain = ConversationalRetrievalChain.from_llm( | |
| llm, retriever=vectordb.as_retriever(), memory=memory | |
| ) | |
| # Chat Interface | |
| st.markdown("---") | |
| st.markdown("π¬ **Ask a question from the PDF:**") | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| question = st.text_input("Type your question here...", key="user_input") | |
| if question: | |
| result = qa_chain.run(question) | |
| st.session_state.chat_history.append(("You", question)) | |
| st.session_state.chat_history.append(("Bot", result)) | |
| # Show chat history | |
| for sender, msg in st.session_state.chat_history[::-1]: | |
| st.markdown(f"**{sender}:** {msg}") | |
| # Question Generation Button | |
| st.markdown("---") | |
| if st.button("π Generate Q&A from all chapters"): | |
| st.info("Generating questions and answers from the content...") | |
| questions = [ | |
| "What is the main idea of this chapter?", | |
| "What are the key points discussed?", | |
| "Can you summarize this section?", | |
| "Are there any definitions or terms introduced?" | |
| ] | |
| for i, chunk in enumerate(chunks[:3]): # Limit to first 3 chunks for demo | |
| st.markdown(f"**Chapter Section {i+1}:**") | |
| for q in questions: | |
| answer = llm.invoke(q + "\\n" + chunk.page_content[:1000]) | |
| st.markdown(f"**Q:** {q}") | |
| st.markdown(f"**A:** {answer}") | |
| st.markdown("---") | |
| """ | |
| # Save both files to /mnt/data for user download or deployment | |
| with open("/mnt/data/requirements.txt", "w") as f: | |
| f.write(requirements_txt.strip()) | |
| with open("/mnt/data/app.py", "w") as f: | |
| f.write(app_py.strip()) | |