import streamlit as st from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader from langchain.chains import RetrievalQA from langchain.llms import HuggingFacePipeline from transformers import pipeline # ---------------------------- # APP CONFIG # ---------------------------- st.set_page_config(page_title="📘 PDF Question Answering", layout="wide") st.title("📘 PDF Question Answering App") st.markdown("Upload a PDF and ask questions about its content.") # ---------------------------- # GLOBAL VARIABLE # ---------------------------- qa_chain = None # ---------------------------- # FUNCTIONS # ---------------------------- def load_pdf(pdf_file): """Load PDF and split into chunks""" loader = PyPDFLoader(pdf_file.name) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) docs = text_splitter.split_documents(documents) return docs def build_vectorstore(docs): """Create FAISS vector store from documents""" embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(docs, embeddings) return vectorstore def build_qa_chain(vectorstore): """Build QA chain using FLAN-T5""" llm = HuggingFacePipeline( pipeline=pipeline( "text2text-generation", model="google/flan-t5-base", max_length=512, temperature=0 ) ) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), chain_type="stuff" ) return qa_chain # ---------------------------- # STREAMLIT UI # ---------------------------- uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: with st.spinner("Processing PDF..."): docs = load_pdf(uploaded_file) vectorstore = build_vectorstore(docs) qa_chain = build_qa_chain(vectorstore) st.success("✅ PDF uploaded & processed. You can now ask questions!") if qa_chain: query = st.text_input("Ask a question about the PDF:") if query: with st.spinner("Searching..."): answer = qa_chain.run(query) st.subheader("📌 Answer:") st.write(answer)