PDF_QA_App / app.py
Alpha108's picture
Update app.py
ae52af0 verified
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
# ----------------------------
# APP CONFIG
# ----------------------------
st.set_page_config(page_title="πŸ“˜ PDF Question Answering", layout="wide")
st.title("πŸ“˜ PDF Question Answering App")
st.markdown("Upload a PDF and ask questions about its content.")
# ----------------------------
# GLOBAL VARIABLE
# ----------------------------
qa_chain = None
# ----------------------------
# FUNCTIONS
# ----------------------------
def load_pdf(pdf_file):
"""Load PDF and split into chunks"""
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
return docs
def build_vectorstore(docs):
"""Create FAISS vector store from documents"""
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
return vectorstore
def build_qa_chain(vectorstore):
"""Build QA chain using FLAN-T5"""
llm = HuggingFacePipeline(
pipeline=pipeline(
"text2text-generation",
model="google/flan-t5-base",
max_length=512,
temperature=0
)
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
chain_type="stuff"
)
return qa_chain
# ----------------------------
# STREAMLIT UI
# ----------------------------
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
with st.spinner("Processing PDF..."):
docs = load_pdf(uploaded_file)
vectorstore = build_vectorstore(docs)
qa_chain = build_qa_chain(vectorstore)
st.success("βœ… PDF uploaded & processed. You can now ask questions!")
if qa_chain:
query = st.text_input("Ask a question about the PDF:")
if query:
with st.spinner("Searching..."):
answer = qa_chain.run(query)
st.subheader("πŸ“Œ Answer:")
st.write(answer)