PDF_QA_App / app.py
Alpha108's picture
Create app.py
c94a99e verified
raw
history blame
2.3 kB
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from transformers import pipeline
# ----------------------------
# APP CONFIG
# ----------------------------
st.set_page_config(page_title="πŸ“˜ PDF Question Answering", layout="wide")
st.title("πŸ“˜ PDF Question Answering App")
st.markdown("Upload a PDF and ask questions about its content.")
# ----------------------------
# GLOBAL VARIABLES
# ----------------------------
qa_chain = None
# ----------------------------
# FUNCTIONS
# ----------------------------
def load_pdf(pdf_file):
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
return docs
def build_vectorstore(docs):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
return vectorstore
def build_qa_chain(vectorstore):
llm = HuggingFacePipeline(
pipeline=pipeline(
"text2text-generation",
model="google/flan-t5-base",
max_length=512,
temperature=0
)
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=vectorstore.as_retriever(search_kwargs={"k":3}),
chain_type="stuff"
)
return qa_chain
# ----------------------------
# STREAMLIT UI
# ----------------------------
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_file:
with st.spinner("Processing PDF..."):
docs = load_pdf(uploaded_file)
vectorstore = build_vectorstore(docs)
qa_chain = build_qa_chain(vectorstore)
st.success("βœ… PDF uploaded & processed. You can now ask questions!")
if qa_chain:
query = st.text_input("Ask a question about the PDF:")
if query:
with st.spinner("Searching..."):
answer = qa_chain.run(query)
st.markdown("### Answer:")
st.write(answer)