Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import HuggingFacePipeline | |
| from transformers import pipeline | |
| # ---------------------------- | |
| # APP CONFIG | |
| # ---------------------------- | |
| st.set_page_config(page_title="π PDF Question Answering", layout="wide") | |
| st.title("π PDF Question Answering App") | |
| st.markdown("Upload a PDF and ask questions about its content.") | |
| # ---------------------------- | |
| # GLOBAL VARIABLE | |
| # ---------------------------- | |
| qa_chain = None | |
| # ---------------------------- | |
| # FUNCTIONS | |
| # ---------------------------- | |
| def load_pdf(pdf_file): | |
| """Load PDF and split into chunks""" | |
| loader = PyPDFLoader(pdf_file.name) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) | |
| docs = text_splitter.split_documents(documents) | |
| return docs | |
| def build_vectorstore(docs): | |
| """Create FAISS vector store from documents""" | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| vectorstore = FAISS.from_documents(docs, embeddings) | |
| return vectorstore | |
| def build_qa_chain(vectorstore): | |
| """Build QA chain using FLAN-T5""" | |
| llm = HuggingFacePipeline( | |
| pipeline=pipeline( | |
| "text2text-generation", | |
| model="google/flan-t5-base", | |
| max_length=512, | |
| temperature=0 | |
| ) | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), | |
| chain_type="stuff" | |
| ) | |
| return qa_chain | |
| # ---------------------------- | |
| # STREAMLIT UI | |
| # ---------------------------- | |
| uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
| if uploaded_file: | |
| with st.spinner("Processing PDF..."): | |
| docs = load_pdf(uploaded_file) | |
| vectorstore = build_vectorstore(docs) | |
| qa_chain = build_qa_chain(vectorstore) | |
| st.success("β PDF uploaded & processed. You can now ask questions!") | |
| if qa_chain: | |
| query = st.text_input("Ask a question about the PDF:") | |
| if query: | |
| with st.spinner("Searching..."): | |
| answer = qa_chain.run(query) | |
| st.subheader("π Answer:") | |
| st.write(answer) | |