Spaces:

aaporosh
/

SmartPDF_Q_A

Sleeping

File size: 2,274 Bytes

import streamlit as st
import pdfplumber
from transformers import pipeline
import re

# Load models once for speed
qa_model = pipeline("question-answering", model="google/flan-t5-large", tokenizer="google/flan-t5-large")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

st.set_page_config(page_title="Smart PDF Chatbot & Summarizer", layout="wide")
st.title("📄 Smart PDF Chatbot & Summarizer")

# Sidebar settings
st.sidebar.header("⚙️ Settings")
max_length = st.sidebar.slider("Summary Length", 50, 500, 250)

# Upload PDF
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])

if uploaded_file:
    with pdfplumber.open(uploaded_file) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

    if not text.strip():
        st.error("Couldn't extract text from this PDF.")
    else:
        tabs = st.tabs(["💬 Chat with PDF", "📝 Summarize PDF", "💻 Extract Code"])

        # Chat tab
        with tabs[0]:
            st.subheader("Ask Questions About Your PDF")
            question = st.text_input("Enter your question:")
            if st.button("Ask", key="qa") and question:
                try:
                    result = qa_model(question=question, context=text)
                    st.success(result['answer'])
                except Exception as e:
                    st.error(f"Error: {e}")

        # Summarization tab
        with tabs[1]:
            st.subheader("PDF Summary")
            if st.button("Generate Summary", key="sum"):
                try:
                    summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)
                    st.info(summary[0]['summary_text'])
                except Exception as e:
                    st.error(f"Error: {e}")

        # Code extraction tab
        with tabs[2]:
            st.subheader("Extracted Programming Code")
            code_blocks = re.findall(r'```[a-zA-Z]*([\s\S]*?)```', text)
            if code_blocks:
                for idx, code in enumerate(code_blocks, 1):
                    st.code(code, language="python")
            else:
                st.warning("No code blocks found in this PDF.")
else:
    st.info("👆 Please upload a PDF to start.")