Spaces:
Sleeping
Sleeping
File size: 2,274 Bytes
f513b53 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 56d0815 d9893e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import pdfplumber
from transformers import pipeline
import re
# Load models once for speed
qa_model = pipeline("question-answering", model="google/flan-t5-large", tokenizer="google/flan-t5-large")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
st.set_page_config(page_title="Smart PDF Chatbot & Summarizer", layout="wide")
st.title("π Smart PDF Chatbot & Summarizer")
# Sidebar settings
st.sidebar.header("βοΈ Settings")
max_length = st.sidebar.slider("Summary Length", 50, 500, 250)
# Upload PDF
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])
if uploaded_file:
with pdfplumber.open(uploaded_file) as pdf:
text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
if not text.strip():
st.error("Couldn't extract text from this PDF.")
else:
tabs = st.tabs(["π¬ Chat with PDF", "π Summarize PDF", "π» Extract Code"])
# Chat tab
with tabs[0]:
st.subheader("Ask Questions About Your PDF")
question = st.text_input("Enter your question:")
if st.button("Ask", key="qa") and question:
try:
result = qa_model(question=question, context=text)
st.success(result['answer'])
except Exception as e:
st.error(f"Error: {e}")
# Summarization tab
with tabs[1]:
st.subheader("PDF Summary")
if st.button("Generate Summary", key="sum"):
try:
summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)
st.info(summary[0]['summary_text'])
except Exception as e:
st.error(f"Error: {e}")
# Code extraction tab
with tabs[2]:
st.subheader("Extracted Programming Code")
code_blocks = re.findall(r'```[a-zA-Z]*([\s\S]*?)```', text)
if code_blocks:
for idx, code in enumerate(code_blocks, 1):
st.code(code, language="python")
else:
st.warning("No code blocks found in this PDF.")
else:
st.info("π Please upload a PDF to start.")
|