File size: 2,274 Bytes
f513b53
56d0815
 
d9893e1
56d0815
d9893e1
 
 
56d0815
d9893e1
 
56d0815
d9893e1
 
 
56d0815
d9893e1
 
56d0815
d9893e1
 
 
56d0815
d9893e1
 
 
 
56d0815
d9893e1
 
 
 
 
56d0815
d9893e1
 
56d0815
d9893e1
56d0815
d9893e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56d0815
d9893e1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
import pdfplumber
from transformers import pipeline
import re

# Load models once for speed
qa_model = pipeline("question-answering", model="google/flan-t5-large", tokenizer="google/flan-t5-large")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

st.set_page_config(page_title="Smart PDF Chatbot & Summarizer", layout="wide")
st.title("πŸ“„ Smart PDF Chatbot & Summarizer")

# Sidebar settings
st.sidebar.header("βš™οΈ Settings")
max_length = st.sidebar.slider("Summary Length", 50, 500, 250)

# Upload PDF
uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"])

if uploaded_file:
    with pdfplumber.open(uploaded_file) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

    if not text.strip():
        st.error("Couldn't extract text from this PDF.")
    else:
        tabs = st.tabs(["πŸ’¬ Chat with PDF", "πŸ“ Summarize PDF", "πŸ’» Extract Code"])

        # Chat tab
        with tabs[0]:
            st.subheader("Ask Questions About Your PDF")
            question = st.text_input("Enter your question:")
            if st.button("Ask", key="qa") and question:
                try:
                    result = qa_model(question=question, context=text)
                    st.success(result['answer'])
                except Exception as e:
                    st.error(f"Error: {e}")

        # Summarization tab
        with tabs[1]:
            st.subheader("PDF Summary")
            if st.button("Generate Summary", key="sum"):
                try:
                    summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)
                    st.info(summary[0]['summary_text'])
                except Exception as e:
                    st.error(f"Error: {e}")

        # Code extraction tab
        with tabs[2]:
            st.subheader("Extracted Programming Code")
            code_blocks = re.findall(r'```[a-zA-Z]*([\s\S]*?)```', text)
            if code_blocks:
                for idx, code in enumerate(code_blocks, 1):
                    st.code(code, language="python")
            else:
                st.warning("No code blocks found in this PDF.")
else:
    st.info("πŸ‘† Please upload a PDF to start.")