import streamlit as st from transformers import pipeline import pdfplumber # Set the title st.set_page_config(page_title="PDF Summarizer & Theme Extractor") st.title("📄 PDF Summary and Theme Explorer") # Load Hugging Face models @st.cache_resource def load_models(): summarizer = pipeline("summarization", model="facebook/bart-large-cnn") classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") return summarizer, classifier summarizer, classifier = load_models() # PDF Upload uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: # Extract text from PDF with pdfplumber.open(uploaded_file) as pdf: text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) if not text.strip(): st.warning("No readable text found in the PDF.") else: st.subheader("📚 Extracted Text (Preview)") st.text_area("Extracted Text", text[:1500] + "...", height=200) with st.spinner("Summarizing..."): # Truncate text for summarization input_text = text[:1024 * 2] # Transformers limit input tokens summary = summarizer(input_text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] st.subheader("📝 Summary") st.write(summary) with st.spinner("Extracting key themes..."): candidate_labels = ["finance", "politics", "health", "technology", "education", "environment", "law", "science", "culture"] result = classifier(text[:1024], candidate_labels) themes = [label for label, score in zip(result['labels'], result['scores']) if score > 0.3] st.subheader("🏷️ Key Themes") st.write(", ".join(themes) if themes else "No strong themes identified.")