Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| import pdfplumber | |
| # Set the title | |
| st.set_page_config(page_title="PDF Summarizer & Theme Extractor") | |
| st.title("π PDF Summary and Theme Explorer") | |
| # Load Hugging Face models | |
| def load_models(): | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| return summarizer, classifier | |
| summarizer, classifier = load_models() | |
| # PDF Upload | |
| uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
| if uploaded_file: | |
| # Extract text from PDF | |
| with pdfplumber.open(uploaded_file) as pdf: | |
| text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) | |
| if not text.strip(): | |
| st.warning("No readable text found in the PDF.") | |
| else: | |
| st.subheader("π Extracted Text (Preview)") | |
| st.text_area("Extracted Text", text[:1500] + "...", height=200) | |
| with st.spinner("Summarizing..."): | |
| # Truncate text for summarization | |
| input_text = text[:1024 * 2] # Transformers limit input tokens | |
| summary = summarizer(input_text, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] | |
| st.subheader("π Summary") | |
| st.write(summary) | |
| with st.spinner("Extracting key themes..."): | |
| candidate_labels = ["finance", "politics", "health", "technology", "education", "environment", "law", "science", "culture"] | |
| result = classifier(text[:1024], candidate_labels) | |
| themes = [label for label, score in zip(result['labels'], result['scores']) if score > 0.3] | |
| st.subheader("π·οΈ Key Themes") | |
| st.write(", ".join(themes) if themes else "No strong themes identified.") | |