import streamlit as st from transformers import pipeline from PyPDF2 import PdfReader # Initialize the summarization pipeline summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def summarize_text(text, max_length=130, min_length=30): """Summarize the input text.""" if not text.strip(): return "Input text is empty. Please provide valid text." # Limit text length to avoid issues with very long texts max_text_length = 1000 # You can adjust this value as needed if len(text) > max_text_length: text = text[:max_text_length] try: summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text'] if summary else "No summary available." except Exception as e: return f"An error occurred: {e}" def extract_text_from_pdf(pdf_file): """Extract text from an uploaded PDF file.""" reader = PdfReader(pdf_file) text = "" for page in reader.pages: text += page.extract_text() or "" return text def main(): st.title("Document Summarization App") # File uploader for users to upload documents uploaded_file = st.file_uploader("Upload a document (PDF or TXT)", type=["pdf", "txt"]) if uploaded_file is not None: # Check if the uploaded file is a PDF if uploaded_file.type == "application/pdf": text = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "text/plain": text = uploaded_file.read().decode("utf-8") else: st.error("Unsupported file type. Please upload a PDF or TXT file.") return st.subheader("Original Text") st.write(text) if st.button("Generate Summary"): with st.spinner("Summarizing..."): summary = summarize_text(text) st.subheader("Summary") st.write(summary) if __name__ == "__main__": main()