Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from PyPDF2 import PdfReader | |
| # Initialize the summarization pipeline | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def summarize_text(text, max_length=130, min_length=30): | |
| """Summarize the input text.""" | |
| if not text.strip(): | |
| return "Input text is empty. Please provide valid text." | |
| # Limit text length to avoid issues with very long texts | |
| max_text_length = 1000 # You can adjust this value as needed | |
| if len(text) > max_text_length: | |
| text = text[:max_text_length] | |
| try: | |
| summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) | |
| return summary[0]['summary_text'] if summary else "No summary available." | |
| except Exception as e: | |
| return f"An error occurred: {e}" | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from an uploaded PDF file.""" | |
| reader = PdfReader(pdf_file) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| def main(): | |
| st.title("Document Summarization App") | |
| # File uploader for users to upload documents | |
| uploaded_file = st.file_uploader("Upload a document (PDF or TXT)", type=["pdf", "txt"]) | |
| if uploaded_file is not None: | |
| # Check if the uploaded file is a PDF | |
| if uploaded_file.type == "application/pdf": | |
| text = extract_text_from_pdf(uploaded_file) | |
| elif uploaded_file.type == "text/plain": | |
| text = uploaded_file.read().decode("utf-8") | |
| else: | |
| st.error("Unsupported file type. Please upload a PDF or TXT file.") | |
| return | |
| st.subheader("Original Text") | |
| st.write(text) | |
| if st.button("Generate Summary"): | |
| with st.spinner("Summarizing..."): | |
| summary = summarize_text(text) | |
| st.subheader("Summary") | |
| st.write(summary) | |
| if __name__ == "__main__": | |
| main() | |