Spaces:

garvitcpp
/

Sum-it-up

Sleeping

App Files Files Community

garvitcpp commited on Dec 8, 2024

Commit

ce9b735

verified ·

1 Parent(s): 710be58

Upload 6 files

Browse files

Files changed (6) hide show

README.md +25 -13
app.py +124 -0
requirements.txt +8 -0
src/__init__.py +0 -0
src/document_processor.py +35 -0
src/summarizer.py +47 -0

README.md CHANGED Viewed

@@ -1,13 +1,25 @@
----
-title: Sum It Up
-emoji: 🦀
-colorFrom: green
-colorTo: purple
-sdk: streamlit
-sdk_version: 1.40.2
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# News Summarizer
+## Overview
+An intelligent document summarization tool using Hugging Face's BART model and Streamlit.
+## Features
+- Direct text input summarization
+- Document upload support (txt, pdf, docx)
+- Configurable summary length
+- Uses state-of-the-art NLP model
+## Installation
+1. Clone the repository
+2. Install dependencies: `pip install -r requirements.txt`
+3. Run the app: `streamlit run app.py`
+## Dependencies
+- Streamlit
+- Transformers
+- PyTorch
+- PyPDF2
+- python-docx
+## Model
+Uses Facebook's BART Large CNN model for summarization

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import streamlit as st
+from src.document_processor import process_document
+from src.summarizer import TextSummarizer
+import logging
+from textblob import TextBlob  # Ensure this library is installed
+# Set up logging
+logging.basicConfig(level=logging.DEBUG)
+def main():
+    # Streamlit app configuration
+    st.set_page_config(
+        page_title="SumItUp | Document Summarizer",
+        page_icon="✍️",  # Or another icon that represents summarization
+        layout="wide"
+    )
+    st.title("✍️ SumItUp")
+    st.subheader("Intelligent Document Summarization Made Easy")
+    # Sidebar for configuration
+    st.sidebar.header("Summarization Settings")
+    summary_length = st.sidebar.slider(
+        "Summary Length",
+        min_value=100,
+        max_value=400,
+        value=250
+    )
+    # Tabs for different input methods
+    tab1, tab2 = st.tabs(["Paste Text", "Upload Document"])
+    # Initialize summarizer
+    summarizer = TextSummarizer()
+    # Function to classify sentiment
+    def classify_sentiment(polarity):
+        if polarity > 0:
+            return "Positive 😊"
+        elif polarity < 0:
+            return "Negative 😟"
+        else:
+            return "Neutral 😐"
+    # Tab 1: Direct Text Input
+    with tab1:
+        st.header("Direct Text Input")
+        text_input = st.text_area(
+            "Paste your text here:",
+            height=300,
+            help="Enter the text you want to summarize"
+        )
+        if st.button("Summarize Text", key="text_summarize"):
+            if text_input:
+                with st.spinner('Generating summary and sentiment analysis...'):
+                    try:
+                        # Generate summary
+                        summary = summarizer.generate_summary(
+                            text_input,
+                            max_length=summary_length,
+                            min_length=summary_length // 2  # Optional: set min_length proportionally
+                        )
+                        st.subheader("Summary")
+                        st.write(summary)
+                        # Perform sentiment analysis
+                        if text_input.strip():
+                            sentiment = TextBlob(text_input).sentiment
+                            sentiment_class = classify_sentiment(sentiment.polarity)
+                            st.subheader("Sentiment Analysis")
+                            st.write(f"Sentiment: {sentiment_class}")
+                            st.write(f"Polarity: {sentiment.polarity:.2f} (Range: -1 to 1)")
+                            st.write(f"Subjectivity: {sentiment.subjectivity:.2f} (Range: 0 to 1)")
+                        else:
+                            st.warning("No valid text for sentiment analysis.")
+                    except Exception as e:
+                        st.error(f"Summarization failed: {e}")
+            else:
+                st.warning("Please enter some text to summarize.")
+    # Tab 2: Document Upload
+    with tab2:
+        st.header("Document Upload")
+        uploaded_file = st.file_uploader(
+            "Choose a file",
+            type=['txt', 'pdf', 'docx'],
+            help="Upload a text, PDF, or Word document"
+        )
+        if uploaded_file is not None:
+            if st.button("Summarize Document", key="doc_summarize"):
+                with st.spinner('Processing, summarizing, and analyzing sentiment...'):
+                    try:
+                        # Process document
+                        document_text = process_document(uploaded_file)
+                        # Generate summary
+                        summary = summarizer.generate_summary(
+                            document_text,
+                            max_length=summary_length,
+                            min_length=summary_length // 2  # Optional: set min_length proportionally
+                        )
+                        st.subheader("Summary")
+                        st.write(summary)
+                        # Perform sentiment analysis
+                        if document_text.strip():
+                            sentiment = TextBlob(document_text).sentiment
+                            sentiment_class = classify_sentiment(sentiment.polarity)
+                            st.subheader("Sentiment Analysis")
+                            st.write(f"Sentiment: {sentiment_class}")
+                            st.write(f"Polarity: {sentiment.polarity:.2f} (Range: -1 to 1)")
+                            st.write(f"Subjectivity: {sentiment.subjectivity:.2f} (Range: 0 to 1)")
+                        else:
+                            st.warning("No valid text for sentiment analysis.")
+                    except Exception as e:
+                        st.error(f"Error processing document: {e}")
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+PyPDF2
+python-docx
+transformers
+torch
+sentencepiece
+accelerate
+textblob

src/__init__.py ADDED Viewed

File without changes

src/document_processor.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import PyPDF2
+import docx
+def process_document(uploaded_file):
+    """
+    Process different document types and extract text
+    Args:
+        uploaded_file: Streamlit uploaded file object
+    Returns:
+        str: Extracted text from the document
+    """
+    # Text file
+    if uploaded_file.type == 'text/plain':
+        return uploaded_file.getvalue().decode("utf-8")
+    # PDF file
+    elif uploaded_file.type == 'application/pdf':
+        try:
+            pdf_reader = PyPDF2.PdfReader(uploaded_file)
+            return " ".join([page.extract_text() for page in pdf_reader.pages])
+        except Exception as e:
+            raise ValueError(f"Error processing PDF: {e}")
+    # Word document
+    elif uploaded_file.type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+        try:
+            doc = docx.Document(uploaded_file)
+            return " ".join([para.text for para in doc.paragraphs])
+        except Exception as e:
+            raise ValueError(f"Error processing Word document: {e}")
+    else:
+        raise ValueError("Unsupported file type")

src/summarizer.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from transformers import pipeline
+import time
+class TextSummarizer:
+    def __init__(self, model_name="facebook/bart-large-cnn"):
+        """
+        Initialize summarization pipeline
+        Args:
+            model_name (str): Hugging Face model for summarization
+        """
+        try:
+            self.summarizer = pipeline("summarization", model=model_name)
+        except Exception as e:
+            raise RuntimeError(f"Failed to load summarization model: {e}")
+    def generate_summary(self, text, max_length=400, min_length=100):
+        """
+        Generate summary for given text
+        Args:
+            text (str): Input text to summarize
+            max_length (int): Maximum length of summary
+            min_length (int): Minimum length of summary
+        Returns:
+            str: Generated summary
+        """
+        try:
+            # Validate input text
+            if not text or len(text.strip()) == 0:
+                return "No text provided for summarization."
+            # Ensure min_length is less than max_length
+            min_length = min(min_length, max_length)
+            # Generate summary
+            summary = self.summarizer(
+                text,
+                max_length=max_length,
+                min_length=min_length,
+                do_sample=False
+            )[0]['summary_text']
+            return summary
+        except Exception as e:
+            return f"Error during summarization: {e}"