garvitcpp commited on
Commit
ce9b735
·
verified ·
1 Parent(s): 710be58

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +25 -13
  2. app.py +124 -0
  3. requirements.txt +8 -0
  4. src/__init__.py +0 -0
  5. src/document_processor.py +35 -0
  6. src/summarizer.py +47 -0
README.md CHANGED
@@ -1,13 +1,25 @@
1
- ---
2
- title: Sum It Up
3
- emoji: 🦀
4
- colorFrom: green
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.40.2
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # News Summarizer
2
+
3
+ ## Overview
4
+ An intelligent document summarization tool using Hugging Face's BART model and Streamlit.
5
+
6
+ ## Features
7
+ - Direct text input summarization
8
+ - Document upload support (txt, pdf, docx)
9
+ - Configurable summary length
10
+ - Uses state-of-the-art NLP model
11
+
12
+ ## Installation
13
+ 1. Clone the repository
14
+ 2. Install dependencies: `pip install -r requirements.txt`
15
+ 3. Run the app: `streamlit run app.py`
16
+
17
+ ## Dependencies
18
+ - Streamlit
19
+ - Transformers
20
+ - PyTorch
21
+ - PyPDF2
22
+ - python-docx
23
+
24
+ ## Model
25
+ Uses Facebook's BART Large CNN model for summarization
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.document_processor import process_document
3
+ from src.summarizer import TextSummarizer
4
+ import logging
5
+ from textblob import TextBlob # Ensure this library is installed
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.DEBUG)
9
+
10
+ def main():
11
+ # Streamlit app configuration
12
+ st.set_page_config(
13
+ page_title="SumItUp | Document Summarizer",
14
+ page_icon="✍️", # Or another icon that represents summarization
15
+ layout="wide"
16
+ )
17
+
18
+ st.title("✍️ SumItUp")
19
+ st.subheader("Intelligent Document Summarization Made Easy")
20
+
21
+ # Sidebar for configuration
22
+ st.sidebar.header("Summarization Settings")
23
+ summary_length = st.sidebar.slider(
24
+ "Summary Length",
25
+ min_value=100,
26
+ max_value=400,
27
+ value=250
28
+ )
29
+
30
+ # Tabs for different input methods
31
+ tab1, tab2 = st.tabs(["Paste Text", "Upload Document"])
32
+
33
+ # Initialize summarizer
34
+ summarizer = TextSummarizer()
35
+
36
+ # Function to classify sentiment
37
+ def classify_sentiment(polarity):
38
+ if polarity > 0:
39
+ return "Positive 😊"
40
+ elif polarity < 0:
41
+ return "Negative 😟"
42
+ else:
43
+ return "Neutral 😐"
44
+
45
+ # Tab 1: Direct Text Input
46
+ with tab1:
47
+ st.header("Direct Text Input")
48
+ text_input = st.text_area(
49
+ "Paste your text here:",
50
+ height=300,
51
+ help="Enter the text you want to summarize"
52
+ )
53
+
54
+ if st.button("Summarize Text", key="text_summarize"):
55
+ if text_input:
56
+ with st.spinner('Generating summary and sentiment analysis...'):
57
+ try:
58
+ # Generate summary
59
+ summary = summarizer.generate_summary(
60
+ text_input,
61
+ max_length=summary_length,
62
+ min_length=summary_length // 2 # Optional: set min_length proportionally
63
+ )
64
+ st.subheader("Summary")
65
+ st.write(summary)
66
+
67
+ # Perform sentiment analysis
68
+ if text_input.strip():
69
+ sentiment = TextBlob(text_input).sentiment
70
+ sentiment_class = classify_sentiment(sentiment.polarity)
71
+ st.subheader("Sentiment Analysis")
72
+ st.write(f"Sentiment: {sentiment_class}")
73
+ st.write(f"Polarity: {sentiment.polarity:.2f} (Range: -1 to 1)")
74
+ st.write(f"Subjectivity: {sentiment.subjectivity:.2f} (Range: 0 to 1)")
75
+ else:
76
+ st.warning("No valid text for sentiment analysis.")
77
+
78
+ except Exception as e:
79
+ st.error(f"Summarization failed: {e}")
80
+ else:
81
+ st.warning("Please enter some text to summarize.")
82
+
83
+ # Tab 2: Document Upload
84
+ with tab2:
85
+ st.header("Document Upload")
86
+ uploaded_file = st.file_uploader(
87
+ "Choose a file",
88
+ type=['txt', 'pdf', 'docx'],
89
+ help="Upload a text, PDF, or Word document"
90
+ )
91
+
92
+ if uploaded_file is not None:
93
+ if st.button("Summarize Document", key="doc_summarize"):
94
+ with st.spinner('Processing, summarizing, and analyzing sentiment...'):
95
+ try:
96
+ # Process document
97
+ document_text = process_document(uploaded_file)
98
+
99
+ # Generate summary
100
+ summary = summarizer.generate_summary(
101
+ document_text,
102
+ max_length=summary_length,
103
+ min_length=summary_length // 2 # Optional: set min_length proportionally
104
+ )
105
+ st.subheader("Summary")
106
+ st.write(summary)
107
+
108
+ # Perform sentiment analysis
109
+ if document_text.strip():
110
+ sentiment = TextBlob(document_text).sentiment
111
+ sentiment_class = classify_sentiment(sentiment.polarity)
112
+ st.subheader("Sentiment Analysis")
113
+ st.write(f"Sentiment: {sentiment_class}")
114
+ st.write(f"Polarity: {sentiment.polarity:.2f} (Range: -1 to 1)")
115
+ st.write(f"Subjectivity: {sentiment.subjectivity:.2f} (Range: 0 to 1)")
116
+ else:
117
+ st.warning("No valid text for sentiment analysis.")
118
+
119
+ except Exception as e:
120
+ st.error(f"Error processing document: {e}")
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ PyPDF2
3
+ python-docx
4
+ transformers
5
+ torch
6
+ sentencepiece
7
+ accelerate
8
+ textblob
src/__init__.py ADDED
File without changes
src/document_processor.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ import docx
3
+
4
+ def process_document(uploaded_file):
5
+ """
6
+ Process different document types and extract text
7
+
8
+ Args:
9
+ uploaded_file: Streamlit uploaded file object
10
+
11
+ Returns:
12
+ str: Extracted text from the document
13
+ """
14
+ # Text file
15
+ if uploaded_file.type == 'text/plain':
16
+ return uploaded_file.getvalue().decode("utf-8")
17
+
18
+ # PDF file
19
+ elif uploaded_file.type == 'application/pdf':
20
+ try:
21
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
22
+ return " ".join([page.extract_text() for page in pdf_reader.pages])
23
+ except Exception as e:
24
+ raise ValueError(f"Error processing PDF: {e}")
25
+
26
+ # Word document
27
+ elif uploaded_file.type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
28
+ try:
29
+ doc = docx.Document(uploaded_file)
30
+ return " ".join([para.text for para in doc.paragraphs])
31
+ except Exception as e:
32
+ raise ValueError(f"Error processing Word document: {e}")
33
+
34
+ else:
35
+ raise ValueError("Unsupported file type")
src/summarizer.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import time
3
+ class TextSummarizer:
4
+ def __init__(self, model_name="facebook/bart-large-cnn"):
5
+ """
6
+ Initialize summarization pipeline
7
+
8
+ Args:
9
+ model_name (str): Hugging Face model for summarization
10
+ """
11
+ try:
12
+ self.summarizer = pipeline("summarization", model=model_name)
13
+ except Exception as e:
14
+ raise RuntimeError(f"Failed to load summarization model: {e}")
15
+
16
+ def generate_summary(self, text, max_length=400, min_length=100):
17
+ """
18
+ Generate summary for given text
19
+
20
+ Args:
21
+ text (str): Input text to summarize
22
+ max_length (int): Maximum length of summary
23
+ min_length (int): Minimum length of summary
24
+
25
+ Returns:
26
+ str: Generated summary
27
+ """
28
+ try:
29
+ # Validate input text
30
+ if not text or len(text.strip()) == 0:
31
+ return "No text provided for summarization."
32
+
33
+ # Ensure min_length is less than max_length
34
+ min_length = min(min_length, max_length)
35
+
36
+ # Generate summary
37
+ summary = self.summarizer(
38
+ text,
39
+ max_length=max_length,
40
+ min_length=min_length,
41
+ do_sample=False
42
+ )[0]['summary_text']
43
+
44
+ return summary
45
+
46
+ except Exception as e:
47
+ return f"Error during summarization: {e}"