File size: 6,510 Bytes
7107674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""
@file app.py
@description Main application entry point for the Text Summarizer. Handles Flask routes, 
integrates summarization algorithms (SpaCy, NLTK, Gensim, Sumy), and manages data processing 
for text and URL inputs.

@author Amey Thakur <https://github.com/Amey-Thakur>
@author Mega Satish <https://github.com/msatmod>
@created 2022-08-09
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
@license MIT
"""

from __future__ import unicode_literals
from flask import Flask,render_template,url_for,request

# Import proprietary and third-party summarization modules
from spacy_summarization import text_summarizer  # SpaCy-based summarization logic
from summa.summarizer import summarize       # Summa (Gensim TextRank fork) implementation
from nltk_summarization import nltk_summarizer   # NLTK frequency-based summarization
import time
import spacy

# Initialize SpaCy's English model for Natural Language Processing task
nlp = spacy.load("en_core_web_sm")
app = Flask(__name__)

# Web Scraping Pkg
from bs4 import BeautifulSoup
from urllib.request import urlopen


# Sumy Package Imports for LexRank Algorithm
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer

def sumy_summary(docx):
	"""
	Generates a text summary using the LexRank algorithm provided by Sumy.
	
	@param docx (str): The input text document to be summarized.
	@return result (str): The concatenated summary string containing top-ranked sentences.
	"""
	parser = PlaintextParser.from_string(docx,Tokenizer("english"))
	lex_summarizer = LexRankSummarizer()
	summary = lex_summarizer(parser.document,3) # Extract top 3 sentences
	summary_list = [str(sentence) for sentence in summary]
	result = ' '.join(summary_list)
	return result


def readingTime(mytext):
	"""
	Estimates the reading time for a given text based on average reading speed.
	
	@param mytext (str): The input text to analyze.
	@return estimatedTime (float): The estimated reading time in minutes (assuming 200 wpm).
	"""
	total_words = len([ token.text for token in nlp(mytext)]) # Tokenize and count words
	estimatedTime = total_words/200.0
	return estimatedTime

# Fetch Text From Url
def get_text(url):
	"""
	Scrapes and processes textual content from a valid URL.
	
	@param url (str): The HTTP URL of the target webpage.
	@return fetched_text (str): The cleaned text content extracted from paragraph tags.
	"""
	page = urlopen(url)
	soup = BeautifulSoup(page, "html.parser") # Parse HTML content
	fetched_text = ' '.join(map(lambda p:p.text,soup.find_all('p'))) # Extract text from <p> tags
	return fetched_text

@app.route('/')
def index():
	return render_template('index.html')


@app.route('/analyze',methods=['GET','POST'])
def analyze():
	"""
	Route to handle direct text input for summarization.
	Processes the input text, calculates reading times, and returns the simplified summary.
	"""
	start = time.time()
	if request.method == 'POST':
		rawtext = request.form['rawtext']
		# Calculate metrics for original text
		final_reading_time = readingTime(rawtext)
		# Generate summary using SpaCy-based custom algorithm
		final_summary = text_summarizer(rawtext)
		summary_reading_time = readingTime(final_summary)
		end = time.time()
		final_time = end-start
	return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)

@app.route('/analyze_url',methods=['GET','POST'])
def analyze_url():
	"""
	Route to handle URL-based input for summarization.
	Fetches content from the URL, extracts text, and performs summarization.
	"""
	start = time.time()
	if request.method == 'POST':
		raw_url = request.form['raw_url']
		rawtext = get_text(raw_url)
		final_reading_time = readingTime(rawtext)
		final_summary = text_summarizer(rawtext)
		summary_reading_time = readingTime(final_summary)
		end = time.time()
		final_time = end-start
	return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)



@app.route('/compare_summary')
def compare_summary():
	return render_template('compare_summary.html')

@app.route('/comparer',methods=['GET','POST'])
def comparer():
	"""
	Comparative analysis route.
	Runs multiple summarization algorithms (SpaCy, Gensim, NLTK, Sumy) on the same input 
	to allow side-by-side performance and quality comparison.
	"""
	start = time.time()
	if request.method == 'POST':
		rawtext = request.form['rawtext']
		final_reading_time = readingTime(rawtext)
		
		# 1. SpaCy Summarizer
		try:
			final_summary_spacy = text_summarizer(rawtext)
			summary_reading_time = readingTime(final_summary_spacy)
		except Exception:
			final_summary_spacy = "Error: Text too short or processing failed."
			summary_reading_time = 0

		# 2. Gensim Summarizer (Summa)
		try:
			final_summary_gensim = summarize(rawtext)
			summary_reading_time_gensim = readingTime(final_summary_gensim)
		except Exception:
			final_summary_gensim = "Error: Text too short or processing failed."
			summary_reading_time_gensim = 0

		# 3. NLTK Summarizer (Frequency Dist)
		try:
			final_summary_nltk = nltk_summarizer(rawtext)
			summary_reading_time_nltk = readingTime(final_summary_nltk)
		except Exception:
			final_summary_nltk = "Error: Text too short or processing failed."
			summary_reading_time_nltk = 0

		# 4. Sumy Summarizer (LexRank)
		try:
			final_summary_sumy = sumy_summary(rawtext)
			summary_reading_time_sumy = readingTime(final_summary_sumy)
		except Exception:
			final_summary_sumy = "Error: Text too short or processing failed."
			summary_reading_time_sumy = 0 

		end = time.time()
		final_time = end-start
	return render_template('compare_summary.html',ctext=rawtext,final_summary_spacy=final_summary_spacy,final_summary_gensim=final_summary_gensim,final_summary_nltk=final_summary_nltk,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time,summary_reading_time_gensim=summary_reading_time_gensim,final_summary_sumy=final_summary_sumy,summary_reading_time_sumy=summary_reading_time_sumy,summary_reading_time_nltk=summary_reading_time_nltk)



@app.route('/about')
def about():
	return render_template('index.html')

@app.errorhandler(404)
def page_not_found(e):
	return render_template('404.html'), 404

if __name__ == '__main__':
	app.run(debug=True)