Spaces:
Sleeping
Sleeping
Commit ·
7107674
0
Parent(s):
Text Summarizer
Browse files- .gitattributes +29 -0
- CITATION.cff +13 -0
- Dockerfile +34 -0
- LICENSE +21 -0
- README.md +275 -0
- SECURITY.md +41 -0
- Source Code/Procfile +2 -0
- Source Code/app.py +182 -0
- Source Code/download_nltk.py +13 -0
- Source Code/nltk.txt +3 -0
- Source Code/nltk_summarization.py +63 -0
- Source Code/pyvenv.cfg +8 -0
- Source Code/requirements.txt +15 -0
- Source Code/spacy_summarization.py +76 -0
- Source Code/spacy_summarizer.py +66 -0
- Source Code/static/css/custom.css +107 -0
- Source Code/static/css/materialize.css +0 -0
- Source Code/static/css/materialize.min.css +0 -0
- Source Code/static/js/init.js +101 -0
- Source Code/static/js/materialize.js +0 -0
- Source Code/static/js/materialize.min.js +0 -0
- Source Code/templates/404.html +132 -0
- Source Code/templates/compare_summary.html +236 -0
- Source Code/templates/index.html +197 -0
- codemeta.json +57 -0
- docs/SPECIFICATION.md +66 -0
- pa_setup.sh +26 -0
.gitattributes
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Standardized Git Attributes for Scholarly Archiving
|
| 2 |
+
|
| 3 |
+
# Handle line endings automatically for files detected as text
|
| 4 |
+
# and perform LF normalization on checkin.
|
| 5 |
+
* text=auto eol=lf
|
| 6 |
+
|
| 7 |
+
# Explicitly mark source files as text
|
| 8 |
+
*.py text eol=lf
|
| 9 |
+
*.html text eol=lf
|
| 10 |
+
*.css text eol=lf
|
| 11 |
+
*.js text eol=lf
|
| 12 |
+
*.json text eol=lf
|
| 13 |
+
*.md text eol=lf
|
| 14 |
+
*.txt text eol=lf
|
| 15 |
+
Procfile text eol=lf
|
| 16 |
+
|
| 17 |
+
# Mark binary files
|
| 18 |
+
*.pdf binary
|
| 19 |
+
*.pptx binary
|
| 20 |
+
*.jpg binary
|
| 21 |
+
*.png binary
|
| 22 |
+
*.gif binary
|
| 23 |
+
*.ico binary
|
| 24 |
+
|
| 25 |
+
# Academic/Scholarly classification
|
| 26 |
+
*.pdf linguist-documentation
|
| 27 |
+
*.pptx linguist-documentation
|
| 28 |
+
*.md linguist-documentation
|
| 29 |
+
*.txt linguist-documentation
|
CITATION.cff
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cff-version: 1.2.0
|
| 2 |
+
message: "If you use this Computer Engineering project or its associated academic materials, please cite them as below."
|
| 3 |
+
authors:
|
| 4 |
+
- family-names: "Thakur"
|
| 5 |
+
given-names: "Amey"
|
| 6 |
+
orcid: "https://orcid.org/0000-0001-5644-1575"
|
| 7 |
+
- family-names: "Satish"
|
| 8 |
+
given-names: "Mega"
|
| 9 |
+
orcid: "https://orcid.org/0000-0002-1844-9557"
|
| 10 |
+
title: "TEXT-SUMMARIZER"
|
| 11 |
+
version: 1.0.0
|
| 12 |
+
date-released: 2022-08-09
|
| 13 |
+
url: "https://github.com/Amey-Thakur/TEXT-SUMMARIZER"
|
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
git \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Copy requirements and install
|
| 12 |
+
COPY ["Source Code/requirements.txt", "requirements.txt"]
|
| 13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 14 |
+
|
| 15 |
+
# Download NLTK data
|
| 16 |
+
COPY ["Source Code/download_nltk.py", "download_nltk.py"]
|
| 17 |
+
RUN python download_nltk.py
|
| 18 |
+
|
| 19 |
+
# Create a non-root user
|
| 20 |
+
RUN useradd -m -u 1000 user
|
| 21 |
+
USER user
|
| 22 |
+
ENV HOME=/home/user \
|
| 23 |
+
PATH=/home/user/.local/bin:$PATH
|
| 24 |
+
|
| 25 |
+
WORKDIR /home/user/app
|
| 26 |
+
|
| 27 |
+
# Copy application code with correct ownership
|
| 28 |
+
COPY --chown=user ["Source Code/", "."]
|
| 29 |
+
|
| 30 |
+
# Expose the correct port
|
| 31 |
+
EXPOSE 7860
|
| 32 |
+
|
| 33 |
+
# Run the app
|
| 34 |
+
CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2018-2022 Amey Thakur and Mega Satish
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Text Summarizer
|
| 3 |
+
emoji: 📝
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
<div align="center">
|
| 12 |
+
|
| 13 |
+
<a name="readme-top"></a>
|
| 14 |
+
# Text Summarizer
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
[](LICENSE)
|
| 18 |
+

|
| 19 |
+
[](https://github.com/Amey-Thakur/TEXT-SUMMARIZER)
|
| 20 |
+
[](https://github.com/Amey-Thakur/TEXT-SUMMARIZER)
|
| 21 |
+
|
| 22 |
+
A robust web application leveraging multiple NLP algorithms (SpaCy, NLTK, Gensim, Sumy) to summarize textual content and URL sources, featuring a comparative analysis interface for evaluating summarization quality.
|
| 23 |
+
|
| 24 |
+
**[Source Code](Source%20Code/)** · **[Technical Specification](docs/SPECIFICATION.md)** · **[Video Demo](https://youtu.be/2drrqsSB1Bc)**
|
| 25 |
+
|
| 26 |
+
[](https://youtu.be/2drrqsSB1Bc)
|
| 27 |
+
|
| 28 |
+
</div>
|
| 29 |
+
|
| 30 |
+
---
|
| 31 |
+
|
| 32 |
+
<div align="center">
|
| 33 |
+
|
| 34 |
+
[Authors](#authors) · [Overview](#overview) · [Features](#features) · [Structure](#project-structure) · [Results](#results-gallery) · [Quick Start](#quick-start) · [Usage Guidelines](#usage-guidelines) · [License](#license) · [About](#about-this-repository) · [Acknowledgments](#acknowledgments)
|
| 35 |
+
|
| 36 |
+
</div>
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
<!-- AUTHORS -->
|
| 41 |
+
<div align="center">
|
| 42 |
+
|
| 43 |
+
## Authors
|
| 44 |
+
|
| 45 |
+
**Terna Engineering College | Computer Engineering | Batch of 2022**
|
| 46 |
+
|
| 47 |
+
| <a href="https://github.com/Amey-Thakur"><img src="https://github.com/Amey-Thakur.png" width="150" height="150" alt="Amey Thakur"></a><br>[**Amey Thakur**](https://github.com/Amey-Thakur)<br><br>[](https://orcid.org/0000-0001-5644-1575) | <a href="https://github.com/msatmod"><img src="https://raw.githubusercontent.com/Amey-Thakur/TEXT-SUMMARIZER/main/Mega/Mega.png" width="150" height="150" alt="Mega Satish"></a><br>[**Mega Satish**](https://github.com/msatmod)<br><br>[](https://orcid.org/0000-0002-1844-9557) |
|
| 48 |
+
| :---: | :---: |
|
| 49 |
+
|
| 50 |
+
</div>
|
| 51 |
+
|
| 52 |
+
> [!IMPORTANT]
|
| 53 |
+
> ### 🤝🏻 Special Acknowledgement
|
| 54 |
+
> *Special thanks to **[Mega Satish](https://github.com/msatmod)** for her meaningful contributions, guidance, and support that helped shape this work.*
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
<!-- OVERVIEW -->
|
| 59 |
+
## Overview
|
| 60 |
+
|
| 61 |
+
This project implements a versatile **Text Summarizer** capable of condensing large bodies of text or web content into concise summaries. It serves as a comparative platform for various Extractive Summarization techniques, including frequency-based methods (SpaCy, NLTK) and graph-based algorithms (TextRank via Gensim, LexRank via Sumy).
|
| 62 |
+
|
| 63 |
+
Developed as a mini-project for the **8th Semester** curriculum, this system addresses the need for efficient information retrieval by automating the abstraction of key insights from documents. It features a Flask-based web interface that allows users to input raw text or URLs and visualize the comparative performance of different NLP models.
|
| 64 |
+
|
| 65 |
+
> [!NOTE]
|
| 66 |
+
> **Research Impact & Certification**
|
| 67 |
+
>
|
| 68 |
+
> This project was published as a research paper in the **International Journal for Research in Applied Science and Engineering Technology (IJRASET)** (Volume 10, Issue 1) and is also available as a preprint on **viXra**. The project received an official **Publication Certificate** for its research contribution to natural language processing.
|
| 69 |
+
>
|
| 70 |
+
> - [Preprint @viXra](https://vixra.org/abs/2202.0017)
|
| 71 |
+
> - [Published Paper @IJRASET](https://doi.org/10.22214/ijraset.2022.40066)
|
| 72 |
+
> - [Publication Certificate](https://github.com/Amey-Thakur/ACHIEVEMENTS/blob/main/Research%20Papers/Text%20Summarizer%20Using%20Julia/IJRASET40066%20-%20Text%20Summarizer%20Using%20Julia.pdf)
|
| 73 |
+
|
| 74 |
+
### Resources
|
| 75 |
+
|
| 76 |
+
| # | Resource | Description |
|
| 77 |
+
|---|---|---|
|
| 78 |
+
| 1 | [**Technical Report**](Mini-Project/TEXT%20SUMMARIZER.pdf) | Detailed project documentation |
|
| 79 |
+
| 2 | [**Project Presentation**](Mini-Project/TEXT%20SUMMARIZER.pptx) | Visual demonstration and slides |
|
| 80 |
+
| 3 | [**Technical Specification**](docs/SPECIFICATION.md) | Technical Architecture & Specification |
|
| 81 |
+
| 4 | [**Source Code**](Source%20Code/) | Complete source code and documentation |
|
| 82 |
+
| 5 | [**Research Article**](https://doi.org/10.22214/ijraset.2022.40066) | IJRASET Published Paper |
|
| 83 |
+
| 6 | [**Scholarly Preprint**](https://vixra.org/abs/2202.0017) | Formal research manuscript (viXra) |
|
| 84 |
+
| 7 | [**Project Demo**](https://youtu.be/2drrqsSB1Bc) | Real-time demonstration of features |
|
| 85 |
+
| 8 | [**NLP Laboratory**](https://github.com/Amey-Thakur/NATURAL-LANGUAGE-PROCESSING-AND-COMPUTATIONAL-LAB-II) | Academic repository for NLP |
|
| 86 |
+
|
| 87 |
+
> [!TIP]
|
| 88 |
+
> **Algorithm Selection for Optimal Results**
|
| 89 |
+
>
|
| 90 |
+
> For long-form documents, **Gensim's TextRank** provides superior coherence by leveraging graph-based sentence ranking. For shorter texts or news articles, **SpaCy's frequency-based** approach offers faster execution with comparable quality.
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
<!-- FEATURES -->
|
| 95 |
+
## Features
|
| 96 |
+
|
| 97 |
+
| Feature | Description |
|
| 98 |
+
|---------|-------------|
|
| 99 |
+
| **Multi-Algorithm Support** | Unified interface for SpaCy, NLTK, Gensim, and Sumy summarization engines. |
|
| 100 |
+
| **Comparative Analysis** | Side-by-side visualization of summaries with reading time reduction metrics. |
|
| 101 |
+
| **Web Scraping** | Integrated BeautifulSoup module to extract and process text directly from web links. |
|
| 102 |
+
| **Material UI** | Responsive frontend built with Materialize CSS for a clean, modern research aesthetic. |
|
| 103 |
+
| **Performance Metrics** | Real-time calculation of original vs. summarized reading times and execution speed. |
|
| 104 |
+
| **Scholarly Codebase** | Fully documented source code with strict academic formatting and inline citations. |
|
| 105 |
+
|
| 106 |
+
### Tech Stack
|
| 107 |
+
- **Backend**: Python 3.x, Flask
|
| 108 |
+
- **NLP Libraries**: SpaCy, NLTK, Gensim, Sumy
|
| 109 |
+
- **Frontend**: HTML5, Materialize CSS, jQuery
|
| 110 |
+
- **Utilities**: BeautifulSoup4, lxml
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
<!-- PROJECT STRUCTURE -->
|
| 115 |
+
## Project Structure
|
| 116 |
+
|
| 117 |
+
```python
|
| 118 |
+
TEXT-SUMMARIZER/
|
| 119 |
+
│
|
| 120 |
+
├── docs/ # Formal Documentation
|
| 121 |
+
│ └── SPECIFICATION.md # Technical Architecture & Specification
|
| 122 |
+
│
|
| 123 |
+
├── Mega/ # Archival Attribution Assets
|
| 124 |
+
│ ├── Filly.jpg # Project-related Content Asset
|
| 125 |
+
│ └── Mega.png # Author Profile Image (Mega Satish)
|
| 126 |
+
│
|
| 127 |
+
├── Mini-Project/ # Research & Academic Assets
|
| 128 |
+
│ ├── TEXT SUMMARIZER.pdf # Technical Project Report (PDF)
|
| 129 |
+
│ ├── TEXT SUMMARIZER.pptx # Project Presentation (PPTX)
|
| 130 |
+
│ └── Text Summarizer Using Julia/ # Related Research Materials
|
| 131 |
+
│
|
| 132 |
+
├── Source Code/ # Application Implementation
|
| 133 |
+
│ ├── static/ # Frontend Assets (CSS/JS)
|
| 134 |
+
│ ├── templates/ # HTML Jinja2 Templates
|
| 135 |
+
│ ├── app.py # Main Flask Application
|
| 136 |
+
│ ├── nltk_summarization.py # NLTK Logic Module
|
| 137 |
+
│ ├── spacy_summarization.py # SpaCy Logic Module
|
| 138 |
+
│ ├── spacy_summarizer.py # SpaCy Helper Module
|
| 139 |
+
│ ├── Procfile # Heroku Deployment Config
|
| 140 |
+
│ └── requirements.txt # Dependency Manifest
|
| 141 |
+
│
|
| 142 |
+
├── .gitattributes # Global Git LFS & Config
|
| 143 |
+
├── .gitignore # Asset Exclusion Manifest
|
| 144 |
+
├── CITATION.cff # Scholarly Citation Metadata
|
| 145 |
+
├── codemeta.json # Software Metadata Manifest
|
| 146 |
+
├── LICENSE # MIT License Terms
|
| 147 |
+
├── README.md # Comprehensive Archival Entrance
|
| 148 |
+
└── SECURITY.md # Vulnerability Exposure Policy
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
<!-- RESULTS GALLERY -->
|
| 154 |
+
## Results Gallery
|
| 155 |
+
|
| 156 |
+
### Application Interface
|
| 157 |
+
The interface provides a clean, side-by-side comparison of summarization results along with reading time metrics.
|
| 158 |
+
|
| 159 |
+
<div align="center">
|
| 160 |
+
|
| 161 |
+

|
| 162 |
+
|
| 163 |
+
</div>
|
| 164 |
+
|
| 165 |
+
---
|
| 166 |
+
|
| 167 |
+
<!-- QUICK START -->
|
| 168 |
+
## Quick Start
|
| 169 |
+
|
| 170 |
+
### 1. Prerequisites
|
| 171 |
+
Ensure your environment meets the following requirements:
|
| 172 |
+
- **Python**: Version **3.6** or higher.
|
| 173 |
+
- **Packages**: Flask, SpaCy, NLTK, Gensim, Sumy.
|
| 174 |
+
- **NLP Models**: `en_core_web_sm` (SpaCy), `stopwords/punkt` (NLTK).
|
| 175 |
+
|
| 176 |
+
> [!WARNING]
|
| 177 |
+
> **Technical Dependencies & Environment**
|
| 178 |
+
>
|
| 179 |
+
> This system requires **Python 3.6+** and multiple NLP libraries (SpaCy, NLTK, Gensim, Sumy). For stable execution, it is recommended to run this in an isolated virtual environment and ensure all SpaCy language models are downloaded prior to execution.
|
| 180 |
+
|
| 181 |
+
### 2. Setup & Installation
|
| 182 |
+
1. **Clone the Repository**:
|
| 183 |
+
```bash
|
| 184 |
+
git clone https://github.com/Amey-Thakur/TEXT-SUMMARIZER.git
|
| 185 |
+
cd TEXT-SUMMARIZER/Source\ Code
|
| 186 |
+
```
|
| 187 |
+
2. **Install Dependencies**:
|
| 188 |
+
```bash
|
| 189 |
+
pip install -r requirements.txt
|
| 190 |
+
python -m spacy download en_core_web_sm
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
### 3. Launch Application
|
| 194 |
+
1. **Run the Flask Server**:
|
| 195 |
+
```bash
|
| 196 |
+
python app.py
|
| 197 |
+
```
|
| 198 |
+
2. **Access the Interface**:
|
| 199 |
+
- Open your browser and navigate to `http://127.0.0.1:5000/`.
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
<!-- USAGE GUIDELINES -->
|
| 204 |
+
## Usage Guidelines
|
| 205 |
+
|
| 206 |
+
This repository is openly shared to support learning and knowledge exchange across the academic community.
|
| 207 |
+
|
| 208 |
+
**For Students**
|
| 209 |
+
Use this project as a reference for implementing NLP pipelines, understanding Flask web architecture, and integrating multiple machine learning libraries into a single application.
|
| 210 |
+
|
| 211 |
+
**For Educators**
|
| 212 |
+
This project may serve as a practical example or supplementary teaching resource for **Natural Language Processing (`DLO8012`)** and **Computational Lab II (`CSL804`)** as part of the **8th Semester Computer Engineering** curriculum. Attribution is appreciated when utilizing content.
|
| 213 |
+
|
| 214 |
+
**For Researchers**
|
| 215 |
+
The comparative framework allows for the evaluation of different extractive summarization algorithms on custom datasets, providing a baseline for further research into abstractive methods.
|
| 216 |
+
|
| 217 |
+
---
|
| 218 |
+
|
| 219 |
+
<!-- LICENSE -->
|
| 220 |
+
## License
|
| 221 |
+
|
| 222 |
+
This repository and all linked academic content are made available under the **MIT License**. See the [LICENSE](LICENSE) file for complete terms.
|
| 223 |
+
|
| 224 |
+
> [!NOTE]
|
| 225 |
+
> **Summary**: You are free to share and adapt this content for any purpose, even commercially, as long as you provide appropriate attribution to the original author.
|
| 226 |
+
|
| 227 |
+
Copyright © 2022 Amey Thakur, Mega Satish
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
<!-- ABOUT -->
|
| 232 |
+
## About This Repository
|
| 233 |
+
|
| 234 |
+
**Created & Maintained by**: [Amey Thakur](https://github.com/Amey-Thakur) & [Mega Satish](https://github.com/msatmod)
|
| 235 |
+
**Academic Journey**: Bachelor of Engineering in Computer Engineering (2018-2022)
|
| 236 |
+
**Institution**: [Terna Engineering College](https://ternaengg.ac.in/), Navi Mumbai
|
| 237 |
+
**University**: [University of Mumbai](https://mu.ac.in/)
|
| 238 |
+
|
| 239 |
+
This project features the **Text Summarizer**, a utility developed as an **8th Semester Mini-Project**. It represents a culmination of studies in computational linguistics and software engineering, delivering a functional tool for automated text analysis.
|
| 240 |
+
|
| 241 |
+
**Connect**: [GitHub](https://github.com/Amey-Thakur) · [LinkedIn](https://www.linkedin.com/in/amey-thakur) · [ORCID](https://orcid.org/0000-0001-5644-1575)
|
| 242 |
+
|
| 243 |
+
### Acknowledgments
|
| 244 |
+
|
| 245 |
+
Grateful acknowledgment to [**Mega Satish**](https://github.com/msatmod) for her exceptional collaboration and scholarly partnership during the development of this project. Her intellectual contributions, technical insights, and dedicated commitment to software quality were fundamental in achieving the system's analytical and functional objectives. Learning alongside her was a transformative experience; her thoughtful approach to problem-solving and encouragement turned challenges into meaningful learning moments. This work reflects the growth and insights gained from our side-by-side academic journey. Thank you, Mega, for everything you shared and taught along the way.
|
| 246 |
+
|
| 247 |
+
Grateful acknowledgment to the faculty members of the **Department of Computer Engineering** at Terna Engineering College for their guidance and instruction in Natural Language Processing. Their expertise in computational linguistics and algorithmic design helped shape the technical foundation of this project.
|
| 248 |
+
|
| 249 |
+
Special thanks to the mentors and peers whose encouragement, discussions, and support contributed meaningfully to this learning experience.
|
| 250 |
+
|
| 251 |
+
---
|
| 252 |
+
|
| 253 |
+
<div align="center">
|
| 254 |
+
|
| 255 |
+
[↑ Back to Top](#readme-top)
|
| 256 |
+
|
| 257 |
+
[Authors](#authors) · [Overview](#overview) · [Features](#features) · [Structure](#project-structure) · [Results](#results-gallery) · [Quick Start](#quick-start) · [Usage Guidelines](#usage-guidelines) · [License](#license) · [About](#about-this-repository) · [Acknowledgments](#acknowledgments)
|
| 258 |
+
|
| 259 |
+
<br>
|
| 260 |
+
|
| 261 |
+
🔬 **[Natural Language Processing Laboratory](https://github.com/Amey-Thakur/NATURAL-LANGUAGE-PROCESSING-AND-COMPUTATIONAL-LAB-II)** · 📝 **[Text Summarizer](https://github.com/Amey-Thakur/TEXT-SUMMARIZER)**
|
| 262 |
+
|
| 263 |
+
---
|
| 264 |
+
|
| 265 |
+
#### Presented as part of the 8th Semester Mini-Project @ Terna Engineering College
|
| 266 |
+
|
| 267 |
+
---
|
| 268 |
+
|
| 269 |
+
### 🎓 [Computer Engineering Repository](https://github.com/Amey-Thakur/COMPUTER-ENGINEERING)
|
| 270 |
+
|
| 271 |
+
**Computer Engineering (B.E.) - University of Mumbai**
|
| 272 |
+
|
| 273 |
+
*Semester-wise curriculum, laboratories, projects, and academic notes.*
|
| 274 |
+
|
| 275 |
+
</div>
|
SECURITY.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Security Policy
|
| 2 |
+
|
| 3 |
+
## Maintenance Status
|
| 4 |
+
|
| 5 |
+
This repository is part of a curated Computer Engineering project collection and is maintained in a finalized and stable state. The project is preserved as a complete and authoritative record, with its scope and contents intentionally fixed to ensure long-term academic and professional reference.
|
| 6 |
+
|
| 7 |
+
## Supported Versions
|
| 8 |
+
|
| 9 |
+
As a finalized project, only the version listed below is authoritative:
|
| 10 |
+
|
| 11 |
+
| Version | Supported |
|
| 12 |
+
| ------- | --------- |
|
| 13 |
+
| 1.0.0 | Yes |
|
| 14 |
+
|
| 15 |
+
## Vulnerability Reporting Protocol
|
| 16 |
+
|
| 17 |
+
In accordance with established academic and professional standards for security disclosure, security-related observations associated with this project are documented through formal scholarly channels.
|
| 18 |
+
|
| 19 |
+
To document a security concern, communication is facilitated with the project curators:
|
| 20 |
+
- **Primary Curator**: [Amey Thakur](https://github.com/Amey-Thakur)
|
| 21 |
+
- **Collaborators**: [Mega Satish](https://github.com/msatmod)
|
| 22 |
+
- **Method**: Reports are submitted via the repository’s [GitHub Issues](https://github.com/Amey-Thakur/TEXT-SUMMARIZER/issues) interface to formally record security-related findings.
|
| 23 |
+
|
| 24 |
+
Submissions include:
|
| 25 |
+
1. A precise and technically accurate description of the identified issue.
|
| 26 |
+
2. Demonstrable steps or technical evidence sufficient to contextualize the finding.
|
| 27 |
+
3. An explanation of the issue’s relevance within the defined scope of the project.
|
| 28 |
+
|
| 29 |
+
## Implementation Context: Python, Flask & NLP Libraries
|
| 30 |
+
This project is implemented using Python, Flask, SpaCy, NLTK, Gensim, and Sumy, as defined by the project’s technical scope.
|
| 31 |
+
|
| 32 |
+
- **Execution Environment**: Execution occurs within standard Python 3.x runtime environments.
|
| 33 |
+
- **Scope Limitation**: This policy applies exclusively to the source code and academic materials contained within this repository and does not extend to the underlying runtime environments, library dependencies, or host-level security guarantees.
|
| 34 |
+
|
| 35 |
+
## Technical Integrity Statement
|
| 36 |
+
|
| 37 |
+
This repository is preserved as a fixed academic and engineering project. Security-related submissions are recorded for documentation and contextual reference and do not imply active monitoring, response obligations, or subsequent modification of the repository.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
*This document defines the security posture of a finalized Computer Engineering project.*
|
Source Code/Procfile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
web: gunicorn app:app
|
| 2 |
+
|
Source Code/app.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
@file app.py
|
| 3 |
+
@description Main application entry point for the Text Summarizer. Handles Flask routes,
|
| 4 |
+
integrates summarization algorithms (SpaCy, NLTK, Gensim, Sumy), and manages data processing
|
| 5 |
+
for text and URL inputs.
|
| 6 |
+
|
| 7 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 8 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 9 |
+
@created 2022-08-09
|
| 10 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 11 |
+
@license MIT
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import unicode_literals
|
| 15 |
+
from flask import Flask,render_template,url_for,request
|
| 16 |
+
|
| 17 |
+
# Import proprietary and third-party summarization modules
|
| 18 |
+
from spacy_summarization import text_summarizer # SpaCy-based summarization logic
|
| 19 |
+
from summa.summarizer import summarize # Summa (Gensim TextRank fork) implementation
|
| 20 |
+
from nltk_summarization import nltk_summarizer # NLTK frequency-based summarization
|
| 21 |
+
import time
|
| 22 |
+
import spacy
|
| 23 |
+
|
| 24 |
+
# Initialize SpaCy's English model for Natural Language Processing task
|
| 25 |
+
nlp = spacy.load("en_core_web_sm")
|
| 26 |
+
app = Flask(__name__)
|
| 27 |
+
|
| 28 |
+
# Web Scraping Pkg
|
| 29 |
+
from bs4 import BeautifulSoup
|
| 30 |
+
from urllib.request import urlopen
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Sumy Package Imports for LexRank Algorithm
|
| 34 |
+
from sumy.parsers.plaintext import PlaintextParser
|
| 35 |
+
from sumy.nlp.tokenizers import Tokenizer
|
| 36 |
+
from sumy.summarizers.lex_rank import LexRankSummarizer
|
| 37 |
+
|
| 38 |
+
def sumy_summary(docx):
|
| 39 |
+
"""
|
| 40 |
+
Generates a text summary using the LexRank algorithm provided by Sumy.
|
| 41 |
+
|
| 42 |
+
@param docx (str): The input text document to be summarized.
|
| 43 |
+
@return result (str): The concatenated summary string containing top-ranked sentences.
|
| 44 |
+
"""
|
| 45 |
+
parser = PlaintextParser.from_string(docx,Tokenizer("english"))
|
| 46 |
+
lex_summarizer = LexRankSummarizer()
|
| 47 |
+
summary = lex_summarizer(parser.document,3) # Extract top 3 sentences
|
| 48 |
+
summary_list = [str(sentence) for sentence in summary]
|
| 49 |
+
result = ' '.join(summary_list)
|
| 50 |
+
return result
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def readingTime(mytext):
|
| 54 |
+
"""
|
| 55 |
+
Estimates the reading time for a given text based on average reading speed.
|
| 56 |
+
|
| 57 |
+
@param mytext (str): The input text to analyze.
|
| 58 |
+
@return estimatedTime (float): The estimated reading time in minutes (assuming 200 wpm).
|
| 59 |
+
"""
|
| 60 |
+
total_words = len([ token.text for token in nlp(mytext)]) # Tokenize and count words
|
| 61 |
+
estimatedTime = total_words/200.0
|
| 62 |
+
return estimatedTime
|
| 63 |
+
|
| 64 |
+
# Fetch Text From Url
|
| 65 |
+
def get_text(url):
|
| 66 |
+
"""
|
| 67 |
+
Scrapes and processes textual content from a valid URL.
|
| 68 |
+
|
| 69 |
+
@param url (str): The HTTP URL of the target webpage.
|
| 70 |
+
@return fetched_text (str): The cleaned text content extracted from paragraph tags.
|
| 71 |
+
"""
|
| 72 |
+
page = urlopen(url)
|
| 73 |
+
soup = BeautifulSoup(page, "html.parser") # Parse HTML content
|
| 74 |
+
fetched_text = ' '.join(map(lambda p:p.text,soup.find_all('p'))) # Extract text from <p> tags
|
| 75 |
+
return fetched_text
|
| 76 |
+
|
| 77 |
+
@app.route('/')
|
| 78 |
+
def index():
|
| 79 |
+
return render_template('index.html')
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@app.route('/analyze',methods=['GET','POST'])
|
| 83 |
+
def analyze():
|
| 84 |
+
"""
|
| 85 |
+
Route to handle direct text input for summarization.
|
| 86 |
+
Processes the input text, calculates reading times, and returns the simplified summary.
|
| 87 |
+
"""
|
| 88 |
+
start = time.time()
|
| 89 |
+
if request.method == 'POST':
|
| 90 |
+
rawtext = request.form['rawtext']
|
| 91 |
+
# Calculate metrics for original text
|
| 92 |
+
final_reading_time = readingTime(rawtext)
|
| 93 |
+
# Generate summary using SpaCy-based custom algorithm
|
| 94 |
+
final_summary = text_summarizer(rawtext)
|
| 95 |
+
summary_reading_time = readingTime(final_summary)
|
| 96 |
+
end = time.time()
|
| 97 |
+
final_time = end-start
|
| 98 |
+
return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)
|
| 99 |
+
|
| 100 |
+
@app.route('/analyze_url',methods=['GET','POST'])
|
| 101 |
+
def analyze_url():
|
| 102 |
+
"""
|
| 103 |
+
Route to handle URL-based input for summarization.
|
| 104 |
+
Fetches content from the URL, extracts text, and performs summarization.
|
| 105 |
+
"""
|
| 106 |
+
start = time.time()
|
| 107 |
+
if request.method == 'POST':
|
| 108 |
+
raw_url = request.form['raw_url']
|
| 109 |
+
rawtext = get_text(raw_url)
|
| 110 |
+
final_reading_time = readingTime(rawtext)
|
| 111 |
+
final_summary = text_summarizer(rawtext)
|
| 112 |
+
summary_reading_time = readingTime(final_summary)
|
| 113 |
+
end = time.time()
|
| 114 |
+
final_time = end-start
|
| 115 |
+
return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
@app.route('/compare_summary')
|
| 120 |
+
def compare_summary():
|
| 121 |
+
return render_template('compare_summary.html')
|
| 122 |
+
|
| 123 |
+
@app.route('/comparer',methods=['GET','POST'])
|
| 124 |
+
def comparer():
|
| 125 |
+
"""
|
| 126 |
+
Comparative analysis route.
|
| 127 |
+
Runs multiple summarization algorithms (SpaCy, Gensim, NLTK, Sumy) on the same input
|
| 128 |
+
to allow side-by-side performance and quality comparison.
|
| 129 |
+
"""
|
| 130 |
+
start = time.time()
|
| 131 |
+
if request.method == 'POST':
|
| 132 |
+
rawtext = request.form['rawtext']
|
| 133 |
+
final_reading_time = readingTime(rawtext)
|
| 134 |
+
|
| 135 |
+
# 1. SpaCy Summarizer
|
| 136 |
+
try:
|
| 137 |
+
final_summary_spacy = text_summarizer(rawtext)
|
| 138 |
+
summary_reading_time = readingTime(final_summary_spacy)
|
| 139 |
+
except Exception:
|
| 140 |
+
final_summary_spacy = "Error: Text too short or processing failed."
|
| 141 |
+
summary_reading_time = 0
|
| 142 |
+
|
| 143 |
+
# 2. Gensim Summarizer (Summa)
|
| 144 |
+
try:
|
| 145 |
+
final_summary_gensim = summarize(rawtext)
|
| 146 |
+
summary_reading_time_gensim = readingTime(final_summary_gensim)
|
| 147 |
+
except Exception:
|
| 148 |
+
final_summary_gensim = "Error: Text too short or processing failed."
|
| 149 |
+
summary_reading_time_gensim = 0
|
| 150 |
+
|
| 151 |
+
# 3. NLTK Summarizer (Frequency Dist)
|
| 152 |
+
try:
|
| 153 |
+
final_summary_nltk = nltk_summarizer(rawtext)
|
| 154 |
+
summary_reading_time_nltk = readingTime(final_summary_nltk)
|
| 155 |
+
except Exception:
|
| 156 |
+
final_summary_nltk = "Error: Text too short or processing failed."
|
| 157 |
+
summary_reading_time_nltk = 0
|
| 158 |
+
|
| 159 |
+
# 4. Sumy Summarizer (LexRank)
|
| 160 |
+
try:
|
| 161 |
+
final_summary_sumy = sumy_summary(rawtext)
|
| 162 |
+
summary_reading_time_sumy = readingTime(final_summary_sumy)
|
| 163 |
+
except Exception:
|
| 164 |
+
final_summary_sumy = "Error: Text too short or processing failed."
|
| 165 |
+
summary_reading_time_sumy = 0
|
| 166 |
+
|
| 167 |
+
end = time.time()
|
| 168 |
+
final_time = end-start
|
| 169 |
+
return render_template('compare_summary.html',ctext=rawtext,final_summary_spacy=final_summary_spacy,final_summary_gensim=final_summary_gensim,final_summary_nltk=final_summary_nltk,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time,summary_reading_time_gensim=summary_reading_time_gensim,final_summary_sumy=final_summary_sumy,summary_reading_time_sumy=summary_reading_time_sumy,summary_reading_time_nltk=summary_reading_time_nltk)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
@app.route('/about')
|
| 174 |
+
def about():
|
| 175 |
+
return render_template('index.html')
|
| 176 |
+
|
| 177 |
+
@app.errorhandler(404)
|
| 178 |
+
def page_not_found(e):
|
| 179 |
+
return render_template('404.html'), 404
|
| 180 |
+
|
| 181 |
+
if __name__ == '__main__':
|
| 182 |
+
app.run(debug=True)
|
Source Code/download_nltk.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nltk
|
| 2 |
+
|
| 3 |
+
def download_nltk_data():
|
| 4 |
+
try:
|
| 5 |
+
nltk.download('wordnet')
|
| 6 |
+
nltk.download('stopwords')
|
| 7 |
+
nltk.download('punkt')
|
| 8 |
+
print("NLTK data downloaded successfully.")
|
| 9 |
+
except Exception as e:
|
| 10 |
+
print(f"Error downloading NLTK data: {e}")
|
| 11 |
+
|
| 12 |
+
if __name__ == "__main__":
|
| 13 |
+
download_nltk_data()
|
Source Code/nltk.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
wordnet
|
| 2 |
+
stopwords
|
| 3 |
+
punkt
|
Source Code/nltk_summarization.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
@file nltk_summarization.py
|
| 3 |
+
@description Implementation of text summarization logic using the NLTK library.
|
| 4 |
+
Provides functions to calculate sentence scores based on word frequency.
|
| 5 |
+
|
| 6 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 7 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 8 |
+
@created 2022-08-09
|
| 9 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 10 |
+
@license MIT
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import nltk
|
| 14 |
+
from nltk.corpus import stopwords
|
| 15 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
| 16 |
+
import heapq
|
| 17 |
+
|
| 18 |
+
def nltk_summarizer(raw_text):
|
| 19 |
+
"""
|
| 20 |
+
Generates an extractive text summary using NLTK based on word frequency distribution.
|
| 21 |
+
|
| 22 |
+
@param raw_text (str): The original text content to be summarized.
|
| 23 |
+
@return summary (str): The concatenated string of the top 7 ranked sentences.
|
| 24 |
+
"""
|
| 25 |
+
stopWords = set(stopwords.words("english"))
|
| 26 |
+
word_frequencies = {}
|
| 27 |
+
|
| 28 |
+
# 1. Calculate Word Frequencies
|
| 29 |
+
# Tokenize the text and count occurrences of non-stop words
|
| 30 |
+
for word in nltk.word_tokenize(raw_text):
|
| 31 |
+
if word not in stopWords:
|
| 32 |
+
if word not in word_frequencies.keys():
|
| 33 |
+
word_frequencies[word] = 1
|
| 34 |
+
else:
|
| 35 |
+
word_frequencies[word] += 1
|
| 36 |
+
|
| 37 |
+
# 2. Normalize Frequencies
|
| 38 |
+
# Scale word frequencies by dividing by the maximum frequency to get weighted scores
|
| 39 |
+
maximum_frequncy = max(word_frequencies.values())
|
| 40 |
+
|
| 41 |
+
for word in word_frequencies.keys():
|
| 42 |
+
word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
|
| 43 |
+
|
| 44 |
+
# 3. Calculate Sentence Scores
|
| 45 |
+
# Score sentences by summing the weighted frequencies of their constituent words
|
| 46 |
+
sentence_list = nltk.sent_tokenize(raw_text)
|
| 47 |
+
sentence_scores = {}
|
| 48 |
+
for sent in sentence_list:
|
| 49 |
+
for word in nltk.word_tokenize(sent.lower()):
|
| 50 |
+
if word in word_frequencies.keys():
|
| 51 |
+
# Restrict to sentences with fewer than 30 words to avoid excessive length
|
| 52 |
+
if len(sent.split(' ')) < 30:
|
| 53 |
+
if sent not in sentence_scores.keys():
|
| 54 |
+
sentence_scores[sent] = word_frequencies[word]
|
| 55 |
+
else:
|
| 56 |
+
sentence_scores[sent] += word_frequencies[word]
|
| 57 |
+
|
| 58 |
+
# 4. Generate Summary
|
| 59 |
+
# Select the top 7 sentences with the highest cumulative scores
|
| 60 |
+
summary_sentences = heapq.nlargest(7, sentence_scores, key=sentence_scores.get)
|
| 61 |
+
|
| 62 |
+
summary = ' '.join(summary_sentences)
|
| 63 |
+
return summary
|
Source Code/pyvenv.cfg
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
home = /Library/Frameworks/Python.framework/Versions/3.8
|
| 2 |
+
implementation = CPython
|
| 3 |
+
version_info = 3.8.8.final.0
|
| 4 |
+
virtualenv = 20.7.0
|
| 5 |
+
include-system-site-packages = false
|
| 6 |
+
base-prefix = /Library/Frameworks/Python.framework/Versions/3.8
|
| 7 |
+
base-exec-prefix = /Library/Frameworks/Python.framework/Versions/3.8
|
| 8 |
+
base-executable = /Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8
|
Source Code/requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask==3.0.0
|
| 2 |
+
beautifulsoup4==4.12.3
|
| 3 |
+
spacy==3.7.4
|
| 4 |
+
gensim==4.3.2
|
| 5 |
+
summa==1.2.0
|
| 6 |
+
nltk==3.8.1
|
| 7 |
+
sumy==0.11.0
|
| 8 |
+
gunicorn==21.2.0
|
| 9 |
+
requests==2.31.0
|
| 10 |
+
lxml==5.1.0
|
| 11 |
+
# Pin Scipy/Numpy to avoid Gensim ImportError: cannot import name 'triu' from 'scipy.linalg'
|
| 12 |
+
scipy==1.12.0
|
| 13 |
+
numpy<2.0.0
|
| 14 |
+
# Direct URL for SpaCy model
|
| 15 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
Source Code/spacy_summarization.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# NLP Pkgs
|
| 2 |
+
"""
|
| 3 |
+
@file spacy_summarization.py
|
| 4 |
+
@description Implementation of text summarization logic using the SpaCy library.
|
| 5 |
+
Utilizes tokenization, stop-word removal, and sentence weighting for summary generation.
|
| 6 |
+
|
| 7 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 8 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 9 |
+
@created 2022-08-09
|
| 10 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 11 |
+
@license MIT
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import spacy
|
| 15 |
+
nlp = spacy.load("en_core_web_sm")
|
| 16 |
+
# Pkgs for Normalizing Text
|
| 17 |
+
from spacy.lang.en.stop_words import STOP_WORDS
|
| 18 |
+
from string import punctuation
|
| 19 |
+
# Import Heapq for Finding the Top N Sentences
|
| 20 |
+
from heapq import nlargest
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def text_summarizer(raw_docx):
|
| 25 |
+
"""
|
| 26 |
+
Generates an extractive summary using SpaCy NLP pipeline.
|
| 27 |
+
Calculates word importance based on inverse frequency and ranks sentences accordingly.
|
| 28 |
+
|
| 29 |
+
@param raw_docx (str): The raw input text string to be summarized.
|
| 30 |
+
@return summary (str): The final extractive summary composed of top-ranked sentences.
|
| 31 |
+
"""
|
| 32 |
+
raw_text = raw_docx
|
| 33 |
+
docx = nlp(raw_text) # Process text through SpaCy pipeline
|
| 34 |
+
stopwords = list(STOP_WORDS)
|
| 35 |
+
|
| 36 |
+
# 1. Build Word Frequency Distribution
|
| 37 |
+
# Iterate over tokens to count non-stopword occurrences
|
| 38 |
+
word_frequencies = {}
|
| 39 |
+
for word in docx:
|
| 40 |
+
if word.text not in stopwords:
|
| 41 |
+
if word.text not in word_frequencies.keys():
|
| 42 |
+
word_frequencies[word.text] = 1
|
| 43 |
+
else:
|
| 44 |
+
word_frequencies[word.text] += 1
|
| 45 |
+
|
| 46 |
+
# 2. Normalize Word Frequencies
|
| 47 |
+
# Scale frequencies to [0, 1] range to determine relative word importance
|
| 48 |
+
maximum_frequncy = max(word_frequencies.values())
|
| 49 |
+
|
| 50 |
+
for word in word_frequencies.keys():
|
| 51 |
+
word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
|
| 52 |
+
|
| 53 |
+
# 3. Sentence Tokenization
|
| 54 |
+
# Extract sentence objects from the SpaCy Doc object
|
| 55 |
+
sentence_list = [ sentence for sentence in docx.sents ]
|
| 56 |
+
|
| 57 |
+
# 4. Calculate Sentence Scores
|
| 58 |
+
# Aggregate weighted word scores to determine overall sentence significance
|
| 59 |
+
sentence_scores = {}
|
| 60 |
+
for sent in sentence_list:
|
| 61 |
+
for word in sent:
|
| 62 |
+
if word.text.lower() in word_frequencies.keys():
|
| 63 |
+
# Filter out long sentences (>30 words) to maintain summary conciseness
|
| 64 |
+
if len(sent.text.split(' ')) < 30:
|
| 65 |
+
if sent not in sentence_scores.keys():
|
| 66 |
+
sentence_scores[sent] = word_frequencies[word.text.lower()]
|
| 67 |
+
else:
|
| 68 |
+
sentence_scores[sent] += word_frequencies[word.text.lower()]
|
| 69 |
+
|
| 70 |
+
# 5. Extract Top Sentences
|
| 71 |
+
# Select the top 7 highest-scoring sentences for the final summary
|
| 72 |
+
summarized_sentences = nlargest(7, sentence_scores, key=sentence_scores.get)
|
| 73 |
+
final_sentences = [ w.text for w in summarized_sentences ]
|
| 74 |
+
summary = ' '.join(final_sentences)
|
| 75 |
+
return summary
|
| 76 |
+
|
Source Code/spacy_summarizer.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# NLP Pkgs
|
| 2 |
+
"""
|
| 3 |
+
@file spacy_summarizer.py
|
| 4 |
+
@description Auxiliary module for SpaCy-based text summarization.
|
| 5 |
+
Contains logic for text processing and sentence ranking.
|
| 6 |
+
|
| 7 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 8 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 9 |
+
@created 2022-08-09
|
| 10 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 11 |
+
@license MIT
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import spacy
|
| 15 |
+
nlp = spacy.load('en')
|
| 16 |
+
# Pkgs for Normalizing Text
|
| 17 |
+
from spacy.lang.en.stop_words import STOP_WORDS
|
| 18 |
+
from string import punctuation
|
| 19 |
+
# Import Heapq for Finding the Top N Sentences
|
| 20 |
+
from heapq import nlargest
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def text_summarizer(raw_docx):
|
| 25 |
+
raw_text = raw_docx
|
| 26 |
+
docx = nlp(raw_text)
|
| 27 |
+
stopwords = list(STOP_WORDS)
|
| 28 |
+
# Build Word Frequency # word.text is tokenization in spacy
|
| 29 |
+
word_frequencies = {}
|
| 30 |
+
for word in docx:
|
| 31 |
+
if word.text not in stopwords:
|
| 32 |
+
if word.text not in word_frequencies.keys():
|
| 33 |
+
word_frequencies[word.text] = 1
|
| 34 |
+
else:
|
| 35 |
+
word_frequencies[word.text] += 1
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
maximum_frequncy = max(word_frequencies.values())
|
| 39 |
+
|
| 40 |
+
for word in word_frequencies.keys():
|
| 41 |
+
word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
|
| 42 |
+
# Sentence Tokens
|
| 43 |
+
sentence_list = [ sentence for sentence in docx.sents ]
|
| 44 |
+
|
| 45 |
+
# Sentence Scores
|
| 46 |
+
sentence_scores = {}
|
| 47 |
+
for sent in sentence_list:
|
| 48 |
+
for word in sent:
|
| 49 |
+
if word.text.lower() in word_frequencies.keys():
|
| 50 |
+
if len(sent.text.split(' ')) < 30:
|
| 51 |
+
if sent not in sentence_scores.keys():
|
| 52 |
+
sentence_scores[sent] = word_frequencies[word.text.lower()]
|
| 53 |
+
else:
|
| 54 |
+
sentence_scores[sent] += word_frequencies[word.text.lower()]
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
summarized_sentences = nlargest(7, sentence_scores, key=sentence_scores.get)
|
| 58 |
+
final_sentences = [ w.text for w in summarized_sentences ]
|
| 59 |
+
summary = ' '.join(final_sentences)
|
| 60 |
+
print("Original Document\n")
|
| 61 |
+
print(raw_docx)
|
| 62 |
+
print("Total Length:",len(raw_docx))
|
| 63 |
+
print('\n\nSummarized Document\n')
|
| 64 |
+
print(summary)
|
| 65 |
+
print("Total Length:",len(summary))
|
| 66 |
+
|
Source Code/static/css/custom.css
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* @file custom.css
|
| 3 |
+
* @description Custom cascading style sheet overriding framework defaults
|
| 4 |
+
* to align with the application's aesthetic themes.
|
| 5 |
+
*
|
| 6 |
+
* @author Amey Thakur <https://github.com/Amey-Thakur>
|
| 7 |
+
* @author Mega Satish <https://github.com/msatmod>
|
| 8 |
+
* @created 2022-08-09
|
| 9 |
+
* @repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 10 |
+
* @license MIT
|
| 11 |
+
*/
|
| 12 |
+
|
| 13 |
+
/* Custom Stylesheet */
|
| 14 |
+
/**
|
| 15 |
+
* Use this file to override Materialize files so you can update
|
| 16 |
+
* the core Materialize files in the future
|
| 17 |
+
*
|
| 18 |
+
* Made By MaterializeCSS.com
|
| 19 |
+
*/
|
| 20 |
+
|
| 21 |
+
nav ul a,
|
| 22 |
+
nav .brand-logo {
|
| 23 |
+
color: rgb(255, 255, 255);
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
p {
|
| 27 |
+
line-height: 2rem;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
.sidenav-trigger {
|
| 31 |
+
color: #00ffff;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.parallax-container {
|
| 35 |
+
min-height: 380px;
|
| 36 |
+
line-height: 0;
|
| 37 |
+
height: auto;
|
| 38 |
+
color: rgba(255, 255, 255, .9);
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
.parallax-container .section {
|
| 42 |
+
width: 100%;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
@media only screen and (max-width : 992px) {
|
| 46 |
+
.parallax-container .section {
|
| 47 |
+
position: absolute;
|
| 48 |
+
top: 40%;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
#index-banner .section {
|
| 52 |
+
top: 10%;
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
@media only screen and (max-width : 600px) {
|
| 57 |
+
#index-banner .section {
|
| 58 |
+
top: 0;
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.icon-block {
|
| 63 |
+
padding: 0 15px;
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
.icon-block .material-icons {
|
| 67 |
+
font-size: inherit;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
footer.page-footer {
|
| 71 |
+
margin: 0;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/* Security & Anti-Inspection Styles */
|
| 75 |
+
body {
|
| 76 |
+
user-select: none;
|
| 77 |
+
-webkit-user-select: none;
|
| 78 |
+
-moz-user-select: none;
|
| 79 |
+
-ms-user-select: none;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
img {
|
| 83 |
+
-webkit-user-drag: none;
|
| 84 |
+
user-drag: none;
|
| 85 |
+
pointer-events: none;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
.security-icon {
|
| 89 |
+
animation: securityPulse 2s infinite;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
@keyframes securityPulse {
|
| 93 |
+
0% {
|
| 94 |
+
transform: scale(1);
|
| 95 |
+
opacity: 1;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
50% {
|
| 99 |
+
transform: scale(1.1);
|
| 100 |
+
opacity: 0.8;
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
100% {
|
| 104 |
+
transform: scale(1);
|
| 105 |
+
opacity: 1;
|
| 106 |
+
}
|
| 107 |
+
}
|
Source Code/static/css/materialize.css
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source Code/static/css/materialize.min.css
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source Code/static/js/init.js
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* @file init.js
|
| 3 |
+
* @description Frontend initialization script for Materialize CSS components
|
| 4 |
+
* and interactive DOM elements.
|
| 5 |
+
*
|
| 6 |
+
* @author Amey Thakur <https://github.com/Amey-Thakur>
|
| 7 |
+
* @author Mega Satish <https://github.com/msatmod>
|
| 8 |
+
* @created 2022-08-09
|
| 9 |
+
* @repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 10 |
+
* @license MIT
|
| 11 |
+
*/
|
| 12 |
+
|
| 13 |
+
(function ($) {
|
| 14 |
+
$(function () {
|
| 15 |
+
|
| 16 |
+
// Initialize Materialize Side Navigation (Mobile/Responsive)
|
| 17 |
+
$('.sidenav').sidenav();
|
| 18 |
+
|
| 19 |
+
// Initialize Parallax Effect for immersive conceptual visuals
|
| 20 |
+
$('.parallax').parallax();
|
| 21 |
+
|
| 22 |
+
// Initialize Tab Navigation Components
|
| 23 |
+
$('.tabs').tabs();
|
| 24 |
+
|
| 25 |
+
// Initialize Carousel for Feature Showcase
|
| 26 |
+
$('.carousel.carousel-slider').carousel({ fullWidth: true });
|
| 27 |
+
|
| 28 |
+
// Initialize Hero Slider with Custom Timing
|
| 29 |
+
$('.slider').slider({
|
| 30 |
+
indicators: false,
|
| 31 |
+
// Suppress navigation dots for cleaner aesthetic
|
| 32 |
+
height: 500, // Fixed height in pixels
|
| 33 |
+
transition: 500, // Transition duration in ms
|
| 34 |
+
interval: 6000 // Slide duration in ms
|
| 35 |
+
// Controlled timing for optimal readability
|
| 36 |
+
});
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
}); // End of document ready state
|
| 40 |
+
})(jQuery); // End of jQuery namespace encapsulation
|
| 41 |
+
|
| 42 |
+
// ----------------------------------------------------------------------------
|
| 43 |
+
// Security & Anti-Inspection Protocols
|
| 44 |
+
// ----------------------------------------------------------------------------
|
| 45 |
+
|
| 46 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 47 |
+
const securityOverlay = document.getElementById('securityOverlay');
|
| 48 |
+
const dismissBtn = document.getElementById('dismissSecurity');
|
| 49 |
+
|
| 50 |
+
// Disable Right Click
|
| 51 |
+
document.addEventListener('contextmenu', (e) => {
|
| 52 |
+
e.preventDefault();
|
| 53 |
+
showSecurityAlert();
|
| 54 |
+
});
|
| 55 |
+
|
| 56 |
+
// Disable Keyboard Shortcuts for Inspection
|
| 57 |
+
document.addEventListener('keydown', (e) => {
|
| 58 |
+
// F12
|
| 59 |
+
if (e.key === 'F12') {
|
| 60 |
+
e.preventDefault();
|
| 61 |
+
showSecurityAlert();
|
| 62 |
+
}
|
| 63 |
+
// Ctrl+Shift+I/J/C (DevTools)
|
| 64 |
+
if (e.ctrlKey && e.shiftKey && ['I', 'J', 'C'].includes(e.key.toUpperCase())) {
|
| 65 |
+
e.preventDefault();
|
| 66 |
+
showSecurityAlert();
|
| 67 |
+
}
|
| 68 |
+
// Ctrl+U (View Source)
|
| 69 |
+
if (e.ctrlKey && e.key.toUpperCase() === 'U') {
|
| 70 |
+
e.preventDefault();
|
| 71 |
+
showSecurityAlert();
|
| 72 |
+
}
|
| 73 |
+
});
|
| 74 |
+
|
| 75 |
+
// Show Security Overlay
|
| 76 |
+
function showSecurityAlert() {
|
| 77 |
+
if (securityOverlay) {
|
| 78 |
+
securityOverlay.style.display = 'flex';
|
| 79 |
+
// Add pulse animation to icon
|
| 80 |
+
const icon = securityOverlay.querySelector('div');
|
| 81 |
+
if (icon) icon.classList.add('security-icon');
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
// Dismiss Security Overlay
|
| 86 |
+
if (dismissBtn) {
|
| 87 |
+
dismissBtn.addEventListener('click', () => {
|
| 88 |
+
securityOverlay.style.display = 'none';
|
| 89 |
+
// Remove animation to reset
|
| 90 |
+
const icon = securityOverlay.querySelector('div');
|
| 91 |
+
if (icon) icon.classList.remove('security-icon');
|
| 92 |
+
});
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
// Console Easter Egg (Self-XSS Warning)
|
| 96 |
+
console.log("%cStop!", "color: red; font-size: 50px; font-weight: bold; text-shadow: 2px 2px 0px black;");
|
| 97 |
+
console.log("%cThis is a browser feature intended for developers. If someone told you to copy-paste something here to enable a feature or 'hack' someone's account, it is a scam and will give them access to your account.", "color: white; font-size: 18px; font-weight: bold;");
|
| 98 |
+
console.log("%cSee https://en.wikipedia.org/wiki/Self-XSS for more information.", "color: cyan; font-size: 16px; font-weight: bold; text-decoration: underline;");
|
| 99 |
+
});
|
| 100 |
+
|
| 101 |
+
|
Source Code/static/js/materialize.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source Code/static/js/materialize.min.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Source Code/templates/404.html
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>Text Summarizer by Amey & Mega</title>
|
| 8 |
+
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
| 9 |
+
<link href="../static/css/materialize.css" type="text/css" rel="stylesheet" />
|
| 10 |
+
<style>
|
| 11 |
+
body {
|
| 12 |
+
background-color: #0d0d0d;
|
| 13 |
+
color: #ffffff;
|
| 14 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 15 |
+
display: flex;
|
| 16 |
+
flex-direction: column;
|
| 17 |
+
align-items: center;
|
| 18 |
+
justify-content: center;
|
| 19 |
+
height: 100vh;
|
| 20 |
+
margin: 0;
|
| 21 |
+
overflow: hidden;
|
| 22 |
+
text-align: center;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.glitch-container {
|
| 26 |
+
position: relative;
|
| 27 |
+
margin-bottom: 20px;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
.error-code {
|
| 31 |
+
font-size: 120px;
|
| 32 |
+
font-weight: 900;
|
| 33 |
+
letter-spacing: 10px;
|
| 34 |
+
color: #ffffff;
|
| 35 |
+
text-shadow: 2px 2px 0px #ff0000, -2px -2px 0px #00ff00;
|
| 36 |
+
animation: glitch 1s infinite;
|
| 37 |
+
position: relative;
|
| 38 |
+
z-index: 2;
|
| 39 |
+
line-height: 1;
|
| 40 |
+
margin: 0;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
@keyframes glitch {
|
| 44 |
+
0% {
|
| 45 |
+
transform: translate(0, 0);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
20% {
|
| 49 |
+
transform: translate(-2px, 2px);
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
40% {
|
| 53 |
+
transform: translate(-2px, -2px);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
60% {
|
| 57 |
+
transform: translate(2px, 2px);
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
80% {
|
| 61 |
+
transform: translate(2px, -2px);
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
100% {
|
| 65 |
+
transform: translate(0, 0);
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.error-message {
|
| 70 |
+
font-size: 24px;
|
| 71 |
+
margin-bottom: 40px;
|
| 72 |
+
color: #888;
|
| 73 |
+
text-transform: uppercase;
|
| 74 |
+
letter-spacing: 2px;
|
| 75 |
+
position: relative;
|
| 76 |
+
z-index: 3;
|
| 77 |
+
margin-top: 10px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.home-btn {
|
| 81 |
+
padding: 15px 40px;
|
| 82 |
+
font-size: 16px;
|
| 83 |
+
background: transparent;
|
| 84 |
+
color: white;
|
| 85 |
+
border: 2px solid white;
|
| 86 |
+
border-radius: 0;
|
| 87 |
+
cursor: pointer;
|
| 88 |
+
transition: all 0.3s ease;
|
| 89 |
+
text-decoration: none;
|
| 90 |
+
text-transform: uppercase;
|
| 91 |
+
letter-spacing: 1px;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.home-btn:hover {
|
| 95 |
+
background: white;
|
| 96 |
+
color: black;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
.footer {
|
| 100 |
+
position: fixed;
|
| 101 |
+
bottom: 0;
|
| 102 |
+
width: 100%;
|
| 103 |
+
background-color: black;
|
| 104 |
+
color: white;
|
| 105 |
+
padding: 15px;
|
| 106 |
+
text-align: center;
|
| 107 |
+
font-size: 15px;
|
| 108 |
+
border-top: 1px solid #333;
|
| 109 |
+
}
|
| 110 |
+
</style>
|
| 111 |
+
</head>
|
| 112 |
+
|
| 113 |
+
<body>
|
| 114 |
+
|
| 115 |
+
<div class="glitch-container">
|
| 116 |
+
<div class="error-code">404</div>
|
| 117 |
+
</div>
|
| 118 |
+
<div class="error-message">Summary Not Found</div>
|
| 119 |
+
|
| 120 |
+
<a href="{{ url_for('index') }}" class="home-btn">Return to Text Summarizer</a>
|
| 121 |
+
|
| 122 |
+
<div class="footer">
|
| 123 |
+
<i>"The goal of summarization is to extract meaning, but sometimes the page itself is missing."</i>
|
| 124 |
+
<br>
|
| 125 |
+
<small>© 2022 <a href="https://github.com/Amey-Thakur" style="color: white; text-decoration: none;">Amey
|
| 126 |
+
Thakur</a> & <a href="https://github.com/msatmod" style="color: white; text-decoration: none;">Mega
|
| 127 |
+
Satish</a></small>
|
| 128 |
+
</div>
|
| 129 |
+
|
| 130 |
+
</body>
|
| 131 |
+
|
| 132 |
+
</html>
|
Source Code/templates/compare_summary.html
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!--
|
| 2 |
+
@file compare_summary.html
|
| 3 |
+
@description Comparison interface allowing users to evaluate summaries generated
|
| 4 |
+
by distinct algorithms (SpaCy, NLTK, Gensim, Sumy) side-by-side.
|
| 5 |
+
|
| 6 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 7 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 8 |
+
@created 2022-08-09
|
| 9 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 10 |
+
@license MIT
|
| 11 |
+
-->
|
| 12 |
+
|
| 13 |
+
<!DOCTYPE html>
|
| 14 |
+
<html lang="en">
|
| 15 |
+
|
| 16 |
+
<head>
|
| 17 |
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
| 18 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 19 |
+
<meta name="description"
|
| 20 |
+
content="Compare text summarization algorithms (SpaCy, NLTK, Gensim, Sumy) side-by-side. Analyze performance and output quality.">
|
| 21 |
+
<meta name="keywords"
|
| 22 |
+
content="Text Summarizer Comparison, Model Comparison, NLP Algorithms, TextRank, LexRank, SpaCy vs NLTK, Machine Learning Evaluation, Amey Thakur, Mega Satish">
|
| 23 |
+
<meta name="author" content="Amey Thakur & Mega Satish">
|
| 24 |
+
<title>TEXT SUMMARIZER</title>
|
| 25 |
+
|
| 26 |
+
<!-- Typography and Framework Styles -->
|
| 27 |
+
<link href="https://fonts.googleapis.com/css2?family=Play&display=swap" rel="stylesheet">
|
| 28 |
+
<link href="static/css/materialize.css" type="text/css" rel="stylesheet" media="screen,projection" />
|
| 29 |
+
<link href="static/css/style.css" type="text/css" rel="stylesheet" media="screen,projection" />
|
| 30 |
+
|
| 31 |
+
<!-- Icon Libraries -->
|
| 32 |
+
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css"
|
| 33 |
+
integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">
|
| 34 |
+
<link rel="stylesheet" href="https://unicons.iconscout.com/release/v4.0.0/css/unicons.css" />
|
| 35 |
+
</head>
|
| 36 |
+
|
| 37 |
+
<body>
|
| 38 |
+
<!-- Security Protection -->
|
| 39 |
+
<div id="securityOverlay"
|
| 40 |
+
style="display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0,0,0,0.95); z-index: 9999; justify-content: center; align-items: center; flex-direction: column;">
|
| 41 |
+
<div style="color: #ff3333; font-size: 60px; margin-bottom: 20px;">🛡️</div>
|
| 42 |
+
<h1
|
| 43 |
+
style="color: #ffffff; font-family: 'Segoe UI'; text-transform: uppercase; letter-spacing: 3px; font-size: 24px; margin-bottom: 15px;">
|
| 44 |
+
Unauthorized Action Detected</h1>
|
| 45 |
+
<p
|
| 46 |
+
style="color: #888; font-family: 'Segoe UI'; margin-bottom: 30px; text-align: center; max-width: 500px; line-height: 1.6;">
|
| 47 |
+
This project is protected by academic integrity protocols.<br>
|
| 48 |
+
Direct access to source code or developer tools is restricted.
|
| 49 |
+
</p>
|
| 50 |
+
<div style="color: #444; font-size: 12px; margin-bottom: 30px; letter-spacing: 2px;">
|
| 51 |
+
AMEY THAKUR · MEGA SATISH
|
| 52 |
+
</div>
|
| 53 |
+
<button id="dismissSecurity"
|
| 54 |
+
style="background: transparent; border: 1px solid #ff3333; color: #ff3333; padding: 10px 30px; cursor: pointer; transition: all 0.3s;">
|
| 55 |
+
DISMISS
|
| 56 |
+
</button>
|
| 57 |
+
</div>
|
| 58 |
+
<!-- Global Page Stylings for Theme Consistency -->
|
| 59 |
+
<style>
|
| 60 |
+
body {
|
| 61 |
+
background-color: rgb(0, 0, 0);
|
| 62 |
+
font-family: Play;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.flex-parent {
|
| 66 |
+
display: flex;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
.jc-center {
|
| 70 |
+
justify-content: center;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
footer {
|
| 74 |
+
text-align: center;
|
| 75 |
+
text-shadow: black;
|
| 76 |
+
padding: 3px;
|
| 77 |
+
color: whitesmoke;
|
| 78 |
+
}
|
| 79 |
+
</style>
|
| 80 |
+
|
| 81 |
+
<!-- Navigation Bar: Fixed positioning for persistent user access -->
|
| 82 |
+
<div class="navbar-fixed">
|
| 83 |
+
<nav class="grey darken-4" role="navigation">
|
| 84 |
+
<div class="nav-wrapper container">
|
| 85 |
+
<a id="logo-container" href="#" class="brand-logo">TEXT SUMMARIZER</a>
|
| 86 |
+
<!-- Desktop Navigation Menu -->
|
| 87 |
+
<ul class="right hide-on-med-and-down">
|
| 88 |
+
<li><a href="{{url_for('index')}}">HOME</a></li>
|
| 89 |
+
<li><a href="{{url_for('compare_summary')}}">COMPARE</a></li>
|
| 90 |
+
<li><a href="#aboutapp">ABOUT US</a></li>
|
| 91 |
+
</ul>
|
| 92 |
+
<a href="#" data-target="nav-mobile" class="sidenav-trigger">MENU</a>
|
| 93 |
+
</div>
|
| 94 |
+
</nav>
|
| 95 |
+
</div>
|
| 96 |
+
|
| 97 |
+
<ul id="nav-mobile" class="sidenav">
|
| 98 |
+
<li><a href="{{url_for('index')}}">HOME</a></li>
|
| 99 |
+
<li><a href="{{url_for('compare_summary')}}">COMPARE</a></li>
|
| 100 |
+
<li><a href="#contactme">ABOUT US</a></li>
|
| 101 |
+
</ul>
|
| 102 |
+
|
| 103 |
+
<!-- Comparison Functionality Section: Multi-algorithm input -->
|
| 104 |
+
<div class="container">
|
| 105 |
+
<div class="section">
|
| 106 |
+
<div class="row">
|
| 107 |
+
<div class="input-field col s12 m10">
|
| 108 |
+
<div class="icon-block">
|
| 109 |
+
<h3 align="center" style="color: rgb(255, 255, 255);">COMPARE</h3>
|
| 110 |
+
<form method="POST" action="/comparer">
|
| 111 |
+
<textarea name="rawtext" cols="10" rows="10" style="color: #ffffff;" class="form-control" required="true"
|
| 112 |
+
placeholder="Enter your text here"></textarea>
|
| 113 |
+
<br />
|
| 114 |
+
<div class="flex-parent jc-center">
|
| 115 |
+
<button type="reset" class="btn btn-small waves-effect waves-light red lighten-1">CLEAR</button>
|
| 116 |
+
|
| 117 |
+
<button type="submit"
|
| 118 |
+
class="btn btn-small waves-effect waves-light light-blue lighten-1">SUMMARIZE</button>
|
| 119 |
+
</div>
|
| 120 |
+
</form>
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
|
| 127 |
+
<!-- Input Text Display: Original raw text with metrics -->
|
| 128 |
+
<section class="section section-solutions-about #fafafa grey lighten-5">
|
| 129 |
+
<div class="container">
|
| 130 |
+
<div class="row">
|
| 131 |
+
<div class="col s12 m8">
|
| 132 |
+
<div class="icon-block">
|
| 133 |
+
<h4 align="center" style="color: rgb(0, 0, 0);">INPUT TEXT</h4>
|
| 134 |
+
<p style="color: rgb(0, 0, 0);">READING TIME: <span style="color:#1100ff;">{{ final_reading_time }} MINUTES
|
| 135 |
+
</span></p>
|
| 136 |
+
<p class="light" style="color: #000000;">{{ctext}}</p>
|
| 137 |
+
<div class="alert alert-info" role="alert">
|
| 138 |
+
<p style="color: rgb(0, 0, 0);">TIME ELAPSED: <span style="color:#ff0000;">{{ final_time }} MINUTES
|
| 139 |
+
</span></p>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
+
</div>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
</section>
|
| 146 |
+
|
| 147 |
+
<!-- Results Tabs: Side-by-side comparison of different algorithms -->
|
| 148 |
+
<section class="section section-solutions-about #fafafa grey lighten-5">
|
| 149 |
+
<div class="container">
|
| 150 |
+
<div class="row">
|
| 151 |
+
<div class="row">
|
| 152 |
+
<div class="col s12">
|
| 153 |
+
<ul class="tabs">
|
| 154 |
+
<li class="tab col s3">
|
| 155 |
+
<a href="#tab1" class="black-text"><b>GENSIM SUMMARIZER</b></a>
|
| 156 |
+
</li>
|
| 157 |
+
<li class="tab col s3">
|
| 158 |
+
<a href="#tab2" class="black-text"><b>NATURAL LANGUAGE TOOLKIT</b></a>
|
| 159 |
+
</li>
|
| 160 |
+
<li class="tab col s3">
|
| 161 |
+
<a href="#tab3" class="black-text"><b>SPACY SUMMARIZER</b></a>
|
| 162 |
+
</li>
|
| 163 |
+
<li class="tab col s3">
|
| 164 |
+
<a href="#tab4" class="black-text"><b>SUMY LEXRANK</b></a>
|
| 165 |
+
</li>
|
| 166 |
+
</ul>
|
| 167 |
+
</div>
|
| 168 |
+
<!-- Gensim Result Block -->
|
| 169 |
+
<div id="tab1" class="col s12">
|
| 170 |
+
<h5>Gensim</h5>
|
| 171 |
+
<div class="">
|
| 172 |
+
<p>READING TIME: <span style="color:#0091EA;">{{ summary_reading_time_gensim }} MINUTES </span></p>
|
| 173 |
+
</div>
|
| 174 |
+
<div class="grey darken-3">
|
| 175 |
+
<p style="padding:5px; font-size:14px; color: #ffffff;">{{ final_summary_gensim }}</p>
|
| 176 |
+
</div>
|
| 177 |
+
</div>
|
| 178 |
+
<!-- NLTK Result Block -->
|
| 179 |
+
<div id="tab2" class="col s12">
|
| 180 |
+
<h5>NLTK</h5>
|
| 181 |
+
<div class="">
|
| 182 |
+
<p>READING TIME: <span style="color:#0091EA;">{{ summary_reading_time_nltk }} MINUTES </span></p>
|
| 183 |
+
</div>
|
| 184 |
+
<div class="grey darken-3">
|
| 185 |
+
<p style="padding:5px; font-size:14px; color: #ffffff;">{{ final_summary_nltk }}</p>
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
<!-- SpaCy Result Block -->
|
| 189 |
+
<div id="tab3" class="col s12">
|
| 190 |
+
<h5>SpaCy</h5>
|
| 191 |
+
<div class="">
|
| 192 |
+
<p>READING TIME: <span style="color:#0091EA;">{{ summary_reading_time }} MINUTES </span></p>
|
| 193 |
+
</div>
|
| 194 |
+
<div class="grey darken-3">
|
| 195 |
+
<p style="padding:5px; font-size:14px; color: #ffffff;">{{ final_summary_spacy }}</p>
|
| 196 |
+
</div>
|
| 197 |
+
</div>
|
| 198 |
+
<!-- Sumy Result Block -->
|
| 199 |
+
<div id="tab4" class="col s12">
|
| 200 |
+
<h5>Sumy</h5>
|
| 201 |
+
<div class="">
|
| 202 |
+
<p>READING TIME: <span style="color:#0091EA;">{{ summary_reading_time_sumy}} MINUTES </span></p>
|
| 203 |
+
</div>
|
| 204 |
+
<div class="grey darken-3">
|
| 205 |
+
<p style="padding:5px; font-size:14px; color: #ffffff;">{{ final_summary_sumy }}</p>
|
| 206 |
+
</div>
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
</div>
|
| 210 |
+
</div>
|
| 211 |
+
</section>
|
| 212 |
+
|
| 213 |
+
<!-- Footer and Author Attribution -->
|
| 214 |
+
<footer class="page-footer grey darken-4">
|
| 215 |
+
<div class="container" id="aboutapp">
|
| 216 |
+
<div class="row">
|
| 217 |
+
<h5 class="white-text" align="center">ABOUT US</h5>
|
| 218 |
+
<p class="grey-text text-lighten-4" align="center">👷 Project Authors: <a href="https://github.com/Amey-Thakur"
|
| 219 |
+
style="color: rgb(255, 0, 0);" target="_blank">Amey Thakur</a> & <a href="https://github.com/msatmod "
|
| 220 |
+
style="color: rgb(0, 0, 255);" target="_blank">Mega Satish</a></p>
|
| 221 |
+
<p class="grey-text text-lighten-4" align="center"><a href="mailto:ameythakur20@gmail.com"
|
| 222 |
+
style="color: greenyellow;" target="_blank">🔗 Queries? Get in touch </a><br></p>
|
| 223 |
+
<p class="grey-text text-lighten-4" align="center"><a
|
| 224 |
+
href="https://github.com/Amey-Thakur/TEXT-SUMMARIZER/pulls" style="color: greenyellow;" target="_blank">🔗
|
| 225 |
+
Interested in collaborating? Open a pull request </a< /p>
|
| 226 |
+
</div>
|
| 227 |
+
</div>
|
| 228 |
+
</footer>
|
| 229 |
+
|
| 230 |
+
<script src="https://code.jquery.com/jquery-2.1.1.min.js"></script>
|
| 231 |
+
<script src="static/js/materialize.js"></script>
|
| 232 |
+
<script src="static/js/init.js"></script>
|
| 233 |
+
|
| 234 |
+
</body>
|
| 235 |
+
|
| 236 |
+
</html>
|
Source Code/templates/index.html
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!--
|
| 2 |
+
@file index.html
|
| 3 |
+
@description Main user interface structure for the Text Summarizer application.
|
| 4 |
+
Defines the layout for text usage, URL inputs, and results display.
|
| 5 |
+
|
| 6 |
+
@author Amey Thakur <https://github.com/Amey-Thakur>
|
| 7 |
+
@author Mega Satish <https://github.com/msatmod>
|
| 8 |
+
@created 2022-08-09
|
| 9 |
+
@repository https://github.com/Amey-Thakur/TEXT-SUMMARIZER
|
| 10 |
+
@license MIT
|
| 11 |
+
-->
|
| 12 |
+
|
| 13 |
+
<!DOCTYPE html>
|
| 14 |
+
<html lang="en">
|
| 15 |
+
|
| 16 |
+
<head>
|
| 17 |
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
| 18 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 19 |
+
<title>TEXT SUMMARIZER</title>
|
| 20 |
+
|
| 21 |
+
<!-- Typography and Framework Styles -->
|
| 22 |
+
<link href="https://fonts.googleapis.com/css2?family=Play&display=swap" rel="stylesheet">
|
| 23 |
+
<link href="static/css/materialize.css" type="text/css" rel="stylesheet" media="screen,projection" />
|
| 24 |
+
<link href="static/css/style.css" type="text/css" rel="stylesheet" media="screen,projection" />
|
| 25 |
+
|
| 26 |
+
<!-- Icon Libraries -->
|
| 27 |
+
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css"
|
| 28 |
+
integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">
|
| 29 |
+
<link rel="stylesheet" href="https://unicons.iconscout.com/release/v4.0.0/css/unicons.css" />
|
| 30 |
+
</head>
|
| 31 |
+
|
| 32 |
+
<body>
|
| 33 |
+
<!-- Security Protection -->
|
| 34 |
+
<div id="securityOverlay"
|
| 35 |
+
style="display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0,0,0,0.95); z-index: 9999; justify-content: center; align-items: center; flex-direction: column;">
|
| 36 |
+
<div style="color: #ff3333; font-size: 60px; margin-bottom: 20px;">🛡️</div>
|
| 37 |
+
<h1
|
| 38 |
+
style="color: #ffffff; font-family: 'Segoe UI'; text-transform: uppercase; letter-spacing: 3px; font-size: 24px; margin-bottom: 15px;">
|
| 39 |
+
Unauthorized Action Detected</h1>
|
| 40 |
+
<p
|
| 41 |
+
style="color: #888; font-family: 'Segoe UI'; margin-bottom: 30px; text-align: center; max-width: 500px; line-height: 1.6;">
|
| 42 |
+
This project is protected by academic integrity protocols.<br>
|
| 43 |
+
Direct access to source code or developer tools is restricted.
|
| 44 |
+
</p>
|
| 45 |
+
<div style="color: #444; font-size: 12px; margin-bottom: 30px; letter-spacing: 2px;">
|
| 46 |
+
AMEY THAKUR · MEGA SATISH
|
| 47 |
+
</div>
|
| 48 |
+
<button id="dismissSecurity"
|
| 49 |
+
style="background: transparent; border: 1px solid #ff3333; color: #ff3333; padding: 10px 30px; cursor: pointer; transition: all 0.3s;">
|
| 50 |
+
DISMISS
|
| 51 |
+
</button>
|
| 52 |
+
</div>
|
| 53 |
+
<!-- Global Page Stylings for Theme Consistency -->
|
| 54 |
+
<style>
|
| 55 |
+
body {
|
| 56 |
+
background-color: rgb(0, 0, 0);
|
| 57 |
+
font-family: Play;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.flex-parent {
|
| 61 |
+
display: flex;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.jc-center {
|
| 65 |
+
justify-content: center;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
footer {
|
| 69 |
+
text-align: center;
|
| 70 |
+
text-shadow: black;
|
| 71 |
+
padding: 3px;
|
| 72 |
+
color: whitesmoke;
|
| 73 |
+
}
|
| 74 |
+
</style>
|
| 75 |
+
|
| 76 |
+
<!-- Navigation Bar: Fixed positioning for persistent user access -->
|
| 77 |
+
<div class="navbar-fixed">
|
| 78 |
+
<nav class="grey darken-4" role="navigation">
|
| 79 |
+
<div class="nav-wrapper container">
|
| 80 |
+
<a id="logo-container" href="#" class="brand-logo">TEXT SUMMARIZER</a>
|
| 81 |
+
<!-- Desktop Navigation Menu -->
|
| 82 |
+
<ul class="right hide-on-med-and-down">
|
| 83 |
+
<li><a href="{{url_for('index')}}">HOME</a></li>
|
| 84 |
+
<li><a href="{{url_for('compare_summary')}}">COMPARE</a></li>
|
| 85 |
+
<li><a href="#aboutapp">ABOUT US</a></li>
|
| 86 |
+
</ul>
|
| 87 |
+
<a href="#" data-target="nav-mobile" class="sidenav-trigger">MENU</a>
|
| 88 |
+
</div>
|
| 89 |
+
</nav>
|
| 90 |
+
</div>
|
| 91 |
+
|
| 92 |
+
<ul id="nav-mobile" class="sidenav">
|
| 93 |
+
<li><a href="{{url_for('index')}}">HOME</a></li>
|
| 94 |
+
<li><a href="{{url_for('compare_summary')}}">COMPARE</a></li>
|
| 95 |
+
<li><a href="#contactme">ABOUT US</a></li>
|
| 96 |
+
</ul>
|
| 97 |
+
|
| 98 |
+
<!-- Main Input Section: URL and Raw Text Interfaces -->
|
| 99 |
+
<div class="container">
|
| 100 |
+
<div class="section">
|
| 101 |
+
<!-- URL Input Block -->
|
| 102 |
+
<div class="row">
|
| 103 |
+
<div class="input-field col s12 m10">
|
| 104 |
+
<div class="icon-block">
|
| 105 |
+
<h4 align="center" style="color: rgb(255, 255, 255);">LINK</h4>
|
| 106 |
+
<form method="POST" action="/analyze_url">
|
| 107 |
+
<input type="text" name="raw_url" style="color: #ffffff;" placeholder="Paste your link here"
|
| 108 |
+
required="true">
|
| 109 |
+
<div class="flex-parent jc-center">
|
| 110 |
+
<button type="reset" class="btn btn-small waves-effect waves-light red lighten-1">CLEAR</button>
|
| 111 |
+
|
| 112 |
+
<button type="submit"
|
| 113 |
+
class="btn btn-small waves-effect waves-light light-blue lighten-1">SUMMARIZE</button>
|
| 114 |
+
</div>
|
| 115 |
+
</form>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
<!-- Raw Text Input Block -->
|
| 120 |
+
<div class="row">
|
| 121 |
+
<div class="input-field col s12 m10">
|
| 122 |
+
<div class="icon-block">
|
| 123 |
+
<h4 align="center" style="color: rgb(255, 255, 255);">TEXT</h4>
|
| 124 |
+
<form method="POST" action="/analyze">
|
| 125 |
+
<textarea name="rawtext" cols="10" rows="10" style="color: #ffffff;" class="form-control" required="true"
|
| 126 |
+
placeholder="Enter your text here"></textarea>
|
| 127 |
+
<br />
|
| 128 |
+
<div class="flex-parent jc-center">
|
| 129 |
+
<button type="reset" class="btn btn-small waves-effect waves-light red lighten-1">CLEAR</button>
|
| 130 |
+
|
| 131 |
+
<button type="submit"
|
| 132 |
+
class="btn btn-small waves-effect waves-light light-blue lighten-1">SUMMARIZE</button>
|
| 133 |
+
</div>
|
| 134 |
+
</form>
|
| 135 |
+
</div>
|
| 136 |
+
</div>
|
| 137 |
+
</div>
|
| 138 |
+
</div>
|
| 139 |
+
</div>
|
| 140 |
+
|
| 141 |
+
<!-- Results Display Section: Split View for Summary and Metrics -->
|
| 142 |
+
<section class="section section-solutions-about #fafafa grey lighten-5">
|
| 143 |
+
<div class="container white-text">
|
| 144 |
+
<div class="row">
|
| 145 |
+
<!-- Generated Summary Output -->
|
| 146 |
+
<div class="col s12 m6 grey darken-3">
|
| 147 |
+
<div class="icon-block">
|
| 148 |
+
<h4 align="center" style="color: rgb(255, 255, 255);">TEXT SUMMARY</h4>
|
| 149 |
+
<div class="" role="alert">
|
| 150 |
+
<p style="color: rgb(255, 255, 255);">READING TIME: <span style="color:#1100ff;">{{ summary_reading_time
|
| 151 |
+
}} MINUTES </span></p>
|
| 152 |
+
</div>
|
| 153 |
+
<p class="light">{{ final_summary }}</p>
|
| 154 |
+
</div>
|
| 155 |
+
</div>
|
| 156 |
+
<!-- Original Text and Metrics -->
|
| 157 |
+
<div class="col s12 m6">
|
| 158 |
+
<div class="icon-block">
|
| 159 |
+
<h4 align="center" style="color: rgb(0, 0, 0);">INPUT TEXT</h4>
|
| 160 |
+
<p style="color: rgb(0, 0, 0);">READING TIME: <span style="color:#1100ff;">{{ final_reading_time }} MINUTES
|
| 161 |
+
</span></p>
|
| 162 |
+
<p class="light" style="color: #000000;">{{ctext}}</p>
|
| 163 |
+
<div class="alert alert-info" role="alert">
|
| 164 |
+
<p style="color: rgb(0, 0, 0);">TIME ELAPSED: <span style="color:#ff0000;">{{ final_time }} MINUTES
|
| 165 |
+
</span></p>
|
| 166 |
+
<br />
|
| 167 |
+
</div>
|
| 168 |
+
</div>
|
| 169 |
+
</div>
|
| 170 |
+
</div>
|
| 171 |
+
</div>
|
| 172 |
+
</section>
|
| 173 |
+
|
| 174 |
+
<!-- Footer and Author Attribution -->
|
| 175 |
+
<footer class="page-footer grey darken-4">
|
| 176 |
+
<div class="container" id="aboutapp">
|
| 177 |
+
<div class="row">
|
| 178 |
+
<h5 class="white-text" align="center">ABOUT US</h5>
|
| 179 |
+
<p class="grey-text text-lighten-4" align="center">👷 Project Authors: <a href="https://github.com/Amey-Thakur"
|
| 180 |
+
style="color: rgb(255, 0, 0);" target="_blank">Amey Thakur</a> & <a href="https://github.com/msatmod "
|
| 181 |
+
style="color: rgb(0, 0, 255);" target="_blank">Mega Satish</a></p>
|
| 182 |
+
<p class="grey-text text-lighten-4" align="center"><a href="mailto:ameythakur20@gmail.com"
|
| 183 |
+
style="color: greenyellow;" target="_blank">🔗 Queries? Get in touch </a><br></p>
|
| 184 |
+
<p class="grey-text text-lighten-4" align="center"><a
|
| 185 |
+
href="https://github.com/Amey-Thakur/TEXT-SUMMARIZER/pulls" style="color: greenyellow;" target="_blank">🔗
|
| 186 |
+
Interested in collaborating? Open a pull request </a< /p>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</footer>
|
| 190 |
+
|
| 191 |
+
<script src="https://code.jquery.com/jquery-2.1.1.min.js"></script>
|
| 192 |
+
<script src="static/js/materialize.js"></script>
|
| 193 |
+
<script src="static/js/init.js"></script>
|
| 194 |
+
|
| 195 |
+
</body>
|
| 196 |
+
|
| 197 |
+
</html>
|
codemeta.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
|
| 3 |
+
"@type": "SoftwareSourceCode",
|
| 4 |
+
"name": "Text Summarizer",
|
| 5 |
+
"description": "A robust web application leveraging multiple NLP algorithms (SpaCy, NLTK, Gensim, Sumy) to summarize textual content and URL sources, featuring a comparative analysis interface for evaluating summarization quality.",
|
| 6 |
+
"identifier": "TEXT-SUMMARIZER",
|
| 7 |
+
"license": "https://spdx.org/licenses/MIT.html",
|
| 8 |
+
"programmingLanguage": [
|
| 9 |
+
"Python"
|
| 10 |
+
],
|
| 11 |
+
"author": [
|
| 12 |
+
{
|
| 13 |
+
"@type": "Person",
|
| 14 |
+
"givenName": "Amey",
|
| 15 |
+
"familyName": "Thakur",
|
| 16 |
+
"id": "https://orcid.org/0000-0001-5644-1575",
|
| 17 |
+
"affiliation": {
|
| 18 |
+
"@type": "Organization",
|
| 19 |
+
"name": "Terna Engineering College"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"@type": "Person",
|
| 24 |
+
"givenName": "Mega",
|
| 25 |
+
"familyName": "Satish",
|
| 26 |
+
"id": "https://orcid.org/0000-0002-1844-9557",
|
| 27 |
+
"affiliation": {
|
| 28 |
+
"@type": "Organization",
|
| 29 |
+
"name": "Terna Engineering College"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
],
|
| 33 |
+
"dateReleased": "2022-08-09",
|
| 34 |
+
"codeRepository": "https://github.com/Amey-Thakur/TEXT-SUMMARIZER",
|
| 35 |
+
"developmentStatus": "complete",
|
| 36 |
+
"runtimePlatform": [
|
| 37 |
+
"Python 3.6+",
|
| 38 |
+
"Flask",
|
| 39 |
+
"SpaCy",
|
| 40 |
+
"NLTK",
|
| 41 |
+
"Gensim",
|
| 42 |
+
"Sumy"
|
| 43 |
+
],
|
| 44 |
+
"applicationCategory": "Natural Language Processing",
|
| 45 |
+
"keywords": [
|
| 46 |
+
"Text Summarization",
|
| 47 |
+
"NLP",
|
| 48 |
+
"Flask",
|
| 49 |
+
"SpaCy",
|
| 50 |
+
"NLTK",
|
| 51 |
+
"Gensim",
|
| 52 |
+
"Sumy",
|
| 53 |
+
"TextRank",
|
| 54 |
+
"LexRank",
|
| 55 |
+
"Computer Engineering"
|
| 56 |
+
]
|
| 57 |
+
}
|
docs/SPECIFICATION.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Technical Specification: Text Summarizer
|
| 2 |
+
|
| 3 |
+
## Architectural Overview
|
| 4 |
+
|
| 5 |
+
**Text Summarizer** is a comprehensive Natural Language Processing (NLP) application designed to condense extensive textual content into concise, meaningful summaries. Unlike single-method tools, this architecture integrates multiple advanced algorithms—SpaCy, NLTK, Gensim, and Sumy—within a unified web interface, allowing researchers and developers to compare extractive summarization techniques side-by-side in real-time.
|
| 6 |
+
|
| 7 |
+
### Processing Pipeline Diagram
|
| 8 |
+
|
| 9 |
+
```mermaid
|
| 10 |
+
graph TD
|
| 11 |
+
User((End User)) -->|"Target Link / Raw Text"| WebApp["Frontend Layer (HTML5/Jinja2)"]
|
| 12 |
+
WebApp -->|"HTTP POST Request"| Server["Backend Server (Flask/Python)"]
|
| 13 |
+
|
| 14 |
+
subgraph Core_Logic ["Summarization Engines"]
|
| 15 |
+
Server -->|"Input Routing"| Router{Algorithm Selector}
|
| 16 |
+
Router -->|"NLP Pipeline"| SpaCy["SpaCy (Frequency Scoring)"]
|
| 17 |
+
Router -->|"TextRank"| Gensim["Gensim (Graph-Based)"]
|
| 18 |
+
Router -->|"LexRank"| Sumy["Sumy (LSA/LexRank)"]
|
| 19 |
+
Router -->|"Statistical"| NLTK["NLTK (Word Freq)"]
|
| 20 |
+
end
|
| 21 |
+
|
| 22 |
+
SpaCy -->|"Summary Text"| Aggregator["Results Aggregator"]
|
| 23 |
+
Gensim -->|"Summary Text"| Aggregator
|
| 24 |
+
Sumy -->|"Summary Text"| Aggregator
|
| 25 |
+
NLTK -->|"Summary Text"| Aggregator
|
| 26 |
+
|
| 27 |
+
Aggregator -->|"Render Template"| WebApp
|
| 28 |
+
WebApp -->|"Visualize Metrics"| User
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## Technical Implementations
|
| 34 |
+
|
| 35 |
+
### 1. NLP & Logic Engines: SpaCy, NLTK, Gensim, Sumy
|
| 36 |
+
The core functionality relies on a diverse set of industrial-strength NLP libraries to perform extractive summarization.
|
| 37 |
+
- **SpaCy**: Utilizes a custom frequency-based pipeline (`spacy_summarizer.py`) to tokenize text, remove stopwords, and score sentences based on word significance.
|
| 38 |
+
- **NLTK**: Implements statistical analysis (`nltk_summarization.py`) to calculate word frequencies and rank sentences without heavy dependency overhead.
|
| 39 |
+
- **Gensim**: Deploys the **TextRank** algorithm, a graph-based approach similar to PageRank, for unsupervised summary generation.
|
| 40 |
+
- **Sumy**: Provides access to **LexRank**, utilizing eigenvector centrality to identify the most representative sentences in a document.
|
| 41 |
+
|
| 42 |
+
### 2. Backend Orchestration: Flask
|
| 43 |
+
The server-side micro-framework manages application routing and data processing.
|
| 44 |
+
- **Route Management**: **Flask** handles HTTP requests for distinct endpoints (`/analyze`, `/analyze_url`, `/comparer`), creating a seamless flow between input and output.
|
| 45 |
+
- **Web Scraping**: Integrates **BeautifulSoup** to extract and sanitize textual content from user-provided URLs before processing.
|
| 46 |
+
- **Performance Metrics**: Custom functions (`app.py`) calculate reading time savings and execution duration, providing quantitative feedback on the summarization efficiency.
|
| 47 |
+
|
| 48 |
+
### 3. Presentation Layer: Materialize CSS & jQuery
|
| 49 |
+
The frontend provides a responsive and intuitive research interface.
|
| 50 |
+
- **UI/UX Design**: Built on **Materialize CSS**, utilizing modern components (cards, tabs, side-navs) for a clean, Material Design aesthetic.
|
| 51 |
+
- **Interactive Elements**: **jQuery** powers dynamic components like result tabs and sliders, ensuring a responsive user experience across devices.
|
| 52 |
+
- **Comparison View**: A dedicated layout (`compare_summary.html`) renders outputs from all algorithms simultaneously for direct qualitative evaluation.
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
|
| 56 |
+
## Technical Prerequisites
|
| 57 |
+
|
| 58 |
+
- **Runtime**: Python 3.x
|
| 59 |
+
- **Web Framework**: Flask 1.x+
|
| 60 |
+
- **NLP Core**: SpaCy (`en_core_web_sm`), NLTK, Gensim, Sumy
|
| 61 |
+
- **Utilities**: BeautifulSoup4, lxml
|
| 62 |
+
- **Frontend**: Materialize CSS, jQuery
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
*Technical Specification | Computer Engineering Project | Version 1.0*
|
pa_setup.sh
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
echo "--------------------------------------------------"
|
| 5 |
+
echo " TEXT SUMMARIZER - PythonAnywhere Setup "
|
| 6 |
+
echo "--------------------------------------------------"
|
| 7 |
+
|
| 8 |
+
# 1. Create a Virtual Environment
|
| 9 |
+
echo "[1/4] Creating Virtual Environment..."
|
| 10 |
+
mkvirtualenv --python=/usr/bin/python3.10 myenv_${RANDOM} || { echo "Note: If mkvirtualenv failed, you might already be in an env. Proceeding..."; }
|
| 11 |
+
|
| 12 |
+
# 2. Install Dependencies (Optimized for space)
|
| 13 |
+
echo "[2/4] Installing libraries..."
|
| 14 |
+
rm -rf ~/.cache/pip # Clear cache to free up space
|
| 15 |
+
workon $(lsvirtualenv -b | head -n 1) # Activate the first env
|
| 16 |
+
pip install --no-cache-dir -r Source\ Code/requirements.txt
|
| 17 |
+
|
| 18 |
+
# 3. Download Data
|
| 19 |
+
echo "[3/4] Downloading NLP Data (NLTK & SpaCy)..."
|
| 20 |
+
python Source\ Code/download_nltk.py
|
| 21 |
+
python -m spacy download en_core_web_sm
|
| 22 |
+
|
| 23 |
+
echo "--------------------------------------------------"
|
| 24 |
+
echo " SETUP COMPLETE! "
|
| 25 |
+
echo "--------------------------------------------------"
|
| 26 |
+
echo "Now go to the 'Web' tab and configure your WSGI file."
|