Gagan0141 commited on
Commit
acba296
·
verified ·
1 Parent(s): 75d4851

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +85 -0
  2. static/styles.css +159 -0
  3. templates/index.html +105 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, render_template, jsonify
2
+ from nltk.tokenize import word_tokenize, sent_tokenize
3
+ from urllib.request import urlopen
4
+ from bs4 import BeautifulSoup
5
+ import nltk
6
+ from difflib import SequenceMatcher
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.cluster import KMeans
9
+
10
+ nltk.download('punkt')
11
+
12
+ app = Flask(__name__)
13
+
14
+ def simple_summary(text, max_sentences=3):
15
+ sents = sent_tokenize(text)
16
+ return " ".join(sents[:max_sentences]) if sents else text[:200] + "..."
17
+
18
+ @app.route("/")
19
+ def home():
20
+ return render_template("index.html")
21
+
22
+ @app.route("/process_urls", methods=["POST"])
23
+ def process_urls():
24
+ data = request.form
25
+ selected_sites = request.form.getlist("sites")
26
+ mode = data.get("mode", "tokenize")
27
+
28
+ articles = {}
29
+ for url in selected_sites:
30
+ try:
31
+ page = urlopen(url)
32
+ soup = BeautifulSoup(page, "html.parser")
33
+ text = soup.get_text(separator=" ")
34
+ articles[url] = text
35
+ except Exception as e:
36
+ articles[url] = f"Error fetching: {str(e)}"
37
+
38
+ # -----------------------------
39
+ # Compare articles side-by-side
40
+ # -----------------------------
41
+ comparison_results = []
42
+ urls = list(articles.keys())
43
+ for i in range(len(urls)):
44
+ for j in range(i+1, len(urls)):
45
+ a, b = articles[urls[i]], articles[urls[j]]
46
+ sents_a, sents_b = sent_tokenize(a), sent_tokenize(b)
47
+ diff_a = []
48
+ for sent in sents_a:
49
+ if any(SequenceMatcher(None, sent, s).ratio() < 0.8 for s in sents_b):
50
+ diff_a.append(sent)
51
+ comparison_results.append({
52
+ "site1": urls[i],
53
+ "site2": urls[j],
54
+ "diff_sentences_site1": diff_a
55
+ })
56
+
57
+ # -----------------------------
58
+ # Cluster articles by topic
59
+ # -----------------------------
60
+ clusters = {}
61
+ if len(articles) > 0:
62
+ vectorizer = TfidfVectorizer(stop_words='english')
63
+ X = vectorizer.fit_transform(list(articles.values()))
64
+ n_clusters = min(3, len(articles))
65
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(X)
66
+ for idx, label in enumerate(kmeans.labels_):
67
+ clusters.setdefault(int(label), []).append(urls[idx])
68
+
69
+
70
+ # -----------------------------
71
+ # Summarize each article using NLTK simple summary
72
+ # -----------------------------
73
+ summaries = {}
74
+ for url, text in articles.items():
75
+ summaries[url] = simple_summary(text, max_sentences=3)
76
+
77
+ return jsonify({
78
+ "articles": articles,
79
+ "comparisons": comparison_results,
80
+ "clusters": clusters,
81
+ "summaries": summaries
82
+ })
83
+
84
+ if __name__ == "__main__":
85
+ app.run(debug=True)
static/styles.css ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --bg: #111;
3
+ --text: #fff;
4
+ --accent: cyan;
5
+ --card: rgba(15,15,15,0.6);
6
+ }
7
+
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ background: var(--bg);
11
+ color: var(--text);
12
+ padding: 30px;
13
+ }
14
+
15
+ .container {
16
+ max-width: 900px;
17
+ margin: auto;
18
+ background: #222;
19
+ padding: 25px;
20
+ border-radius: 12px;
21
+ box-shadow: 0 0 25px rgba(0,255,255,0.1);
22
+ }
23
+
24
+ /* ========== HOLOGRAPHIC MULTI-SELECT CARDS ========== */
25
+ .site-card-container {
26
+ margin-top: 25px;
27
+ }
28
+
29
+ .section-title {
30
+ font-size: 22px;
31
+ margin-bottom: 20px;
32
+ text-align: center;
33
+ font-weight: 600;
34
+ }
35
+
36
+ .site-cards {
37
+ display: flex;
38
+ justify-content: center;
39
+ flex-wrap: wrap;
40
+ gap: 22px;
41
+ }
42
+
43
+ .site-cards input[type="checkbox"] {
44
+ display: none;
45
+ }
46
+
47
+ .holo-card {
48
+ width: 150px;
49
+ padding: 22px 12px;
50
+ border-radius: 18px;
51
+ text-align: center;
52
+ cursor: pointer;
53
+ background: var(--card);
54
+ backdrop-filter: blur(12px);
55
+ border: 2px solid rgba(0,255,255,0.25);
56
+ box-shadow: 0 0 20px rgba(0,255,255,0.15), inset 0 0 15px rgba(0,255,255,0.05);
57
+ position: relative;
58
+ overflow: hidden;
59
+ transition: transform 0.25s ease, box-shadow 0.3s ease, border 0.3s ease;
60
+ }
61
+
62
+ .holo-card:hover {
63
+ transform: translateY(-8px) scale(1.04);
64
+ animation: rainbow-border 2.5s infinite linear;
65
+ }
66
+
67
+ @keyframes rainbow-border {
68
+ 0% { border-color: #00ffff; }
69
+ 25% { border-color: #ff00ff; }
70
+ 50% { border-color: #ffcc00; }
71
+ 75% { border-color: #00ff66; }
72
+ 100% { border-color: #00ffff; }
73
+ }
74
+
75
+ .holo-card::before {
76
+ content: "";
77
+ position: absolute;
78
+ top: -30%;
79
+ left: -60%;
80
+ width: 250%;
81
+ height: 250%;
82
+ background: linear-gradient(120deg, transparent 20%, rgba(255,255,255,0.12) 50%, transparent 80%);
83
+ transform: rotate(20deg);
84
+ animation: shimmer 3s infinite;
85
+ pointer-events: none;
86
+ }
87
+
88
+ @keyframes shimmer {
89
+ 0% { transform: translateX(-200%) rotate(20deg); }
90
+ 100% { transform: translateX(200%) rotate(20deg); }
91
+ }
92
+
93
+ .holo-card::after {
94
+ content: "";
95
+ position: absolute;
96
+ inset: 0;
97
+ border-radius: 18px;
98
+ box-shadow: 0 0 20px rgba(0,255,255,0.15), 0 0 30px rgba(0,255,255,0.05), inset 0 0 20px rgba(0,255,255,0.05);
99
+ z-index: -1;
100
+ }
101
+
102
+ .holo-card .icon {
103
+ font-size: 34px;
104
+ margin-bottom: 12px;
105
+ animation: pulse-glow 2s infinite alternate;
106
+ }
107
+
108
+ @keyframes pulse-glow {
109
+ from { text-shadow: 0 0 4px cyan; }
110
+ to { text-shadow: 0 0 12px cyan; }
111
+ }
112
+
113
+ .holo-card .site-name {
114
+ color: var(--text);
115
+ font-size: 16px;
116
+ font-weight: 600;
117
+ text-shadow: 0 0 8px rgba(0,255,255,0.4);
118
+ }
119
+
120
+ .site-cards input:checked + .holo-card {
121
+ background: rgba(0,40,40,0.85);
122
+ border: 2px solid cyan;
123
+ box-shadow: 0 0 35px rgba(0,255,255,0.75), inset 0 0 30px rgba(0,255,255,0.45), 0 0 15px rgba(0,255,255,0.35);
124
+ transform: translateY(-10px) scale(1.06);
125
+ }
126
+
127
+ .holo-card:active {
128
+ transform: scale(0.97);
129
+ }
130
+
131
+ /* ========== RESULT CARDS ========== */
132
+ .result-container {
133
+ margin-top: 30px;
134
+ }
135
+
136
+ .result-card {
137
+ background: rgba(0,0,0,0.7);
138
+ border-radius: 12px;
139
+ padding: 15px;
140
+ margin-bottom: 18px;
141
+ box-shadow: 0 0 12px rgba(0,255,255,0.3);
142
+ border: 1px solid rgba(0,255,255,0.2);
143
+ transition: transform 0.25s ease, box-shadow 0.25s ease;
144
+ }
145
+
146
+ .result-card:hover {
147
+ transform: translateY(-5px);
148
+ box-shadow: 0 0 25px rgba(0,255,255,0.6);
149
+ }
150
+
151
+ .result-card h3 {
152
+ color: cyan;
153
+ margin-bottom: 10px;
154
+ }
155
+
156
+ .result-card ul {
157
+ margin: 0;
158
+ padding-left: 18px;
159
+ }
templates/index.html ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Ultra News Tokenizer</title>
6
+ <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
7
+ </head>
8
+ <body>
9
+
10
+ <div class="container">
11
+ <h1>Ultra News Tokenizer</h1>
12
+
13
+ <form id="urlForm" method="POST">
14
+ <div class="site-card-container">
15
+ <label class="section-title">Select News Sites</label>
16
+
17
+ <div class="site-cards">
18
+ <input type="checkbox" name="sites" id="indianexpress" value="https://indianexpress.com/">
19
+ <label for="indianexpress" class="site-card holo-card">
20
+ <div class="icon">📰</div>
21
+ <div class="site-name">Indian Express</div>
22
+ </label>
23
+
24
+ <input type="checkbox" name="sites" id="ndtv" value="https://www.ndtv.com/">
25
+ <label for="ndtv" class="site-card holo-card">
26
+ <div class="icon">🔵</div>
27
+ <div class="site-name">NDTV</div>
28
+ </label>
29
+
30
+ <input type="checkbox" name="sites" id="toi" value="https://timesofindia.indiatimes.com/">
31
+ <label for="toi" class="site-card holo-card">
32
+ <div class="icon">🟣</div>
33
+ <div class="site-name">Times of India</div>
34
+ </label>
35
+
36
+ <input type="checkbox" id="selectall">
37
+ <label for="selectall" class="site-card holo-card special">
38
+ <div class="icon">⭐</div>
39
+ <div class="site-name">Select All</div>
40
+ </label>
41
+ </div>
42
+ </div>
43
+
44
+ <label>Select Mode:</label>
45
+ <select name="mode">
46
+ <option value="tokenize">Tokenize Text</option>
47
+ <option value="raw_text">Raw Text</option>
48
+ </select>
49
+
50
+ <button type="submit">Submit</button>
51
+ </form>
52
+
53
+ <div id="output" class="result-container"></div>
54
+ </div>
55
+
56
+ <script>
57
+ document.getElementById("selectall").addEventListener("change", function() {
58
+ const checked = this.checked;
59
+ document.querySelectorAll('.site-cards input[type="checkbox"]').forEach(cb => {
60
+ if(cb.id !== "selectall") cb.checked = checked;
61
+ });
62
+ });
63
+
64
+ const form = document.getElementById("urlForm");
65
+ form.addEventListener("submit", async function(e) {
66
+ e.preventDefault();
67
+ const formData = new FormData(form);
68
+ const response = await fetch("/process_urls", {
69
+ method: "POST",
70
+ body: formData
71
+ });
72
+ const data = await response.json();
73
+
74
+ let html = "";
75
+
76
+ html += "<h2>Article Summaries</h2>";
77
+ for (const url in data.summaries) {
78
+ html += `<div class="result-card">
79
+ <h3>${url}</h3>
80
+ <p>${data.summaries[url]}</p>
81
+ </div>`;
82
+ }
83
+
84
+ html += "<h2>Comparisons (Changed Sentences)</h2>";
85
+ data.comparisons.forEach(c => {
86
+ html += `<div class="result-card">
87
+ <h3>${c.site1} VS ${c.site2}</h3>
88
+ <ul>`;
89
+ c.diff_sentences_site1.forEach(s => {
90
+ html += `<li>${s}</li>`;
91
+ });
92
+ html += `</ul></div>`;
93
+ });
94
+
95
+ html += "<h2>Clusters</h2>";
96
+ for (const label in data.clusters) {
97
+ html += `<div class="result-card"><strong>Cluster ${label}:</strong> ${data.clusters[label].join(", ")}</div>`;
98
+ }
99
+
100
+ document.getElementById("output").innerHTML = html;
101
+ });
102
+ </script>
103
+
104
+ </body>
105
+ </html>