Spaces:
Sleeping
Sleeping
Delete templates/index.html
Browse files- templates/index.html +0 -230
templates/index.html
DELETED
|
@@ -1,230 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html>
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<title>Web Scraper + Tokenizer (Flask)</title>
|
| 6 |
-
<link rel="stylesheet" href="/static/styles.css">
|
| 7 |
-
</head>
|
| 8 |
-
<body>
|
| 9 |
-
<div class="topbar">
|
| 10 |
-
<h1>Web Tokenizer & News Comparator</h1>
|
| 11 |
-
<button id="darkToggle" class="toggle">Dark Mode</button>
|
| 12 |
-
</div>
|
| 13 |
-
|
| 14 |
-
<div class="main">
|
| 15 |
-
<div class="controls card">
|
| 16 |
-
<label><strong>Select website to add (preset):</strong></label>
|
| 17 |
-
<div class="row">
|
| 18 |
-
<select id="siteList">
|
| 19 |
-
<option value="">-- Choose a website --</option>
|
| 20 |
-
{% for name, link in sites.items() %}
|
| 21 |
-
<option value="{{ link }}">{{ name }}</option>
|
| 22 |
-
{% endfor %}
|
| 23 |
-
</select>
|
| 24 |
-
<button onclick="addSite()">Add</button>
|
| 25 |
-
</div>
|
| 26 |
-
|
| 27 |
-
<label style="margin-top:12px;"><strong>Enter multiple URLs (one per line):</strong></label>
|
| 28 |
-
<textarea id="urls" placeholder="https://example.com/page1 https://example.com/page2"></textarea>
|
| 29 |
-
|
| 30 |
-
<label style="margin-top:12px;"><strong>Select Mode:</strong></label>
|
| 31 |
-
<select id="mode">
|
| 32 |
-
<option value="tokenize">Tokenize Page Text</option>
|
| 33 |
-
<option value="H1">Tokenize H1</option>
|
| 34 |
-
<option value="H2">Tokenize H2</option>
|
| 35 |
-
<option value="H3">Tokenize H3</option>
|
| 36 |
-
<option value="H4">Tokenize H4</option>
|
| 37 |
-
<option value="H5">Tokenize H5</option>
|
| 38 |
-
<option value="H6">Tokenize H6</option>
|
| 39 |
-
<option value="raw_text">Fetch Raw Text Only</option>
|
| 40 |
-
</select>
|
| 41 |
-
|
| 42 |
-
<button id="submitBtn" onclick="process()">Submit</button>
|
| 43 |
-
</div>
|
| 44 |
-
|
| 45 |
-
<div id="outputArea" class="card output">
|
| 46 |
-
<div id="output-container"></div>
|
| 47 |
-
</div>
|
| 48 |
-
</div>
|
| 49 |
-
|
| 50 |
-
<!-- Comparison panel -->
|
| 51 |
-
<div id="compare-section" class="card" style="display:none; margin-top:18px;">
|
| 52 |
-
<h2>Side-by-Side Comparison (changed sentences)</h2>
|
| 53 |
-
<div style="display:flex; gap:12px;">
|
| 54 |
-
<div style="flex:1;">
|
| 55 |
-
<h3 id="compare-left-title"></h3>
|
| 56 |
-
<div id="compare-left" class="compare-box"></div>
|
| 57 |
-
</div>
|
| 58 |
-
<div style="flex:1;">
|
| 59 |
-
<h3 id="compare-right-title"></h3>
|
| 60 |
-
<div id="compare-right" class="compare-box"></div>
|
| 61 |
-
</div>
|
| 62 |
-
</div>
|
| 63 |
-
</div>
|
| 64 |
-
|
| 65 |
-
<script>
|
| 66 |
-
let articlesGlobal = [];
|
| 67 |
-
let selectedForCompare = [];
|
| 68 |
-
|
| 69 |
-
// Add site dropdown -> textarea
|
| 70 |
-
function addSite() {
|
| 71 |
-
const site = document.getElementById("siteList").value;
|
| 72 |
-
if (!site) return;
|
| 73 |
-
const ta = document.getElementById("urls");
|
| 74 |
-
ta.value = ta.value ? ta.value.trim() + "\n" + site : site;
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
// Submit and call backend
|
| 78 |
-
async function process() {
|
| 79 |
-
document.getElementById("output-container").innerHTML = "<p>Processing...</p>";
|
| 80 |
-
document.getElementById("compare-section").style.display = "none";
|
| 81 |
-
selectedForCompare = [];
|
| 82 |
-
|
| 83 |
-
const urlLines = document.getElementById("urls").value.split("\n").map(s => s.trim()).filter(Boolean);
|
| 84 |
-
const mode = document.getElementById("mode").value;
|
| 85 |
-
|
| 86 |
-
if (urlLines.length === 0) {
|
| 87 |
-
document.getElementById("output-container").innerHTML = "<p class='notice'>Please enter at least one URL.</p>";
|
| 88 |
-
return;
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
const res = await fetch("/process_urls", {
|
| 92 |
-
method: "POST",
|
| 93 |
-
headers: {"Content-Type": "application/json"},
|
| 94 |
-
body: JSON.stringify({ urls: urlLines, mode })
|
| 95 |
-
});
|
| 96 |
-
const data = await res.json();
|
| 97 |
-
articlesGlobal = data.articles || [];
|
| 98 |
-
renderCards(data);
|
| 99 |
-
}
|
| 100 |
-
|
| 101 |
-
// Render cards, clusters, duplicates
|
| 102 |
-
function renderCards(data) {
|
| 103 |
-
const container = document.getElementById("output-container");
|
| 104 |
-
container.innerHTML = "";
|
| 105 |
-
|
| 106 |
-
// Group by cluster
|
| 107 |
-
const clusters = {};
|
| 108 |
-
(data.articles || []).forEach(a => {
|
| 109 |
-
const c = a.cluster || 0;
|
| 110 |
-
if (!clusters[c]) clusters[c] = [];
|
| 111 |
-
clusters[c].push(a);
|
| 112 |
-
});
|
| 113 |
-
|
| 114 |
-
// Render clusters
|
| 115 |
-
Object.keys(clusters).forEach(clusterId => {
|
| 116 |
-
const hdr = document.createElement("h2");
|
| 117 |
-
hdr.innerText = "Topic Cluster " + clusterId;
|
| 118 |
-
hdr.className = "cluster-title";
|
| 119 |
-
container.appendChild(hdr);
|
| 120 |
-
|
| 121 |
-
clusters[clusterId].forEach(article => {
|
| 122 |
-
container.appendChild(makeArticleCard(article));
|
| 123 |
-
});
|
| 124 |
-
});
|
| 125 |
-
|
| 126 |
-
// Duplicate groups
|
| 127 |
-
if (data.duplicate_groups && data.duplicate_groups.length) {
|
| 128 |
-
const dupTitle = document.createElement("h2");
|
| 129 |
-
dupTitle.innerText = "Duplicate Groups";
|
| 130 |
-
dupTitle.className = "dup-title";
|
| 131 |
-
container.appendChild(dupTitle);
|
| 132 |
-
|
| 133 |
-
data.duplicate_groups.forEach((grp, idx) => {
|
| 134 |
-
const gcard = document.createElement("div");
|
| 135 |
-
gcard.className = "dup-card";
|
| 136 |
-
gcard.innerHTML = `<strong>Group ${idx+1}:</strong><br>` + grp.map(u => `<div><a href="${u}" target="_blank">${u}</a></div>`).join("");
|
| 137 |
-
container.appendChild(gcard);
|
| 138 |
-
});
|
| 139 |
-
}
|
| 140 |
-
|
| 141 |
-
if ((data.articles || []).length === 0) {
|
| 142 |
-
container.innerHTML = "<p class='notice'>No articles found / all failed to fetch.</p>";
|
| 143 |
-
}
|
| 144 |
-
}
|
| 145 |
-
|
| 146 |
-
function makeArticleCard(article) {
|
| 147 |
-
const card = document.createElement("div");
|
| 148 |
-
card.className = "article-card";
|
| 149 |
-
|
| 150 |
-
const title = document.createElement("div");
|
| 151 |
-
title.className = "article-title";
|
| 152 |
-
title.innerText = article.host || article.url;
|
| 153 |
-
card.appendChild(title);
|
| 154 |
-
|
| 155 |
-
const urlLine = document.createElement("div");
|
| 156 |
-
urlLine.className = "article-url";
|
| 157 |
-
urlLine.innerHTML = `<a href="${article.url}" target="_blank">${article.url}</a>`;
|
| 158 |
-
card.appendChild(urlLine);
|
| 159 |
-
|
| 160 |
-
const summary = document.createElement("div");
|
| 161 |
-
summary.className = "article-summary";
|
| 162 |
-
summary.innerHTML = `<strong>Summary:</strong><div>${escapeHtml(article.summary || "")}</div>`;
|
| 163 |
-
card.appendChild(summary);
|
| 164 |
-
|
| 165 |
-
const textBox = document.createElement("div");
|
| 166 |
-
textBox.className = "article-text";
|
| 167 |
-
textBox.innerText = article.text || "— No text extracted —";
|
| 168 |
-
card.appendChild(textBox);
|
| 169 |
-
|
| 170 |
-
const stats = document.createElement("div");
|
| 171 |
-
stats.className = "article-stats";
|
| 172 |
-
stats.innerText = `Words: ${article.words?.length || 0} • Sentences: ${article.sentences?.length || 0}`;
|
| 173 |
-
card.appendChild(stats);
|
| 174 |
-
|
| 175 |
-
// compare button
|
| 176 |
-
const cmpBtn = document.createElement("button");
|
| 177 |
-
cmpBtn.innerText = "Select for Comparison";
|
| 178 |
-
cmpBtn.onclick = () => selectForCompare(article);
|
| 179 |
-
cmpBtn.className = "small-btn";
|
| 180 |
-
card.appendChild(cmpBtn);
|
| 181 |
-
|
| 182 |
-
return card;
|
| 183 |
-
}
|
| 184 |
-
|
| 185 |
-
function selectForCompare(article) {
|
| 186 |
-
selectedForCompare.push(article);
|
| 187 |
-
// allow only two selection then show comparison
|
| 188 |
-
if (selectedForCompare.length > 2) selectedForCompare.shift();
|
| 189 |
-
if (selectedForCompare.length === 2) {
|
| 190 |
-
showComparison(selectedForCompare[0], selectedForCompare[1]);
|
| 191 |
-
} else {
|
| 192 |
-
alert("Select one more article to compare (2 total).");
|
| 193 |
-
}
|
| 194 |
-
}
|
| 195 |
-
|
| 196 |
-
async function showComparison(a1, a2) {
|
| 197 |
-
// request highlighted changed sentences
|
| 198 |
-
const resp = await fetch("/compare_texts", {
|
| 199 |
-
method: "POST",
|
| 200 |
-
headers: {"Content-Type": "application/json"},
|
| 201 |
-
body: JSON.stringify({ text1: a1.text || "", text2: a2.text || "" })
|
| 202 |
-
});
|
| 203 |
-
const data = await resp.json();
|
| 204 |
-
|
| 205 |
-
document.getElementById("compare-left-title").innerText = a1.host || a1.url;
|
| 206 |
-
document.getElementById("compare-right-title").innerText = a2.host || a2.url;
|
| 207 |
-
|
| 208 |
-
document.getElementById("compare-left").innerHTML = data.left || "<em>No changed sentences</em>";
|
| 209 |
-
document.getElementById("compare-right").innerHTML = data.right || "<em>No changed sentences</em>";
|
| 210 |
-
|
| 211 |
-
document.getElementById("compare-section").style.display = "block";
|
| 212 |
-
// scroll to compare
|
| 213 |
-
document.getElementById("compare-section").scrollIntoView({behavior: "smooth"});
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
// small escape
|
| 217 |
-
function escapeHtml(str) {
|
| 218 |
-
if (!str) return "";
|
| 219 |
-
return str.replace(/&/g,"&").replace(/</g,"<").replace(/>/g,">");
|
| 220 |
-
}
|
| 221 |
-
|
| 222 |
-
// dark mode toggle
|
| 223 |
-
const darkBtn = document.getElementById("darkToggle");
|
| 224 |
-
darkBtn.addEventListener("click", () => {
|
| 225 |
-
document.body.classList.toggle("dark");
|
| 226 |
-
darkBtn.innerText = document.body.classList.contains("dark") ? "Light Mode" : "Dark Mode";
|
| 227 |
-
});
|
| 228 |
-
</script>
|
| 229 |
-
</body>
|
| 230 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|