Spaces:
Running
Running
Create index.html
Browse files- index.html +704 -0
index.html
ADDED
|
@@ -0,0 +1,704 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<!--
|
| 3 |
+
File: micro-apps/text-tools/url-formatter-cleaner-with-titles-v2.html
|
| 4 |
+
Purpose: Extract only regular (non-image) HTTP/HTTPS links from large pasted text,
|
| 5 |
+
and (optionally) fetch page titles to emit Markdown links WITHOUT bullets
|
| 6 |
+
and WITHOUT any leading "Title:" prefixes.
|
| 7 |
+
|
| 8 |
+
Notes:
|
| 9 |
+
- Single-file Micro-App (no build step, no local assets).
|
| 10 |
+
- In-line comments explain each section in plain language.
|
| 11 |
+
- Title fetching is best-effort: many sites block cross-origin reads (CORS).
|
| 12 |
+
• We first try a normal fetch.
|
| 13 |
+
• If that fails and the "Use proxy" option is ON, we fall back to https://r.jina.ai/<URL>
|
| 14 |
+
which returns a readability-friendly text version that often contains the page title.
|
| 15 |
+
• We sanitize titles to strip "Title:" and leading headings/bullets.
|
| 16 |
+
• If everything fails, we fall back to the URL's hostname as the title.
|
| 17 |
+
-->
|
| 18 |
+
<html lang="en">
|
| 19 |
+
<head>
|
| 20 |
+
<meta charset="UTF-8" />
|
| 21 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 22 |
+
<title>URL Cleaner + Title Fetcher — Markdown Links (No Bullets)</title>
|
| 23 |
+
<style>
|
| 24 |
+
/* =============== Theme tokens (easy color tweaking) =============== */
|
| 25 |
+
:root{
|
| 26 |
+
--bg:#0f1115; /* page background (dark) */
|
| 27 |
+
--panel:#161a23; /* cards/panels */
|
| 28 |
+
--panel-2:#0d1118; /* headers / subpanels */
|
| 29 |
+
--text:#e5e7eb; /* main text */
|
| 30 |
+
--muted:#9aa3b2; /* secondary text */
|
| 31 |
+
--accent:#5b9cff; /* primary accent */
|
| 32 |
+
--accent-2:#2f6fe6; /* accent hover */
|
| 33 |
+
--border:#222838; /* subtle borders */
|
| 34 |
+
--good:#22c55e; /* success green */
|
| 35 |
+
--warn:#f59e0b; /* warning amber */
|
| 36 |
+
--bad:#ef4444; /* danger red */
|
| 37 |
+
--radius-lg:16px; /* big rounded corners */
|
| 38 |
+
--radius:12px; /* normal rounded corners */
|
| 39 |
+
--pad:14px; /* base padding */
|
| 40 |
+
--shadow:0 18px 50px rgba(0,0,0,.35);
|
| 41 |
+
--mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
| 42 |
+
--sans: Inter, ui-sans-serif, system-ui, Segoe UI, Roboto, Helvetica, Arial;
|
| 43 |
+
}
|
| 44 |
+
/* Optional light theme (toggle button) */
|
| 45 |
+
[data-theme="light"]{
|
| 46 |
+
--bg:#f6f7fb;
|
| 47 |
+
--panel:#ffffff;
|
| 48 |
+
--panel-2:#f2f5fb;
|
| 49 |
+
--text:#0f172a;
|
| 50 |
+
--muted:#5b6474;
|
| 51 |
+
--accent:#2563eb;
|
| 52 |
+
--accent-2:#1e40af;
|
| 53 |
+
--border:#e6e8ee;
|
| 54 |
+
--shadow:0 18px 50px rgba(0,0,0,.08);
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* =============== Page reset and layout shell =============== */
|
| 58 |
+
*{box-sizing:border-box}
|
| 59 |
+
html,body{height:100%}
|
| 60 |
+
body{
|
| 61 |
+
margin:0; background:var(--bg); color:var(--text);
|
| 62 |
+
font-family:var(--sans); -webkit-font-smoothing:antialiased; -moz-osx-font-smoothing:grayscale;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
header{
|
| 66 |
+
position:sticky; top:0; z-index:10;
|
| 67 |
+
display:flex; align-items:center; gap:10px;
|
| 68 |
+
padding:var(--pad);
|
| 69 |
+
background:linear-gradient(180deg, rgba(255,255,255,.04), rgba(255,255,255,0)) , var(--bg);
|
| 70 |
+
border-bottom:1px solid var(--border);
|
| 71 |
+
-webkit-backdrop-filter: blur(6px);
|
| 72 |
+
backdrop-filter: blur(6px);
|
| 73 |
+
}
|
| 74 |
+
.brand{
|
| 75 |
+
display:flex; align-items:center; gap:10px;
|
| 76 |
+
background:var(--panel); border:1px solid var(--border);
|
| 77 |
+
padding:8px 12px; border-radius:10px; box-shadow:var(--shadow); font-weight:700;
|
| 78 |
+
}
|
| 79 |
+
.brand svg{color:var(--accent)}
|
| 80 |
+
.btn{
|
| 81 |
+
appearance:none; border:1px solid var(--border);
|
| 82 |
+
background:var(--panel); color:var(--text);
|
| 83 |
+
border-radius:10px; padding:10px 12px; cursor:pointer;
|
| 84 |
+
transition: transform .12s ease, border-color .12s ease, background .12s ease;
|
| 85 |
+
}
|
| 86 |
+
.btn:hover{transform:translateY(-1px); border-color:var(--accent)}
|
| 87 |
+
.btn.primary{background:var(--accent); border-color:var(--accent); color:#fff}
|
| 88 |
+
.btn.primary:hover{background:var(--accent-2); border-color:var(--accent-2)}
|
| 89 |
+
.btn[disabled]{opacity:.6; cursor:not-allowed; transform:none}
|
| 90 |
+
.pill{
|
| 91 |
+
display:inline-flex; align-items:center; gap:6px; font-size:12px; color:var(--muted);
|
| 92 |
+
border:1px solid var(--border); background:var(--panel); padding:6px 10px; border-radius:999px;
|
| 93 |
+
}
|
| 94 |
+
.kbd{
|
| 95 |
+
font-family:var(--mono); font-size:12px; padding:2px 6px;
|
| 96 |
+
border:1px solid var(--border); border-bottom-width:3px; border-radius:6px; background:var(--panel-2);
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
main{
|
| 100 |
+
padding:var(--pad);
|
| 101 |
+
display:grid; gap:var(--pad);
|
| 102 |
+
grid-template-columns: 1fr 1fr; /* two columns: input | output */
|
| 103 |
+
}
|
| 104 |
+
@media (max-width: 1024px){ main{grid-template-columns:1fr} }
|
| 105 |
+
|
| 106 |
+
.card{
|
| 107 |
+
background:var(--panel); border:1px solid var(--border);
|
| 108 |
+
border-radius:var(--radius-lg); box-shadow:var(--shadow); overflow:hidden;
|
| 109 |
+
display:grid; grid-template-rows:auto 1fr auto;
|
| 110 |
+
}
|
| 111 |
+
.card-header{
|
| 112 |
+
background:var(--panel-2); border-bottom:1px solid var(--border);
|
| 113 |
+
padding:var(--pad); display:flex; align-items:center; justify-content:space-between; gap:10px;
|
| 114 |
+
}
|
| 115 |
+
.card-body{ padding:var(--pad); overflow:auto }
|
| 116 |
+
.card-footer{ padding:var(--pad); border-top:1px solid var(--border); background:var(--panel-2); display:flex; gap:8px; flex-wrap:wrap }
|
| 117 |
+
|
| 118 |
+
h1,h2,h3{margin:0}
|
| 119 |
+
.muted{color:var(--muted)}
|
| 120 |
+
.row{display:flex; gap:8px; align-items:center}
|
| 121 |
+
.spacer{flex:1}
|
| 122 |
+
.hidden{display:none}
|
| 123 |
+
|
| 124 |
+
textarea{
|
| 125 |
+
width:100%; min-height:320px; resize:vertical;
|
| 126 |
+
border-radius:12px; border:1px solid var(--border);
|
| 127 |
+
background:var(--panel-2); color:var(--text); padding:12px;
|
| 128 |
+
font-family:var(--mono); font-size:14px; line-height:1.45;
|
| 129 |
+
}
|
| 130 |
+
.stats{ display:flex; gap:12px; flex-wrap:wrap; font-size:13px; color:var(--muted) }
|
| 131 |
+
.stats b{color:var(--text)}
|
| 132 |
+
.options{ display:flex; gap:14px; flex-wrap:wrap; align-items:center; font-size:14px; color:var(--muted) }
|
| 133 |
+
label.switch{
|
| 134 |
+
display:inline-flex; gap:8px; align-items:center; cursor:pointer;
|
| 135 |
+
background:var(--panel-2); padding:6px 10px; border:1px solid var(--border); border-radius:10px;
|
| 136 |
+
}
|
| 137 |
+
input[type="checkbox"]{ accent-color: var(--accent) }
|
| 138 |
+
|
| 139 |
+
/* Small toast notification (copy / status messages) */
|
| 140 |
+
.toast{
|
| 141 |
+
position:fixed; bottom:16px; right:16px; z-index:50;
|
| 142 |
+
background:var(--panel); border:1px solid var(--border); border-left:6px solid var(--good);
|
| 143 |
+
padding:10px 12px; border-radius:10px; box-shadow:var(--shadow);
|
| 144 |
+
opacity:0; transform:translateY(8px); transition:opacity .18s ease, transform .18s ease;
|
| 145 |
+
max-width: 60ch; white-space: pre-wrap;
|
| 146 |
+
}
|
| 147 |
+
.toast.show{opacity:1; transform:translateY(0)}
|
| 148 |
+
.spinner{ inline-size:14px; block-size:14px; border:2px solid var(--border); border-top-color:var(--accent); border-radius:50%; animation:spin 1s linear infinite }
|
| 149 |
+
@keyframes spin{ to{ transform:rotate(360deg) } }
|
| 150 |
+
</style>
|
| 151 |
+
</head>
|
| 152 |
+
<body>
|
| 153 |
+
<!-- ========= Top bar with title and quick actions ========= -->
|
| 154 |
+
<header>
|
| 155 |
+
<div class="brand" role="img" aria-label="URL Cleaner">
|
| 156 |
+
<!-- tiny logo dot -->
|
| 157 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" aria-hidden="true">
|
| 158 |
+
<circle cx="12" cy="12" r="8" fill="currentColor"></circle>
|
| 159 |
+
</svg>
|
| 160 |
+
URL Cleaner
|
| 161 |
+
</div>
|
| 162 |
+
<span class="pill">Paste → Clean URLs → Markdown with titles (no bullets)</span>
|
| 163 |
+
<div class="spacer"></div>
|
| 164 |
+
<button id="themeToggle" class="btn" title="Toggle theme (dark/light)">Toggle Theme</button>
|
| 165 |
+
</header>
|
| 166 |
+
|
| 167 |
+
<!-- ========= Main two-panel layout: Input | Output ========= -->
|
| 168 |
+
<main id="app" data-theme="dark">
|
| 169 |
+
<!-- ========== Left: Input panel ========== -->
|
| 170 |
+
<section class="card">
|
| 171 |
+
<div class="card-header">
|
| 172 |
+
<h3>Input Text</h3>
|
| 173 |
+
<div class="row">
|
| 174 |
+
<button id="btnPaste" class="btn" title="Paste from clipboard">Paste</button>
|
| 175 |
+
<span class="pill"><span class="kbd">Ctrl</span>/<span class="kbd">Cmd</span> + <span class="kbd">Enter</span> clean · <span class="kbd">Shift</span> + <span class="kbd">Enter</span> titles</span>
|
| 176 |
+
</div>
|
| 177 |
+
</div>
|
| 178 |
+
<div class="card-body">
|
| 179 |
+
<!-- Where you paste long/messy text with URLs -->
|
| 180 |
+
<textarea id="inputArea" placeholder="Paste any text here (descriptions, HTML/Markdown, etc.)"></textarea>
|
| 181 |
+
</div>
|
| 182 |
+
<div class="card-footer">
|
| 183 |
+
<!-- Simple options that tweak cleaning behavior -->
|
| 184 |
+
<div class="options">
|
| 185 |
+
<label class="switch" title="Remove UTM/fbclid/tracking params from kept links">
|
| 186 |
+
<input type="checkbox" id="optStripTracking" checked />
|
| 187 |
+
<span>Strip tracking params</span>
|
| 188 |
+
</label>
|
| 189 |
+
<label class="switch" title="Remove exact-duplicate links (case-insensitive)">
|
| 190 |
+
<input type="checkbox" id="optDedupe" checked />
|
| 191 |
+
<span>De-duplicate</span>
|
| 192 |
+
</label>
|
| 193 |
+
<label class="switch" title="Only keep http/https (ignore other schemes)">
|
| 194 |
+
<input type="checkbox" id="optHttpOnly" checked />
|
| 195 |
+
<span>HTTP(S) only</span>
|
| 196 |
+
</label>
|
| 197 |
+
<label class="switch" title="Fallback to a CORS-friendly proxy when fetching titles (best compatibility)">
|
| 198 |
+
<input type="checkbox" id="optProxy" checked />
|
| 199 |
+
<span>Use proxy for titles</span>
|
| 200 |
+
</label>
|
| 201 |
+
</div>
|
| 202 |
+
<div class="spacer"></div>
|
| 203 |
+
<button id="btnClean" class="btn primary" title="Extract non-image links (Ctrl/Cmd+Enter)">Clean Links</button>
|
| 204 |
+
</div>
|
| 205 |
+
</section>
|
| 206 |
+
|
| 207 |
+
<!-- ========== Right: Output panel ========== -->
|
| 208 |
+
<section class="card">
|
| 209 |
+
<div class="card-header">
|
| 210 |
+
<h3>Output</h3>
|
| 211 |
+
<div class="row">
|
| 212 |
+
<span id="counts" class="pill" aria-live="polite">0 kept · 0 removed</span>
|
| 213 |
+
<span id="loading" class="pill hidden"><span class="spinner" aria-hidden="true"></span> <span id="loadingText">Fetching…</span></span>
|
| 214 |
+
<button id="btnTitles" class="btn" title="Fetch titles and output Markdown (no bullets)">Titles → Markdown</button>
|
| 215 |
+
<button id="btnAddSpacing" class="btn" title="Insert a blank line between each link in the output">Add spacing</button>
|
| 216 |
+
<button id="btnRemoveSpacing" class="btn" title="Remove extra blank lines between links">Remove spacing</button>
|
| 217 |
+
<button id="btnSortDomain" class="btn" title="Sort by domain (group similar domains)">Sort by domain</button>
|
| 218 |
+
<button id="btnCopy" class="btn" title="Copy all to clipboard">Copy All</button>
|
| 219 |
+
<button id="btnDownload" class="btn" title="Download as .txt">Download</button>
|
| 220 |
+
</div>
|
| 221 |
+
</div>
|
| 222 |
+
<div class="card-body">
|
| 223 |
+
<!-- Read-only output (either URLs or Markdown list) -->
|
| 224 |
+
<textarea id="outputArea" placeholder="Your cleaned list (or Markdown) will appear here"></textarea>
|
| 225 |
+
</div>
|
| 226 |
+
<div class="card-footer">
|
| 227 |
+
<div class="stats" id="stats">
|
| 228 |
+
<div><b>Found:</b> <span id="statFound">0</span> links</div>
|
| 229 |
+
<div><b>Removed images:</b> <span id="statImg">0</span></div>
|
| 230 |
+
<div><b>Removed dupes:</b> <span id="statDupes">0</span></div>
|
| 231 |
+
<div><b>Kept:</b> <span id="statKept">0</span></div>
|
| 232 |
+
<div><b>Titles resolved:</b> <span id="statTitles">0</span></div>
|
| 233 |
+
</div>
|
| 234 |
+
</div>
|
| 235 |
+
</section>
|
| 236 |
+
</main>
|
| 237 |
+
|
| 238 |
+
<!-- Small toast element for feedback (copy / errors / summaries) -->
|
| 239 |
+
<div id="toast" class="toast" role="status" aria-live="polite"></div>
|
| 240 |
+
|
| 241 |
+
<script>
|
| 242 |
+
// ===========================
|
| 243 |
+
// Helper shortcuts (tiny DOM utilities)
|
| 244 |
+
// ===========================
|
| 245 |
+
const qs = (s, el=document) => el.querySelector(s); // select one element by CSS selector
|
| 246 |
+
const qsa = (s, el=document) => [...el.querySelectorAll(s)]; // select many elements
|
| 247 |
+
const on = (el, ev, fn, opts) => el.addEventListener(ev, fn, opts); // attach event listener
|
| 248 |
+
|
| 249 |
+
// ===========================
|
| 250 |
+
// URL detection and cleaning utilities
|
| 251 |
+
// ===========================
|
| 252 |
+
|
| 253 |
+
// Find raw http/https URLs in text (keeps anything until whitespace or obvious break)
|
| 254 |
+
// Layman's terms: look for "http://..." or "https://..." and grab characters until a space or quote.
|
| 255 |
+
const URL_REGEX = /https?:\/\/[^\s<>"'`)+\]}]+/gi;
|
| 256 |
+
|
| 257 |
+
// Direct image file extensions
|
| 258 |
+
const IMAGE_EXT = /\.(?:png|jpe?g|gif|bmp|webp|tiff?|svg|avif|heic|heif)$/i;
|
| 259 |
+
|
| 260 |
+
// Query params that hint an image format
|
| 261 |
+
const IMAGE_HINT_PARAMS = ['format','fm','image','img','ext'];
|
| 262 |
+
|
| 263 |
+
// Tracking params to strip when enabled
|
| 264 |
+
const TRACKING_PARAMS = [
|
| 265 |
+
'utm_source','utm_medium','utm_campaign','utm_term','utm_content',
|
| 266 |
+
'utm_name','utm_id','utm_creative_format','utm_creative_tactic',
|
| 267 |
+
'gclid','fbclid','mc_cid','mc_eid','igshid','msclkid','vero_conv','vero_id'
|
| 268 |
+
];
|
| 269 |
+
|
| 270 |
+
// Trim trailing punctuation (like ")" or ",") and angle brackets around URLs
|
| 271 |
+
function trimTrailingPunctuation(url){
|
| 272 |
+
url = url.replace(/^<+|>+$/g, '');
|
| 273 |
+
const TRAIL = /[),.;:'"\]\}>]+$/;
|
| 274 |
+
while (TRAIL.test(url)) {
|
| 275 |
+
if (url.endsWith(')')) {
|
| 276 |
+
const open = (url.match(/\(/g)||[]).length;
|
| 277 |
+
const close = (url.match(/\)/g)||[]).length;
|
| 278 |
+
if (close <= open) break;
|
| 279 |
+
}
|
| 280 |
+
url = url.replace(TRAIL, '');
|
| 281 |
+
}
|
| 282 |
+
return url;
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
// Decide if a URL points directly to an image file
|
| 286 |
+
function isDirectImageURL(href){
|
| 287 |
+
if (/^(data|blob):/i.test(href)) return true;
|
| 288 |
+
let u; try { u = new URL(href); } catch { return false; }
|
| 289 |
+
if (!/^https?:$/i.test(u.protocol)) return false;
|
| 290 |
+
if (IMAGE_EXT.test(u.pathname)) return true;
|
| 291 |
+
for (const key of IMAGE_HINT_PARAMS) {
|
| 292 |
+
const val = (u.searchParams.get(key) || '').toLowerCase();
|
| 293 |
+
if (/(?:png|jpe?g|gif|bmp|webp|tiff?|svg|avif|heic|heif)/.test(val)) return true;
|
| 294 |
+
}
|
| 295 |
+
return false;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
// Optionally remove common tracking parameters from a URL
|
| 299 |
+
function stripTrackingParams(href){
|
| 300 |
+
let u; try { u = new URL(href); } catch { return href; }
|
| 301 |
+
for (const p of TRACKING_PARAMS) u.searchParams.delete(p);
|
| 302 |
+
return u.toString().replace(/\?$/, '').replace(/#$/, '');
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
// ===========================
|
| 306 |
+
// Title parsing + normalization helpers
|
| 307 |
+
// ===========================
|
| 308 |
+
|
| 309 |
+
// Decode HTML entities like & to &
|
| 310 |
+
function decodeEntities(str){ const t=document.createElement('textarea'); t.innerHTML=str; return t.value; }
|
| 311 |
+
|
| 312 |
+
// Normalize a title: trim, collapse whitespace, remove common prefixes like "Title: " or leading hashes/bullets
|
| 313 |
+
function normalizeTitle(s){
|
| 314 |
+
if (!s) return '';
|
| 315 |
+
s = decodeEntities(String(s)).replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' ').trim();
|
| 316 |
+
s = s.replace(/^(?:title\s*:\s*)/i, ''); // drop leading "Title: " (case-insensitive)
|
| 317 |
+
s = s.replace(/^[#>\-\s]+/, ''); // drop leading Markdown headings/bullets/arrows
|
| 318 |
+
s = s.slice(0, 160); // keep titles reasonably short
|
| 319 |
+
return s;
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
// Escape [] and backslashes in Markdown link text to avoid breaking syntax
|
| 323 |
+
function markdownEscapeText(s){
|
| 324 |
+
return String(s).replace(/[\\\[\]]/g, m => `\\${m}`);
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
// Basic regex escape
|
| 328 |
+
function rxEscape(s){ return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); }
|
| 329 |
+
|
| 330 |
+
// Extract a site core name from hostname (drop subdomains/TLDs where possible)
|
| 331 |
+
function hostCore(hostname){
|
| 332 |
+
const h = (hostname || '').replace(/^www\.|^m\.|^amp\.|^news\./i, '');
|
| 333 |
+
const parts = h.split('.');
|
| 334 |
+
if (parts.length <= 2) return parts[0] || h;
|
| 335 |
+
// choose the longest label as a heuristic core
|
| 336 |
+
return parts.reduce((a,b)=> (b.length>a.length?b:a), parts[0]);
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
// Strip trailing/leading site/brand suffixes like " - The Verge" or "| NYTimes"
|
| 340 |
+
function stripSiteSuffix(title, hostname, siteName){
|
| 341 |
+
let out = String(title || '');
|
| 342 |
+
if (!out) return '';
|
| 343 |
+
const core = hostCore(hostname);
|
| 344 |
+
const candidates = [siteName, core, hostname && hostname.replace(/^www\./i,'')].filter(Boolean)
|
| 345 |
+
.map(s => normalizeTitle(String(s))
|
| 346 |
+
.replace(/\.(com|net|org|io|dev|app|news|co|uk|de|jp|fr|es|it|nl|ru|br|au)$/i, ''))
|
| 347 |
+
.filter(Boolean)
|
| 348 |
+
.map(rxEscape);
|
| 349 |
+
if (!candidates.length) return out;
|
| 350 |
+
|
| 351 |
+
const SEP = '\\s*(?:[-–—|:•»]\\s*)';
|
| 352 |
+
const suffixRe = new RegExp(`${SEP}(?:${candidates.join('|')})\\s*$`, 'i');
|
| 353 |
+
const prefixRe = new RegExp(`^(?:${candidates.join('|')})${SEP}`, 'i');
|
| 354 |
+
|
| 355 |
+
let prev;
|
| 356 |
+
do { prev = out; out = out.replace(suffixRe, '').replace(prefixRe, ''); } while (out !== prev);
|
| 357 |
+
return out.trim();
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
// Derive a human-ish title from the URL slug as a last resort
|
| 361 |
+
function slugTitleFromURL(href){
|
| 362 |
+
try{
|
| 363 |
+
const u = new URL(href);
|
| 364 |
+
const segs = u.pathname.split('/').filter(Boolean);
|
| 365 |
+
let cand = segs.length ? segs[segs.length-1] : '';
|
| 366 |
+
if (!cand && segs.length > 1) cand = segs[segs.length-2];
|
| 367 |
+
cand = cand.replace(/\.(html?|php|aspx?)$/i, '');
|
| 368 |
+
cand = decodeURIComponent(cand).replace(/[\-_+]+/g, ' ');
|
| 369 |
+
if (/^\d+$/.test(cand) || cand.length < 3) return hostCore(u.hostname);
|
| 370 |
+
return normalizeTitle(cand);
|
| 371 |
+
}catch{ return ''; }
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
// Pull a reasonable title from HTML markup using DOMParser with multiple sources
|
| 375 |
+
function parseTitleFromHTML(html, hostname){
|
| 376 |
+
if (!html) return '';
|
| 377 |
+
let doc;
|
| 378 |
+
try { doc = new DOMParser().parseFromString(html, 'text/html'); } catch { doc = null; }
|
| 379 |
+
if (!doc) return '';
|
| 380 |
+
const pick = (sel) => {
|
| 381 |
+
const el = doc.querySelector(sel);
|
| 382 |
+
return el ? normalizeTitle(el.getAttribute('content') || el.textContent || '') : '';
|
| 383 |
+
};
|
| 384 |
+
const siteName = pick('meta[property="og:site_name"], meta[name="og:site_name"]');
|
| 385 |
+
const cands = [
|
| 386 |
+
pick('meta[property="og:title"], meta[name="og:title"]'),
|
| 387 |
+
pick('meta[name="twitter:title"], meta[property="twitter:title"], meta[name="title"], meta[property="title"]'),
|
| 388 |
+
pick('h1, h1[itemprop="headline"], [data-test-id="post-title"]'),
|
| 389 |
+
normalizeTitle(doc.title || '')
|
| 390 |
+
].filter(Boolean);
|
| 391 |
+
let best = cands.find(Boolean) || '';
|
| 392 |
+
best = stripSiteSuffix(best, hostname, siteName);
|
| 393 |
+
return normalizeTitle(best);
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
// Pull a decent title from a plaintext article (proxy output). Skip obvious boilerplate.
|
| 397 |
+
function parseTitleFromPlain(text){
|
| 398 |
+
if (!text) return '';
|
| 399 |
+
const bad = /cookie|consent|subscribe|sign\s*in|log\s*in|adblock|advert|privacy|terms|404|not\s*found|redirecting/i;
|
| 400 |
+
const first = (text.split(/\r?\n/)
|
| 401 |
+
.map(s=>s.trim())
|
| 402 |
+
.filter(Boolean)
|
| 403 |
+
.find(s => s.length >= 3 && s.length <= 180 && !bad.test(s)) || '');
|
| 404 |
+
return normalizeTitle(first);
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
// ===========================
|
| 408 |
+
// Core cleaning logic
|
| 409 |
+
// ===========================
|
| 410 |
+
|
| 411 |
+
function cleanLinksFromText(raw, opts){
|
| 412 |
+
if (!raw) return { kept: [], found: 0, removedImages: 0, removedDupes: 0 };
|
| 413 |
+
const matches = raw.match(URL_REGEX) || [];
|
| 414 |
+
const foundCount = matches.length;
|
| 415 |
+
let links = matches.map(m => trimTrailingPunctuation(m));
|
| 416 |
+
if (opts.httpOnly) links = links.filter(h => /^https?:\/\//i.test(h));
|
| 417 |
+
|
| 418 |
+
const kept = []; let removedImages = 0;
|
| 419 |
+
for (const href of links) { if (isDirectImageURL(href)) removedImages++; else kept.push(href); }
|
| 420 |
+
|
| 421 |
+
let cleaned = kept.map(h => opts.stripTracking ? stripTrackingParams(h) : h);
|
| 422 |
+
|
| 423 |
+
let removedDupes = 0;
|
| 424 |
+
if (opts.dedupe) {
|
| 425 |
+
const seen = new Set(); const uniq = [];
|
| 426 |
+
for (const h of cleaned) { const key = h.toLowerCase(); if (!seen.has(key)) { seen.add(key); uniq.push(h); } else { removedDupes++; } }
|
| 427 |
+
cleaned = uniq;
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
return { kept: cleaned, found: foundCount, removedImages, removedDupes };
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
// ===========================
|
| 434 |
+
// Title fetching with concurrency + proxy fallback
|
| 435 |
+
// ===========================
|
| 436 |
+
|
| 437 |
+
// Promise with timeout so one slow site doesn't block everything
|
| 438 |
+
function withTimeout(promise, ms){
|
| 439 |
+
return Promise.race([ promise, new Promise((_, rej)=> setTimeout(()=> rej(new Error('timeout')), ms)) ]);
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
// Try to fetch a page's title. Strategy: direct fetch → proxy → fallback to hostname
|
| 443 |
+
// Simple in-session title cache
|
| 444 |
+
const titleCache = new Map();
|
| 445 |
+
(function hydrateCache(){
|
| 446 |
+
try{ const raw = sessionStorage.getItem('titleCacheV1'); if (raw) {
|
| 447 |
+
const obj = JSON.parse(raw); for (const [k,v] of Object.entries(obj)) titleCache.set(k, v);
|
| 448 |
+
}}catch{}
|
| 449 |
+
})();
|
| 450 |
+
function cacheGet(key){ return titleCache.get(key); }
|
| 451 |
+
function cacheSet(key, value){ try{ titleCache.set(key, value); sessionStorage.setItem('titleCacheV1', JSON.stringify(Object.fromEntries(titleCache))); }catch{} }
|
| 452 |
+
|
| 453 |
+
async function fetchTitleForURL(url, { useProxy=true, timeoutMs=10000 }={}){
|
| 454 |
+
const u = new URL(url);
|
| 455 |
+
const fallbackHost = u.hostname.replace(/^www\./, '');
|
| 456 |
+
|
| 457 |
+
const cached = cacheGet(url);
|
| 458 |
+
if (cached) return { url, title: cached, source: 'cache' };
|
| 459 |
+
|
| 460 |
+
try {
|
| 461 |
+
const res = await withTimeout(fetch(url, { mode:'cors', redirect:'follow', credentials:'omit' }), timeoutMs);
|
| 462 |
+
if (res && res.ok) {
|
| 463 |
+
const ct = res.headers.get('content-type') || '';
|
| 464 |
+
if (/text\/html|application\/(xhtml\+xml|html)/i.test(ct) || !ct) {
|
| 465 |
+
const html = await res.text();
|
| 466 |
+
const t = parseTitleFromHTML(html, u.hostname);
|
| 467 |
+
if (t) { cacheSet(url, t); return { url, title: t, source: 'direct' }; }
|
| 468 |
+
}
|
| 469 |
+
}
|
| 470 |
+
} catch (_) { /* swallow and try proxy */ }
|
| 471 |
+
|
| 472 |
+
if (useProxy) {
|
| 473 |
+
try {
|
| 474 |
+
const res2 = await withTimeout(fetch('https://r.jina.ai/' + url, { redirect:'follow' }), timeoutMs);
|
| 475 |
+
if (res2 && res2.ok) {
|
| 476 |
+
const text = await res2.text();
|
| 477 |
+
// r.jina.ai returns plain text; first try plain heuristics, then attempt HTML parse in case markup survived
|
| 478 |
+
let maybe = parseTitleFromPlain(text);
|
| 479 |
+
if (!maybe) maybe = parseTitleFromHTML(text, u.hostname);
|
| 480 |
+
if (!maybe) maybe = slugTitleFromURL(url) || fallbackHost;
|
| 481 |
+
if (maybe) { cacheSet(url, maybe); return { url, title: maybe, source: 'proxy' }; }
|
| 482 |
+
}
|
| 483 |
+
} catch (_) { /* ignore */ }
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
const last = slugTitleFromURL(url) || fallbackHost;
|
| 487 |
+
cacheSet(url, last);
|
| 488 |
+
return { url, title: last, source: 'fallback' };
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
// Map with limited concurrency
|
| 492 |
+
async function mapWithConcurrency(items, limit, mapper){
|
| 493 |
+
const results = new Array(items.length); let i = 0, active = 0;
|
| 494 |
+
return await new Promise((resolve)=>{
|
| 495 |
+
function launch(){
|
| 496 |
+
while (active < limit && i < items.length){
|
| 497 |
+
const idx = i++; active++;
|
| 498 |
+
Promise.resolve(mapper(items[idx], idx))
|
| 499 |
+
.then(v => { results[idx] = { status:'fulfilled', value:v }; })
|
| 500 |
+
.catch(e => { results[idx] = { status:'rejected', reason:e }; })
|
| 501 |
+
.finally(()=>{ active--; if (i < items.length) launch(); else if (active === 0) resolve(results); });
|
| 502 |
+
}
|
| 503 |
+
}
|
| 504 |
+
launch();
|
| 505 |
+
});
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
// ===========================
|
| 509 |
+
// UI wiring
|
| 510 |
+
// ===========================
|
| 511 |
+
const app = qs('#app');
|
| 512 |
+
const inputArea = qs('#inputArea');
|
| 513 |
+
const outputArea = qs('#outputArea');
|
| 514 |
+
const btnClean = qs('#btnClean');
|
| 515 |
+
const btnTitles = qs('#btnTitles');
|
| 516 |
+
const btnCopy = qs('#btnCopy');
|
| 517 |
+
const btnDownload= qs('#btnDownload');
|
| 518 |
+
const btnPaste = qs('#btnPaste');
|
| 519 |
+
const btnAddSpacing = qs('#btnAddSpacing');
|
| 520 |
+
const btnRemoveSpacing = qs('#btnRemoveSpacing');
|
| 521 |
+
const btnSortDomain = qs('#btnSortDomain');
|
| 522 |
+
const themeToggle= qs('#themeToggle');
|
| 523 |
+
|
| 524 |
+
const optStrip = qs('#optStripTracking');
|
| 525 |
+
const optDedupe = qs('#optDedupe');
|
| 526 |
+
const optHttp = qs('#optHttpOnly');
|
| 527 |
+
const optProxy = qs('#optProxy');
|
| 528 |
+
|
| 529 |
+
const counts = qs('#counts');
|
| 530 |
+
const statFound = qs('#statFound');
|
| 531 |
+
const statImg = qs('#statImg');
|
| 532 |
+
const statDupes = qs('#statDupes');
|
| 533 |
+
const statKept = qs('#statKept');
|
| 534 |
+
const statTitles = qs('#statTitles');
|
| 535 |
+
const toastEl = qs('#toast');
|
| 536 |
+
const loadingEl = qs('#loading');
|
| 537 |
+
const loadingTxt = qs('#loadingText');
|
| 538 |
+
|
| 539 |
+
let currentKept = [];
|
| 540 |
+
|
| 541 |
+
function showToast(msg='Copied!'){
|
| 542 |
+
toastEl.textContent = msg; toastEl.classList.add('show'); setTimeout(()=> toastEl.classList.remove('show'), 1600);
|
| 543 |
+
}
|
| 544 |
+
|
| 545 |
+
on(themeToggle, 'click', () => {
|
| 546 |
+
const next = app.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
|
| 547 |
+
app.setAttribute('data-theme', next);
|
| 548 |
+
});
|
| 549 |
+
|
| 550 |
+
on(btnPaste, 'click', async () => {
|
| 551 |
+
try { const txt = await navigator.clipboard.readText(); inputArea.value = txt || inputArea.value; inputArea.focus(); }
|
| 552 |
+
catch { showToast('Clipboard read blocked by browser'); }
|
| 553 |
+
});
|
| 554 |
+
|
| 555 |
+
on(btnClean, 'click', () => runClean());
|
| 556 |
+
|
| 557 |
+
// Keyboard shortcuts: Ctrl/Cmd + Enter to clean; +Shift to fetch titles → Markdown (no bullets)
|
| 558 |
+
on(document, 'keydown', (e) => {
|
| 559 |
+
const meta = e.ctrlKey || e.metaKey;
|
| 560 |
+
if (meta && e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); runClean(); }
|
| 561 |
+
if (meta && e.key === 'Enter' && e.shiftKey) { e.preventDefault(); runTitlesToMarkdown(); }
|
| 562 |
+
});
|
| 563 |
+
|
| 564 |
+
on(btnCopy, 'click', async () => {
|
| 565 |
+
const txt = outputArea.value || '';
|
| 566 |
+
try { await navigator.clipboard.writeText(txt); showToast('Output copied!'); }
|
| 567 |
+
catch { outputArea.focus(); outputArea.select(); showToast('Press Ctrl+C to copy'); }
|
| 568 |
+
});
|
| 569 |
+
|
| 570 |
+
on(btnDownload, 'click', () => {
|
| 571 |
+
const blob = new Blob([outputArea.value || ''], {type:'text/plain'});
|
| 572 |
+
const url = URL.createObjectURL(blob);
|
| 573 |
+
const a = document.createElement('a'); a.href = url; a.download = 'links-or-markdown.txt'; a.click();
|
| 574 |
+
URL.revokeObjectURL(url);
|
| 575 |
+
});
|
| 576 |
+
|
| 577 |
+
on(btnTitles, 'click', () => runTitlesToMarkdown());
|
| 578 |
+
|
| 579 |
+
// Insert exactly one empty line between each non-empty line in the output
|
| 580 |
+
on(btnAddSpacing, 'click', () => {
|
| 581 |
+
const text = outputArea.value || '';
|
| 582 |
+
if (!text.trim()) { showToast('No output to space'); return; }
|
| 583 |
+
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
| 584 |
+
if (!lines.length) { showToast('No output to space'); return; }
|
| 585 |
+
outputArea.value = lines.join('\n\n');
|
| 586 |
+
showToast('Added blank lines between links');
|
| 587 |
+
});
|
| 588 |
+
|
| 589 |
+
// Remove excess blank lines (compact to one line per link)
|
| 590 |
+
on(btnRemoveSpacing, 'click', () => {
|
| 591 |
+
const text = outputArea.value || '';
|
| 592 |
+
if (!text.trim()) { showToast('No output to trim'); return; }
|
| 593 |
+
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
| 594 |
+
if (!lines.length) { showToast('No output to trim'); return; }
|
| 595 |
+
outputArea.value = lines.join('\n');
|
| 596 |
+
showToast('Removed extra blank lines');
|
| 597 |
+
});
|
| 598 |
+
|
| 599 |
+
// Sort output lines by domain name (hostname), grouping similar domains
|
| 600 |
+
on(btnSortDomain, 'click', () => {
|
| 601 |
+
const text = outputArea.value || '';
|
| 602 |
+
if (!text.trim()) { showToast('No output to sort'); return; }
|
| 603 |
+
const lines = text.split(/\r?\n/).filter(l => l.trim() !== '');
|
| 604 |
+
if (!lines.length) { showToast('No output to sort'); return; }
|
| 605 |
+
|
| 606 |
+
const URL_IN_LINE = /https?:\/\/[^\s<>"'`)+\]}]+/i;
|
| 607 |
+
function extractUrl(line){
|
| 608 |
+
const md = line.match(/\[[^\]]*\]\((https?:[^\s)]+)\)/i);
|
| 609 |
+
if (md) return md[1];
|
| 610 |
+
const m = line.match(URL_IN_LINE);
|
| 611 |
+
return m ? m[0] : '';
|
| 612 |
+
}
|
| 613 |
+
function hostKeyFromUrl(u){
|
| 614 |
+
try { const { hostname } = new URL(u); return (hostname || '').replace(/^www\./i,'').toLowerCase(); } catch { return ''; }
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
const enriched = lines.map((line, idx) => {
|
| 618 |
+
const url = extractUrl(line);
|
| 619 |
+
const hostKey = hostKeyFromUrl(url);
|
| 620 |
+
return { line, idx, hostKey };
|
| 621 |
+
});
|
| 622 |
+
|
| 623 |
+
enriched.sort((a,b) => {
|
| 624 |
+
if (a.hostKey && b.hostKey){
|
| 625 |
+
const c = a.hostKey.localeCompare(b.hostKey);
|
| 626 |
+
if (c !== 0) return c;
|
| 627 |
+
return a.line.localeCompare(b.line);
|
| 628 |
+
}
|
| 629 |
+
if (a.hostKey && !b.hostKey) return -1;
|
| 630 |
+
if (!a.hostKey && b.hostKey) return 1;
|
| 631 |
+
return a.line.localeCompare(b.line);
|
| 632 |
+
});
|
| 633 |
+
|
| 634 |
+
outputArea.value = enriched.map(e => e.line).join('\n');
|
| 635 |
+
showToast('Sorted by domain');
|
| 636 |
+
});
|
| 637 |
+
|
| 638 |
+
function setLoading(isOn, text){
|
| 639 |
+
loadingEl.classList.toggle('hidden', !isOn);
|
| 640 |
+
loadingTxt.textContent = text || 'Fetching…';
|
| 641 |
+
btnTitles.disabled = !!isOn; btnClean.disabled = !!isOn; btnCopy.disabled = !!isOn; btnDownload.disabled = !!isOn; btnAddSpacing.disabled = !!isOn; btnSortDomain.disabled = !!isOn;
|
| 642 |
+
btnRemoveSpacing.disabled = !!isOn;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
// Clean → update UI state
|
| 646 |
+
function runClean(){
|
| 647 |
+
const opts = { stripTracking: !!optStrip.checked, dedupe: !!optDedupe.checked, httpOnly: !!optHttp.checked };
|
| 648 |
+
const res = cleanLinksFromText(inputArea.value || '', opts);
|
| 649 |
+
currentKept = res.kept.slice();
|
| 650 |
+
outputArea.value = res.kept.join('\n');
|
| 651 |
+
statFound.textContent = String(res.found);
|
| 652 |
+
statImg.textContent = String(res.removedImages);
|
| 653 |
+
statDupes.textContent = String(res.removedDupes);
|
| 654 |
+
statKept.textContent = String(res.kept.length);
|
| 655 |
+
statTitles.textContent= '0';
|
| 656 |
+
counts.textContent = `${res.kept.length} kept · ${res.removedImages + res.removedDupes} removed`;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
// Fetch titles for the current list, then print Markdown lines: [Title](URL) (no bullets)
|
| 660 |
+
async function runTitlesToMarkdown(){
|
| 661 |
+
if (!currentKept.length) runClean();
|
| 662 |
+
const urls = currentKept.slice();
|
| 663 |
+
if (!urls.length){ showToast('Nothing to process — paste text and Clean first'); return; }
|
| 664 |
+
|
| 665 |
+
setLoading(true, `Fetching titles (0/${urls.length})`);
|
| 666 |
+
statTitles.textContent = '0';
|
| 667 |
+
|
| 668 |
+
const CONCURRENCY = 6; // a small pool keeps things fast without overloading
|
| 669 |
+
let resolved = 0, titleHits = 0;
|
| 670 |
+
|
| 671 |
+
const results = await mapWithConcurrency(urls, CONCURRENCY, async (u) => {
|
| 672 |
+
const r = await fetchTitleForURL(u, { useProxy: !!optProxy.checked, timeoutMs: 10000 });
|
| 673 |
+
resolved++; if (r && r.title) titleHits++;
|
| 674 |
+
if (resolved % 1 === 0) setLoading(true, `Fetching titles (${resolved}/${urls.length})`);
|
| 675 |
+
return r;
|
| 676 |
+
});
|
| 677 |
+
|
| 678 |
+
// Build Markdown lines without bullets
|
| 679 |
+
const lines = results.map((r, i) => {
|
| 680 |
+
let url = urls[i];
|
| 681 |
+
let title = '';
|
| 682 |
+
if (r && r.status === 'fulfilled' && r.value) {
|
| 683 |
+
url = r.value.url || url;
|
| 684 |
+
title = r.value.title || '';
|
| 685 |
+
}
|
| 686 |
+
let safeTitle = normalizeTitle(title);
|
| 687 |
+
if (!safeTitle) safeTitle = slugTitleFromURL(url) || new URL(url).hostname.replace(/^www\./, '');
|
| 688 |
+
return `[${markdownEscapeText(safeTitle)}](${url})`;
|
| 689 |
+
});
|
| 690 |
+
|
| 691 |
+
outputArea.value = lines.join('\n');
|
| 692 |
+
statTitles.textContent = String(titleHits);
|
| 693 |
+
counts.textContent = `${urls.length} links → Markdown (no bullets, titles resolved: ${titleHits})`;
|
| 694 |
+
setLoading(false);
|
| 695 |
+
|
| 696 |
+
const proxyNote = optProxy.checked ? '\n(Proxy ON for better compatibility)' : '';
|
| 697 |
+
showToast(`Markdown ready for ${urls.length} link(s). Titles resolved: ${titleHits}${proxyNote}`);
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
// Auto-run if input already has content (e.g., after refresh)
|
| 701 |
+
if ((qs('#inputArea').value||'').trim()) runClean();
|
| 702 |
+
</script>
|
| 703 |
+
</body>
|
| 704 |
+
</html>
|