Galaxydude2 commited on
Commit
e124cde
·
verified ·
1 Parent(s): c09f119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -139
app.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  import os
2
  import threading
3
  import time
@@ -9,281 +16,298 @@ from pathlib import Path
9
  import gradio as gr
10
 
11
  # ────────────────────────────────────────────────
12
- # Globale Konfiguration & Schutzmaßnahmen
13
  # ────────────────────────────────────────────────
14
 
15
  SESSION = requests.Session()
16
  SESSION.headers.update({
17
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
18
- "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 "
19
- "Coppermine-Original-Downloader/1.0 (Security-Research; Contact:security@example.com)",
 
 
 
 
20
  "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
21
  "Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
22
- "Referer": "https://www.google.com/" # Kann je nach Gallery angepasst werden
23
  })
24
 
25
  REQUEST_TIMEOUT = 12
26
- DOWNLOAD_DELAY = 0.35 # Sekunden zwischen Downloads → Anti-DoS
27
- MAX_THREADS_DEFAULT = 3
28
- MAX_PAGES_TO_SCAN = 300 # Sicherheitsnetz gegen Endlosschleifen
29
 
30
  # ────────────────────────────────────────────────
31
  # Hilfsfunktionen
32
  # ────────────────────────────────────────────────
33
 
34
- def correct_image_url(thumb_url: str) -> str:
35
- """
36
- Typische Coppermine-Transformation:
37
- thumb_ → original
38
- normal_ → original (manchmal)
39
- Manchmal sitzt die volle Auflösung einfach im selben Ordner ohne Prefix.
40
- """
41
- path = Path(thumb_url)
42
  filename = path.name
43
 
44
- # Entferne gängige Vorsilben
45
- for prefix in ["thumb_", "normal_", "medium_", "small_"]:
46
  if filename.startswith(prefix):
47
- filename = filename[len(prefix):]
48
- break
49
 
50
- # Manche Installationen nutzen andere Muster hier ggf. erweitern
51
- return str(path.with_name(filename))
 
 
 
 
 
52
 
53
 
54
  def download_image(img_url: str, folder: str, progress_queue: queue.Queue) -> bool:
55
- file_name = img_url.split("/")[-1].split("?")[0].split("#")[0]
56
- if not file_name.lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
 
 
 
57
  return False
58
 
59
- file_path = os.path.join(folder, file_name)
60
 
61
- if os.path.exists(file_path):
62
- progress_queue.put(("skip", f"Exists → {file_name}"))
63
  return False
64
 
65
  try:
66
- r = SESSION.get(img_url, timeout=REQUEST_TIMEOUT, stream=True)
67
- if r.status_code != 200:
 
 
 
 
 
68
  return False
69
 
70
- content_type = r.headers.get("Content-Type", "").lower()
71
- if "image" not in content_type and "octet-stream" not in content_type:
72
  return False
73
 
74
- with open(file_path, "wb") as f:
 
 
 
 
75
  for chunk in r.iter_content(chunk_size=8192):
76
  if chunk:
77
  f.write(chunk)
78
 
79
- progress_queue.put(("success", f"OK {file_name}"))
80
  return True
81
 
82
- except (requests.RequestException, OSError) as e:
83
- progress_queue.put(("error", f"Fail {file_name} → {str(e)}"))
84
  return False
85
 
86
 
87
- def scrape_page(page_url: str) -> list[str]:
88
- """Sucht nach Bild-URLs auf einer Album-Seite (?page=XY)"""
89
  try:
90
  r = SESSION.get(page_url, timeout=REQUEST_TIMEOUT)
91
  r.raise_for_status()
92
 
93
  soup = BeautifulSoup(r.text, "html.parser")
94
- images = set()
95
 
96
- # 1. Normale <img src=...> Tags (meist Thumbnails)
97
  for img in soup.find_all("img"):
98
- src = img.get("src")
99
- if src and any(src.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png"]):
100
  full = urljoin(page_url, src)
101
- orig = correct_image_url(full)
102
- images.add(orig)
103
 
104
- # 2. Manchmal sind die Original-Links in <a href="..."> um das Thumbnail herum
105
  for a in soup.find_all("a", href=True):
106
- href = a.get("href")
107
- if href and any(href.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif"]):
108
  full = urljoin(page_url, href)
109
- images.add(full) # Hier meist schon Original
 
 
 
 
 
 
110
 
111
- return list(images)
112
 
113
  except Exception as e:
114
  print(f"Scrape-Fehler {page_url}: {e}")
115
  return []
116
 
117
 
118
- def worker(album_base_url: str, folder: str, stop_event: threading.Event,
119
  progress_queue: queue.Queue, thread_id: int):
120
  page = 1
121
- downloaded = 0
122
-
123
- while not stop_event.is_set():
124
- if page > MAX_PAGES_TO_SCAN:
125
- progress_queue.put(("warn", f"Thread {thread_id} → Max-Seiten-Limit erreicht"))
126
- break
127
 
128
- page_url = f"{album_base_url.rstrip('/')}?page={page}"
129
- progress_queue.put(("info", f"Thread {thread_id} scannt Seite {page}"))
 
130
 
131
- image_urls = scrape_page(page_url)
132
  if not image_urls:
133
- progress_queue.put(("info", f"Thread {thread_id} → Keine Bilder mehr (Seite {page})"))
134
  break
135
 
136
  for url in image_urls:
137
  if stop_event.is_set():
138
  break
139
  if download_image(url, folder, progress_queue):
140
- downloaded += 1
141
  time.sleep(DOWNLOAD_DELAY)
142
 
143
  page += 1
144
 
145
- progress_queue.put(("done", f"Thread {thread_id} beendet – {downloaded} Bilder"))
146
 
147
 
148
- def start_scraper(album_url: str, download_folder: str, num_threads: int):
149
  if not album_url.strip():
150
- yield gr.update(value="❌ Album-URL fehlt"), "", 0, gr.update(value="Fehler")
151
  return
152
 
153
- if not download_folder.strip():
154
- download_folder = "coppermine_downloads"
155
-
156
- Path(download_folder).mkdir(parents=True, exist_ok=True)
157
 
158
  stop_event = threading.Event()
159
  progress_queue = queue.Queue()
160
 
161
- # Status-Update-Thread
162
- def progress_updater():
163
- total_downloaded = 0
164
- log_lines = []
165
 
166
  while True:
167
  try:
168
- typ, msg = progress_queue.get(timeout=1.5)
169
  if typ == "success":
170
- total_downloaded += 1
171
  if typ in ("info", "success", "skip", "error", "warn", "done"):
172
- log_lines.append(msg)
173
- log_lines = log_lines[-25:] # letzten 25 Zeilen behalten
 
 
 
 
174
 
175
  yield (
176
- f"**Download läuft …** ({total_downloaded} Bilder)",
177
- "\n".join(log_lines),
178
- total_downloaded,
179
- gr.update(value="Stop Download", interactive=True)
180
  )
181
 
 
 
 
182
  except queue.Empty:
183
- if not any(t.is_alive() for t in threads):
184
  break
185
 
186
- yield (
187
- "Fertig oder gestoppt!",
188
- "\n".join(log_lines) + "\n\n→ Download abgeschlossen.",
189
- total_downloaded,
190
- gr.update(value="Start Download", interactive=True)
191
- )
192
 
193
- # Worker-Threads starten
194
- threads = []
195
- for i in range(1, num_threads + 1):
 
196
  t = threading.Thread(
197
  target=worker,
198
- args=(album_url, download_folder, stop_event, progress_queue, i),
199
  daemon=True
200
  )
201
- threads.append(t)
202
  t.start()
203
 
204
- # Gradio Live-Update starten
205
- yield from progress_updater()
206
 
207
 
208
- def stop_scraper():
209
- # Wird über globales Event gesteuert – hier nur UI-Feedback
210
- return (
211
- "Stop-Signal gesendet … warte auf Threads",
212
- gr.update(value="Stop gesendet – bitte warten", interactive=False)
213
- )
214
 
215
 
216
  # ────────────────────────────────────────────────
217
- # Gradio Interface
218
  # ────────────────────────────────────────────────
219
 
220
  css = """
221
- .gradio-container { max-width: 880px; margin: auto; }
222
- .status { font-weight: bold; color: #2e7d32; }
223
- .log { font-family: 'Consolas', monospace; background: #111; color: #0f0; padding: 12px; border-radius: 6px; white-space: pre-wrap; max-height: 320px; overflow-y: auto; }
 
 
 
224
  """
225
 
226
- with gr.Blocks(title="Coppermine Original-Downloader", css=css) as demo:
227
  gr.Markdown("""
228
- # Coppermine → Originalbilder Downloader
229
- **Sicherheitsforscher / Bug-Bounty Edition** – nur für legale Forschungszwecke!
 
230
  """)
231
 
232
  with gr.Row():
233
  url_input = gr.Textbox(
234
- label="Album-URL (z. B. https://example.com/index.php?album=42)",
235
- placeholder="https://...",
236
- value="https://example.com/gallery/index.php?album=1",
237
- scale=4
238
  )
239
  folder_input = gr.Textbox(
240
- label="Download-Ordner",
241
- value="coppermine_downloads",
242
- scale=2
243
  )
244
 
245
- with gr.Row():
246
- threads_input = gr.Slider(
247
- 1, 12, value=MAX_THREADS_DEFAULT, step=1,
248
- label="Anzahl paralleler Threads (Vorsicht vor DoS!)"
249
- )
250
 
251
- status_output = gr.Markdown("**Bereit …**")
252
- log_output = gr.Textbox(label="Live-Log", lines=12, max_lines=30, interactive=False, elem_classes=["log"])
253
- count_output = gr.Number(label="Heruntergeladene Bilder", value=0)
254
 
255
  with gr.Row():
256
- start_btn = gr.Button("Start Download", variant="primary", scale=1)
257
- stop_btn = gr.Button("Stop Download", variant="stop", interactive=False, scale=1)
258
 
259
  gr.Markdown("""
260
- **Hinweise & Warnungen**
261
- Viele Coppermine-Instanzen erkennen aggressive Scraper Rate-Limiting eingebaut
262
- • **Rechtlich**: Nur mit Erlaubnis des Betreibers nutzen!
263
- • **Bug Bounty**: Wenn du dabei Schwachstellen findest (z. B. fehlende Auth, IDOR, offene Ordner), melde sie verantwortungsvoll.
 
264
  """)
265
 
266
- # ─── Events ───────────────────────────────────────
267
 
268
  start_btn.click(
269
- start_scraper,
270
- inputs=[url_input, folder_input, threads_input],
271
- outputs=[status_output, log_output, count_output, stop_btn],
272
- _js=None # Live-Update durch Generator
273
  )
274
 
275
  stop_btn.click(
276
- stop_scraper,
277
- outputs=[status_output, stop_btn]
278
  )
279
 
280
- # Globales Stop-Event muss von außen erreichbar sein – hier dummy
281
- # In echt würde man eine globale Variable oder Klasse verwenden
282
 
283
  if __name__ == "__main__":
284
- demo.queue(max_size=5).launch(
285
  server_name="0.0.0.0",
286
- server_port=7865,
287
  share=False,
288
  debug=False
289
- )
 
1
+ # app.py
2
+ """
3
+ Coppermine Originalbild-Downloader – Gradio Edition
4
+ Sicherheitsforschung / Bug-Bounty Tool
5
+ Nur für legale Tests mit Erlaubnis des Betreibers nutzen!
6
+ """
7
+
8
  import os
9
  import threading
10
  import time
 
16
  import gradio as gr
17
 
18
  # ────────────────────────────────────────────────
19
+ # Globale Konfiguration anpassbar
20
  # ────────────────────────────────────────────────
21
 
22
  SESSION = requests.Session()
23
  SESSION.headers.update({
24
+ "User-Agent": (
25
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
26
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
27
+ "Chrome/128.0.0.0 Safari/537.36 "
28
+ "Coppermine-Research-Downloader/1.1 "
29
+ "(Security-Research; responsible-disclosure)"
30
+ ),
31
  "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
32
  "Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
33
+ "Referer": "https://www.google.com/"
34
  })
35
 
36
  REQUEST_TIMEOUT = 12
37
+ DOWNLOAD_DELAY = 0.40 # Anti-Rate-Limit / Anti-DoS
38
+ MAX_THREADS_DEFAULT = 4
39
+ MAX_PAGES_TO_SCAN = 400 # Schutz vor Endlosschleifen / DoS
40
 
41
  # ────────────────────────────────────────────────
42
  # Hilfsfunktionen
43
  # ────────────────────────────────────────────────
44
 
45
+ def correct_image_url(url: str) -> str:
46
+ """Entfernt gängige Coppermine-Thumbnails-Prefixe"""
47
+ path = Path(url.split('?')[0].split('#')[0])
 
 
 
 
 
48
  filename = path.name
49
 
50
+ prefixes = ["thumb_", "normal_", "medium_", "small_", "preview_", "mini_"]
51
+ for prefix in prefixes:
52
  if filename.startswith(prefix):
53
+ original_name = filename[len(prefix):]
54
+ return str(path.with_name(original_name))
55
 
56
+ # Kein Prefixvermutlich schon Original
57
+ return str(path)
58
+
59
+
60
+ def is_likely_image_url(url: str) -> bool:
61
+ exts = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
62
+ return any(url.lower().endswith(ext) for ext in exts)
63
 
64
 
65
  def download_image(img_url: str, folder: str, progress_queue: queue.Queue) -> bool:
66
+ if not is_likely_image_url(img_url):
67
+ return False
68
+
69
+ filename = img_url.split("/")[-1].split("?")[0].split("#")[0]
70
+ if not filename:
71
  return False
72
 
73
+ filepath = os.path.join(folder, filename)
74
 
75
+ if os.path.exists(filepath):
76
+ progress_queue.put(("skip", f"bereits vorhanden → {filename}"))
77
  return False
78
 
79
  try:
80
+ # HEAD-Request zuerst spart Bandbreite bei großen Dateien
81
+ head = SESSION.head(img_url, timeout=6, allow_redirects=True)
82
+ if head.status_code != 200:
83
+ return False
84
+
85
+ ct = head.headers.get("Content-Type", "").lower()
86
+ if "image" not in ct and "octet-stream" not in ct:
87
  return False
88
 
89
+ size = int(head.headers.get("Content-Length", 0))
90
+ if size < 20_000: # < \~20 KB meist Thumbnail oder Fehler
91
  return False
92
 
93
+ # Jetzt erst richtiger Download
94
+ r = SESSION.get(img_url, timeout=REQUEST_TIMEOUT, stream=True)
95
+ r.raise_for_status()
96
+
97
+ with open(filepath, "wb") as f:
98
  for chunk in r.iter_content(chunk_size=8192):
99
  if chunk:
100
  f.write(chunk)
101
 
102
+ progress_queue.put(("success", f" {filename} ({size//1024:,} KB)"))
103
  return True
104
 
105
+ except Exception as e:
106
+ progress_queue.put(("error", f"× {filename} → {str(e)}"))
107
  return False
108
 
109
 
110
+ def scrape_album_page(page_url: str) -> list[str]:
111
+ """Extrahiert Bild-URLs von einer Album-Seite (?page=...)"""
112
  try:
113
  r = SESSION.get(page_url, timeout=REQUEST_TIMEOUT)
114
  r.raise_for_status()
115
 
116
  soup = BeautifulSoup(r.text, "html.parser")
117
+ candidates = set()
118
 
119
+ # 1. <img src=...> meist Thumbnails
120
  for img in soup.find_all("img"):
121
+ src = img.get("src") or img.get("data-src") or img.get("data-lazy-src")
122
+ if src and is_likely_image_url(src):
123
  full = urljoin(page_url, src)
124
+ candidates.add(correct_image_url(full))
 
125
 
126
+ # 2. <a href=...> die direkt auf Bilder zeigen
127
  for a in soup.find_all("a", href=True):
128
+ href = a["href"]
129
+ if is_likely_image_url(href):
130
  full = urljoin(page_url, href)
131
+ candidates.add(full)
132
+
133
+ # 3. data-fancybox / lightbox-Attribute (häufig bei neueren Themes)
134
+ for elem in soup.find_all(attrs={"data-fancybox": True, "href": True}):
135
+ href = elem.get("href")
136
+ if href and is_likely_image_url(href):
137
+ candidates.add(urljoin(page_url, href))
138
 
139
+ return list(candidates)
140
 
141
  except Exception as e:
142
  print(f"Scrape-Fehler {page_url}: {e}")
143
  return []
144
 
145
 
146
+ def worker(album_url: str, folder: str, stop_event: threading.Event,
147
  progress_queue: queue.Queue, thread_id: int):
148
  page = 1
149
+ count = 0
 
 
 
 
 
150
 
151
+ while not stop_event.is_set() and page <= MAX_PAGES_TO_SCAN:
152
+ page_url = f"{album_url.rstrip('/')}?page={page}"
153
+ progress_queue.put(("info", f"Thread {thread_id} → Seite {page}"))
154
 
155
+ image_urls = scrape_album_page(page_url)
156
  if not image_urls:
157
+ progress_queue.put(("info", f"Thread {thread_id} → Ende erreicht (Seite {page})"))
158
  break
159
 
160
  for url in image_urls:
161
  if stop_event.is_set():
162
  break
163
  if download_image(url, folder, progress_queue):
164
+ count += 1
165
  time.sleep(DOWNLOAD_DELAY)
166
 
167
  page += 1
168
 
169
+ progress_queue.put(("done", f"Thread {thread_id} beendet – {count} Bilder"))
170
 
171
 
172
+ def start_download(album_url: str, folder: str, threads: int):
173
  if not album_url.strip():
174
+ yield "❌ Album-URL fehlt", "", 0, gr.update(interactive=False)
175
  return
176
 
177
+ folder = folder.strip() or "downloads_coppermine"
178
+ Path(folder).mkdir(parents=True, exist_ok=True)
 
 
179
 
180
  stop_event = threading.Event()
181
  progress_queue = queue.Queue()
182
 
183
+ def progress_loop():
184
+ total = 0
185
+ lines = []
 
186
 
187
  while True:
188
  try:
189
+ typ, msg = progress_queue.get(timeout=1.2)
190
  if typ == "success":
191
+ total += 1
192
  if typ in ("info", "success", "skip", "error", "warn", "done"):
193
+ lines.append(msg)
194
+ lines = lines[-30:]
195
+
196
+ status = f"**Download läuft** – {total} Bilder"
197
+ if typ == "done" and total > 0:
198
+ status = f"**Fertig** – {total} Bilder heruntergeladen"
199
 
200
  yield (
201
+ status,
202
+ "\n".join(lines),
203
+ total,
204
+ gr.update(value="Stoppen", interactive=not stop_event.is_set())
205
  )
206
 
207
+ if typ == "done" and all(not t.is_alive() for t in threads_list):
208
+ break
209
+
210
  except queue.Empty:
211
+ if all(not t.is_alive() for t in threads_list):
212
  break
213
 
214
+ final_log = "\n".join(lines) + "\n\n→ Download abgeschlossen oder gestoppt."
215
+ yield "Download beendet", final_log, total, gr.update(value="Start", interactive=True)
 
 
 
 
216
 
217
+ # Threads starten
218
+ global threads_list
219
+ threads_list = []
220
+ for i in range(1, max(1, threads) + 1):
221
  t = threading.Thread(
222
  target=worker,
223
+ args=(album_url, folder, stop_event, progress_queue, i),
224
  daemon=True
225
  )
226
+ threads_list.append(t)
227
  t.start()
228
 
229
+ yield from progress_loop()
 
230
 
231
 
232
+ def stop_download():
233
+ if 'stop_event' in globals():
234
+ stop_event.set()
235
+ return "Stop-Signal gesendet … Threads werden beendet", gr.update(value="Stop gesendet", interactive=False)
236
+ return "Kein Download läuft", gr.update()
 
237
 
238
 
239
  # ────────────────────────────────────────────────
240
+ # Gradio Interface
241
  # ────────────────────────────────────────────────
242
 
243
  css = """
244
+ .gradio-container { max-width: 960px; margin: auto; font-family: system-ui, sans-serif; }
245
+ .logbox { font-family: 'Consolas', 'Courier New', monospace !important;
246
+ background: #0d1117; color: #c9d1d9; padding: 14px;
247
+ border-radius: 8px; white-space: pre-wrap; overflow-y: auto;
248
+ max-height: 380px; line-height: 1.45; }
249
+ .status { font-weight: 600; }
250
  """
251
 
252
+ with gr.Blocks(css=css, title="Coppermine Original Downloader – Research Edition") as demo:
253
  gr.Markdown("""
254
+ # Coppermine Originalbild-Downloader
255
+ **Sicherheitsforschung / Bug-Bounty Tool** – 2025/2026 Edition
256
+ Nur mit ausdrücklicher Erlaubnis des Website-Betreibers nutzen!
257
  """)
258
 
259
  with gr.Row():
260
  url_input = gr.Textbox(
261
+ label="Album Basis-URL",
262
+ placeholder="https://example.com/gallery/index.php?album=123",
263
+ value="https://example.com/index.php?album=1",
264
+ scale=5
265
  )
266
  folder_input = gr.Textbox(
267
+ label="Zielordner",
268
+ value="coppermine_originals",
269
+ scale=3
270
  )
271
 
272
+ threads_slider = gr.Slider(
273
+ 1, 12, value=MAX_THREADS_DEFAULT, step=1,
274
+ label="Anzahl paralleler Threads (Vorsicht vor Rate-Limits / IP-Bans)"
275
+ )
 
276
 
277
+ status_md = gr.Markdown("**Bereit …**", elem_classes=["status"])
278
+ log_box = gr.Textbox(label="Live-Log", lines=14, max_lines=40, interactive=False, elem_classes=["logbox"])
279
+ count_num = gr.Number(label="Heruntergeladene Bilder", value=0, interactive=False)
280
 
281
  with gr.Row():
282
+ start_btn = gr.Button("Download starten", variant="primary")
283
+ stop_btn = gr.Button("Download stoppen", variant="stop", interactive=False)
284
 
285
  gr.Markdown("""
286
+ **Wichtige Hinweise**
287
+ **Rechtlich**: Massen-Downloads können gegen AGB / Strafgesetze verstoßen
288
+ • **Technisch**: Viele Coppermine-Instanzen haben schwachen Schutz → IDOR, Directory Listing, offene Alben häufig
289
+ • **Bug Bounty**: Finde Schwachstellen? Responsible Disclosure!
290
+ • **Tipp**: Teste zuerst mit HEAD-Requests & niedriger Thread-Anzahl
291
  """)
292
 
293
+ # ─── Events ─────────────────────────────────────────────
294
 
295
  start_btn.click(
296
+ start_download,
297
+ inputs=[url_input, folder_input, threads_slider],
298
+ outputs=[status_md, log_box, count_num, stop_btn]
 
299
  )
300
 
301
  stop_btn.click(
302
+ stop_download,
303
+ outputs=[status_md, stop_btn]
304
  )
305
 
 
 
306
 
307
  if __name__ == "__main__":
308
+ demo.queue(max_size=8).launch(
309
  server_name="0.0.0.0",
310
+ server_port=7860,
311
  share=False,
312
  debug=False
313
+ )