wuhp commited on
Commit
e1ab87a
·
verified ·
1 Parent(s): 7fc8384

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -24
app.py CHANGED
@@ -46,11 +46,9 @@ def parse_torrent(raw: bytes) -> Dict:
46
 
47
  # files
48
  files = []
49
- total_len = 0
50
  if b"files" in info:
51
  for f in info[b"files"]:
52
  length = int(f.get(b"length", 0))
53
- total_len += length
54
  parts = []
55
  for pe in f.get(b"path", []):
56
  parts.append((pe.decode("utf-8", "replace")) if isinstance(pe,(bytes,bytearray)) else str(pe))
@@ -58,7 +56,6 @@ def parse_torrent(raw: bytes) -> Dict:
58
  files.append({"path": rel, "length": length})
59
  else:
60
  length = int(info.get(b"length", 0))
61
- total_len = length
62
  rel = name or "(unnamed)"
63
  files.append({"path": rel, "length": length})
64
 
@@ -87,18 +84,47 @@ def join_url(base: str, *segs: str) -> str:
87
  parts.append(enc)
88
  return "/".join(parts)
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def supports_range_and_size(url: str, timeout: int = 30) -> Tuple[bool, Optional[int]]:
91
- r = requests.head(url, timeout=timeout, allow_redirects=True)
92
- if r.status_code >= 400:
 
 
 
 
 
 
93
  r = requests.get(url, stream=True, timeout=timeout, allow_redirects=True)
94
  r.raise_for_status()
95
  size = int(r.headers.get("Content-Length","0") or 0)
96
- accept_ranges = r.headers.get("Accept-Ranges","")
97
  try: r.close()
98
  except: pass
99
- return ("bytes" in accept_ranges.lower() or size>0, size if size>0 else None)
100
- size = int(r.headers.get("Content-Length","0") or 0)
101
- return (("bytes" in r.headers.get("Accept-Ranges","").lower()) or size>0, size if size>0 else None)
102
 
103
  def download_with_resume(url: str, dest_path: pathlib.Path, timeout: int = 120):
104
  dest_path.parent.mkdir(parents=True, exist_ok=True)
@@ -142,6 +168,40 @@ def preview_path(path_str: str, max_bytes: int = 250_000) -> Tuple[str, Optional
142
  except Exception as e:
143
  return f"Error previewing file: {type(e).__name__}: {e}", None
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  # -------------------------
146
  # The single action
147
  # -------------------------
@@ -153,20 +213,24 @@ def run_pipeline(torrent_url: str):
153
  # Parse torrent
154
  raw = fetch_bytes(torrent_url.strip())
155
  meta = parse_torrent(raw)
156
- if not meta["web_seeds"]:
157
- raise gr.Error("This .torrent does not advertise HTTP web seeds (BEP-19 url-list). "
158
- "On Spaces we cannot use BitTorrent; please use a .torrent with web seeds "
159
- "or host the .7z via HTTPS and download it directly.")
 
160
 
161
  infohash = meta["infohash"]
162
  root_name = meta["name"]
163
- seed = meta["web_seeds"][0] # keep it simple: first seed
164
 
165
  # We expect .7z payloads
166
  sevenz_files = [f for f in meta["files"] if f["path"].lower().endswith(".7z")]
167
  if not sevenz_files:
168
  raise gr.Error("No .7z files listed in the torrent.")
169
 
 
 
 
 
170
  # Work dirs
171
  base_dir = pathlib.Path("/mnt/data/work") / infohash
172
  dl_dir = base_dir / "downloads"
@@ -177,15 +241,22 @@ def run_pipeline(torrent_url: str):
177
  logs = []
178
  saved_archives = []
179
 
180
- # Download each .7z over HTTP using <seed>/<root_name>/<relative path>
181
  for f in sevenz_files:
182
  rel = f["path"]
183
- url = join_url(seed, root_name, rel)
 
 
 
 
 
 
 
184
  dest = dl_dir / rel
185
- logs.append(f"Downloading: {url}")
186
- download_with_resume(url, dest)
187
  if not dest.exists():
188
- raise gr.Error(f"Download failed: {url}")
189
  logs.append(f"Saved: {dest} ({human_bytes(dest.stat().st_size)})")
190
  saved_archives.append(str(dest))
191
 
@@ -203,7 +274,6 @@ def run_pipeline(torrent_url: str):
203
  else:
204
  logs.append(f"Extracted files: {len(extracted)}")
205
 
206
- # For the UI: return a short log, a list of files, and default selection
207
  log_md = "### Run log\n" + "\n".join(f"- {l}" for l in logs)
208
  return log_md, extracted, (extracted[0] if extracted else "")
209
 
@@ -219,12 +289,13 @@ with gr.Blocks(title="Torrent → 7z → View (HTTP only)") as demo:
219
  gr.Markdown(
220
  """
221
  # Torrent → 7z → View (HTTP only)
222
- Paste a **.torrent URL** that includes **HTTP web seeds**.
223
- The app will download the `.7z` file(s), extract them, and let you preview text/csv/json files.
 
224
  """
225
  )
226
 
227
- url_in = gr.Textbox(label=".torrent URL", placeholder="https://.../something.torrent")
228
  go_btn = gr.Button("Download, Extract & List")
229
  log_out = gr.Markdown()
230
  files_dd = gr.Dropdown(label="Extracted files", choices=[], interactive=True)
@@ -247,5 +318,5 @@ if __name__ == "__main__":
247
  demo.launch(
248
  server_name="0.0.0.0",
249
  server_port=int(os.environ.get("PORT", 7860)),
250
- allowed_paths=["/mnt/data"] # allow returning files from work dir if needed later
251
  )
 
46
 
47
  # files
48
  files = []
 
49
  if b"files" in info:
50
  for f in info[b"files"]:
51
  length = int(f.get(b"length", 0))
 
52
  parts = []
53
  for pe in f.get(b"path", []):
54
  parts.append((pe.decode("utf-8", "replace")) if isinstance(pe,(bytes,bytearray)) else str(pe))
 
56
  files.append({"path": rel, "length": length})
57
  else:
58
  length = int(info.get(b"length", 0))
 
59
  rel = name or "(unnamed)"
60
  files.append({"path": rel, "length": length})
61
 
 
84
  parts.append(enc)
85
  return "/".join(parts)
86
 
87
+ def _head_or_peek(url: str, timeout: int = 20) -> Tuple[bool, Optional[int]]:
88
+ # Try HEAD
89
+ try:
90
+ r = requests.head(url, timeout=timeout, allow_redirects=True)
91
+ if r.status_code < 400:
92
+ size = r.headers.get("Content-Length")
93
+ return True, (int(size) if size and size.isdigit() else None)
94
+ except Exception:
95
+ pass
96
+ # Fallback tiny GET (first chunk)
97
+ try:
98
+ r = requests.get(url, stream=True, timeout=timeout, allow_redirects=True)
99
+ if r.status_code < 400:
100
+ size = r.headers.get("Content-Length")
101
+ try:
102
+ next(r.iter_content(chunk_size=1024))
103
+ except Exception:
104
+ pass
105
+ r.close()
106
+ return True, (int(size) if size and size.isdigit() else None)
107
+ except Exception:
108
+ pass
109
+ return False, None
110
+
111
  def supports_range_and_size(url: str, timeout: int = 30) -> Tuple[bool, Optional[int]]:
112
+ try:
113
+ r = requests.head(url, timeout=timeout, allow_redirects=True)
114
+ if r.status_code < 400:
115
+ size = int(r.headers.get("Content-Length","0") or 0)
116
+ return (("bytes" in r.headers.get("Accept-Ranges","").lower()) or size>0, size if size>0 else None)
117
+ except Exception:
118
+ pass
119
+ try:
120
  r = requests.get(url, stream=True, timeout=timeout, allow_redirects=True)
121
  r.raise_for_status()
122
  size = int(r.headers.get("Content-Length","0") or 0)
 
123
  try: r.close()
124
  except: pass
125
+ return ("bytes" in r.headers.get("Accept-Ranges","").lower() or size>0, size if size>0 else None)
126
+ except Exception:
127
+ return False, None
128
 
129
  def download_with_resume(url: str, dest_path: pathlib.Path, timeout: int = 120):
130
  dest_path.parent.mkdir(parents=True, exist_ok=True)
 
168
  except Exception as e:
169
  return f"Error previewing file: {type(e).__name__}: {e}", None
170
 
171
+ # ---------- NEW: base inference when no web seeds ----------
172
+
173
+ def infer_bases_from_torrent_url(torrent_url: str) -> List[str]:
174
+ """
175
+ For URLs like:
176
+ https://data.ddosecrets.com/Collection/Collection.torrent
177
+ return:
178
+ ["https://data.ddosecrets.com/Collection"]
179
+ """
180
+ u = torrent_url.strip()
181
+ if "/" not in u:
182
+ return []
183
+ base = u.rsplit("/", 1)[0]
184
+ return [base]
185
+
186
+ def resolve_download_url(bases: List[str], root_name: str, rel_path: str) -> Optional[str]:
187
+ """
188
+ Try both:
189
+ base/root_name/rel_path
190
+ base/rel_path
191
+ Return the first that exists.
192
+ """
193
+ candidates = []
194
+ for b in bases:
195
+ candidates.append(join_url(b, root_name, rel_path))
196
+ candidates.append(join_url(b, rel_path))
197
+ tried = []
198
+ for c in candidates:
199
+ ok, _ = _head_or_peek(c)
200
+ tried.append((c, ok))
201
+ if ok:
202
+ return c
203
+ return None
204
+
205
  # -------------------------
206
  # The single action
207
  # -------------------------
 
213
  # Parse torrent
214
  raw = fetch_bytes(torrent_url.strip())
215
  meta = parse_torrent(raw)
216
+
217
+ # seed list: web seeds if present, else infer from torrent URL folder (DDoSecrets-friendly)
218
+ seeds = list(meta["web_seeds"])
219
+ if not seeds:
220
+ seeds = infer_bases_from_torrent_url(torrent_url)
221
 
222
  infohash = meta["infohash"]
223
  root_name = meta["name"]
 
224
 
225
  # We expect .7z payloads
226
  sevenz_files = [f for f in meta["files"] if f["path"].lower().endswith(".7z")]
227
  if not sevenz_files:
228
  raise gr.Error("No .7z files listed in the torrent.")
229
 
230
+ if not seeds:
231
+ raise gr.Error("No HTTP source found. Tried to infer base from the .torrent URL but failed. "
232
+ "If this is DDoSecrets, host likely at the same folder as the torrent.")
233
+
234
  # Work dirs
235
  base_dir = pathlib.Path("/mnt/data/work") / infohash
236
  dl_dir = base_dir / "downloads"
 
241
  logs = []
242
  saved_archives = []
243
 
244
+ # Download each .7z over HTTP
245
  for f in sevenz_files:
246
  rel = f["path"]
247
+ # resolve against any seed/base
248
+ final_url = None
249
+ for seed in seeds:
250
+ final_url = resolve_download_url([seed], root_name, rel)
251
+ if final_url:
252
+ break
253
+ if not final_url:
254
+ raise gr.Error(f"Could not resolve an HTTP URL for {rel} from bases {seeds}.")
255
  dest = dl_dir / rel
256
+ logs.append(f"Downloading: {final_url}")
257
+ download_with_resume(final_url, dest)
258
  if not dest.exists():
259
+ raise gr.Error(f"Download failed: {final_url}")
260
  logs.append(f"Saved: {dest} ({human_bytes(dest.stat().st_size)})")
261
  saved_archives.append(str(dest))
262
 
 
274
  else:
275
  logs.append(f"Extracted files: {len(extracted)}")
276
 
 
277
  log_md = "### Run log\n" + "\n".join(f"- {l}" for l in logs)
278
  return log_md, extracted, (extracted[0] if extracted else "")
279
 
 
289
  gr.Markdown(
290
  """
291
  # Torrent → 7z → View (HTTP only)
292
+ Paste a **.torrent URL**.
293
+ If it has web seeds, great. If not, we'll auto-guess the HTTPS folder from the URL (works for DDoSecrets layouts).
294
+ The app downloads `.7z` file(s), extracts them, and lets you preview text/csv/json.
295
  """
296
  )
297
 
298
+ url_in = gr.Textbox(label=".torrent URL", placeholder="https://data.ddosecrets.com/Collection/Collection.torrent")
299
  go_btn = gr.Button("Download, Extract & List")
300
  log_out = gr.Markdown()
301
  files_dd = gr.Dropdown(label="Extracted files", choices=[], interactive=True)
 
318
  demo.launch(
319
  server_name="0.0.0.0",
320
  server_port=int(os.environ.get("PORT", 7860)),
321
+ allowed_paths=["/mnt/data"]
322
  )