wuhp commited on
Commit
4b94fe6
·
verified ·
1 Parent(s): 29f8cf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -6
app.py CHANGED
@@ -8,6 +8,7 @@ import requests
8
  import gradio as gr
9
  import bencodepy
10
  import py7zr
 
11
 
12
  # =========================
13
  # Helpers
@@ -247,6 +248,41 @@ def test_7z_integrity(archive_path: str) -> bool:
247
  except Exception:
248
  return False
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  # =========================
251
  # Pipeline
252
  # =========================
@@ -312,16 +348,24 @@ def run_pipeline(torrent_url: str):
312
  logs.append(f"CRC test failed for {dest.name}, retrying download fresh…")
313
  download_file_exact(final_url, dest, expected_size, max_attempts=2)
314
  if not test_7z_integrity(str(dest)):
315
- raise gr.Error(f"Archive still fails CRC after re-download: {dest.name}")
316
-
317
  saved_archives.append(str(dest))
318
 
319
- # Extract all .7z archives (after passing CRC test)
320
  for apath in saved_archives:
321
  logs.append(f"Extracting: {apath}")
322
- with py7zr.SevenZipFile(apath, mode="r") as z:
323
- z.extract(path=str(ex_dir))
324
- logs.append(f"Extracted to: {ex_dir}")
 
 
 
 
 
 
 
 
 
325
 
326
  # List extracted files
327
  extracted = list_files_recursive(ex_dir)
 
8
  import gradio as gr
9
  import bencodepy
10
  import py7zr
11
+ from py7zr.exceptions import CrcError # for granular handling
12
 
13
  # =========================
14
  # Helpers
 
248
  except Exception:
249
  return False
250
 
251
+ def safe_extract_7z(archive_path: str, dest_dir: str) -> Tuple[int, List[str]]:
252
+ """
253
+ Extract an archive. If a CRC error occurs, fall back to per-member extraction,
254
+ skipping only the bad members. Returns (#extracted, skipped_list).
255
+ """
256
+ extracted_count = 0
257
+ skipped: List[str] = []
258
+ dest = pathlib.Path(dest_dir)
259
+ dest.mkdir(parents=True, exist_ok=True)
260
+
261
+ # First try normal extraction (fast path).
262
+ try:
263
+ with py7zr.SevenZipFile(archive_path, mode="r") as z:
264
+ z.extract(path=str(dest))
265
+ # We don't know exact count from here; return -1 to mean "unknown but success"
266
+ return -1, skipped
267
+ except CrcError:
268
+ # Fall back to per-member extraction, skipping corrupted ones.
269
+ pass
270
+
271
+ # Per-member pass
272
+ with py7zr.SevenZipFile(archive_path, mode="r") as z:
273
+ members = [info.filename for info in z.list() if not info.is_directory]
274
+ for name in members:
275
+ try:
276
+ # Extract only this member; py7zr streams it to disk
277
+ z.extract(targets=[name], path=str(dest))
278
+ extracted_count += 1
279
+ except CrcError:
280
+ skipped.append(name)
281
+ except Exception:
282
+ skipped.append(name)
283
+
284
+ return extracted_count, skipped
285
+
286
  # =========================
287
  # Pipeline
288
  # =========================
 
348
  logs.append(f"CRC test failed for {dest.name}, retrying download fresh…")
349
  download_file_exact(final_url, dest, expected_size, max_attempts=2)
350
  if not test_7z_integrity(str(dest)):
351
+ logs.append(f"Archive still reports CRC problems: {dest.name}. Will try per-file extraction and skip corrupt members.")
 
352
  saved_archives.append(str(dest))
353
 
354
+ # Extract all .7z archives (with resilient per-member fallback)
355
  for apath in saved_archives:
356
  logs.append(f"Extracting: {apath}")
357
+ count, skipped = safe_extract_7z(apath, str(ex_dir))
358
+ if count == -1:
359
+ logs.append(f"Extracted OK {ex_dir}")
360
+ else:
361
+ logs.append(f"Extracted {count} members to {ex_dir}")
362
+ if skipped:
363
+ logs.append(f"Skipped {len(skipped)} corrupted member(s):")
364
+ # show up to a few to keep log readable
365
+ show = skipped[:10]
366
+ logs += [f" - {s}" for s in show]
367
+ if len(skipped) > 10:
368
+ logs.append(f" … and {len(skipped) - 10} more")
369
 
370
  # List extracted files
371
  extracted = list_files_recursive(ex_dir)