Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import requests
|
|
| 8 |
import gradio as gr
|
| 9 |
import bencodepy
|
| 10 |
import py7zr
|
|
|
|
| 11 |
|
| 12 |
# =========================
|
| 13 |
# Helpers
|
|
@@ -247,6 +248,41 @@ def test_7z_integrity(archive_path: str) -> bool:
|
|
| 247 |
except Exception:
|
| 248 |
return False
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
# =========================
|
| 251 |
# Pipeline
|
| 252 |
# =========================
|
|
@@ -312,16 +348,24 @@ def run_pipeline(torrent_url: str):
|
|
| 312 |
logs.append(f"CRC test failed for {dest.name}, retrying download fresh…")
|
| 313 |
download_file_exact(final_url, dest, expected_size, max_attempts=2)
|
| 314 |
if not test_7z_integrity(str(dest)):
|
| 315 |
-
|
| 316 |
-
|
| 317 |
saved_archives.append(str(dest))
|
| 318 |
|
| 319 |
-
# Extract all .7z archives (
|
| 320 |
for apath in saved_archives:
|
| 321 |
logs.append(f"Extracting: {apath}")
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
|
| 326 |
# List extracted files
|
| 327 |
extracted = list_files_recursive(ex_dir)
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
import bencodepy
|
| 10 |
import py7zr
|
| 11 |
+
from py7zr.exceptions import CrcError # for granular handling
|
| 12 |
|
| 13 |
# =========================
|
| 14 |
# Helpers
|
|
|
|
| 248 |
except Exception:
|
| 249 |
return False
|
| 250 |
|
| 251 |
+
def safe_extract_7z(archive_path: str, dest_dir: str) -> Tuple[int, List[str]]:
|
| 252 |
+
"""
|
| 253 |
+
Extract an archive. If a CRC error occurs, fall back to per-member extraction,
|
| 254 |
+
skipping only the bad members. Returns (#extracted, skipped_list).
|
| 255 |
+
"""
|
| 256 |
+
extracted_count = 0
|
| 257 |
+
skipped: List[str] = []
|
| 258 |
+
dest = pathlib.Path(dest_dir)
|
| 259 |
+
dest.mkdir(parents=True, exist_ok=True)
|
| 260 |
+
|
| 261 |
+
# First try normal extraction (fast path).
|
| 262 |
+
try:
|
| 263 |
+
with py7zr.SevenZipFile(archive_path, mode="r") as z:
|
| 264 |
+
z.extract(path=str(dest))
|
| 265 |
+
# We don't know exact count from here; return -1 to mean "unknown but success"
|
| 266 |
+
return -1, skipped
|
| 267 |
+
except CrcError:
|
| 268 |
+
# Fall back to per-member extraction, skipping corrupted ones.
|
| 269 |
+
pass
|
| 270 |
+
|
| 271 |
+
# Per-member pass
|
| 272 |
+
with py7zr.SevenZipFile(archive_path, mode="r") as z:
|
| 273 |
+
members = [info.filename for info in z.list() if not info.is_directory]
|
| 274 |
+
for name in members:
|
| 275 |
+
try:
|
| 276 |
+
# Extract only this member; py7zr streams it to disk
|
| 277 |
+
z.extract(targets=[name], path=str(dest))
|
| 278 |
+
extracted_count += 1
|
| 279 |
+
except CrcError:
|
| 280 |
+
skipped.append(name)
|
| 281 |
+
except Exception:
|
| 282 |
+
skipped.append(name)
|
| 283 |
+
|
| 284 |
+
return extracted_count, skipped
|
| 285 |
+
|
| 286 |
# =========================
|
| 287 |
# Pipeline
|
| 288 |
# =========================
|
|
|
|
| 348 |
logs.append(f"CRC test failed for {dest.name}, retrying download fresh…")
|
| 349 |
download_file_exact(final_url, dest, expected_size, max_attempts=2)
|
| 350 |
if not test_7z_integrity(str(dest)):
|
| 351 |
+
logs.append(f"Archive still reports CRC problems: {dest.name}. Will try per-file extraction and skip corrupt members.")
|
|
|
|
| 352 |
saved_archives.append(str(dest))
|
| 353 |
|
| 354 |
+
# Extract all .7z archives (with resilient per-member fallback)
|
| 355 |
for apath in saved_archives:
|
| 356 |
logs.append(f"Extracting: {apath}")
|
| 357 |
+
count, skipped = safe_extract_7z(apath, str(ex_dir))
|
| 358 |
+
if count == -1:
|
| 359 |
+
logs.append(f"Extracted OK → {ex_dir}")
|
| 360 |
+
else:
|
| 361 |
+
logs.append(f"Extracted {count} members to {ex_dir}")
|
| 362 |
+
if skipped:
|
| 363 |
+
logs.append(f"Skipped {len(skipped)} corrupted member(s):")
|
| 364 |
+
# show up to a few to keep log readable
|
| 365 |
+
show = skipped[:10]
|
| 366 |
+
logs += [f" - {s}" for s in show]
|
| 367 |
+
if len(skipped) > 10:
|
| 368 |
+
logs.append(f" … and {len(skipped) - 10} more")
|
| 369 |
|
| 370 |
# List extracted files
|
| 371 |
extracted = list_files_recursive(ex_dir)
|