fromozu commited on
Commit
a971ac1
·
verified ·
1 Parent(s): 94907aa

Clean: remove Playwright code, keep scoring fix and fallback threshold

Browse files
Files changed (1) hide show
  1. hf_backend/fetcher.py +1 -53
hf_backend/fetcher.py CHANGED
@@ -286,59 +286,7 @@ def _download_from_src_a(
286
  except Exception:
287
  continue
288
 
289
- with sync_playwright() as p:
290
- browser = p.chromium.launch(headless=True)
291
- context = browser.new_context(
292
- user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
293
- accept_downloads=True,
294
- )
295
- page = context.new_page()
296
-
297
- try:
298
- page.goto(url, timeout=60_000, wait_until="domcontentloaded")
299
-
300
- # Wait for the countdown timer to finish and a download link to appear
301
- for elapsed in range(wait_seconds):
302
- time.sleep(1)
303
-
304
- # Check if page navigated away (redirect to download)
305
- current = page.url
306
- if current != url and "slow_download" not in current and "fast_download" not in current:
307
- # Direct redirect — fetch via requests using cookies from the browser
308
- break
309
-
310
- # Check for download links that appeared after countdown
311
- try:
312
- links = page.query_selector_all("a[href]")
313
- for link in links:
314
- href = link.get_attribute("href") or ""
315
- text = (link.text_content() or "").strip().lower()
316
- if ("get.php" in href or href.endswith(".epub") or
317
- ("download" in text and href and href != "#")):
318
- # Try to capture download
319
- try:
320
- with page.expect_download(timeout=5_000) as dl_info:
321
- link.click()
322
- dl = dl_info.value
323
- dl_path = dl.path()
324
- if dl_path:
325
- with open(dl_path, "rb") as f:
326
- return f.read()
327
- except Exception:
328
- # click didn't trigger download, try fetching URL directly
329
- abs_href = href if href.startswith("http") else f"https://annas-archive.gl{href}"
330
- resp = context.request.get(abs_href, timeout=120_000)
331
- if resp.status == 200 and len(resp.body()) > 1000:
332
- return resp.body()
333
- except Exception:
334
- pass
335
-
336
- except Exception:
337
- pass
338
- finally:
339
- browser.close()
340
-
341
- return None
342
 
343
 
344
  def _download_from_src_b(
 
286
  except Exception:
287
  continue
288
 
289
+ raise FetchError("所有下载方式均失败(Libgen Anna's Archive)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
 
292
  def _download_from_src_b(