ebook-executor / hf_backend /test_capture_nav.py
fromozu's picture
Upload hf_backend/test_capture_nav.py with huggingface_hub
c8e4410 verified
#!/usr/bin/env python3
"""
捕获导航事件,获取最终的EPUB URL
"""
from playwright.sync_api import sync_playwright
import time
MD5 = "d94c20d1364af9b484949659398c4062"
SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"
def capture_navigation():
"""捕获所有导航事件"""
print(f"目标: {SLOW_URL}\n")
result = {"url": None, "error": None}
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
viewport={"width": 1920, "height": 1080},
)
page = context.new_page()
# 监听所有可能的导航事件
def on_navigation(frame):
url = frame.url if frame else page.url
print(f" [导航事件] {url}")
page.on("navigation", on_navigation)
page.on("framenavigated", on_navigation)
# 监听下载
def on_download(download):
print(f" [下载事件] {download.url}")
result["url"] = download.url
page.on("download", on_download)
print("访问slow_download...")
try:
page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
except Exception as e:
print(f"goto异常: {e}")
# 可能是导航成功了
result["url"] = page.url
print(f"初始URL: {page.url}")
# 等待60秒
print("\n等待60秒...")
for i in range(60):
time.sleep(1)
try:
current_url = page.url
if current_url.lower().endswith('.epub'):
print(f"\n在第{i+1}秒发现EPUB URL: {current_url}")
result["url"] = current_url
break
if (i + 1) % 10 == 0:
print(f" {i+1}秒... 当前URL: {current_url[:60]}...")
except Exception as e:
# 页面可能在导航中
print(f" {i+1}秒... (页面可能正在导航: {str(e)[:30]})")
continue
print(f"\n最终URL: {page.url}")
browser.close()
return result["url"]
if __name__ == "__main__":
print("=" * 60)
print("捕获导航事件")
print("=" * 60)
print()
url = capture_navigation()
print(f"\n{'='*60}")
if url:
print(f"成功获取到: {url}")
else:
print("未能获取到EPUB链接")
print("=" * 60)