fromozu commited on
Commit
c8e4410
·
verified ·
1 Parent(s): 8d87bea

Upload hf_backend/test_capture_nav.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_capture_nav.py +88 -0
hf_backend/test_capture_nav.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 捕获导航事件,获取最终的EPUB URL
4
+ """
5
+
6
+ from playwright.sync_api import sync_playwright
7
+ import time
8
+
9
+ MD5 = "d94c20d1364af9b484949659398c4062"
10
+ SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"
11
+
12
+ def capture_navigation():
13
+ """捕获所有导航事件"""
14
+ print(f"目标: {SLOW_URL}\n")
15
+
16
+ result = {"url": None, "error": None}
17
+
18
+ with sync_playwright() as p:
19
+ browser = p.chromium.launch(headless=False)
20
+ context = browser.new_context(
21
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
22
+ viewport={"width": 1920, "height": 1080},
23
+ )
24
+ page = context.new_page()
25
+
26
+ # 监听所有可能的导航事件
27
+ def on_navigation(frame):
28
+ url = frame.url if frame else page.url
29
+ print(f" [导航事件] {url}")
30
+
31
+ page.on("navigation", on_navigation)
32
+ page.on("framenavigated", on_navigation)
33
+
34
+ # 监听下载
35
+ def on_download(download):
36
+ print(f" [下载事件] {download.url}")
37
+ result["url"] = download.url
38
+
39
+ page.on("download", on_download)
40
+
41
+ print("访问slow_download...")
42
+ try:
43
+ page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
44
+ except Exception as e:
45
+ print(f"goto异常: {e}")
46
+ # 可能是导航成功了
47
+ result["url"] = page.url
48
+
49
+ print(f"初始URL: {page.url}")
50
+
51
+ # 等待60秒
52
+ print("\n等待60秒...")
53
+ for i in range(60):
54
+ time.sleep(1)
55
+
56
+ try:
57
+ current_url = page.url
58
+ if current_url.lower().endswith('.epub'):
59
+ print(f"\n在第{i+1}秒发现EPUB URL: {current_url}")
60
+ result["url"] = current_url
61
+ break
62
+
63
+ if (i + 1) % 10 == 0:
64
+ print(f" {i+1}秒... 当前URL: {current_url[:60]}...")
65
+ except Exception as e:
66
+ # 页面可能在导航中
67
+ print(f" {i+1}秒... (页面可能正在导航: {str(e)[:30]})")
68
+ continue
69
+
70
+ print(f"\n最终URL: {page.url}")
71
+
72
+ browser.close()
73
+
74
+ return result["url"]
75
+
76
+ if __name__ == "__main__":
77
+ print("=" * 60)
78
+ print("捕获导航事件")
79
+ print("=" * 60)
80
+ print()
81
+
82
+ url = capture_navigation()
83
+ print(f"\n{'='*60}")
84
+ if url:
85
+ print(f"成功获取到: {url}")
86
+ else:
87
+ print("未能获取到EPUB链接")
88
+ print("=" * 60)