fromozu commited on
Commit
0fc00c7
·
verified ·
1 Parent(s): 2dbbed6

Upload hf_backend/test_real_browser_v2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_real_browser_v2.py +98 -0
hf_backend/test_real_browser_v2.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 使用完全真实的浏览器(headless=False),捕获导航事件。
4
+ """
5
+
6
+ from playwright.sync_api import sync_playwright
7
+ import time
8
+
9
+ MD5 = "d94c20d1364af9b484949659398c4062"
10
+ SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"
11
+
12
+ def use_real_browser():
13
+ """使用完全真实的浏览器,捕获导航"""
14
+ print(f"目标: {SLOW_URL}")
15
+ print("将打开一个真实的Chrome浏览器窗口\n")
16
+
17
+ result_url = {"url": None}
18
+ download_url = {"url": None}
19
+
20
+ with sync_playwright() as p:
21
+ browser = p.chromium.launch(
22
+ headless=False, # 完全显示浏览器
23
+ )
24
+
25
+ context = browser.new_context(
26
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
27
+ viewport={"width": 1920, "height": 1080},
28
+ locale="zh-CN",
29
+ )
30
+ page = context.new_page()
31
+
32
+ # 监听下载事件
33
+ def on_download(download):
34
+ print(f"检测到下载: {download.url}")
35
+ download_url["url"] = download.url
36
+
37
+ page.on("download", on_download)
38
+
39
+ # 监听URL变化
40
+ page.on("navigation", lambda: print(f"导航到: {page.url}"))
41
+ page.on("framenavigated", lambda frame: print(f"框架导航: {frame.url}") if page.url != SLOW_URL else None)
42
+
43
+ print("步骤1: 访问主页...")
44
+ page.goto("https://annas-archive.gl", timeout=60000, wait_until="domcontentloaded")
45
+ print(f" 标题: {page.title()}")
46
+ time.sleep(2)
47
+
48
+ print("\n步骤2: 访问slow_download...")
49
+ page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
50
+ print(f" 初始标题: {page.title()}")
51
+
52
+ print("\n步骤3: 等待30秒让导航完成...")
53
+ for i in range(30):
54
+ time.sleep(1)
55
+
56
+ try:
57
+ url = page.url
58
+
59
+ if url.lower().endswith('.epub'):
60
+ print(f"\n*** 在第{i+1}秒检测到EPUB URL: {url} ***")
61
+ result_url["url"] = url
62
+ break
63
+
64
+ if (i + 1) % 5 == 0:
65
+ try:
66
+ title = page.title()
67
+ print(f" {i+1}秒... URL: {url[:60]}... 标题: {title}")
68
+ except:
69
+ print(f" {i+1}秒... URL: {url[:60]}...")
70
+
71
+ except Exception as e:
72
+ # 如果出错,可能正在导航
73
+ print(f" {i+1}秒... (页面正在导航)")
74
+ continue
75
+
76
+ print(f"\n最终URL: {page.url}")
77
+
78
+ # 检查是否有下载
79
+ if download_url["url"]:
80
+ print(f"下载链接: {download_url['url']}")
81
+ result_url["url"] = download_url["url"]
82
+
83
+ browser.close()
84
+
85
+ return result_url["url"] or download_url["url"]
86
+
87
+ if __name__ == "__main__":
88
+ print("=" * 60)
89
+ print("Anna's Archive 真实浏览器测试 (捕获导航)")
90
+ print("=" * 60)
91
+ print()
92
+
93
+ result = use_real_browser()
94
+
95
+ if result:
96
+ print(f"\n成功! EPUB链接: {result}")
97
+ else:
98
+ print("\n未能获取到EPUB链接")