fromozu commited on
Commit
2dbbed6
·
verified ·
1 Parent(s): e9d6d40

Upload hf_backend/test_real_browser.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_real_browser.py +78 -0
hf_backend/test_real_browser.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 使用完全真实的浏览器(headless=False),不隐藏任何东西。
4
+ DDoS-Guard看到的就是一个真实的Chrome浏览器。
5
+ """
6
+
7
+ from playwright.sync_api import sync_playwright
8
+ import time
9
+
10
+ MD5 = "d94c20d1364af9b484949659398c4062"
11
+ SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"
12
+
13
+ def use_real_browser():
14
+ """使用完全真实的浏览器"""
15
+ print(f"目标: {SLOW_URL}")
16
+ print("注意: 将打开一个真实的Chrome浏览器窗口")
17
+
18
+ with sync_playwright() as p:
19
+ # 启动完全真实的浏览器,不隐藏任何东西
20
+ browser = p.chromium.launch(
21
+ headless=False, # 完全显示浏览器
22
+ args=[
23
+ '--disable-blink-features=AutomationControlled', # 这个会暴露
24
+ ]
25
+ )
26
+
27
+ # 使用标准的浏览器上下文,不做任何伪装
28
+ context = browser.new_context(
29
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
30
+ viewport={"width": 1920, "height": 1080},
31
+ locale="zh-CN",
32
+ )
33
+ page = context.new_page()
34
+
35
+ print("步骤1: 访问主页...")
36
+ page.goto("https://annas-archive.gl", timeout=60000, wait_until="domcontentloaded")
37
+ print(f" 标题: {page.title()}")
38
+ time.sleep(3)
39
+
40
+ print("\n步骤2: 访问slow_download...")
41
+ page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
42
+ print(f" 初始标题: {page.title()}")
43
+
44
+ print("\n步骤3: 等待30秒...")
45
+ for i in range(30):
46
+ time.sleep(1)
47
+ url = page.url
48
+ title = page.title()
49
+
50
+ if url.lower().endswith('.epub'):
51
+ print(f"\n*** 在第{i+1}秒检测到EPUB: {url} ***")
52
+ browser.close()
53
+ return url
54
+
55
+ if title != "DDoS-Guard":
56
+ print(f"\n在第{i+1}秒: 标题={title}")
57
+
58
+ if (i + 1) % 10 == 0:
59
+ print(f" {i+1}秒... URL: {url[:60]}...")
60
+
61
+ print(f"\n最终URL: {page.url}")
62
+ print(f"最终标题: {page.title()}")
63
+
64
+ browser.close()
65
+ return page.url if page.url != SLOW_URL else None
66
+
67
+ if __name__ == "__main__":
68
+ print("=" * 60)
69
+ print("Anna's Archive 真实浏览器测试")
70
+ print("=" * 60)
71
+ print()
72
+
73
+ result = use_real_browser()
74
+
75
+ if result:
76
+ print(f"\n结果: {result}")
77
+ else:
78
+ print("\n未能获取到EPUB链接")