fromozu commited on
Commit
ebce8a1
·
verified ·
1 Parent(s): c974285

Upload hf_backend/test_show_external.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_show_external.py +94 -0
hf_backend/test_show_external.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 查找 "show external downloads" 选项并尝试通过其他方式获取文件
4
+ """
5
+
6
+ from playwright.sync_api import sync_playwright
7
+ import time
8
+ import re
9
+
10
+ MD5 = "d94c20d1364af9b484949659398c4062"
11
+ MD5_URL = f"https://annas-archive.gl/md5/{MD5}"
12
+
13
+ def find_external_downloads():
14
+ """查找外部下载选项"""
15
+ print(f"目标: {MD5_URL}\n")
16
+
17
+ with sync_playwright() as p:
18
+ browser = p.chromium.launch(headless=True)
19
+ context = browser.new_context(
20
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
21
+ )
22
+ page = context.new_page()
23
+
24
+ print("访问 md5 页面...")
25
+ page.goto(MD5_URL, timeout=60000, wait_until="domcontentloaded")
26
+ page.wait_for_timeout(3000)
27
+
28
+ # 查找 "show external downloads" 按钮或链接
29
+ print("\n查找 'show external downloads' 链接...")
30
+
31
+ # 方法1: 通过文本查找
32
+ show_external = None
33
+ try:
34
+ # 尝试查找包含 "external" 的元素
35
+ elements = page.query_selector_all("button, a, span, div")
36
+ for el in elements:
37
+ text = el.inner_text().strip().lower()
38
+ if 'external' in text or 'show' in text:
39
+ print(f" 发现元素: '{el.inner_text().strip()}' - tag: {el.evaluate('el => el.tagName')}")
40
+ if 'external' in text:
41
+ show_external = el
42
+ except Exception as e:
43
+ print(f" 查找错误: {e}")
44
+
45
+ # 方法2: 直接点击 "show external downloads" 文本
46
+ try:
47
+ # 尝试点击包含 "show external" 的链接
48
+ link = page.locator("text=show external").first
49
+ if link:
50
+ print(f"\n找到 'show external' 链接!")
51
+ link.click()
52
+ page.wait_for_timeout(2000)
53
+
54
+ # 查找新出现的下载选项
55
+ print("点击后查找下载链接...")
56
+ links = page.query_selector_all("a[href]")
57
+ for l in links:
58
+ href = l.get_attribute("href")
59
+ if href and any(k in href.lower() for k in ['zlib', 'tor', 'onion', 'libgen']):
60
+ print(f" 外部链接: {href}")
61
+ except Exception as e:
62
+ print(f"点击错误: {e}")
63
+
64
+ # 方法3: 直接访问 Z-Library 的 Tor URL
65
+ print("\n尝试访问 Z-Library Tor URL...")
66
+ tor_url = f"http://bookszlibb74ugqojhzhg2a63w5i2atv5bqarulgczawnbmsb6s6qead.onion/md5/{MD5}"
67
+ try:
68
+ # 这个可能会失败因为没有 Tor 代理
69
+ page.goto(tor_url, timeout=15000)
70
+ print(f" Tor URL 标题: {page.title()}")
71
+ except Exception as e:
72
+ print(f" Tor URL 访问失败: {str(e)[:50]}")
73
+
74
+ # 方法4: 尝试用 requests 访问 Z-Library 的 clearnet 版本
75
+ print("\n尝试 Z-Library clearnet...")
76
+ import requests
77
+ try:
78
+ # Z-Library 的 API 或搜索
79
+ resp = requests.get(
80
+ "https://z-lib.org",
81
+ headers={"User-Agent": "Mozilla/5.0"},
82
+ timeout=10
83
+ )
84
+ print(f" Z-Library 状态: {resp.status_code}")
85
+ except Exception as e:
86
+ print(f" Z-Library 访问失败: {str(e)[:50]}")
87
+
88
+ browser.close()
89
+
90
+ if __name__ == "__main__":
91
+ print("=" * 60)
92
+ print("查找外部下载选项")
93
+ print("=" * 60)
94
+ find_external_downloads()