fromozu commited on
Commit
1479093
·
verified ·
1 Parent(s): f50cc3f

Upload hf_backend/test_zlibrary_v2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_zlibrary_v2.py +107 -0
hf_backend/test_zlibrary_v2.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 测试 Z-Library 不同的访问方式
4
+ """
5
+
6
+ from playwright.sync_api import sync_playwright
7
+ import requests
8
+ import time
9
+ import re
10
+
11
+ def test_zlibrary_domains():
12
+ """测试不同的 Z-Library 域名"""
13
+ domains = [
14
+ "https://z-library.sk",
15
+ "https://zlibrary.to",
16
+ "https://zlibrary.is",
17
+ "https://singlelogin.re",
18
+ "https://bookszlibb74ugqojhzhg2a63w5i2atv5bqarulgczawnbmsb6s6qead.onion", # Tor
19
+ ]
20
+
21
+ query = "Capitalism A Global History"
22
+ print("测试不同的 Z-Library 域名...\n")
23
+
24
+ for domain in domains:
25
+ print(f"测试: {domain}")
26
+ try:
27
+ if ".onion" in domain:
28
+ print(" [跳过] Tor 需要特殊配置")
29
+ continue
30
+
31
+ resp = requests.get(f"{domain}/search?q={query}&languages=1",
32
+ timeout=10,
33
+ headers={"User-Agent": "Mozilla/5.0"})
34
+ print(f" 状态: {resp.status_code}")
35
+ if resp.status_code == 200:
36
+ if "not found" in resp.text.lower() or "404" in resp.text[:500]:
37
+ print(" [无效] 页面不存在")
38
+ else:
39
+ print(f" [有效] 内容长度: {len(resp.text)}")
40
+ except Exception as e:
41
+ print(f" 错误: {str(e)[:50]}")
42
+ print()
43
+
44
+ def test_with_playwright():
45
+ """用 Playwright 测试"""
46
+ print("\n用 Playwright 测试 Z-Library...")
47
+
48
+ domains = [
49
+ ("https://z-library.sk", "Z-Library SK"),
50
+ ("https://singlelogin.re", "SingleLogin"),
51
+ ]
52
+
53
+ with sync_playwright() as p:
54
+ browser = p.chromium.launch(headless=True)
55
+
56
+ for domain, name in domains:
57
+ print(f"\n测试 {name}: {domain}")
58
+ context = browser.new_context(
59
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
60
+ )
61
+ page = context.new_page()
62
+
63
+ try:
64
+ page.goto(f"{domain}/search?q=Capitalism+A+Global+History&languages=1",
65
+ timeout=15000, wait_until="domcontentloaded")
66
+ page.wait_for_timeout(2000)
67
+ print(f" 标题: {page.title()}")
68
+
69
+ # 查找书籍链接
70
+ links = page.query_selector_all("a[href*='/book/'], a[href*='/b/']")
71
+ print(f" 找到书籍链接: {len(links)}")
72
+ for link in links[:5]:
73
+ href = link.get_attribute("href")
74
+ text = link.inner_text().strip()[:50]
75
+ print(f" {href[:60]} - {text}")
76
+
77
+ except Exception as e:
78
+ print(f" 错误: {e}")
79
+
80
+ context.close()
81
+
82
+ browser.close()
83
+
84
+ def test_zlib_api():
85
+ """测试 Z-Library API"""
86
+ print("\n\n测试 Z-Library API...")
87
+
88
+ # Z-Library 有一些公共 API 端点
89
+ api_urls = [
90
+ "https://z-library.sk/suggest_json.php?term=capitalism",
91
+ "https://api.zlibrary.org/v1/book/search",
92
+ ]
93
+
94
+ for url in api_urls:
95
+ print(f"\n测试: {url}")
96
+ try:
97
+ resp = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
98
+ print(f" 状态: {resp.status_code}")
99
+ if resp.status_code == 200:
100
+ print(f" 内容: {resp.text[:200]}")
101
+ except Exception as e:
102
+ print(f" 错误: {e}")
103
+
104
+ if __name__ == "__main__":
105
+ test_zlibrary_domains()
106
+ test_with_playwright()
107
+ test_zlib_api()