Spaces:
Paused
Paused
Upload hf_backend/test_zlibrary_v2.py with huggingface_hub
Browse files- hf_backend/test_zlibrary_v2.py +107 -0
hf_backend/test_zlibrary_v2.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
测试 Z-Library 不同的访问方式
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from playwright.sync_api import sync_playwright
|
| 7 |
+
import requests
|
| 8 |
+
import time
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
def test_zlibrary_domains():
|
| 12 |
+
"""测试不同的 Z-Library 域名"""
|
| 13 |
+
domains = [
|
| 14 |
+
"https://z-library.sk",
|
| 15 |
+
"https://zlibrary.to",
|
| 16 |
+
"https://zlibrary.is",
|
| 17 |
+
"https://singlelogin.re",
|
| 18 |
+
"https://bookszlibb74ugqojhzhg2a63w5i2atv5bqarulgczawnbmsb6s6qead.onion", # Tor
|
| 19 |
+
]
|
| 20 |
+
|
| 21 |
+
query = "Capitalism A Global History"
|
| 22 |
+
print("测试不同的 Z-Library 域名...\n")
|
| 23 |
+
|
| 24 |
+
for domain in domains:
|
| 25 |
+
print(f"测试: {domain}")
|
| 26 |
+
try:
|
| 27 |
+
if ".onion" in domain:
|
| 28 |
+
print(" [跳过] Tor 需要特殊配置")
|
| 29 |
+
continue
|
| 30 |
+
|
| 31 |
+
resp = requests.get(f"{domain}/search?q={query}&languages=1",
|
| 32 |
+
timeout=10,
|
| 33 |
+
headers={"User-Agent": "Mozilla/5.0"})
|
| 34 |
+
print(f" 状态: {resp.status_code}")
|
| 35 |
+
if resp.status_code == 200:
|
| 36 |
+
if "not found" in resp.text.lower() or "404" in resp.text[:500]:
|
| 37 |
+
print(" [无效] 页面不存在")
|
| 38 |
+
else:
|
| 39 |
+
print(f" [有效] 内容长度: {len(resp.text)}")
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f" 错误: {str(e)[:50]}")
|
| 42 |
+
print()
|
| 43 |
+
|
| 44 |
+
def test_with_playwright():
|
| 45 |
+
"""用 Playwright 测试"""
|
| 46 |
+
print("\n用 Playwright 测试 Z-Library...")
|
| 47 |
+
|
| 48 |
+
domains = [
|
| 49 |
+
("https://z-library.sk", "Z-Library SK"),
|
| 50 |
+
("https://singlelogin.re", "SingleLogin"),
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
with sync_playwright() as p:
|
| 54 |
+
browser = p.chromium.launch(headless=True)
|
| 55 |
+
|
| 56 |
+
for domain, name in domains:
|
| 57 |
+
print(f"\n测试 {name}: {domain}")
|
| 58 |
+
context = browser.new_context(
|
| 59 |
+
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
| 60 |
+
)
|
| 61 |
+
page = context.new_page()
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
page.goto(f"{domain}/search?q=Capitalism+A+Global+History&languages=1",
|
| 65 |
+
timeout=15000, wait_until="domcontentloaded")
|
| 66 |
+
page.wait_for_timeout(2000)
|
| 67 |
+
print(f" 标题: {page.title()}")
|
| 68 |
+
|
| 69 |
+
# 查找书籍链接
|
| 70 |
+
links = page.query_selector_all("a[href*='/book/'], a[href*='/b/']")
|
| 71 |
+
print(f" 找到书籍链接: {len(links)}")
|
| 72 |
+
for link in links[:5]:
|
| 73 |
+
href = link.get_attribute("href")
|
| 74 |
+
text = link.inner_text().strip()[:50]
|
| 75 |
+
print(f" {href[:60]} - {text}")
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f" 错误: {e}")
|
| 79 |
+
|
| 80 |
+
context.close()
|
| 81 |
+
|
| 82 |
+
browser.close()
|
| 83 |
+
|
| 84 |
+
def test_zlib_api():
|
| 85 |
+
"""测试 Z-Library API"""
|
| 86 |
+
print("\n\n测试 Z-Library API...")
|
| 87 |
+
|
| 88 |
+
# Z-Library 有一些公共 API 端点
|
| 89 |
+
api_urls = [
|
| 90 |
+
"https://z-library.sk/suggest_json.php?term=capitalism",
|
| 91 |
+
"https://api.zlibrary.org/v1/book/search",
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
for url in api_urls:
|
| 95 |
+
print(f"\n测试: {url}")
|
| 96 |
+
try:
|
| 97 |
+
resp = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
|
| 98 |
+
print(f" 状态: {resp.status_code}")
|
| 99 |
+
if resp.status_code == 200:
|
| 100 |
+
print(f" 内容: {resp.text[:200]}")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f" 错误: {e}")
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
test_zlibrary_domains()
|
| 106 |
+
test_with_playwright()
|
| 107 |
+
test_zlib_api()
|