Spaces:
Paused
Paused
Upload hf_backend/test_show_external.py with huggingface_hub
Browse files
hf_backend/test_show_external.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
查找 "show external downloads" 选项并尝试通过其他方式获取文件
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from playwright.sync_api import sync_playwright
|
| 7 |
+
import time
|
| 8 |
+
import re
|
| 9 |
+
|
| 10 |
+
MD5 = "d94c20d1364af9b484949659398c4062"
|
| 11 |
+
MD5_URL = f"https://annas-archive.gl/md5/{MD5}"
|
| 12 |
+
|
| 13 |
+
def find_external_downloads():
|
| 14 |
+
"""查找外部下载选项"""
|
| 15 |
+
print(f"目标: {MD5_URL}\n")
|
| 16 |
+
|
| 17 |
+
with sync_playwright() as p:
|
| 18 |
+
browser = p.chromium.launch(headless=True)
|
| 19 |
+
context = browser.new_context(
|
| 20 |
+
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
| 21 |
+
)
|
| 22 |
+
page = context.new_page()
|
| 23 |
+
|
| 24 |
+
print("访问 md5 页面...")
|
| 25 |
+
page.goto(MD5_URL, timeout=60000, wait_until="domcontentloaded")
|
| 26 |
+
page.wait_for_timeout(3000)
|
| 27 |
+
|
| 28 |
+
# 查找 "show external downloads" 按钮或链接
|
| 29 |
+
print("\n查找 'show external downloads' 链接...")
|
| 30 |
+
|
| 31 |
+
# 方法1: 通过文本查找
|
| 32 |
+
show_external = None
|
| 33 |
+
try:
|
| 34 |
+
# 尝试查找包含 "external" 的元素
|
| 35 |
+
elements = page.query_selector_all("button, a, span, div")
|
| 36 |
+
for el in elements:
|
| 37 |
+
text = el.inner_text().strip().lower()
|
| 38 |
+
if 'external' in text or 'show' in text:
|
| 39 |
+
print(f" 发现元素: '{el.inner_text().strip()}' - tag: {el.evaluate('el => el.tagName')}")
|
| 40 |
+
if 'external' in text:
|
| 41 |
+
show_external = el
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f" 查找错误: {e}")
|
| 44 |
+
|
| 45 |
+
# 方法2: 直接点击 "show external downloads" 文本
|
| 46 |
+
try:
|
| 47 |
+
# 尝试点击包含 "show external" 的链接
|
| 48 |
+
link = page.locator("text=show external").first
|
| 49 |
+
if link:
|
| 50 |
+
print(f"\n找到 'show external' 链接!")
|
| 51 |
+
link.click()
|
| 52 |
+
page.wait_for_timeout(2000)
|
| 53 |
+
|
| 54 |
+
# 查找新出现的下载选项
|
| 55 |
+
print("点击后查找下载链接...")
|
| 56 |
+
links = page.query_selector_all("a[href]")
|
| 57 |
+
for l in links:
|
| 58 |
+
href = l.get_attribute("href")
|
| 59 |
+
if href and any(k in href.lower() for k in ['zlib', 'tor', 'onion', 'libgen']):
|
| 60 |
+
print(f" 外部链接: {href}")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f"点击错误: {e}")
|
| 63 |
+
|
| 64 |
+
# 方法3: 直接访问 Z-Library 的 Tor URL
|
| 65 |
+
print("\n尝试访问 Z-Library Tor URL...")
|
| 66 |
+
tor_url = f"http://bookszlibb74ugqojhzhg2a63w5i2atv5bqarulgczawnbmsb6s6qead.onion/md5/{MD5}"
|
| 67 |
+
try:
|
| 68 |
+
# 这个可能会失败因为没有 Tor 代理
|
| 69 |
+
page.goto(tor_url, timeout=15000)
|
| 70 |
+
print(f" Tor URL 标题: {page.title()}")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f" Tor URL 访问失败: {str(e)[:50]}")
|
| 73 |
+
|
| 74 |
+
# 方法4: 尝试用 requests 访问 Z-Library 的 clearnet 版本
|
| 75 |
+
print("\n尝试 Z-Library clearnet...")
|
| 76 |
+
import requests
|
| 77 |
+
try:
|
| 78 |
+
# Z-Library 的 API 或搜索
|
| 79 |
+
resp = requests.get(
|
| 80 |
+
"https://z-lib.org",
|
| 81 |
+
headers={"User-Agent": "Mozilla/5.0"},
|
| 82 |
+
timeout=10
|
| 83 |
+
)
|
| 84 |
+
print(f" Z-Library 状态: {resp.status_code}")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f" Z-Library 访问失败: {str(e)[:50]}")
|
| 87 |
+
|
| 88 |
+
browser.close()
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
print("=" * 60)
|
| 92 |
+
print("查找外部下载选项")
|
| 93 |
+
print("=" * 60)
|
| 94 |
+
find_external_downloads()
|