Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| 测试 Z-Library 搜索功能,验证能否获取 EPUB 直链 | |
| """ | |
| from playwright.sync_api import sync_playwright | |
| import time | |
| import re | |
| from urllib.parse import quote | |
| import requests | |
| def test_zlibrary_search(query): | |
| """测试 Z-Library 搜索""" | |
| print(f"搜索: {query}\n") | |
| with sync_playwright() as p: | |
| browser = p.chromium.launch(headless=False) | |
| context = browser.new_context( | |
| user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", | |
| ) | |
| page = context.new_page() | |
| # 访问搜索页 | |
| search_url = f"https://z-library.sk/s/{quote(query)}" | |
| page.goto(search_url, timeout=60000, wait_until="domcontentloaded") | |
| page.wait_for_timeout(5000) | |
| print(f"页面标题: {page.title()}") | |
| # 获取页面内容 | |
| content = page.content() | |
| # 查找书籍详情页链接 | |
| book_pattern = r'/book/([a-zA-Z0-9]+)/([^"]+\.html)' | |
| book_matches = re.findall(book_pattern, content) | |
| print(f"找到书籍数量: {len(book_matches)}") | |
| # 获取第一本书的下载页 URL | |
| if book_matches: | |
| first_book_id, first_book_title = book_matches[0] | |
| book_url = f"https://z-library.sk/book/{first_book_id}/{first_book_title}" | |
| print(f"\n访问第一本书详情页: {book_url}") | |
| page2 = context.new_page() | |
| page2.goto(book_url, timeout=60000, wait_until="domcontentloaded") | |
| page2.wait_for_timeout(3000) | |
| # 获取下载链接 | |
| detail_content = page2.content() | |
| # 查找 dl/ 链接 | |
| dl_matches = re.findall(r'/dl/([a-zA-Z0-9]+)', detail_content) | |
| if dl_matches: | |
| dl_id = dl_matches[0] | |
| dl_url = f"https://z-library.sk/dl/{dl_id}" | |
| print(f"下载页 URL: {dl_url}") | |
| # 尝试用 Playwright 访问下载页 | |
| print("\n用 Playwright 访问下载页...") | |
| page3 = context.new_page() | |
| page3.goto(dl_url, timeout=60000, wait_until="domcontentloaded") | |
| # 等待 Cloudflare 检查 | |
| for i in range(30): | |
| time.sleep(1) | |
| title = page3.title() | |
| print(f" {i+1}秒: {title}") | |
| if 'Checking' not in title and 'browser' not in title.lower(): | |
| break | |
| final_url = page3.url | |
| print(f"\n最终 URL: {final_url}") | |
| if final_url.lower().endswith('.epub'): | |
| print("[成功] 获取到 EPUB 直链!") | |
| page3.close() | |
| page2.close() | |
| input("按回车键关闭浏览器...") | |
| browser.close() | |
| if __name__ == "__main__": | |
| test_zlibrary_search("Capitalism: A Global History") |