Spaces:

fromozu
/

ebook-executor

Paused

App Files Files Community

fromozu commited on May 4

Commit

158ec06

verified ·

1 Parent(s): 9bb2166

Upload hf_backend/test_zlibrary_full.py with huggingface_hub

Browse files

Files changed (1) hide show

hf_backend/test_zlibrary_full.py +90 -0

hf_backend/test_zlibrary_full.py ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env python3
+"""
+测试 Z-Library 搜索功能，获取书籍列表和下载页 URL
+"""
+from playwright.sync_api import sync_playwright
+import time
+import re
+from urllib.parse import quote
+def test_zlibrary_search(query):
+    """测试 Z-Library 搜索"""
+    print(f"搜索: {query}\n")
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False)
+        context = browser.new_context(
+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+        )
+        page = context.new_page()
+        # 访问搜索页
+        search_url = f"https://z-library.sk/s/{quote(query)}"
+        print(f"搜索 URL: {search_url}")
+        page.goto(search_url, timeout=60000, wait_until="domcontentloaded")
+        page.wait_for_timeout(5000)
+        print(f"页面标题: {page.title()}")
+        # 获取页面内容
+        content = page.content()
+        # 查找书籍详情页链接
+        # 格式: /book/{id}/{title}.html
+        book_pattern = r'/book/([a-zA-Z0-9]+)/([^"]+\.html)'
+        book_matches = re.findall(book_pattern, content)
+        print(f"\n找到书籍数量: {len(book_matches)}")
+        books = []
+        for book_id, book_title in book_matches[:10]:
+            title = book_title.replace('.html', '').replace('-', ' ')
+            full_url = f"https://z-library.sk/book/{book_id}/{book_title}"
+            books.append((title, full_url))
+            print(f"  - {title[:50]}")
+            print(f"    URL: {full_url}")
+        # 查找下载链接
+        # 格式: /dl/{id}
+        dl_pattern = r'/dl/([a-zA-Z0-9]+)'
+        dl_matches = re.findall(dl_pattern, content)
+        unique_dl = list(set(dl_matches))
+        print(f"\n找到下载链接: {len(unique_dl)}")
+        for dl_id in unique_dl[:5]:
+            print(f"  https://z-library.sk/dl/{dl_id}")
+        # 点击第一本书，获取下载页面 URL
+        if book_matches:
+            first_book_id, first_book_title = book_matches[0]
+            book_url = f"https://z-library.sk/book/{first_book_id}/{first_book_title}"
+            print(f"\n访问第一本书详情页: {book_url}")
+            page2 = context.new_page()
+            page2.goto(book_url, timeout=60000, wait_until="domcontentloaded")
+            page2.wait_for_timeout(3000)
+            # 获取下载链接
+            detail_content = page2.content()
+            # 查找 dl/ 链接
+            dl_in_detail = re.findall(r'/dl/([a-zA-Z0-9]+)', detail_content)
+            if dl_in_detail:
+                dl_id = dl_in_detail[0]
+                dl_url = f"https://z-library.sk/dl/{dl_id}"
+                print(f"下载页 URL: {dl_url}")
+            # 查找 EPUB 文件的直接下载 URL
+            epub_pattern = r'"(https?://[^"]*\.epub[^"]*)"'
+            epub_matches = re.findall(epub_pattern, detail_content)
+            if epub_matches:
+                print(f"EPUB 直链: {epub_matches[0]}")
+            page2.close()
+        input("按回车键关闭浏览器...")
+        browser.close()
+if __name__ == "__main__":
+    test_zlibrary_search("Capitalism: A Global History")