Spaces:

fromozu
/

ebook-executor

Paused

File size: 6,537 Bytes

fcaef18

#!/usr/bin/env python3
"""
Test: Use same browser context for search + slow_download to preserve cookies.
Also try checking if we can extract final URL from search results directly.
"""

from playwright.sync_api import sync_playwright
import time
import re

MD5 = "d94c20d1364af9b484949659398c4062"
SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"

def test_same_context_cookies():
    """Try using cookies from search page to access slow_download."""
    print("[测试] 在同一浏览器上下文中访问搜索页和slow_download...")

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
        )
        page = context.new_page()

        # First, visit the search page to get cookies
        print("   1. 访问搜索页面获取cookies...")
        try:
            page.goto("https://annas-archive.gl/search?q=Capitalism+A+Global+History", timeout=60000, wait_until="networkidle")
            print(f"   搜索页标题: {page.title()}")
            cookies = context.cookies()
            print(f"   获取到 {len(cookies)} 个 cookies")
            for c in cookies:
                print(f"      {c['name']}: {c['value'][:30]}...")
        except Exception as e:
            print(f"   搜索页错误: {e}")

        # Now try slow_download in same context
        print("\n   2. 在同一上下文中访问slow_download...")
        try:
            response = page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
            print(f"   状态码: {response.status if response else 'None'}")

            for i in range(45):
                time.sleep(1)
                url = page.url
                title = page.title()

                if url.lower().endswith('.epub'):
                    print(f"   成功! 在第 {i+1} 秒 URL 变成 EPUB")
                    browser.close()
                    return url

                if title != "DDoS-Guard":
                    print(f"   {i+1}秒: 标题={title}, URL={url[:60]}...")

                if (i + 1) % 15 == 0:
                    print(f"   {i+1}秒...")

        except Exception as e:
            print(f"   slow_download 错误: {e}")

        browser.close()
    return None

def test_extract_from_search_md5():
    """
    Check if there's a way to get the final URL directly from the MD5.
    Maybe Anna's Archive has an API or we can construct the URL.
    """
    print("\n[测试] 分析MD5哈希看是否能直接构造最终URL...")

    # The MD5 is: d94c20d1364af9b484949659398c4062
    # This is 32 hex characters = 128 bits
    # Anna's Archive file IDs might be based on this

    # Try to check what fast_redirect or other endpoints exist
    print(f"   MD5: {MD5}")
    print(f"   可能的文件标识符: {MD5}")

    # Try some alternative URL patterns
    test_urls = [
        f"https://annas-archive.gl/fast_download/{MD5}/0/3",
        f"https://annas-archive.gl/download/{MD5}",
        f"https://annas-archive.gl/get/{MD5}",
    ]

    for url in test_urls:
        print(f"\n   测试: {url}")
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            try:
                response = page.goto(url, timeout=15000, wait_until="domcontentloaded")
                print(f"   状态码: {response.status if response else 'None'}")
                print(f"   最终URL: {page.url}")
            except Exception as e:
                print(f"   错误: {str(e)[:50]}")
            browser.close()
        time.sleep(1)

    return None

def test_page_source_for_redirect():
    """Check the actual page source of slow_download to understand redirect mechanism."""
    print("\n[测试] 检查slow_download页面源码中的重定向机制...")

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
        )
        page = context.new_page()

        try:
            response = page.goto(SLOW_URL, timeout=30000, wait_until="domcontentloaded")
            print(f"   状态码: {response.status if response else 'None'}")

            # Get page content
            content = page.content()
            print(f"   页面内容长度: {len(content)} 字符")

            # Look for meta refresh, javascript redirects, etc.
            if 'meta' in content.lower():
                meta_refresh = re.findall(r'<meta[^>]*refresh[^>]*content=["\']([^"\']*)["\']', content, re.IGNORECASE)
                if meta_refresh:
                    print(f"   Meta Refresh: {meta_refresh}")

            if 'window.location' in content.lower():
                print("   发现 window.location 重定向")

            if 'location.href' in content.lower():
                print("   发现 location.href 重定向")

            if 'setTimeout' in content.lower():
                print("   发现 setTimeout 定时器")

            # Look for any URLs in the page
            urls = re.findall(r'https?://[^\s"\'<>]+\.epub[^\s"\'<>]*', content, re.IGNORECASE)
            if urls:
                print(f"   找到EPUB URLs: {urls}")

            # Print relevant parts of the page
            print("\n   页面内容片段:")
            lines = content.split('\n')
            for i, line in enumerate(lines):
                if any(keyword in line.lower() for keyword in ['refresh', 'location', 'timeout', 'redirect', 'epub', 'download']):
                    print(f"      {line.strip()[:100]}")

        except Exception as e:
            print(f"   错误: {e}")

        browser.close()

    return None

def main():
    print("=" * 60)
    print("Anna's Archive slow_download 深度测试")
    print("=" * 60)

    print("\n目标 MD5:", MD5)
    print("目标 URL:", SLOW_URL)

    # Test 1: Same context cookies
    result1 = test_same_context_cookies()
    if result1:
        print(f"\n*** 成功! 链接: {result1} ***")
        return result1

    # Test 2: Extract from MD5
    test_extract_from_search_md5()

    # Test 3: Check page source
    test_page_source_for_redirect()

    print("\n" + "=" * 60)
    print("所有测试完成")
    print("=" * 60)

if __name__ == "__main__":
    main()