File size: 6,537 Bytes
fcaef18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python3
"""
Test: Use same browser context for search + slow_download to preserve cookies.
Also try checking if we can extract final URL from search results directly.
"""

from playwright.sync_api import sync_playwright
import time
import re

MD5 = "d94c20d1364af9b484949659398c4062"
SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"

def test_same_context_cookies():
    """Try using cookies from search page to access slow_download."""
    print("[测试] 在同一浏览器上下文中访问搜索页和slow_download...")

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
        )
        page = context.new_page()

        # First, visit the search page to get cookies
        print("   1. 访问搜索页面获取cookies...")
        try:
            page.goto("https://annas-archive.gl/search?q=Capitalism+A+Global+History", timeout=60000, wait_until="networkidle")
            print(f"   搜索页标题: {page.title()}")
            cookies = context.cookies()
            print(f"   获取到 {len(cookies)} 个 cookies")
            for c in cookies:
                print(f"      {c['name']}: {c['value'][:30]}...")
        except Exception as e:
            print(f"   搜索页错误: {e}")

        # Now try slow_download in same context
        print("\n   2. 在同一上下文中访问slow_download...")
        try:
            response = page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded")
            print(f"   状态码: {response.status if response else 'None'}")

            for i in range(45):
                time.sleep(1)
                url = page.url
                title = page.title()

                if url.lower().endswith('.epub'):
                    print(f"   成功! 在第 {i+1} 秒 URL 变成 EPUB")
                    browser.close()
                    return url

                if title != "DDoS-Guard":
                    print(f"   {i+1}秒: 标题={title}, URL={url[:60]}...")

                if (i + 1) % 15 == 0:
                    print(f"   {i+1}秒...")

        except Exception as e:
            print(f"   slow_download 错误: {e}")

        browser.close()
    return None

def test_extract_from_search_md5():
    """
    Check if there's a way to get the final URL directly from the MD5.
    Maybe Anna's Archive has an API or we can construct the URL.
    """
    print("\n[测试] 分析MD5哈希看是否能直接构造最终URL...")

    # The MD5 is: d94c20d1364af9b484949659398c4062
    # This is 32 hex characters = 128 bits
    # Anna's Archive file IDs might be based on this

    # Try to check what fast_redirect or other endpoints exist
    print(f"   MD5: {MD5}")
    print(f"   可能的文件标识符: {MD5}")

    # Try some alternative URL patterns
    test_urls = [
        f"https://annas-archive.gl/fast_download/{MD5}/0/3",
        f"https://annas-archive.gl/download/{MD5}",
        f"https://annas-archive.gl/get/{MD5}",
    ]

    for url in test_urls:
        print(f"\n   测试: {url}")
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            try:
                response = page.goto(url, timeout=15000, wait_until="domcontentloaded")
                print(f"   状态码: {response.status if response else 'None'}")
                print(f"   最终URL: {page.url}")
            except Exception as e:
                print(f"   错误: {str(e)[:50]}")
            browser.close()
        time.sleep(1)

    return None

def test_page_source_for_redirect():
    """Check the actual page source of slow_download to understand redirect mechanism."""
    print("\n[测试] 检查slow_download页面源码中的重定向机制...")

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context(
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
        )
        page = context.new_page()

        try:
            response = page.goto(SLOW_URL, timeout=30000, wait_until="domcontentloaded")
            print(f"   状态码: {response.status if response else 'None'}")

            # Get page content
            content = page.content()
            print(f"   页面内容长度: {len(content)} 字符")

            # Look for meta refresh, javascript redirects, etc.
            if 'meta' in content.lower():
                meta_refresh = re.findall(r'<meta[^>]*refresh[^>]*content=["\']([^"\']*)["\']', content, re.IGNORECASE)
                if meta_refresh:
                    print(f"   Meta Refresh: {meta_refresh}")

            if 'window.location' in content.lower():
                print("   发现 window.location 重定向")

            if 'location.href' in content.lower():
                print("   发现 location.href 重定向")

            if 'setTimeout' in content.lower():
                print("   发现 setTimeout 定时器")

            # Look for any URLs in the page
            urls = re.findall(r'https?://[^\s"\'<>]+\.epub[^\s"\'<>]*', content, re.IGNORECASE)
            if urls:
                print(f"   找到EPUB URLs: {urls}")

            # Print relevant parts of the page
            print("\n   页面内容片段:")
            lines = content.split('\n')
            for i, line in enumerate(lines):
                if any(keyword in line.lower() for keyword in ['refresh', 'location', 'timeout', 'redirect', 'epub', 'download']):
                    print(f"      {line.strip()[:100]}")

        except Exception as e:
            print(f"   错误: {e}")

        browser.close()

    return None

def main():
    print("=" * 60)
    print("Anna's Archive slow_download 深度测试")
    print("=" * 60)

    print("\n目标 MD5:", MD5)
    print("目标 URL:", SLOW_URL)

    # Test 1: Same context cookies
    result1 = test_same_context_cookies()
    if result1:
        print(f"\n*** 成功! 链接: {result1} ***")
        return result1

    # Test 2: Extract from MD5
    test_extract_from_search_md5()

    # Test 3: Check page source
    test_page_source_for_redirect()

    print("\n" + "=" * 60)
    print("所有测试完成")
    print("=" * 60)

if __name__ == "__main__":
    main()