#!/usr/bin/env python3 """ Test: Use same browser context for search + slow_download to preserve cookies. Also try checking if we can extract final URL from search results directly. """ from playwright.sync_api import sync_playwright import time import re MD5 = "d94c20d1364af9b484949659398c4062" SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3" def test_same_context_cookies(): """Try using cookies from search page to access slow_download.""" print("[测试] 在同一浏览器上下文中访问搜索页和slow_download...") with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", ) page = context.new_page() # First, visit the search page to get cookies print(" 1. 访问搜索页面获取cookies...") try: page.goto("https://annas-archive.gl/search?q=Capitalism+A+Global+History", timeout=60000, wait_until="networkidle") print(f" 搜索页标题: {page.title()}") cookies = context.cookies() print(f" 获取到 {len(cookies)} 个 cookies") for c in cookies: print(f" {c['name']}: {c['value'][:30]}...") except Exception as e: print(f" 搜索页错误: {e}") # Now try slow_download in same context print("\n 2. 在同一上下文中访问slow_download...") try: response = page.goto(SLOW_URL, timeout=120000, wait_until="domcontentloaded") print(f" 状态码: {response.status if response else 'None'}") for i in range(45): time.sleep(1) url = page.url title = page.title() if url.lower().endswith('.epub'): print(f" 成功! 在第 {i+1} 秒 URL 变成 EPUB") browser.close() return url if title != "DDoS-Guard": print(f" {i+1}秒: 标题={title}, URL={url[:60]}...") if (i + 1) % 15 == 0: print(f" {i+1}秒...") except Exception as e: print(f" slow_download 错误: {e}") browser.close() return None def test_extract_from_search_md5(): """ Check if there's a way to get the final URL directly from the MD5. Maybe Anna's Archive has an API or we can construct the URL. """ print("\n[测试] 分析MD5哈希看是否能直接构造最终URL...") # The MD5 is: d94c20d1364af9b484949659398c4062 # This is 32 hex characters = 128 bits # Anna's Archive file IDs might be based on this # Try to check what fast_redirect or other endpoints exist print(f" MD5: {MD5}") print(f" 可能的文件标识符: {MD5}") # Try some alternative URL patterns test_urls = [ f"https://annas-archive.gl/fast_download/{MD5}/0/3", f"https://annas-archive.gl/download/{MD5}", f"https://annas-archive.gl/get/{MD5}", ] for url in test_urls: print(f"\n 测试: {url}") with sync_playwright() as p: browser = p.chromium.launch(headless=True) page = browser.new_page() try: response = page.goto(url, timeout=15000, wait_until="domcontentloaded") print(f" 状态码: {response.status if response else 'None'}") print(f" 最终URL: {page.url}") except Exception as e: print(f" 错误: {str(e)[:50]}") browser.close() time.sleep(1) return None def test_page_source_for_redirect(): """Check the actual page source of slow_download to understand redirect mechanism.""" print("\n[测试] 检查slow_download页面源码中的重定向机制...") with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", ) page = context.new_page() try: response = page.goto(SLOW_URL, timeout=30000, wait_until="domcontentloaded") print(f" 状态码: {response.status if response else 'None'}") # Get page content content = page.content() print(f" 页面内容长度: {len(content)} 字符") # Look for meta refresh, javascript redirects, etc. if 'meta' in content.lower(): meta_refresh = re.findall(r']*refresh[^>]*content=["\']([^"\']*)["\']', content, re.IGNORECASE) if meta_refresh: print(f" Meta Refresh: {meta_refresh}") if 'window.location' in content.lower(): print(" 发现 window.location 重定向") if 'location.href' in content.lower(): print(" 发现 location.href 重定向") if 'setTimeout' in content.lower(): print(" 发现 setTimeout 定时器") # Look for any URLs in the page urls = re.findall(r'https?://[^\s"\'<>]+\.epub[^\s"\'<>]*', content, re.IGNORECASE) if urls: print(f" 找到EPUB URLs: {urls}") # Print relevant parts of the page print("\n 页面内容片段:") lines = content.split('\n') for i, line in enumerate(lines): if any(keyword in line.lower() for keyword in ['refresh', 'location', 'timeout', 'redirect', 'epub', 'download']): print(f" {line.strip()[:100]}") except Exception as e: print(f" 错误: {e}") browser.close() return None def main(): print("=" * 60) print("Anna's Archive slow_download 深度测试") print("=" * 60) print("\n目标 MD5:", MD5) print("目标 URL:", SLOW_URL) # Test 1: Same context cookies result1 = test_same_context_cookies() if result1: print(f"\n*** 成功! 链接: {result1} ***") return result1 # Test 2: Extract from MD5 test_extract_from_search_md5() # Test 3: Check page source test_page_source_for_redirect() print("\n" + "=" * 60) print("所有测试完成") print("=" * 60) if __name__ == "__main__": main()