fromozu commited on
Commit
eac3ab6
·
verified ·
1 Parent(s): c8e4410

Upload hf_backend/test_curl_cookies.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/test_curl_cookies.py +67 -0
hf_backend/test_curl_cookies.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 使用curl来访问slow_download,保存和使用cookies
4
+ """
5
+
6
+ import subprocess
7
+ import time
8
+ import os
9
+
10
+ MD5 = "d94c20d1364af9b484949659398c4062"
11
+ SLOW_URL = f"https://annas-archive.gl/slow_download/{MD5}/0/3"
12
+ COOKIE_FILE = "cookies.txt"
13
+
14
+ def curl_get(url, cookies=None, follow_redirect=True):
15
+ """使用curl发送请求"""
16
+ cmd = ["curl", "-s", "-L"]
17
+
18
+ if cookies:
19
+ cmd.extend(["-b", cookies, "-c", cookies])
20
+
21
+ cmd.extend([
22
+ "-A", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
23
+ "-H", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
24
+ "-H", "Accept-Language: zh-CN,zh;q=0.9,en;q=0.8",
25
+ url
26
+ ])
27
+
28
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
29
+ return result.stdout, result.stderr, result.returncode
30
+
31
+ def test_curl_approach():
32
+ print(f"目标: {SLOW_URL}\n")
33
+
34
+ # 首先访问主页获取cookies
35
+ print("步骤1: 访问主页获取cookies...")
36
+ stdout, stderr, code = curl_get("https://annas-archive.gl", cookies=None)
37
+
38
+ # 保存cookies
39
+ if os.path.exists(COOKIE_FILE):
40
+ print(f"Cookies文件已存在: {COOKIE_FILE}")
41
+ else:
42
+ print(f"创建cookies文件: {COOKIE_FILE}")
43
+
44
+ print(f"\n步骤2: 用cookies访问slow_download...")
45
+ stdout, stderr, code = curl_get(SLOW_URL, cookies=COOKIE_FILE)
46
+
47
+ print(f"响应长度: {len(stdout)} 字节")
48
+ print(f"前500字符:\n{stdout[:500]}")
49
+
50
+ # 检查是否是DDoS-Guard
51
+ if 'ddos' in stdout.lower() or 'DDoS' in stdout:
52
+ print("\n被DDoS-Guard阻止")
53
+ elif '.epub' in stdout.lower():
54
+ print("\n可能包含EPUB!")
55
+ import re
56
+ epub_links = re.findall(r'href=["\']([^"\']*\.epub[^"\']*)["\']', stdout)
57
+ if epub_links:
58
+ print(f"找到EPUB链接: {epub_links[0]}")
59
+ else:
60
+ # 检查最终URL
61
+ print("\n内容不包含.epub或ddos")
62
+
63
+ if __name__ == "__main__":
64
+ print("=" * 60)
65
+ print("Curl方式访问Anna's Archive")
66
+ print("=" * 60)
67
+ test_curl_approach()