lazyfling commited on
Commit
4390011
·
verified ·
1 Parent(s): c0ae781

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -46
app.py CHANGED
@@ -1,53 +1,116 @@
1
- from flask import Flask, request, jsonify, make_response
2
- import requests
3
- import logging
 
 
4
 
5
- logging.basicConfig(level=logging.INFO)
 
6
 
7
- app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def proxy_request(full_url):
10
- try:
11
- headers = {key: value for key, value in request.headers.items() if key.lower() != 'host'}
12
- headers['User-Agent'] = request.headers.get('User-Agent', 'Mozilla/5.0 (compatible; SpacesProxy/1.0)')
13
- response = requests.request(
14
- method=request.method,
15
- url=full_url,
16
- headers=headers,
17
- data=request.get_data(),
18
- params=request.args,
19
- timeout=60,
20
- stream=True # 启用流式以处理大响应,如HTML页面
21
- )
22
- return response.iter_content(chunk_size=1024), response.status_code, response.headers # 返回迭代器以流式输出
23
- except requests.exceptions.RequestException as e:
24
- logging.error(f"代理请求错误: {e}")
25
- return jsonify({"error": str(e)}), 500
 
 
 
 
26
 
27
- @app.route('/proxy/<path:url>')
28
- def handle_proxy(url):
29
- if not url.startswith('http'):
30
- full_url = f"https://{url}"
31
- else:
32
- full_url = url # 使用传入的完整URL
33
-
34
- content_iter, status_code, headers = proxy_request(full_url) # 获取迭代器
35
-
36
- resp = make_response('') # 初始化响应
37
- resp.status_code = status_code
38
- for key, value in headers.items():
39
- if key.lower() not in ['content-encoding', 'content-length', 'transfer-encoding', 'connection', 'host']: # 更严格过滤可能冲突的头部
40
- resp.headers[key] = value
41
- resp.mimetype = headers.get('Content-Type', 'text/html') # 设置正确的MIME类型
42
 
43
- # 流式写入内容
44
- def generate():
45
- for chunk in content_iter: # 逐块输出内容
46
- yield chunk
47
- resp.response = generate() # 将生成器设置为响应体
48
- resp.headers['Content-Length'] = headers.get('Content-Length', '0') # 如果可用,设置内容长度
49
 
50
- return resp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- if __name__ == '__main__':
53
- app.run(host='0.0.0.0', port=7860, debug=True)
 
1
+ from flask import Flask, request, jsonify, make_response, render_template_string, url_for
2
+ import requests
3
+ import logging
4
+ import re
5
+ from urllib.parse import urljoin, urlparse
6
 
7
+ logging.basicConfig(level=logging.INFO)
8
+ app = Flask(__name__)
9
 
10
+ HTML_TEMPLATE = '''
11
+ <!DOCTYPE html>
12
+ <html>
13
+ <head>
14
+ <title>简易网页代理</title>
15
+ <style>
16
+ body { font-family: Arial, sans-serif; margin: 0; padding: 20px; }
17
+ .container { max-width: 800px; margin: 0 auto; }
18
+ .form-group { margin-bottom: 15px; }
19
+ input[type="text"] { width: 80%; padding: 8px; }
20
+ button { padding: 8px 15px; background: #4285f4; color: white; border: none; cursor: pointer; }
21
+ .note { margin-top: 20px; font-size: 0.9em; color: #666; }
22
+ </style>
23
+ </head>
24
+ <body>
25
+ <div class="container">
26
+ <h1>网页代理</h1>
27
+ <form action="/proxy" method="get">
28
+ <div class="form-group">
29
+ <input type="text" name="url" placeholder="输入完整URL (例如: https://github.com)" required>
30
+ <button type="submit">访问</button>
31
+ </div>
32
+ </form>
33
+ <div class="note">
34
+ <p>使用方法: 输入完整URL,包括https://前缀</p>
35
+ </div>
36
+ </div>
37
+ </body>
38
+ </html>
39
+ '''
40
 
41
+ def rewrite_html(content, original_url):
42
+ """重写HTML中的链接,使它们通过代理访问"""
43
+ try:
44
+ content_str = content.decode('utf-8')
45
+ base_url = urlparse(original_url)
46
+ base_domain = f"{base_url.scheme}://{base_url.netloc}"
47
+
48
+ # 重写相对URL为绝对URL然后添加代理前缀
49
+ content_str = re.sub(r'(src|href)=["\'](?!http)([^"\']+)["\']',
50
+ lambda m: f'{m.group(1)}="/proxy?url={urljoin(base_domain, m.group(2))}"',
51
+ content_str)
52
+
53
+ # 重写绝对URL添加代理前缀
54
+ content_str = re.sub(r'(src|href)=["\'](https?://[^"\']+)["\']',
55
+ lambda m: f'{m.group(1)}="/proxy?url={m.group(2)}"',
56
+ content_str)
57
+
58
+ return content_str.encode('utf-8')
59
+ except Exception as e:
60
+ logging.error(f"重写HTML出错: {e}")
61
+ return content
62
 
63
+ @app.route('/')
64
+ def index():
65
+ return render_template_string(HTML_TEMPLATE)
66
+
67
+ @app.route('/proxy')
68
+ def proxy():
69
+ url = request.args.get('url', '')
70
+ if not url:
71
+ return render_template_string(HTML_TEMPLATE)
 
 
 
 
 
 
72
 
73
+ if not url.startswith('http'):
74
+ url = f"https://{url}"
 
 
 
 
75
 
76
+ try:
77
+ headers = {key: value for key, value in request.headers.items()
78
+ if key.lower() not in ['host', 'cookie', 'referer']}
79
+ headers['User-Agent'] = request.headers.get('User-Agent', 'Mozilla/5.0 (compatible; SpacesProxy/1.0)')
80
+
81
+ response = requests.get(
82
+ url=url,
83
+ headers=headers,
84
+ timeout=30,
85
+ allow_redirects=True
86
+ )
87
+
88
+ content_type = response.headers.get('Content-Type', '')
89
+ content = response.content
90
+
91
+ # 对HTML内容进行链接重写
92
+ if 'text/html' in content_type:
93
+ content = rewrite_html(content, url)
94
+
95
+ # 创建响应
96
+ resp = make_response(content)
97
+ resp.status_code = response.status_code
98
+
99
+ # 复制必要的头部信息
100
+ for key, value in response.headers.items():
101
+ if key.lower() not in ['content-encoding', 'content-length', 'transfer-encoding',
102
+ 'connection', 'host', 'access-control-allow-origin']:
103
+ resp.headers[key] = value
104
+
105
+ # 设置正确的MIME类型
106
+ if 'Content-Type' in response.headers:
107
+ resp.mimetype = response.headers['Content-Type'].split(';')[0]
108
+
109
+ return resp
110
+
111
+ except Exception as e:
112
+ logging.error(f"代理请求错误: {e}")
113
+ return jsonify({"error": str(e), "url": url}), 500
114
 
115
+ if __name__ == '__main__':
116
+ app.run(host='0.0.0.0', port=7860, debug=True)