lazyfling commited on
Commit
32c9c25
·
verified ·
1 Parent(s): 4496a6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -218
app.py CHANGED
@@ -1,272 +1,232 @@
1
- from flask import Flask, request, jsonify, make_response, render_template_string
2
  import requests
3
- import logging
4
  import re
5
- from urllib.parse import urljoin, urlparse, quote
6
- import base64
7
 
8
  logging.basicConfig(level=logging.INFO)
9
  app = Flask(__name__)
10
 
11
- HTML_TEMPLATE = '''
 
12
  <!DOCTYPE html>
13
  <html>
14
  <head>
15
- <title>网页代理</title>
16
  <style>
17
- body { font-family: Arial, sans-serif; margin: 0; padding: 20px; }
18
- .container { max-width: 800px; margin: 0 auto; }
19
- .form-group { margin-bottom: 15px; }
20
- input[type="text"] { width: 80%; padding: 8px; }
21
- button { padding: 8px 15px; background: #4285f4; color: white; border: none; cursor: pointer; }
22
- .note { margin-top: 20px; font-size: 0.9em; color: #666; }
23
  </style>
24
  </head>
25
  <body>
26
  <div class="container">
27
- <h1>高级网页代理</h1>
28
- <form action="/proxy" method="get">
29
- <div class="form-group">
30
- <input type="text" name="url" placeholder="输入完整URL (例如: https://github.com)" required>
31
- <button type="submit">访问</button>
32
- </div>
33
  </form>
34
  <div class="note">
35
- <p>使用说明: 输入任何网站的完整URLhttps://),即可通过代理访问</p>
36
  </div>
37
  </div>
 
 
 
 
 
 
 
 
 
38
  </body>
39
  </html>
40
- '''
41
 
42
- def create_proxy_url(original_url, base_url):
43
- """创建代理URL"""
44
- if not original_url:
45
- return ""
46
-
47
- # 处理相对URL
48
- if not original_url.startswith('http'):
49
- original_url = urljoin(base_url, original_url)
50
-
51
- return f"/proxy?url={quote(original_url)}"
52
 
53
- def rewrite_html(content, base_url):
54
- """全面重写HTML内容中的资源引用"""
55
  try:
56
- content_str = content.decode('utf-8', errors='replace')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- # 1. 替换基本的src和href属性
59
- patterns = [
60
- # 标准src和href属性
61
- (r'(src|href)=["\'](.*?)["\']',
62
- lambda m: f'{m.group(1)}="{create_proxy_url(m.group(2), base_url)}"'),
63
-
64
- # data-src属性 (延迟加载)
65
- (r'data-src=["\'](.*?)["\']',
66
- lambda m: f'data-src="{create_proxy_url(m.group(1), base_url)}"'),
67
-
68
- # srcset属性 (响应式图片)
69
- (r'srcset=["\'](.*?)["\']',
70
- lambda m: f'srcset="{" ".join([" ".join([create_proxy_url(p.strip().split(" ")[0], base_url)] + p.strip().split(" ")[1:]) for p in m.group(1).split(",")])}'),
71
-
72
- # CSS中的url()
73
- (r'url\([\'"]?(.*?)[\'"]?\)',
74
- lambda m: f'url({create_proxy_url(m.group(1), base_url)})'),
75
-
76
- # action属性 (表单)
77
- (r'action=["\'](.*?)["\']',
78
- lambda m: f'action="{create_proxy_url(m.group(1), base_url)}"'),
79
-
80
- # meta刷新
81
- (r'<meta[^>]*?http-equiv=["\'](refresh)["\'][^>]*?content=["\'](.*?)["\'][^>]*?>',
82
- lambda m: re.sub(r'url=(.*)', lambda u: f'url={create_proxy_url(u.group(1), base_url)}', m.group(0))),
83
- ]
84
 
85
- for pattern, replacement in patterns:
86
- content_str = re.sub(pattern, replacement, content_str)
87
 
88
- # 2. 注入代理处理脚本到HTML
89
- proxy_script = '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  <script>
91
- // 重写XHR和Fetch请求,使其通过代理
92
  (function() {
93
- // 保存原始方法
94
- const originalXhrOpen = XMLHttpRequest.prototype.open;
95
- const originalFetch = window.fetch;
 
 
 
 
96
 
97
  // 重写XHR
 
98
  XMLHttpRequest.prototype.open = function(method, url, async, user, password) {
99
- // 转换URL到代理URL
100
- if (url && !url.startsWith('/proxy') && !url.startsWith('data:')) {
101
- const absoluteUrl = new URL(url, window.location).href;
102
- url = '/proxy?url=' + encodeURIComponent(absoluteUrl);
103
  }
104
- return originalXhrOpen.call(this, method, url, async, user, password);
105
  };
106
 
107
  // 重写Fetch
 
108
  window.fetch = function(resource, init) {
109
- if (typeof resource === 'string' && !resource.startsWith('/proxy') && !resource.startsWith('data:')) {
110
- const absoluteUrl = new URL(resource, window.location).href;
111
- resource = '/proxy?url=' + encodeURIComponent(absoluteUrl);
112
  }
113
- return originalFetch.call(this, resource, init);
114
  };
115
 
116
- // 监听所有DOM变化重写新添加元素的URL
117
- const observer = new MutationObserver(function(mutations) {
118
- mutations.forEach(function(mutation) {
119
- if (mutation.type === 'childList') {
120
- mutation.addedNodes.forEach(function(node) {
121
- if (node.nodeType === 1) { // 元素节点
122
- rewireElement(node);
123
- const elements = node.querySelectorAll('*');
124
- elements.forEach(rewireElement);
125
- }
126
- });
127
- }
128
- });
129
- });
130
-
131
- function rewireElement(el) {
132
- // 处理src和href属性
133
- ['src', 'href', 'action', 'data-src'].forEach(attr => {
134
- if (el.hasAttribute && el.hasAttribute(attr)) {
135
- let val = el.getAttribute(attr);
136
- if (val && !val.startsWith('/proxy') && !val.startsWith('data:') && !val.startsWith('javascript:')) {
137
- const absoluteUrl = new URL(val, window.location).href;
138
- el.setAttribute(attr, '/proxy?url=' + encodeURIComponent(absoluteUrl));
139
- }
140
- }
141
- });
142
-
143
- // 处理样式表中的url()
144
- if (el.style && el.style.cssText) {
145
- el.style.cssText = el.style.cssText.replace(/url\(['"]?(.*?)['"]?\)/g, function(match, url) {
146
- if (!url.startsWith('/proxy') && !url.startsWith('data:')) {
147
- const absoluteUrl = new URL(url, window.location).href;
148
- return `url('/proxy?url=${encodeURIComponent(absoluteUrl)}')`;
149
- }
150
- return match;
151
- });
152
  }
153
- }
154
-
155
- // 启动观察器
156
- observer.observe(document, { childList: true, subtree: true });
157
-
158
- // 初始处理所有元素
159
- document.addEventListener('DOMContentLoaded', function() {
160
- document.querySelectorAll('*').forEach(rewireElement);
161
- });
162
  })();
163
  </script>
164
- '''
165
 
166
- # 在head标签结束前入脚本
167
- if '<head>' in content_str:
168
- content_str = content_str.replace('</head>', f'{proxy_script}</head>')
169
  else:
170
- content_str = f'{proxy_script}\n{content_str}'
171
-
172
- # 3. 添加base标签,帮助处理相对URL
173
- base_tag = f'<base href="{base_url}">'
174
- if '<head>' in content_str:
175
- content_str = content_str.replace('<head>', f'<head>{base_tag}')
176
-
177
- return content_str.encode('utf-8', errors='replace')
178
- except Exception as e:
179
- logging.error(f"重写HTML出错: {e}")
180
- return content
181
-
182
- def rewrite_css(content, base_url):
183
- """重写CSS中的URL引用"""
184
- try:
185
- content_str = content.decode('utf-8', errors='replace')
186
- # 处理CSS中的url()引用
187
- content_str = re.sub(
188
- r'url\([\'"]?(.*?)[\'"]?\)',
189
- lambda m: f'url({create_proxy_url(m.group(1), base_url)})',
190
- content_str
191
- )
192
  return content_str.encode('utf-8', errors='replace')
193
  except Exception as e:
194
- logging.error(f"CSS出错: {e}")
195
  return content
196
 
197
- @app.route('/')
198
- def index():
199
- return render_template_string(HTML_TEMPLATE)
200
-
201
- @app.route('/proxy')
202
- def proxy():
203
  url = request.args.get('url', '')
204
  if not url:
205
- return render_template_string(HTML_TEMPLATE)
206
 
207
- if not url.startswith('http'):
208
- url = f"https://{url}"
209
-
210
- try:
211
- logging.info(f"代理请求: {url}")
212
-
213
- # 构建请求头
214
- headers = {
215
- 'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (compatible; SpacesProxy/1.0)'),
216
- 'Accept': request.headers.get('Accept', '*/*'),
217
- 'Accept-Language': request.headers.get('Accept-Language', 'en-US,en;q=0.9'),
218
- 'Accept-Encoding': 'identity', # 禁用压缩以简化处理
219
- 'Connection': 'keep-alive',
220
- 'Referer': url # 使用目标URL作为Referer
221
- }
222
-
223
- # 发送请求获取响应
224
- response = requests.get(
225
- url=url,
226
- headers=headers,
227
- timeout=30,
228
- allow_redirects=True,
229
- verify=False # 禁用SSL验证
230
- )
231
-
232
- # 获取内容类型
233
- content_type = response.headers.get('Content-Type', '').lower()
234
- content = response.content
235
-
236
- # 根据内容类型进行不同处理
237
- if 'text/html' in content_type:
238
- content = rewrite_html(content, url)
239
- elif 'text/css' in content_type:
240
- content = rewrite_css(content, url)
241
- # 其他类型保持原样
242
-
243
- # 创建响应
244
- resp = make_response(content)
245
- resp.status_code = response.status_code
246
-
247
- # 设置响应头
248
- for key, value in response.headers.items():
249
- if key.lower() not in ['content-encoding', 'content-length', 'transfer-encoding',
250
- 'connection', 'host', 'access-control-allow-origin',
251
- 'content-security-policy', 'x-frame-options']:
252
- resp.headers[key] = value
253
-
254
- # 设置正确的内容类型
255
- if 'Content-Type' in response.headers:
256
- resp.headers['Content-Type'] = response.headers['Content-Type']
257
-
258
- # 添加允许跨域的头
259
- resp.headers['Access-Control-Allow-Origin'] = '*'
260
-
261
- return resp
262
-
263
- except Exception as e:
264
- logging.error(f"代理请求错误: {e}")
265
- return jsonify({
266
- "error": str(e),
267
- "url": url,
268
- "提示": "请检查URL是否正确,或者该网站是否可以正常访问"
269
- }), 500
270
 
271
  if __name__ == '__main__':
272
  app.run(host='0.0.0.0', port=7860, debug=True)
 
1
+ from flask import Flask, request, Response
2
  import requests
 
3
  import re
4
+ import urllib.parse
5
+ import logging
6
 
7
  logging.basicConfig(level=logging.INFO)
8
  app = Flask(__name__)
9
 
10
+ # 首页HTML
11
+ HOME_PAGE = """
12
  <!DOCTYPE html>
13
  <html>
14
  <head>
15
+ <title>简易Web代理</title>
16
  <style>
17
+ body { font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }
18
+ .container { max-width: 600px; margin: 0 auto; }
19
+ input[type="text"] { width: 70%; padding: 10px; margin: 10px 0; }
20
+ button { padding: 10px 20px; background: #4285f4; color: white; border: none; cursor: pointer; }
21
+ .note { margin-top: 20px; color: #666; font-size: 0.9em; }
 
22
  </style>
23
  </head>
24
  <body>
25
  <div class="container">
26
+ <h1>Web代理</h1>
27
+ <form id="proxyForm">
28
+ <input type="text" id="urlInput" placeholder="输入要访问的URL (例如: https://github.com)" required>
29
+ <button type="submit">访问</button>
 
 
30
  </form>
31
  <div class="note">
32
+ <p>输入完整URLhttps://前缀</p>
33
  </div>
34
  </div>
35
+ <script>
36
+ document.getElementById('proxyForm').addEventListener('submit', function(e) {
37
+ e.preventDefault();
38
+ const url = document.getElementById('urlInput').value;
39
+ if(url) {
40
+ window.location.href = '/proxy/' + encodeURIComponent(btoa(url));
41
+ }
42
+ });
43
+ </script>
44
  </body>
45
  </html>
46
+ """
47
 
48
+ @app.route('/')
49
+ def index():
50
+ return HOME_PAGE
 
 
 
 
 
 
 
51
 
52
+ @app.route('/proxy/<path:encoded_url>')
53
+ def proxy_path(encoded_url):
54
  try:
55
+ # Base64解码URL
56
+ url = urllib.parse.unquote(encoded_url)
57
+ url = str(base64_decode(url))
58
+ return proxy_request(url)
59
+ except Exception as e:
60
+ logging.error(f"代理路径出错: {e}")
61
+ return f"代理错误: {str(e)}", 500
62
+
63
+ def base64_decode(encoded):
64
+ # 添加填充以确保正确解码
65
+ padding = 4 - (len(encoded) % 4)
66
+ if padding < 4:
67
+ encoded += "=" * padding
68
+ try:
69
+ return bytes.decode(base64.b64decode(encoded))
70
+ except:
71
+ try:
72
+ # 尝试URL安全的Base64解码
73
+ return bytes.decode(base64.urlsafe_b64decode(encoded))
74
+ except:
75
+ # 如果解码失败,返回原始值
76
+ return encoded
77
+
78
+ def proxy_request(url):
79
+ try:
80
+ # 如果URL不是完整的HTTP URL,添加协议
81
+ if not url.startswith('http'):
82
+ url = 'https://' + url
83
 
84
+ logging.info(f"代理请求: {url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # 创建会话以便保留cookies等状态
87
+ session = requests.Session()
88
 
89
+ # 构建请求
90
+ headers = {
91
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
92
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
93
+ 'Accept-Language': 'en-US,en;q=0.5',
94
+ 'Accept-Encoding': 'identity', # 禁用压缩
95
+ 'Connection': 'keep-alive',
96
+ 'Upgrade-Insecure-Requests': '1',
97
+ }
98
+
99
+ # 发送请求
100
+ resp = session.get(url, headers=headers, stream=True, verify=False, timeout=30)
101
+
102
+ # 获取响应内容
103
+ content = resp.content
104
+ content_type = resp.headers.get('Content-Type', 'text/html')
105
+
106
+ # 如果是HTML内容,修改所有链接
107
+ if 'text/html' in content_type:
108
+ content = rewrite_links(content, url)
109
+
110
+ # 创建响应对象
111
+ response = Response(content)
112
+
113
+ # 复制原始响应的头信息
114
+ for key, value in resp.headers.items():
115
+ if key.lower() not in ['content-encoding', 'content-length', 'transfer-encoding',
116
+ 'connection', 'content-security-policy']:
117
+ response.headers[key] = value
118
+
119
+ return response
120
+
121
+ except Exception as e:
122
+ logging.error(f"代理请求失败: {e}")
123
+ return f"访问 {url} 失败: {str(e)}", 500
124
+
125
+ def rewrite_links(content, base_url):
126
+ try:
127
+ # 解码内容
128
+ content_str = content.decode('utf-8', errors='replace')
129
+
130
+ # 解析基础URL信息
131
+ parsed_base = urllib.parse.urlparse(base_url)
132
+ base_domain = f"{parsed_base.scheme}://{parsed_base.netloc}"
133
+
134
+ # 替换所有链接
135
+ def encode_url(url):
136
+ # 将相对URL转为绝对URL
137
+ if url.startswith('/'):
138
+ absolute_url = base_domain + url
139
+ elif not url.startswith('http') and not url.startswith('data:') and not url.startswith('#'):
140
+ absolute_url = urllib.parse.urljoin(base_url, url)
141
+ else:
142
+ absolute_url = url
143
+
144
+ # 将URL编码为Base64并添加代理前缀
145
+ if absolute_url.startswith('http'):
146
+ encoded = base64.b64encode(absolute_url.encode()).decode()
147
+ return f"/proxy/{urllib.parse.quote(encoded)}"
148
+ return url
149
+
150
+ # 1. 替换href属性
151
+ content_str = re.sub(r'href=["\'](.*?)["\']',
152
+ lambda m: f'href="{encode_url(m.group(1))}"', content_str)
153
+
154
+ # 2. 替换src属性
155
+ content_str = re.sub(r'src=["\'](.*?)["\']',
156
+ lambda m: f'src="{encode_url(m.group(1))}"', content_str)
157
+
158
+ # 3. 替换action属性(表单)
159
+ content_str = re.sub(r'action=["\'](.*?)["\']',
160
+ lambda m: f'action="{encode_url(m.group(1))}"', content_str)
161
+
162
+ # 4. 替换CSS中的URL
163
+ content_str = re.sub(r'url\([\'"]?(.*?)[\'"]?\)',
164
+ lambda m: f'url({encode_url(m.group(1))})', content_str)
165
+
166
+ # 5. 注入处理JavaScript的脚本
167
+ script = """
168
  <script>
 
169
  (function() {
170
+ // 创建Base64编码函数
171
+ function encodeURL(url) {
172
+ if(url && url.startsWith('http')) {
173
+ return '/proxy/' + encodeURIComponent(btoa(url));
174
+ }
175
+ return url;
176
+ }
177
 
178
  // 重写XHR
179
+ const originalOpen = XMLHttpRequest.prototype.open;
180
  XMLHttpRequest.prototype.open = function(method, url, async, user, password) {
181
+ if(url && url.startsWith('http')) {
182
+ arguments[1] = encodeURL(url);
 
 
183
  }
184
+ return originalOpen.apply(this, arguments);
185
  };
186
 
187
  // 重写Fetch
188
+ const originalFetch = window.fetch;
189
  window.fetch = function(resource, init) {
190
+ if(typeof resource === 'string' && resource.startsWith('http')) {
191
+ arguments[0] = encodeURL(resource);
 
192
  }
193
+ return originalFetch.apply(this, arguments);
194
  };
195
 
196
+ // 监听所有点击事件处理链接
197
+ document.addEventListener('click', function(e) {
198
+ const target = e.target.closest('a');
199
+ if(target && target.href && target.href.startsWith('http') &&
200
+ !target.href.includes('/proxy/')) {
201
+ e.preventDefault();
202
+ location.href = encodeURL(target.href);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
+ }, true);
 
 
 
 
 
 
 
 
205
  })();
206
  </script>
207
+ """
208
 
209
+ # 在body结束前入脚本
210
+ if '</body>' in content_str:
211
+ content_str = content_str.replace('</body>', script + '</body>')
212
  else:
213
+ content_str += script
214
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  return content_str.encode('utf-8', errors='replace')
216
  except Exception as e:
217
+ logging.error(f"���链接出错: {e}")
218
  return content
219
 
220
+ import base64
221
+ @app.route('/proxy', methods=['GET'])
222
+ def proxy_get():
 
 
 
223
  url = request.args.get('url', '')
224
  if not url:
225
+ return HOME_PAGE
226
 
227
+ # 对URL进行Base64编码并重定向
228
+ encoded_url = base64.b64encode(url.encode()).decode()
229
+ return proxy_request(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  if __name__ == '__main__':
232
  app.run(host='0.0.0.0', port=7860, debug=True)