sehsapneb commited on
Commit
2b3f02b
·
verified ·
1 Parent(s): d941691

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ from fastapi.responses import HTMLResponse, Response
3
+ from urllib.parse import urlparse, urljoin, quote
4
+ import httpx
5
+ from bs4 import BeautifulSoup
6
+
7
+ app = FastAPI()
8
+
9
+ # ---- 简单首页(输入网址的页面) ----
10
+
11
+ INDEX_HTML = """
12
+ <!DOCTYPE html>
13
+ <html lang="zh-CN">
14
+ <head>
15
+ <meta charset="UTF-8" />
16
+ <title>简单反向代理</title>
17
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
18
+ <style>
19
+ body { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
20
+ background: #0f172a; color: #e5e7eb; display: flex; align-items: center;
21
+ justify-content: center; min-height: 100vh; margin: 0; }
22
+ .card { background: #020617; border-radius: 16px; padding: 32px; max-width: 600px;
23
+ width: 100%; box-shadow: 0 24px 60px rgba(0,0,0,0.6); border: 1px solid #1e293b;}
24
+ h1 { margin-top: 0; font-size: 24px; margin-bottom: 8px;}
25
+ p { margin-top: 0; margin-bottom: 16px; color: #9ca3af; font-size: 14px;}
26
+ form { display: flex; gap: 8px; }
27
+ input[type="url"] {
28
+ flex: 1; padding: 10px 12px; border-radius: 999px; border: 1px solid #334155;
29
+ background: #020617; color: #e5e7eb; font-size: 14px; outline: none;
30
+ }
31
+ input[type="url"]::placeholder { color: #6b7280; }
32
+ button {
33
+ padding: 10px 18px; border-radius: 999px; border: none; cursor: pointer;
34
+ font-size: 14px; font-weight: 600; background: #22c55e; color: #022c22;
35
+ }
36
+ button:hover { filter: brightness(1.05); }
37
+ small { color: #6b7280; font-size: 12px; display: block; margin-top: 12px;}
38
+ </style>
39
+ </head>
40
+ <body>
41
+ <main class="card">
42
+ <h1>🔁 简单反向代理</h1>
43
+ <p>输入一个要访问的 URL,例如 <code>https://example.com</code>。</p>
44
+ <form action="/proxy" method="get">
45
+ <input
46
+ type="url"
47
+ name="url"
48
+ placeholder="https://example.com"
49
+ required
50
+ />
51
+ <button type="submit">Go</button>
52
+ </form>
53
+ <small>仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。</small>
54
+ </main>
55
+ </body>
56
+ </html>
57
+ """
58
+
59
+
60
+ @app.get("/", response_class=HTMLResponse)
61
+ async def index():
62
+ return INDEX_HTML
63
+
64
+
65
+ # ---- 工具函数:校验和规范 URL ----
66
+
67
+ def normalize_url(raw: str) -> str:
68
+ """如果没有 scheme,自动加上 http://"""
69
+ raw = raw.strip()
70
+ if not raw:
71
+ raise ValueError("空 URL")
72
+ parsed = urlparse(raw)
73
+ if not parsed.scheme:
74
+ raw = "http://" + raw
75
+ return raw
76
+
77
+
78
+ def is_allowed_url(url: str) -> bool:
79
+ """限制只能访问公网 http/https,简单阻断本地 / 内网地址。"""
80
+ parsed = urlparse(url)
81
+ if parsed.scheme not in ("http", "https"):
82
+ return False
83
+
84
+ host = parsed.hostname or ""
85
+ host = host.lower()
86
+
87
+ # 禁止访问一些明显的本地 / 内网地址
88
+ if host in ("localhost", "127.0.0.1"):
89
+ return False
90
+ private_prefixes = ("10.", "192.168.", "172.16.", "172.17.", "172.18.",
91
+ "172.19.", "172.20.", "172.21.", "172.22.",
92
+ "172.23.", "172.24.", "172.25.", "172.26.",
93
+ "172.27.", "172.28.", "172.29.", "172.30.",
94
+ "172.31.")
95
+ if any(host.startswith(p) for p in private_prefixes):
96
+ return False
97
+
98
+ return True
99
+
100
+
101
+ HOP_BY_HOP_HEADERS = {
102
+ "connection",
103
+ "keep-alive",
104
+ "proxy-authenticate",
105
+ "proxy-authorization",
106
+ "te",
107
+ "trailers",
108
+ "transfer-encoding",
109
+ "upgrade",
110
+ }
111
+
112
+
113
+ # ---- 重写 HTML 里的链接,使之继续走 /proxy ----
114
+
115
+ def rewrite_html(html: str, base_url: str) -> str:
116
+ soup = BeautifulSoup(html, "html.parser")
117
+
118
+ # 要处理的标签和对应属性
119
+ tag_attr_pairs = [
120
+ ("a", "href"),
121
+ ("link", "href"),
122
+ ("img", "src"),
123
+ ("script", "src"),
124
+ ("iframe", "src"),
125
+ ("source", "src"),
126
+ ("video", "src"),
127
+ ("form", "action"),
128
+ ]
129
+
130
+ for tag, attr in tag_attr_pairs:
131
+ for node in soup.find_all(tag):
132
+ value = node.get(attr)
133
+ if not value:
134
+ continue
135
+
136
+ # 锚点或 javascript: 等不处理
137
+ low = value.lower().strip()
138
+ if low.startswith("#") or low.startswith("javascript:"):
139
+ continue
140
+
141
+ # 相对地址 -> 绝对地址
142
+ absolute = urljoin(base_url, value)
143
+ proxied = f"/proxy?url={quote(absolute, safe='')}"
144
+
145
+ node[attr] = proxied
146
+
147
+ return str(soup)
148
+
149
+
150
+ # ---- 反向代理核心:/proxy ----
151
+
152
+ @app.api_route(
153
+ "/proxy",
154
+ methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
155
+ )
156
+ async def proxy(request: Request, url: str):
157
+ # 1. 规范 URL & 校验
158
+ try:
159
+ target_url = normalize_url(url)
160
+ except ValueError:
161
+ raise HTTPException(status_code=400, detail="无效 URL")
162
+
163
+ if not is_allowed_url(target_url):
164
+ raise HTTPException(
165
+ status_code=400,
166
+ detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。",
167
+ )
168
+
169
+ # 2. 准备要转发的请求头(过滤掉 hop-by-hop 头部)
170
+ outgoing_headers = {}
171
+ for k, v in request.headers.items():
172
+ lk = k.lower()
173
+ if lk in HOP_BY_HOP_HEADERS:
174
+ continue
175
+ if lk == "host":
176
+ continue
177
+ outgoing_headers[k] = v
178
+
179
+ # 请求体(GET/HEAD 一般没有 body)
180
+ body = await request.body()
181
+
182
+ # 3. 用 httpx 转发请求
183
+ async with httpx.AsyncClient(follow_redirects=True, timeout=20.0) as client:
184
+ upstream_resp = await client.request(
185
+ request.method,
186
+ target_url,
187
+ content=body if request.method not in ("GET", "HEAD") else None,
188
+ headers=outgoing_headers,
189
+ )
190
+
191
+ content_type = upstream_resp.headers.get("content-type", "")
192
+
193
+ # 4. 回传响应头(过滤掉 hop-by-hop + 一些可能冲突的)
194
+ response_headers = {}
195
+ for k, v in upstream_resp.headers.items():
196
+ lk = k.lower()
197
+ if lk in HOP_BY_HOP_HEADERS:
198
+ continue
199
+ if lk in ("content-length", "content-encoding"):
200
+ # 交给 FastAPI 重新计算
201
+ continue
202
+ response_headers[k] = v
203
+
204
+ # 5. 如果是 HTML,就重写里面的链接
205
+ if "text/html" in content_type:
206
+ rewritten_html = rewrite_html(
207
+ upstream_resp.text,
208
+ base_url=str(upstream_resp.url),
209
+ )
210
+ return HTMLResponse(
211
+ content=rewritten_html,
212
+ status_code=upstream_resp.status_code,
213
+ headers=response_headers,
214
+ )
215
+
216
+ # 6. 其他类型(CSS/JS/图片等)原样透传
217
+ return Response(
218
+ content=upstream_resp.content,
219
+ status_code=upstream_resp.status_code,
220
+ headers=response_headers,
221
+ media_type=content_type or None,
222
+ )