File size: 8,099 Bytes
2b3f02b c56eb99 c9ed46a c56eb99 2b3f02b c56eb99 c9ed46a c56eb99 2b3f02b c4c13a1 2b3f02b c56eb99 2b3f02b c56eb99 2b3f02b c4c13a1 2b3f02b c56eb99 2b3f02b c4c13a1 2b3f02b c4c13a1 2b3f02b c4c13a1 2b3f02b c4c13a1 c56eb99 2b3f02b c4c13a1 2b3f02b c4c13a1 2b3f02b c4c13a1 c56eb99 2b3f02b c4c13a1 c56eb99 2b3f02b c9ed46a 2b3f02b c4c13a1 2b3f02b c4c13a1 2b3f02b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import HTMLResponse, Response
from urllib.parse import urlparse, urljoin, quote
import httpx
from bs4 import BeautifulSoup
app = FastAPI()
# ==========================
# 首页:输入网址页面
# ==========================
INDEX_HTML = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<title>简单反向代理</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style>
body { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
background: #0f172a; color: #e5e7eb; display: flex; align-items: center;
justify-content: center; min-height: 100vh; margin: 0; }
.card { background: #020617; border-radius: 16px; padding: 32px; max-width: 600px;
width: 100%; box-shadow: 0 24px 60px rgba(0,0,0,0.6); border: 1px solid #1e293b;}
h1 { margin-top: 0; font-size: 24px; margin-bottom: 8px;}
p { margin-top: 0; margin-bottom: 16px; color: #9ca3af; font-size: 14px;}
form { display: flex; gap: 8px; }
input[type="url"] {
flex: 1; padding: 10px 12px; border-radius: 999px; border: 1px solid #334155;
background: #020617; color: #e5e7eb; font-size: 14px; outline: none;
}
input[type="url"]::placeholder { color: #6b7280; }
button {
padding: 10px 18px; border-radius: 999px; border: none; cursor: pointer;
font-size: 14px; font-weight: 600; background: #22c55e; color: #022c22;
}
button:hover { filter: brightness(1.05); }
small { color: #6b7280; font-size: 12px; display: block; margin-top: 12px;}
</style>
</head>
<body>
<main class="card">
<h1>🔁 简单反向代理</h1>
<p>输入一个要访问的 URL,例如 <code>https://example.com</code>。</p>
<form action="/proxy" method="get">
<input
type="url"
name="url"
placeholder="https://example.com"
required
/>
<button type="submit">Go</button>
</form>
<small>仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。</small>
</main>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
async def index():
return INDEX_HTML
# ==========================
# 工具函数 & 常量
# ==========================
def normalize_url(raw: str) -> str:
"""如果没有 scheme,自动加上 http://"""
raw = raw.strip()
if not raw:
raise ValueError("空 URL")
parsed = urlparse(raw)
if not parsed.scheme:
raw = "http://" + raw
return raw
def is_allowed_url(url: str) -> bool:
"""限制只能访问公网 http/https,简单阻断本地 / 内网地址。"""
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
return False
host = (parsed.hostname or "").lower()
if host in ("localhost", "127.0.0.1"):
return False
private_prefixes = (
"10.",
"192.168.",
"172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.",
"172.24.", "172.25.", "172.26.", "172.27.",
"172.28.", "172.29.", "172.30.", "172.31.",
)
if any(host.startswith(p) for p in private_prefixes):
return False
return True
HOP_BY_HOP_HEADERS = {
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
}
BLOCKED_HEADERS = {
"x-frame-options",
"content-security-policy",
}
def rewrite_html(html: str, base_url: str) -> str:
"""重写 HTML 中的链接,使站内跳转继续走 /proxy。"""
soup = BeautifulSoup(html, "html.parser")
tag_attr_pairs = [
("a", "href"),
("link", "href"),
("img", "src"),
("script", "src"),
("iframe", "src"),
("source", "src"),
("video", "src"),
("form", "action"),
]
for tag, attr in tag_attr_pairs:
for node in soup.find_all(tag):
value = node.get(attr)
if not value:
continue
low = value.lower().strip()
if low.startswith("#") or low.startswith("javascript:"):
continue
absolute = urljoin(base_url, value)
proxied = f"/proxy?url={quote(absolute, safe='')}"
node[attr] = proxied
return str(soup)
# ==========================
# 反向代理主逻辑
# ==========================
@app.api_route(
"/proxy",
methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
)
async def proxy(request: Request, url: str):
# 1. 规范 URL & 校验
try:
target_url = normalize_url(url)
except ValueError:
raise HTTPException(status_code=400, detail="无效 URL")
if not is_allowed_url(target_url):
raise HTTPException(
status_code=400,
detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。",
)
# 2. 构造上游请求头(去掉 hop-by-hop、host、accept-encoding)
outgoing_headers = {}
for k, v in request.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("host", "accept-encoding"):
continue
outgoing_headers[k] = v
# 统一告诉上游:不要压缩(避免各种编码问题)
outgoing_headers["Accept-Encoding"] = "identity"
body = await request.body()
# 3. 转发请求
async with httpx.AsyncClient(
follow_redirects=True,
timeout=20.0,
) as client:
upstream_resp = await client.request(
request.method,
target_url,
content=body if request.method not in ("GET", "HEAD") else None,
headers=outgoing_headers,
)
content_type = (upstream_resp.headers.get("content-type") or "").lower()
# 4. 构造要返回的响应头
response_headers = {}
for k, v in upstream_resp.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("content-length", "content-encoding"):
# 长度和压缩交给我们自己处理
continue
if lk in BLOCKED_HEADERS:
continue
response_headers[k] = v
# 5. HTML:重写链接 + 直接输出页面
if "text/html" in content_type or "application/xhtml+xml" in content_type:
html_text = upstream_resp.text # httpx 会按 charset 解码
rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))
return HTMLResponse(
content=rewritten_html,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type="text/html; charset=utf-8",
)
# 6. 图片 / 视频 / 音频 / 字体 / CSS / JS:原样透传(浏览器不会把它们显示成一大堆字)
if (
content_type.startswith("image/")
or content_type.startswith("video/")
or content_type.startswith("audio/")
or "font" in content_type
or content_type in (
"text/css",
"application/javascript",
"text/javascript",
"application/x-javascript",
)
):
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or None,
)
# 7. 其它(如 application/octet-stream、zip、二进制流):强制当附件下载,避免在窗口里看到乱码
if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or "application/octet-stream",
)
|