mi / app.py
sehsapneb's picture
Update app.py
c4c13a1 verified
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import HTMLResponse, Response
from urllib.parse import urlparse, urljoin, quote
import httpx
from bs4 import BeautifulSoup
app = FastAPI()
# ==========================
# 首页:输入网址页面
# ==========================
INDEX_HTML = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<title>简单反向代理</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style>
body { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
background: #0f172a; color: #e5e7eb; display: flex; align-items: center;
justify-content: center; min-height: 100vh; margin: 0; }
.card { background: #020617; border-radius: 16px; padding: 32px; max-width: 600px;
width: 100%; box-shadow: 0 24px 60px rgba(0,0,0,0.6); border: 1px solid #1e293b;}
h1 { margin-top: 0; font-size: 24px; margin-bottom: 8px;}
p { margin-top: 0; margin-bottom: 16px; color: #9ca3af; font-size: 14px;}
form { display: flex; gap: 8px; }
input[type="url"] {
flex: 1; padding: 10px 12px; border-radius: 999px; border: 1px solid #334155;
background: #020617; color: #e5e7eb; font-size: 14px; outline: none;
}
input[type="url"]::placeholder { color: #6b7280; }
button {
padding: 10px 18px; border-radius: 999px; border: none; cursor: pointer;
font-size: 14px; font-weight: 600; background: #22c55e; color: #022c22;
}
button:hover { filter: brightness(1.05); }
small { color: #6b7280; font-size: 12px; display: block; margin-top: 12px;}
</style>
</head>
<body>
<main class="card">
<h1>🔁 简单反向代理</h1>
<p>输入一个要访问的 URL,例如 <code>https://example.com</code>。</p>
<form action="/proxy" method="get">
<input
type="url"
name="url"
placeholder="https://example.com"
required
/>
<button type="submit">Go</button>
</form>
<small>仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。</small>
</main>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
async def index():
return INDEX_HTML
# ==========================
# 工具函数 & 常量
# ==========================
def normalize_url(raw: str) -> str:
"""如果没有 scheme,自动加上 http://"""
raw = raw.strip()
if not raw:
raise ValueError("空 URL")
parsed = urlparse(raw)
if not parsed.scheme:
raw = "http://" + raw
return raw
def is_allowed_url(url: str) -> bool:
"""限制只能访问公网 http/https,简单阻断本地 / 内网地址。"""
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
return False
host = (parsed.hostname or "").lower()
if host in ("localhost", "127.0.0.1"):
return False
private_prefixes = (
"10.",
"192.168.",
"172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.",
"172.24.", "172.25.", "172.26.", "172.27.",
"172.28.", "172.29.", "172.30.", "172.31.",
)
if any(host.startswith(p) for p in private_prefixes):
return False
return True
HOP_BY_HOP_HEADERS = {
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
}
BLOCKED_HEADERS = {
"x-frame-options",
"content-security-policy",
}
def rewrite_html(html: str, base_url: str) -> str:
"""重写 HTML 中的链接,使站内跳转继续走 /proxy。"""
soup = BeautifulSoup(html, "html.parser")
tag_attr_pairs = [
("a", "href"),
("link", "href"),
("img", "src"),
("script", "src"),
("iframe", "src"),
("source", "src"),
("video", "src"),
("form", "action"),
]
for tag, attr in tag_attr_pairs:
for node in soup.find_all(tag):
value = node.get(attr)
if not value:
continue
low = value.lower().strip()
if low.startswith("#") or low.startswith("javascript:"):
continue
absolute = urljoin(base_url, value)
proxied = f"/proxy?url={quote(absolute, safe='')}"
node[attr] = proxied
return str(soup)
# ==========================
# 反向代理主逻辑
# ==========================
@app.api_route(
"/proxy",
methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
)
async def proxy(request: Request, url: str):
# 1. 规范 URL & 校验
try:
target_url = normalize_url(url)
except ValueError:
raise HTTPException(status_code=400, detail="无效 URL")
if not is_allowed_url(target_url):
raise HTTPException(
status_code=400,
detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。",
)
# 2. 构造上游请求头(去掉 hop-by-hop、host、accept-encoding)
outgoing_headers = {}
for k, v in request.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("host", "accept-encoding"):
continue
outgoing_headers[k] = v
# 统一告诉上游:不要压缩(避免各种编码问题)
outgoing_headers["Accept-Encoding"] = "identity"
body = await request.body()
# 3. 转发请求
async with httpx.AsyncClient(
follow_redirects=True,
timeout=20.0,
) as client:
upstream_resp = await client.request(
request.method,
target_url,
content=body if request.method not in ("GET", "HEAD") else None,
headers=outgoing_headers,
)
content_type = (upstream_resp.headers.get("content-type") or "").lower()
# 4. 构造要返回的响应头
response_headers = {}
for k, v in upstream_resp.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("content-length", "content-encoding"):
# 长度和压缩交给我们自己处理
continue
if lk in BLOCKED_HEADERS:
continue
response_headers[k] = v
# 5. HTML:重写链接 + 直接输出页面
if "text/html" in content_type or "application/xhtml+xml" in content_type:
html_text = upstream_resp.text # httpx 会按 charset 解码
rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))
return HTMLResponse(
content=rewritten_html,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type="text/html; charset=utf-8",
)
# 6. 图片 / 视频 / 音频 / 字体 / CSS / JS:原样透传(浏览器不会把它们显示成一大堆字)
if (
content_type.startswith("image/")
or content_type.startswith("video/")
or content_type.startswith("audio/")
or "font" in content_type
or content_type in (
"text/css",
"application/javascript",
"text/javascript",
"application/x-javascript",
)
):
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or None,
)
# 7. 其它(如 application/octet-stream、zip、二进制流):强制当附件下载,避免在窗口里看到乱码
if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or "application/octet-stream",
)