from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import HTMLResponse, Response
from urllib.parse import urlparse, urljoin, quote
import httpx
from bs4 import BeautifulSoup
app = FastAPI()
# ==========================
# 首页:输入网址页面
# ==========================
INDEX_HTML = """
简单反向代理
🔁 简单反向代理
输入一个要访问的 URL,例如 https://example.com。
仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。
"""
@app.get("/", response_class=HTMLResponse)
async def index():
return INDEX_HTML
# ==========================
# 工具函数 & 常量
# ==========================
def normalize_url(raw: str) -> str:
"""如果没有 scheme,自动加上 http://"""
raw = raw.strip()
if not raw:
raise ValueError("空 URL")
parsed = urlparse(raw)
if not parsed.scheme:
raw = "http://" + raw
return raw
def is_allowed_url(url: str) -> bool:
"""限制只能访问公网 http/https,简单阻断本地 / 内网地址。"""
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
return False
host = (parsed.hostname or "").lower()
if host in ("localhost", "127.0.0.1"):
return False
private_prefixes = (
"10.",
"192.168.",
"172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.",
"172.24.", "172.25.", "172.26.", "172.27.",
"172.28.", "172.29.", "172.30.", "172.31.",
)
if any(host.startswith(p) for p in private_prefixes):
return False
return True
HOP_BY_HOP_HEADERS = {
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
}
BLOCKED_HEADERS = {
"x-frame-options",
"content-security-policy",
}
def rewrite_html(html: str, base_url: str) -> str:
"""重写 HTML 中的链接,使站内跳转继续走 /proxy。"""
soup = BeautifulSoup(html, "html.parser")
tag_attr_pairs = [
("a", "href"),
("link", "href"),
("img", "src"),
("script", "src"),
("iframe", "src"),
("source", "src"),
("video", "src"),
("form", "action"),
]
for tag, attr in tag_attr_pairs:
for node in soup.find_all(tag):
value = node.get(attr)
if not value:
continue
low = value.lower().strip()
if low.startswith("#") or low.startswith("javascript:"):
continue
absolute = urljoin(base_url, value)
proxied = f"/proxy?url={quote(absolute, safe='')}"
node[attr] = proxied
return str(soup)
# ==========================
# 反向代理主逻辑
# ==========================
@app.api_route(
"/proxy",
methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
)
async def proxy(request: Request, url: str):
# 1. 规范 URL & 校验
try:
target_url = normalize_url(url)
except ValueError:
raise HTTPException(status_code=400, detail="无效 URL")
if not is_allowed_url(target_url):
raise HTTPException(
status_code=400,
detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。",
)
# 2. 构造上游请求头(去掉 hop-by-hop、host、accept-encoding)
outgoing_headers = {}
for k, v in request.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("host", "accept-encoding"):
continue
outgoing_headers[k] = v
# 统一告诉上游:不要压缩(避免各种编码问题)
outgoing_headers["Accept-Encoding"] = "identity"
body = await request.body()
# 3. 转发请求
async with httpx.AsyncClient(
follow_redirects=True,
timeout=20.0,
) as client:
upstream_resp = await client.request(
request.method,
target_url,
content=body if request.method not in ("GET", "HEAD") else None,
headers=outgoing_headers,
)
content_type = (upstream_resp.headers.get("content-type") or "").lower()
# 4. 构造要返回的响应头
response_headers = {}
for k, v in upstream_resp.headers.items():
lk = k.lower()
if lk in HOP_BY_HOP_HEADERS:
continue
if lk in ("content-length", "content-encoding"):
# 长度和压缩交给我们自己处理
continue
if lk in BLOCKED_HEADERS:
continue
response_headers[k] = v
# 5. HTML:重写链接 + 直接输出页面
if "text/html" in content_type or "application/xhtml+xml" in content_type:
html_text = upstream_resp.text # httpx 会按 charset 解码
rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))
return HTMLResponse(
content=rewritten_html,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type="text/html; charset=utf-8",
)
# 6. 图片 / 视频 / 音频 / 字体 / CSS / JS:原样透传(浏览器不会把它们显示成一大堆字)
if (
content_type.startswith("image/")
or content_type.startswith("video/")
or content_type.startswith("audio/")
or "font" in content_type
or content_type in (
"text/css",
"application/javascript",
"text/javascript",
"application/x-javascript",
)
):
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or None,
)
# 7. 其它(如 application/octet-stream、zip、二进制流):强制当附件下载,避免在窗口里看到乱码
if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"
return Response(
content=upstream_resp.content,
status_code=upstream_resp.status_code,
headers=response_headers,
media_type=content_type or "application/octet-stream",
)