from fastapi import FastAPI, Request, HTTPException from fastapi.responses import HTMLResponse, Response from urllib.parse import urlparse, urljoin, quote import httpx from bs4 import BeautifulSoup app = FastAPI() # ========================== # 首页:输入网址页面 # ========================== INDEX_HTML = """ 简单反向代理

🔁 简单反向代理

输入一个要访问的 URL,例如 https://example.com

仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。
""" @app.get("/", response_class=HTMLResponse) async def index(): return INDEX_HTML # ========================== # 工具函数 & 常量 # ========================== def normalize_url(raw: str) -> str: """如果没有 scheme,自动加上 http://""" raw = raw.strip() if not raw: raise ValueError("空 URL") parsed = urlparse(raw) if not parsed.scheme: raw = "http://" + raw return raw def is_allowed_url(url: str) -> bool: """限制只能访问公网 http/https,简单阻断本地 / 内网地址。""" parsed = urlparse(url) if parsed.scheme not in ("http", "https"): return False host = (parsed.hostname or "").lower() if host in ("localhost", "127.0.0.1"): return False private_prefixes = ( "10.", "192.168.", "172.16.", "172.17.", "172.18.", "172.19.", "172.20.", "172.21.", "172.22.", "172.23.", "172.24.", "172.25.", "172.26.", "172.27.", "172.28.", "172.29.", "172.30.", "172.31.", ) if any(host.startswith(p) for p in private_prefixes): return False return True HOP_BY_HOP_HEADERS = { "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade", } BLOCKED_HEADERS = { "x-frame-options", "content-security-policy", } def rewrite_html(html: str, base_url: str) -> str: """重写 HTML 中的链接,使站内跳转继续走 /proxy。""" soup = BeautifulSoup(html, "html.parser") tag_attr_pairs = [ ("a", "href"), ("link", "href"), ("img", "src"), ("script", "src"), ("iframe", "src"), ("source", "src"), ("video", "src"), ("form", "action"), ] for tag, attr in tag_attr_pairs: for node in soup.find_all(tag): value = node.get(attr) if not value: continue low = value.lower().strip() if low.startswith("#") or low.startswith("javascript:"): continue absolute = urljoin(base_url, value) proxied = f"/proxy?url={quote(absolute, safe='')}" node[attr] = proxied return str(soup) # ========================== # 反向代理主逻辑 # ========================== @app.api_route( "/proxy", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"], ) async def proxy(request: Request, url: str): # 1. 规范 URL & 校验 try: target_url = normalize_url(url) except ValueError: raise HTTPException(status_code=400, detail="无效 URL") if not is_allowed_url(target_url): raise HTTPException( status_code=400, detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。", ) # 2. 构造上游请求头(去掉 hop-by-hop、host、accept-encoding) outgoing_headers = {} for k, v in request.headers.items(): lk = k.lower() if lk in HOP_BY_HOP_HEADERS: continue if lk in ("host", "accept-encoding"): continue outgoing_headers[k] = v # 统一告诉上游:不要压缩(避免各种编码问题) outgoing_headers["Accept-Encoding"] = "identity" body = await request.body() # 3. 转发请求 async with httpx.AsyncClient( follow_redirects=True, timeout=20.0, ) as client: upstream_resp = await client.request( request.method, target_url, content=body if request.method not in ("GET", "HEAD") else None, headers=outgoing_headers, ) content_type = (upstream_resp.headers.get("content-type") or "").lower() # 4. 构造要返回的响应头 response_headers = {} for k, v in upstream_resp.headers.items(): lk = k.lower() if lk in HOP_BY_HOP_HEADERS: continue if lk in ("content-length", "content-encoding"): # 长度和压缩交给我们自己处理 continue if lk in BLOCKED_HEADERS: continue response_headers[k] = v # 5. HTML:重写链接 + 直接输出页面 if "text/html" in content_type or "application/xhtml+xml" in content_type: html_text = upstream_resp.text # httpx 会按 charset 解码 rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url)) return HTMLResponse( content=rewritten_html, status_code=upstream_resp.status_code, headers=response_headers, media_type="text/html; charset=utf-8", ) # 6. 图片 / 视频 / 音频 / 字体 / CSS / JS:原样透传(浏览器不会把它们显示成一大堆字) if ( content_type.startswith("image/") or content_type.startswith("video/") or content_type.startswith("audio/") or "font" in content_type or content_type in ( "text/css", "application/javascript", "text/javascript", "application/x-javascript", ) ): return Response( content=upstream_resp.content, status_code=upstream_resp.status_code, headers=response_headers, media_type=content_type or None, ) # 7. 其它(如 application/octet-stream、zip、二进制流):强制当附件下载,避免在窗口里看到乱码 if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}: response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin" return Response( content=upstream_resp.content, status_code=upstream_resp.status_code, headers=response_headers, media_type=content_type or "application/octet-stream", )