Spaces:

sehsapneb
/

mi

Sleeping

File size: 8,099 Bytes

from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import HTMLResponse, Response
from urllib.parse import urlparse, urljoin, quote
import httpx
from bs4 import BeautifulSoup

app = FastAPI()

# ==========================
# 首页：输入网址页面
# ==========================

INDEX_HTML = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8" />
  <title>简单反向代理</title>
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <style>
    body { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
           background: #0f172a; color: #e5e7eb; display: flex; align-items: center;
           justify-content: center; min-height: 100vh; margin: 0; }
    .card { background: #020617; border-radius: 16px; padding: 32px; max-width: 600px;
            width: 100%; box-shadow: 0 24px 60px rgba(0,0,0,0.6); border: 1px solid #1e293b;}
    h1 { margin-top: 0; font-size: 24px; margin-bottom: 8px;}
    p { margin-top: 0; margin-bottom: 16px; color: #9ca3af; font-size: 14px;}
    form { display: flex; gap: 8px; }
    input[type="url"] {
      flex: 1; padding: 10px 12px; border-radius: 999px; border: 1px solid #334155;
      background: #020617; color: #e5e7eb; font-size: 14px; outline: none;
    }
    input[type="url"]::placeholder { color: #6b7280; }
    button {
      padding: 10px 18px; border-radius: 999px; border: none; cursor: pointer;
      font-size: 14px; font-weight: 600; background: #22c55e; color: #022c22;
    }
    button:hover { filter: brightness(1.05); }
    small { color: #6b7280; font-size: 12px; display: block; margin-top: 12px;}
  </style>
</head>
<body>
  <main class="card">
    <h1>🔁 简单反向代理</h1>
    <p>输入一个要访问的 URL，例如 <code>https://example.com</code>。</p>
    <form action="/proxy" method="get">
      <input
        type="url"
        name="url"
        placeholder="https://example.com"
        required
      />
      <button type="submit">Go</button>
    </form>
    <small>仅支持公开的 http/https 网址，部分复杂 / 需要登录的网站可能无法正常工作。</small>
  </main>
</body>
</html>
"""


@app.get("/", response_class=HTMLResponse)
async def index():
    return INDEX_HTML


# ==========================
# 工具函数 & 常量
# ==========================

def normalize_url(raw: str) -> str:
    """如果没有 scheme，自动加上 http://"""
    raw = raw.strip()
    if not raw:
        raise ValueError("空 URL")
    parsed = urlparse(raw)
    if not parsed.scheme:
        raw = "http://" + raw
    return raw


def is_allowed_url(url: str) -> bool:
    """限制只能访问公网 http/https，简单阻断本地 / 内网地址。"""
    parsed = urlparse(url)
    if parsed.scheme not in ("http", "https"):
        return False

    host = (parsed.hostname or "").lower()

    if host in ("localhost", "127.0.0.1"):
        return False

    private_prefixes = (
        "10.",
        "192.168.",
        "172.16.", "172.17.", "172.18.", "172.19.",
        "172.20.", "172.21.", "172.22.", "172.23.",
        "172.24.", "172.25.", "172.26.", "172.27.",
        "172.28.", "172.29.", "172.30.", "172.31.",
    )
    if any(host.startswith(p) for p in private_prefixes):
        return False

    return True


HOP_BY_HOP_HEADERS = {
    "connection",
    "keep-alive",
    "proxy-authenticate",
    "proxy-authorization",
    "te",
    "trailers",
    "transfer-encoding",
    "upgrade",
}

BLOCKED_HEADERS = {
    "x-frame-options",
    "content-security-policy",
}


def rewrite_html(html: str, base_url: str) -> str:
    """重写 HTML 中的链接，使站内跳转继续走 /proxy。"""
    soup = BeautifulSoup(html, "html.parser")

    tag_attr_pairs = [
        ("a", "href"),
        ("link", "href"),
        ("img", "src"),
        ("script", "src"),
        ("iframe", "src"),
        ("source", "src"),
        ("video", "src"),
        ("form", "action"),
    ]

    for tag, attr in tag_attr_pairs:
        for node in soup.find_all(tag):
            value = node.get(attr)
            if not value:
                continue

            low = value.lower().strip()
            if low.startswith("#") or low.startswith("javascript:"):
                continue

            absolute = urljoin(base_url, value)
            proxied = f"/proxy?url={quote(absolute, safe='')}"
            node[attr] = proxied

    return str(soup)


# ==========================
# 反向代理主逻辑
# ==========================

@app.api_route(
    "/proxy",
    methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
)
async def proxy(request: Request, url: str):
    # 1. 规范 URL & 校验
    try:
        target_url = normalize_url(url)
    except ValueError:
        raise HTTPException(status_code=400, detail="无效 URL")

    if not is_allowed_url(target_url):
        raise HTTPException(
            status_code=400,
            detail="仅支持公网 http/https 地址，且不允许访问内网 / 本地地址。",
        )

    # 2. 构造上游请求头（去掉 hop-by-hop、host、accept-encoding）
    outgoing_headers = {}
    for k, v in request.headers.items():
        lk = k.lower()
        if lk in HOP_BY_HOP_HEADERS:
            continue
        if lk in ("host", "accept-encoding"):
            continue
        outgoing_headers[k] = v

    # 统一告诉上游：不要压缩（避免各种编码问题）
    outgoing_headers["Accept-Encoding"] = "identity"

    body = await request.body()

    # 3. 转发请求
    async with httpx.AsyncClient(
        follow_redirects=True,
        timeout=20.0,
    ) as client:
        upstream_resp = await client.request(
            request.method,
            target_url,
            content=body if request.method not in ("GET", "HEAD") else None,
            headers=outgoing_headers,
        )

    content_type = (upstream_resp.headers.get("content-type") or "").lower()

    # 4. 构造要返回的响应头
    response_headers = {}
    for k, v in upstream_resp.headers.items():
        lk = k.lower()
        if lk in HOP_BY_HOP_HEADERS:
            continue
        if lk in ("content-length", "content-encoding"):
            # 长度和压缩交给我们自己处理
            continue
        if lk in BLOCKED_HEADERS:
            continue
        response_headers[k] = v

    # 5. HTML：重写链接 + 直接输出页面
    if "text/html" in content_type or "application/xhtml+xml" in content_type:
        html_text = upstream_resp.text  # httpx 会按 charset 解码
        rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))

        return HTMLResponse(
            content=rewritten_html,
            status_code=upstream_resp.status_code,
            headers=response_headers,
            media_type="text/html; charset=utf-8",
        )

    # 6. 图片 / 视频 / 音频 / 字体 / CSS / JS：原样透传（浏览器不会把它们显示成一大堆字）
    if (
        content_type.startswith("image/")
        or content_type.startswith("video/")
        or content_type.startswith("audio/")
        or "font" in content_type
        or content_type in (
            "text/css",
            "application/javascript",
            "text/javascript",
            "application/x-javascript",
        )
    ):
        return Response(
            content=upstream_resp.content,
            status_code=upstream_resp.status_code,
            headers=response_headers,
            media_type=content_type or None,
        )

    # 7. 其它（如 application/octet-stream、zip、二进制流）：强制当附件下载，避免在窗口里看到乱码
    if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
        response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"

    return Response(
        content=upstream_resp.content,
        status_code=upstream_resp.status_code,
        headers=response_headers,
        media_type=content_type or "application/octet-stream",
    )