File size: 8,099 Bytes
2b3f02b
 
 
 
 
 
 
 
c56eb99
c9ed46a
c56eb99
2b3f02b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c56eb99
c9ed46a
c56eb99
2b3f02b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4c13a1
2b3f02b
 
 
c56eb99
 
 
 
 
 
 
 
 
2b3f02b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c56eb99
 
 
 
2b3f02b
 
 
c4c13a1
2b3f02b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c56eb99
 
 
2b3f02b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4c13a1
2b3f02b
 
 
 
 
c4c13a1
2b3f02b
 
 
c4c13a1
 
 
2b3f02b
 
c4c13a1
c56eb99
 
 
 
2b3f02b
 
 
 
 
 
 
c4c13a1
2b3f02b
c4c13a1
2b3f02b
 
 
 
 
c4c13a1
 
c56eb99
 
2b3f02b
 
 
c4c13a1
 
 
 
c56eb99
2b3f02b
 
 
 
c9ed46a
2b3f02b
 
c4c13a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b3f02b
 
 
 
c4c13a1
2b3f02b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import HTMLResponse, Response
from urllib.parse import urlparse, urljoin, quote
import httpx
from bs4 import BeautifulSoup

app = FastAPI()

# ==========================
# 首页:输入网址页面
# ==========================

INDEX_HTML = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8" />
  <title>简单反向代理</title>
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <style>
    body { font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
           background: #0f172a; color: #e5e7eb; display: flex; align-items: center;
           justify-content: center; min-height: 100vh; margin: 0; }
    .card { background: #020617; border-radius: 16px; padding: 32px; max-width: 600px;
            width: 100%; box-shadow: 0 24px 60px rgba(0,0,0,0.6); border: 1px solid #1e293b;}
    h1 { margin-top: 0; font-size: 24px; margin-bottom: 8px;}
    p { margin-top: 0; margin-bottom: 16px; color: #9ca3af; font-size: 14px;}
    form { display: flex; gap: 8px; }
    input[type="url"] {
      flex: 1; padding: 10px 12px; border-radius: 999px; border: 1px solid #334155;
      background: #020617; color: #e5e7eb; font-size: 14px; outline: none;
    }
    input[type="url"]::placeholder { color: #6b7280; }
    button {
      padding: 10px 18px; border-radius: 999px; border: none; cursor: pointer;
      font-size: 14px; font-weight: 600; background: #22c55e; color: #022c22;
    }
    button:hover { filter: brightness(1.05); }
    small { color: #6b7280; font-size: 12px; display: block; margin-top: 12px;}
  </style>
</head>
<body>
  <main class="card">
    <h1>🔁 简单反向代理</h1>
    <p>输入一个要访问的 URL,例如 <code>https://example.com</code>。</p>
    <form action="/proxy" method="get">
      <input
        type="url"
        name="url"
        placeholder="https://example.com"
        required
      />
      <button type="submit">Go</button>
    </form>
    <small>仅支持公开的 http/https 网址,部分复杂 / 需要登录的网站可能无法正常工作。</small>
  </main>
</body>
</html>
"""


@app.get("/", response_class=HTMLResponse)
async def index():
    return INDEX_HTML


# ==========================
# 工具函数 & 常量
# ==========================

def normalize_url(raw: str) -> str:
    """如果没有 scheme,自动加上 http://"""
    raw = raw.strip()
    if not raw:
        raise ValueError("空 URL")
    parsed = urlparse(raw)
    if not parsed.scheme:
        raw = "http://" + raw
    return raw


def is_allowed_url(url: str) -> bool:
    """限制只能访问公网 http/https,简单阻断本地 / 内网地址。"""
    parsed = urlparse(url)
    if parsed.scheme not in ("http", "https"):
        return False

    host = (parsed.hostname or "").lower()

    if host in ("localhost", "127.0.0.1"):
        return False

    private_prefixes = (
        "10.",
        "192.168.",
        "172.16.", "172.17.", "172.18.", "172.19.",
        "172.20.", "172.21.", "172.22.", "172.23.",
        "172.24.", "172.25.", "172.26.", "172.27.",
        "172.28.", "172.29.", "172.30.", "172.31.",
    )
    if any(host.startswith(p) for p in private_prefixes):
        return False

    return True


HOP_BY_HOP_HEADERS = {
    "connection",
    "keep-alive",
    "proxy-authenticate",
    "proxy-authorization",
    "te",
    "trailers",
    "transfer-encoding",
    "upgrade",
}

BLOCKED_HEADERS = {
    "x-frame-options",
    "content-security-policy",
}


def rewrite_html(html: str, base_url: str) -> str:
    """重写 HTML 中的链接,使站内跳转继续走 /proxy。"""
    soup = BeautifulSoup(html, "html.parser")

    tag_attr_pairs = [
        ("a", "href"),
        ("link", "href"),
        ("img", "src"),
        ("script", "src"),
        ("iframe", "src"),
        ("source", "src"),
        ("video", "src"),
        ("form", "action"),
    ]

    for tag, attr in tag_attr_pairs:
        for node in soup.find_all(tag):
            value = node.get(attr)
            if not value:
                continue

            low = value.lower().strip()
            if low.startswith("#") or low.startswith("javascript:"):
                continue

            absolute = urljoin(base_url, value)
            proxied = f"/proxy?url={quote(absolute, safe='')}"
            node[attr] = proxied

    return str(soup)


# ==========================
# 反向代理主逻辑
# ==========================

@app.api_route(
    "/proxy",
    methods=["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS"],
)
async def proxy(request: Request, url: str):
    # 1. 规范 URL & 校验
    try:
        target_url = normalize_url(url)
    except ValueError:
        raise HTTPException(status_code=400, detail="无效 URL")

    if not is_allowed_url(target_url):
        raise HTTPException(
            status_code=400,
            detail="仅支持公网 http/https 地址,且不允许访问内网 / 本地地址。",
        )

    # 2. 构造上游请求头(去掉 hop-by-hop、host、accept-encoding)
    outgoing_headers = {}
    for k, v in request.headers.items():
        lk = k.lower()
        if lk in HOP_BY_HOP_HEADERS:
            continue
        if lk in ("host", "accept-encoding"):
            continue
        outgoing_headers[k] = v

    # 统一告诉上游:不要压缩(避免各种编码问题)
    outgoing_headers["Accept-Encoding"] = "identity"

    body = await request.body()

    # 3. 转发请求
    async with httpx.AsyncClient(
        follow_redirects=True,
        timeout=20.0,
    ) as client:
        upstream_resp = await client.request(
            request.method,
            target_url,
            content=body if request.method not in ("GET", "HEAD") else None,
            headers=outgoing_headers,
        )

    content_type = (upstream_resp.headers.get("content-type") or "").lower()

    # 4. 构造要返回的响应头
    response_headers = {}
    for k, v in upstream_resp.headers.items():
        lk = k.lower()
        if lk in HOP_BY_HOP_HEADERS:
            continue
        if lk in ("content-length", "content-encoding"):
            # 长度和压缩交给我们自己处理
            continue
        if lk in BLOCKED_HEADERS:
            continue
        response_headers[k] = v

    # 5. HTML:重写链接 + 直接输出页面
    if "text/html" in content_type or "application/xhtml+xml" in content_type:
        html_text = upstream_resp.text  # httpx 会按 charset 解码
        rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))

        return HTMLResponse(
            content=rewritten_html,
            status_code=upstream_resp.status_code,
            headers=response_headers,
            media_type="text/html; charset=utf-8",
        )

    # 6. 图片 / 视频 / 音频 / 字体 / CSS / JS:原样透传(浏览器不会把它们显示成一大堆字)
    if (
        content_type.startswith("image/")
        or content_type.startswith("video/")
        or content_type.startswith("audio/")
        or "font" in content_type
        or content_type in (
            "text/css",
            "application/javascript",
            "text/javascript",
            "application/x-javascript",
        )
    ):
        return Response(
            content=upstream_resp.content,
            status_code=upstream_resp.status_code,
            headers=response_headers,
            media_type=content_type or None,
        )

    # 7. 其它(如 application/octet-stream、zip、二进制流):强制当附件下载,避免在窗口里看到乱码
    if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
        response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"

    return Response(
        content=upstream_resp.content,
        status_code=upstream_resp.status_code,
        headers=response_headers,
        media_type=content_type or "application/octet-stream",
    )