Spaces:

sehsapneb
/

mi

Sleeping

App Files Files Community

sehsapneb commited on Nov 17, 2025

Commit

c4c13a1

verified ·

1 Parent(s): c9ed46a

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -36

app.py CHANGED Viewed

@@ -85,10 +85,8 @@ def is_allowed_url(url: str) -> bool:
     if parsed.scheme not in ("http", "https"):
         return False
-    host = parsed.hostname or ""
-    host = host.lower()
-    # 禁止访问明显本地 / 内网地址
     if host in ("localhost", "127.0.0.1"):
         return False
@@ -106,7 +104,6 @@ def is_allowed_url(url: str) -> bool:
     return True
-# hop-by-hop 头：代理两边不应转发
 HOP_BY_HOP_HEADERS = {
     "connection",
     "keep-alive",
@@ -118,7 +115,6 @@ HOP_BY_HOP_HEADERS = {
     "upgrade",
 }
-# 会导致被 iframe 拦截的头
 BLOCKED_HEADERS = {
     "x-frame-options",
     "content-security-policy",
@@ -126,10 +122,7 @@ BLOCKED_HEADERS = {
 def rewrite_html(html: str, base_url: str) -> str:
-    """
-    重写 HTML 里的链接，使之继续走 /proxy。
-    处理 a/link/img/script/iframe/video/source/form 等常见标签。
-    """
     soup = BeautifulSoup(html, "html.parser")
     tag_attr_pairs = [
@@ -150,14 +143,11 @@ def rewrite_html(html: str, base_url: str) -> str:
                 continue
             low = value.lower().strip()
-            # 锚点、javascript: 不改
             if low.startswith("#") or low.startswith("javascript:"):
                 continue
-            # 相对链接 -> 绝对 URL
             absolute = urljoin(base_url, value)
             proxied = f"/proxy?url={quote(absolute, safe='')}"
             node[attr] = proxied
     return str(soup)
@@ -184,20 +174,22 @@ async def proxy(request: Request, url: str):
             detail="仅支持公网 http/https 地址，且不允许访问内网 / 本地地址。",
         )
-    # 2. 准备要转发的请求头（过滤掉 hop-by-hop 头部和 Host）
     outgoing_headers = {}
     for k, v in request.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
-        if lk == "host":
             continue
         outgoing_headers[k] = v
-    # 请求体
     body = await request.body()
-    # 3. 用 httpx 转发请求
     async with httpx.AsyncClient(
         follow_redirects=True,
         timeout=20.0,
@@ -209,35 +201,26 @@ async def proxy(request: Request, url: str):
             headers=outgoing_headers,
         )
-    # httpx 默认会帮你解压 gzip/deflate，upstream_resp.content 一般是“解压后的”内容
-    content_type = upstream_resp.headers.get("content-type", "") or ""
-    # 4. 回传响应头（删掉 hop-by-hop、content-length、以及会阻止 iframe 的头）
     response_headers = {}
     for k, v in upstream_resp.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
-        if lk == "content-length":
             continue
         if lk in BLOCKED_HEADERS:
             continue
         response_headers[k] = v
-    # 5. 如果是 HTML，就重写里面的链接
-    if "text/html" in content_type.lower() or "application/xhtml+xml" in content_type.lower():
-        # upstream_resp.text 会根据 charset 自动解码成字符串
-        html_text = upstream_resp.text
-        rewritten_html = rewrite_html(
-            html_text,
-            base_url=str(upstream_resp.url),
-        )
-        # 我们重新编码 HTML，所以不能再带 content-encoding
-        response_headers.pop("content-encoding", None)
-        response_headers.pop("Content-Encoding", None)
-        # 统一成 utf-8 文本，避免浏览器乱猜导致部分中文变成问号 / 乱码
         return HTMLResponse(
             content=rewritten_html,
             status_code=upstream_resp.status_code,
@@ -245,11 +228,33 @@ async def proxy(request: Request, url: str):
             media_type="text/html; charset=utf-8",
         )
-    # 6. 其他类型（CSS/JS/图片/二进制等）原样透传
-    # 这里直接把 bytes 给浏览器，保持原来的 Content-Type / Content-Encoding
     return Response(
         content=upstream_resp.content,
         status_code=upstream_resp.status_code,
         headers=response_headers,
-        media_type=content_type or None,
     )

     if parsed.scheme not in ("http", "https"):
         return False
+    host = (parsed.hostname or "").lower()
     if host in ("localhost", "127.0.0.1"):
         return False
     return True
 HOP_BY_HOP_HEADERS = {
     "connection",
     "keep-alive",
     "upgrade",
 }
 BLOCKED_HEADERS = {
     "x-frame-options",
     "content-security-policy",
 def rewrite_html(html: str, base_url: str) -> str:
+    """重写 HTML 中的链接，使站内跳转继续走 /proxy。"""
     soup = BeautifulSoup(html, "html.parser")
     tag_attr_pairs = [
                 continue
             low = value.lower().strip()
             if low.startswith("#") or low.startswith("javascript:"):
                 continue
             absolute = urljoin(base_url, value)
             proxied = f"/proxy?url={quote(absolute, safe='')}"
             node[attr] = proxied
     return str(soup)
             detail="仅支持公网 http/https 地址，且不允许访问内网 / 本地地址。",
         )
+    # 2. 构造上游请求头（去掉 hop-by-hop、host、accept-encoding）
     outgoing_headers = {}
     for k, v in request.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
+        if lk in ("host", "accept-encoding"):
             continue
         outgoing_headers[k] = v
+    # 统一告诉上游：不要压缩（避免各种编码问题）
+    outgoing_headers["Accept-Encoding"] = "identity"
     body = await request.body()
+    # 3. 转发请求
     async with httpx.AsyncClient(
         follow_redirects=True,
         timeout=20.0,
             headers=outgoing_headers,
         )
+    content_type = (upstream_resp.headers.get("content-type") or "").lower()
+    # 4. 构造要返回的响应头
     response_headers = {}
     for k, v in upstream_resp.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
+        if lk in ("content-length", "content-encoding"):
+            # 长度和压缩交给我们自己处理
             continue
         if lk in BLOCKED_HEADERS:
             continue
         response_headers[k] = v
+    # 5. HTML：重写链接 + 直接输出页面
+    if "text/html" in content_type or "application/xhtml+xml" in content_type:
+        html_text = upstream_resp.text  # httpx 会按 charset 解码
+        rewritten_html = rewrite_html(html_text, base_url=str(upstream_resp.url))
         return HTMLResponse(
             content=rewritten_html,
             status_code=upstream_resp.status_code,
             media_type="text/html; charset=utf-8",
         )
+    # 6. 图片 / 视频 / 音频 / 字体 / CSS / JS：原样透传（浏览器不会把它们显示成一大堆字）
+    if (
+        content_type.startswith("image/")
+        or content_type.startswith("video/")
+        or content_type.startswith("audio/")
+        or "font" in content_type
+        or content_type in (
+            "text/css",
+            "application/javascript",
+            "text/javascript",
+            "application/x-javascript",
+        )
+    ):
+        return Response(
+            content=upstream_resp.content,
+            status_code=upstream_resp.status_code,
+            headers=response_headers,
+            media_type=content_type or None,
+        )
+    # 7. 其它（如 application/octet-stream、zip、二进制流）：强制当附件下载，避免在窗口里看到乱码
+    if "content-disposition" not in {k.lower(): v for k, v in response_headers.items()}:
+        response_headers["Content-Disposition"] = "attachment; filename=downloaded.bin"
     return Response(
         content=upstream_resp.content,
         status_code=upstream_resp.status_code,
         headers=response_headers,
+        media_type=content_type or "application/octet-stream",
     )