Spaces:

sehsapneb
/

mi

Sleeping

App Files Files Community

sehsapneb commited on Nov 17, 2025

Commit

c56eb99

verified ·

1 Parent(s): 2b3f02b

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -18

app.py CHANGED Viewed

@@ -6,7 +6,9 @@ from bs4 import BeautifulSoup
 app = FastAPI()
-# ---- 简单首页（输入网址的页面） ----
 INDEX_HTML = """
 <!DOCTYPE html>
@@ -62,7 +64,9 @@ async def index():
     return INDEX_HTML
-# ---- 工具函数：校验和规范 URL ----
 def normalize_url(raw: str) -> str:
     """如果没有 scheme，自动加上 http://"""
@@ -87,17 +91,22 @@ def is_allowed_url(url: str) -> bool:
     # 禁止访问一些明显的本地 / 内网地址
     if host in ("localhost", "127.0.0.1"):
         return False
-    private_prefixes = ("10.", "192.168.", "172.16.", "172.17.", "172.18.",
-                        "172.19.", "172.20.", "172.21.", "172.22.",
-                        "172.23.", "172.24.", "172.25.", "172.26.",
-                        "172.27.", "172.28.", "172.29.", "172.30.",
-                        "172.31.")
     if any(host.startswith(p) for p in private_prefixes):
         return False
     return True
 HOP_BY_HOP_HEADERS = {
     "connection",
     "keep-alive",
@@ -109,13 +118,20 @@ HOP_BY_HOP_HEADERS = {
     "upgrade",
 }
-# ---- 重写 HTML 里的链接，使之继续走 /proxy ----
 def rewrite_html(html: str, base_url: str) -> str:
     soup = BeautifulSoup(html, "html.parser")
-    # 要处理的标签和对应属性
     tag_attr_pairs = [
         ("a", "href"),
         ("link", "href"),
@@ -133,12 +149,12 @@ def rewrite_html(html: str, base_url: str) -> str:
             if not value:
                 continue
-            # 锚点或 javascript: 等不处理
             low = value.lower().strip()
             if low.startswith("#") or low.startswith("javascript:"):
                 continue
-            # 相对地址 -> 绝对地址
             absolute = urljoin(base_url, value)
             proxied = f"/proxy?url={quote(absolute, safe='')}"
@@ -147,7 +163,9 @@ def rewrite_html(html: str, base_url: str) -> str:
     return str(soup)
-# ---- 反向代理核心：/proxy ----
 @app.api_route(
     "/proxy",
@@ -166,7 +184,7 @@ async def proxy(request: Request, url: str):
             detail="仅支持公网 http/https 地址，且不允许访问内网 / 本地地址。",
         )
-    # 2. 准备要转发的请求头（过滤掉 hop-by-hop 头部）
     outgoing_headers = {}
     for k, v in request.headers.items():
         lk = k.lower()
@@ -180,7 +198,10 @@ async def proxy(request: Request, url: str):
     body = await request.body()
     # 3. 用 httpx 转发请求
-    async with httpx.AsyncClient(follow_redirects=True, timeout=20.0) as client:
         upstream_resp = await client.request(
             request.method,
             target_url,
@@ -190,30 +211,39 @@ async def proxy(request: Request, url: str):
     content_type = upstream_resp.headers.get("content-type", "")
-    # 4. 回传响应头（过滤掉 hop-by-hop + 一些可能冲突的）
     response_headers = {}
     for k, v in upstream_resp.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
-        if lk in ("content-length", "content-encoding"):
-            # 交给 FastAPI 重新计算
             continue
         response_headers[k] = v
     # 5. 如果是 HTML，就重写里面的链接
     if "text/html" in content_type:
         rewritten_html = rewrite_html(
             upstream_resp.text,
             base_url=str(upstream_resp.url),
         )
         return HTMLResponse(
             content=rewritten_html,
             status_code=upstream_resp.status_code,
             headers=response_headers,
         )
-    # 6. 其他类型（CSS/JS/图片等）原样透传
     return Response(
         content=upstream_resp.content,
         status_code=upstream_resp.status_code,

 app = FastAPI()
+# ==========================
+# 首页：输入网址的简单页面
+# ==========================
 INDEX_HTML = """
 <!DOCTYPE html>
     return INDEX_HTML
+# ==========================
+# 工具函数
+# ==========================
 def normalize_url(raw: str) -> str:
     """如果没有 scheme，自动加上 http://"""
     # 禁止访问一些明显的本地 / 内网地址
     if host in ("localhost", "127.0.0.1"):
         return False
+    private_prefixes = (
+        "10.",
+        "192.168.",
+        "172.16.", "172.17.", "172.18.", "172.19.",
+        "172.20.", "172.21.", "172.22.", "172.23.",
+        "172.24.", "172.25.", "172.26.", "172.27.",
+        "172.28.", "172.29.", "172.30.", "172.31.",
+    )
     if any(host.startswith(p) for p in private_prefixes):
         return False
     return True
+# hop-by-hop 头：代理两边不应转发
 HOP_BY_HOP_HEADERS = {
     "connection",
     "keep-alive",
     "upgrade",
 }
+# 会导致被 iframe 拦截的头，在 Hugging Face 的 iframe 环境里可以适当去掉
+BLOCKED_HEADERS = {
+    "x-frame-options",
+    "content-security-policy",
+}
 def rewrite_html(html: str, base_url: str) -> str:
+    """
+    重写 HTML 里的链接，使之继续走 /proxy。
+    处理 a/link/img/script/iframe/video/source/form 等常见标签。
+    """
     soup = BeautifulSoup(html, "html.parser")
     tag_attr_pairs = [
         ("a", "href"),
         ("link", "href"),
             if not value:
                 continue
             low = value.lower().strip()
+            # 锚点、javascript: 不改
             if low.startswith("#") or low.startswith("javascript:"):
                 continue
+            # 相对链接 -> 绝对 URL
             absolute = urljoin(base_url, value)
             proxied = f"/proxy?url={quote(absolute, safe='')}"
     return str(soup)
+# ==========================
+# 反向代理主逻辑
+# ==========================
 @app.api_route(
     "/proxy",
             detail="仅支持公网 http/https 地址，且不允许访问内网 / 本地地址。",
         )
+    # 2. 准备要转发的请求头（过滤掉 hop-by-hop 头部和 Host）
     outgoing_headers = {}
     for k, v in request.headers.items():
         lk = k.lower()
     body = await request.body()
     # 3. 用 httpx 转发请求
+    async with httpx.AsyncClient(
+        follow_redirects=True,
+        timeout=20.0,
+    ) as client:
         upstream_resp = await client.request(
             request.method,
             target_url,
     content_type = upstream_resp.headers.get("content-type", "")
+    # 4. 回传响应头（删掉 hop-by-hop、content-length、以及会阻止 iframe 的头）
     response_headers = {}
     for k, v in upstream_resp.headers.items():
         lk = k.lower()
         if lk in HOP_BY_HOP_HEADERS:
             continue
+        if lk == "content-length":
+            # 让 FastAPI 自己计算长度
+            continue
+        if lk in BLOCKED_HEADERS:
             continue
         response_headers[k] = v
     # 5. 如果是 HTML，就重写里面的链接
     if "text/html" in content_type:
+        # upstream_resp.text 会根据 charset 自动解码成字符串
         rewritten_html = rewrite_html(
             upstream_resp.text,
             base_url=str(upstream_resp.url),
         )
+        # 这里我们已经重新编码 HTML 了，所以不能再带 content-encoding
+        response_headers.pop("content-encoding", None)
+        response_headers.pop("Content-Encoding", None)
         return HTMLResponse(
             content=rewritten_html,
             status_code=upstream_resp.status_code,
             headers=response_headers,
         )
+    # 6. 其他类型（CSS/JS/图片/二进制等）原样透传
+    # 保留 Content-Encoding，这样浏览器才能正确解压 / 展示，避免乱码
     return Response(
         content=upstream_resp.content,
         status_code=upstream_resp.status_code,