Spaces:

dpv007
/

proxy

Sleeping

App Files Files Community

dpv007 commited on Nov 26, 2025

Commit

2651019

verified ·

1 Parent(s): 1bd5bd0

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -22

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from urllib.parse import urljoin, quote
 app = FastAPI()
 HTML_INDEX = """
 <!doctype html>
 <html>
@@ -87,26 +86,38 @@ async def index():
     return HTML_INDEX
-async def fetch_url(url: str) -> httpx.Response:
     """
-    Fetch target URL via httpx.
     """
-    async with httpx.AsyncClient(follow_redirects=True, timeout=15) as client:
-        # Basic headers to mimic a browser
-        headers = {
-            "User-Agent": (
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/120.0 Safari/537.36"
-            )
-        }
         resp = await client.get(url, headers=headers)
         return resp
 def rewrite_html(html: str, base_url: str) -> str:
     """
-    Rewrite links in HTML so sub-resources go through /proxy as well.
     """
     soup = BeautifulSoup(html, "html.parser")
@@ -116,20 +127,34 @@ def rewrite_html(html: str, base_url: str) -> str:
         original = tag.attrs.get(attr)
         if not original:
             return
-        # Handle things like //cdn.example.com, /path, relative paths, etc.
         absolute = urljoin(base_url, original)
         tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
-    # rewrite common URL-carrying tags
-    for tag in soup.find_all(["a", "img", "script", "link", "form", "iframe"]):
         if tag.name in ("a", "link"):
             proxify("href", tag)
-        if tag.name in ("img", "script", "iframe"):
             proxify("src", tag)
         if tag.name == "form":
             proxify("action", tag)
-    # Optionally, inject a small banner to indicate proxied content
     banner = soup.new_tag("div")
     banner.string = f"Proxied via HF Space — {base_url}"
     banner["style"] = (
@@ -147,9 +172,14 @@ def rewrite_html(html: str, base_url: str) -> str:
 async def proxy(url: str, request: Request):
     """
     Reverse-proxy endpoint: /proxy?url=https://example.com
     """
     try:
-        upstream = await fetch_url(url)
     except Exception as e:
         return HTMLResponse(
             f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
@@ -158,16 +188,16 @@ async def proxy(url: str, request: Request):
     content_type = upstream.headers.get("content-type", "")
-    # HTML: rewrite links so that all further requests go via /proxy
     if "text/html" in content_type:
         rewritten = rewrite_html(upstream.text, base_url=url)
         return HTMLResponse(content=rewritten, status_code=upstream.status_code)
-    # For non-HTML (images, JS, CSS, fonts...), just pass through
-    # while stripping hop-by-hop headers.
     safe_headers = {}
     for k, v in upstream.headers.items():
         lk = k.lower()
         if lk in ("content-encoding", "transfer-encoding", "connection"):
             continue
         safe_headers[k] = v

 app = FastAPI()
 HTML_INDEX = """
 <!doctype html>
 <html>
     return HTML_INDEX
+async def fetch_url(url: str, request: Request) -> httpx.Response:
     """
+    Fetch target URL via httpx, forwarding some useful headers
+    (like Range for video/audio).
     """
+    client_headers = request.headers
+    headers = {
+        "User-Agent": client_headers.get(
+            "user-agent",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/120.0 Safari/537.36",
+        ),
+        "Accept": client_headers.get("accept", "*/*"),
+        "Accept-Language": client_headers.get("accept-language", "en-US,en;q=0.9"),
+    }
+    # Forward Range header for video/audio seeking
+    range_header = client_headers.get("range")
+    if range_header:
+        headers["Range"] = range_header
+    async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
         resp = await client.get(url, headers=headers)
         return resp
 def rewrite_html(html: str, base_url: str) -> str:
     """
+    Rewrite links in HTML so sub-resources (scripts, css, images, video, etc.)
+    go through /proxy as well.
     """
     soup = BeautifulSoup(html, "html.parser")
         original = tag.attrs.get(attr)
         if not original:
             return
         absolute = urljoin(base_url, original)
         tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
+    # Tags that can contain URLs
+    for tag in soup.find_all(
+        [
+            "a",
+            "img",
+            "script",
+            "link",
+            "form",
+            "iframe",
+            "video",
+            "audio",
+            "source",
+        ]
+    ):
         if tag.name in ("a", "link"):
             proxify("href", tag)
+        if tag.name in ("img", "script", "iframe", "video", "audio", "source"):
             proxify("src", tag)
         if tag.name == "form":
             proxify("action", tag)
+        # video poster attribute (thumbnail)
+        if tag.name == "video":
+            proxify("poster", tag)
+    # Optional: add a small banner so you know it's proxied
     banner = soup.new_tag("div")
     banner.string = f"Proxied via HF Space — {base_url}"
     banner["style"] = (
 async def proxy(url: str, request: Request):
     """
     Reverse-proxy endpoint: /proxy?url=https://example.com
+    Supports:
+    - HTML (rewritten)
+    - Images
+    - JS / CSS
+    - Video / audio (with Range header forwarded)
     """
     try:
+        upstream = await fetch_url(url, request)
     except Exception as e:
         return HTMLResponse(
             f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
     content_type = upstream.headers.get("content-type", "")
+    # HTML: rewrite links so further requests go via /proxy
     if "text/html" in content_type:
         rewritten = rewrite_html(upstream.text, base_url=url)
         return HTMLResponse(content=rewritten, status_code=upstream.status_code)
+    # Non-HTML (images, videos, audio, JS, CSS, fonts...): pass through
     safe_headers = {}
     for k, v in upstream.headers.items():
         lk = k.lower()
+        # Strip hop-by-hop and encoding headers (let FastAPI handle compression)
         if lk in ("content-encoding", "transfer-encoding", "connection"):
             continue
         safe_headers[k] = v