Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ from urllib.parse import urljoin, quote
|
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
-
|
| 10 |
HTML_INDEX = """
|
| 11 |
<!doctype html>
|
| 12 |
<html>
|
|
@@ -87,26 +86,38 @@ async def index():
|
|
| 87 |
return HTML_INDEX
|
| 88 |
|
| 89 |
|
| 90 |
-
async def fetch_url(url: str) -> httpx.Response:
|
| 91 |
"""
|
| 92 |
-
Fetch target URL via httpx
|
|
|
|
| 93 |
"""
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
resp = await client.get(url, headers=headers)
|
| 104 |
return resp
|
| 105 |
|
| 106 |
|
| 107 |
def rewrite_html(html: str, base_url: str) -> str:
|
| 108 |
"""
|
| 109 |
-
Rewrite links in HTML so sub-resources
|
|
|
|
| 110 |
"""
|
| 111 |
soup = BeautifulSoup(html, "html.parser")
|
| 112 |
|
|
@@ -116,20 +127,34 @@ def rewrite_html(html: str, base_url: str) -> str:
|
|
| 116 |
original = tag.attrs.get(attr)
|
| 117 |
if not original:
|
| 118 |
return
|
| 119 |
-
# Handle things like //cdn.example.com, /path, relative paths, etc.
|
| 120 |
absolute = urljoin(base_url, original)
|
| 121 |
tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
|
| 122 |
|
| 123 |
-
#
|
| 124 |
-
for tag in soup.find_all(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
if tag.name in ("a", "link"):
|
| 126 |
proxify("href", tag)
|
| 127 |
-
if tag.name in ("img", "script", "iframe"):
|
| 128 |
proxify("src", tag)
|
| 129 |
if tag.name == "form":
|
| 130 |
proxify("action", tag)
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
#
|
| 133 |
banner = soup.new_tag("div")
|
| 134 |
banner.string = f"Proxied via HF Space — {base_url}"
|
| 135 |
banner["style"] = (
|
|
@@ -147,9 +172,14 @@ def rewrite_html(html: str, base_url: str) -> str:
|
|
| 147 |
async def proxy(url: str, request: Request):
|
| 148 |
"""
|
| 149 |
Reverse-proxy endpoint: /proxy?url=https://example.com
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"""
|
| 151 |
try:
|
| 152 |
-
upstream = await fetch_url(url)
|
| 153 |
except Exception as e:
|
| 154 |
return HTMLResponse(
|
| 155 |
f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
|
|
@@ -158,16 +188,16 @@ async def proxy(url: str, request: Request):
|
|
| 158 |
|
| 159 |
content_type = upstream.headers.get("content-type", "")
|
| 160 |
|
| 161 |
-
# HTML: rewrite links so
|
| 162 |
if "text/html" in content_type:
|
| 163 |
rewritten = rewrite_html(upstream.text, base_url=url)
|
| 164 |
return HTMLResponse(content=rewritten, status_code=upstream.status_code)
|
| 165 |
|
| 166 |
-
#
|
| 167 |
-
# while stripping hop-by-hop headers.
|
| 168 |
safe_headers = {}
|
| 169 |
for k, v in upstream.headers.items():
|
| 170 |
lk = k.lower()
|
|
|
|
| 171 |
if lk in ("content-encoding", "transfer-encoding", "connection"):
|
| 172 |
continue
|
| 173 |
safe_headers[k] = v
|
|
|
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
|
|
|
| 9 |
HTML_INDEX = """
|
| 10 |
<!doctype html>
|
| 11 |
<html>
|
|
|
|
| 86 |
return HTML_INDEX
|
| 87 |
|
| 88 |
|
| 89 |
+
async def fetch_url(url: str, request: Request) -> httpx.Response:
|
| 90 |
"""
|
| 91 |
+
Fetch target URL via httpx, forwarding some useful headers
|
| 92 |
+
(like Range for video/audio).
|
| 93 |
"""
|
| 94 |
+
client_headers = request.headers
|
| 95 |
+
|
| 96 |
+
headers = {
|
| 97 |
+
"User-Agent": client_headers.get(
|
| 98 |
+
"user-agent",
|
| 99 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 100 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 101 |
+
"Chrome/120.0 Safari/537.36",
|
| 102 |
+
),
|
| 103 |
+
"Accept": client_headers.get("accept", "*/*"),
|
| 104 |
+
"Accept-Language": client_headers.get("accept-language", "en-US,en;q=0.9"),
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Forward Range header for video/audio seeking
|
| 108 |
+
range_header = client_headers.get("range")
|
| 109 |
+
if range_header:
|
| 110 |
+
headers["Range"] = range_header
|
| 111 |
+
|
| 112 |
+
async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
|
| 113 |
resp = await client.get(url, headers=headers)
|
| 114 |
return resp
|
| 115 |
|
| 116 |
|
| 117 |
def rewrite_html(html: str, base_url: str) -> str:
|
| 118 |
"""
|
| 119 |
+
Rewrite links in HTML so sub-resources (scripts, css, images, video, etc.)
|
| 120 |
+
go through /proxy as well.
|
| 121 |
"""
|
| 122 |
soup = BeautifulSoup(html, "html.parser")
|
| 123 |
|
|
|
|
| 127 |
original = tag.attrs.get(attr)
|
| 128 |
if not original:
|
| 129 |
return
|
|
|
|
| 130 |
absolute = urljoin(base_url, original)
|
| 131 |
tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
|
| 132 |
|
| 133 |
+
# Tags that can contain URLs
|
| 134 |
+
for tag in soup.find_all(
|
| 135 |
+
[
|
| 136 |
+
"a",
|
| 137 |
+
"img",
|
| 138 |
+
"script",
|
| 139 |
+
"link",
|
| 140 |
+
"form",
|
| 141 |
+
"iframe",
|
| 142 |
+
"video",
|
| 143 |
+
"audio",
|
| 144 |
+
"source",
|
| 145 |
+
]
|
| 146 |
+
):
|
| 147 |
if tag.name in ("a", "link"):
|
| 148 |
proxify("href", tag)
|
| 149 |
+
if tag.name in ("img", "script", "iframe", "video", "audio", "source"):
|
| 150 |
proxify("src", tag)
|
| 151 |
if tag.name == "form":
|
| 152 |
proxify("action", tag)
|
| 153 |
+
# video poster attribute (thumbnail)
|
| 154 |
+
if tag.name == "video":
|
| 155 |
+
proxify("poster", tag)
|
| 156 |
|
| 157 |
+
# Optional: add a small banner so you know it's proxied
|
| 158 |
banner = soup.new_tag("div")
|
| 159 |
banner.string = f"Proxied via HF Space — {base_url}"
|
| 160 |
banner["style"] = (
|
|
|
|
| 172 |
async def proxy(url: str, request: Request):
|
| 173 |
"""
|
| 174 |
Reverse-proxy endpoint: /proxy?url=https://example.com
|
| 175 |
+
Supports:
|
| 176 |
+
- HTML (rewritten)
|
| 177 |
+
- Images
|
| 178 |
+
- JS / CSS
|
| 179 |
+
- Video / audio (with Range header forwarded)
|
| 180 |
"""
|
| 181 |
try:
|
| 182 |
+
upstream = await fetch_url(url, request)
|
| 183 |
except Exception as e:
|
| 184 |
return HTMLResponse(
|
| 185 |
f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
|
|
|
|
| 188 |
|
| 189 |
content_type = upstream.headers.get("content-type", "")
|
| 190 |
|
| 191 |
+
# HTML: rewrite links so further requests go via /proxy
|
| 192 |
if "text/html" in content_type:
|
| 193 |
rewritten = rewrite_html(upstream.text, base_url=url)
|
| 194 |
return HTMLResponse(content=rewritten, status_code=upstream.status_code)
|
| 195 |
|
| 196 |
+
# Non-HTML (images, videos, audio, JS, CSS, fonts...): pass through
|
|
|
|
| 197 |
safe_headers = {}
|
| 198 |
for k, v in upstream.headers.items():
|
| 199 |
lk = k.lower()
|
| 200 |
+
# Strip hop-by-hop and encoding headers (let FastAPI handle compression)
|
| 201 |
if lk in ("content-encoding", "transfer-encoding", "connection"):
|
| 202 |
continue
|
| 203 |
safe_headers[k] = v
|