Spaces:
Paused
Paused
Update main.py
Browse files
main.py
CHANGED
|
@@ -5,11 +5,11 @@ import urllib.parse
|
|
| 5 |
|
| 6 |
app = FastAPI()
|
| 7 |
|
| 8 |
-
# JavaScript
|
| 9 |
INJECTED_JS = """
|
| 10 |
<script>
|
|
|
|
| 11 |
(function() {
|
| 12 |
-
// Intercept history.pushState so dynamic URL changes use our proxy.
|
| 13 |
const originalPushState = history.pushState;
|
| 14 |
history.pushState = function(state, title, url) {
|
| 15 |
if (url) {
|
|
@@ -19,7 +19,7 @@ INJECTED_JS = """
|
|
| 19 |
return originalPushState.call(history, state, title, url);
|
| 20 |
};
|
| 21 |
|
| 22 |
-
// Intercept fetch() requests
|
| 23 |
const originalFetch = window.fetch;
|
| 24 |
window.fetch = function(input, init) {
|
| 25 |
let url;
|
|
@@ -39,36 +39,52 @@ INJECTED_JS = """
|
|
| 39 |
return originalFetch(input, init);
|
| 40 |
};
|
| 41 |
|
| 42 |
-
// Intercept XMLHttpRequest
|
| 43 |
const originalOpen = XMLHttpRequest.prototype.open;
|
| 44 |
XMLHttpRequest.prototype.open = function(method, url) {
|
| 45 |
const proxiedUrl = '/?url=' + encodeURIComponent(url);
|
| 46 |
return originalOpen.apply(this, [method, proxiedUrl, true]);
|
| 47 |
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
})();
|
| 49 |
</script>
|
| 50 |
"""
|
| 51 |
|
| 52 |
async def fetch_and_rewrite(target_url: str) -> Response:
|
| 53 |
async with httpx.AsyncClient() as client:
|
| 54 |
-
# Fetch the target URL.
|
| 55 |
resp = await client.get(target_url)
|
| 56 |
content_type = resp.headers.get("Content-Type", "")
|
| 57 |
|
| 58 |
-
#
|
| 59 |
if "text/html" not in content_type:
|
| 60 |
return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
|
| 61 |
-
|
| 62 |
-
# Parse HTML content.
|
| 63 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 64 |
-
|
| 65 |
-
#
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
parsed_target = urllib.parse.urlparse(target_url)
|
| 68 |
base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
|
| 69 |
if soup.head:
|
| 70 |
# Remove any existing <base> tags.
|
| 71 |
-
for base in soup.head.find_all(
|
| 72 |
base.decompose()
|
| 73 |
base_tag = soup.new_tag("base", href=base_href)
|
| 74 |
soup.head.insert(0, base_tag)
|
|
@@ -77,16 +93,16 @@ async def fetch_and_rewrite(target_url: str) -> Response:
|
|
| 77 |
base_tag = soup.new_tag("base", href=base_href)
|
| 78 |
head_tag.insert(0, base_tag)
|
| 79 |
soup.insert(0, head_tag)
|
| 80 |
-
|
| 81 |
# --- Inject JavaScript for Dynamic Routing ---
|
| 82 |
-
#
|
| 83 |
if soup.body:
|
| 84 |
soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
|
| 85 |
else:
|
| 86 |
soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
|
| 87 |
-
|
| 88 |
# --- Rewrite Resource URLs ---
|
| 89 |
-
#
|
| 90 |
tags_attrs = {
|
| 91 |
"a": "href",
|
| 92 |
"img": "src",
|
|
@@ -98,39 +114,32 @@ async def fetch_and_rewrite(target_url: str) -> Response:
|
|
| 98 |
for element in soup.find_all(tag):
|
| 99 |
if element.has_attr(attr):
|
| 100 |
orig = element[attr]
|
| 101 |
-
# Skip
|
| 102 |
-
if orig.startswith("javascript:") or orig.startswith("mailto:"):
|
| 103 |
continue
|
| 104 |
-
# Build an absolute URL using the target URL as base.
|
| 105 |
new_url = urllib.parse.urljoin(target_url, orig)
|
| 106 |
-
# Route it through the proxy.
|
| 107 |
element[attr] = "/?url=" + urllib.parse.quote(new_url)
|
| 108 |
-
|
| 109 |
return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
|
| 110 |
|
| 111 |
-
# Catch-all route to
|
| 112 |
@app.get("/{full_path:path}")
|
| 113 |
async def catch_all(full_path: str, request: Request):
|
| 114 |
query_params = dict(request.query_params)
|
| 115 |
-
|
| 116 |
-
# If a "url" query parameter is provided, this is the initial load.
|
| 117 |
if "url" in query_params:
|
| 118 |
target_url = query_params["url"]
|
| 119 |
else:
|
| 120 |
-
# Otherwise, try to rebuild the target URL using a stored cookie.
|
| 121 |
target_base = request.cookies.get("target_base")
|
| 122 |
if not target_base:
|
| 123 |
return Response("No target URL provided.", status_code=400)
|
| 124 |
-
qs = request.url.query
|
| 125 |
target_url = urllib.parse.urljoin(target_base, full_path)
|
| 126 |
if qs:
|
| 127 |
target_url += "?" + qs
|
| 128 |
-
|
| 129 |
response = await fetch_and_rewrite(target_url)
|
| 130 |
-
|
| 131 |
-
# Store the base URL (scheme + host) in a cookie for subsequent requests.
|
| 132 |
parsed_target = urllib.parse.urlparse(target_url)
|
| 133 |
base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
|
| 134 |
response.set_cookie("target_base", base_url)
|
| 135 |
-
|
| 136 |
return response
|
|
|
|
| 5 |
|
| 6 |
app = FastAPI()
|
| 7 |
|
| 8 |
+
# Injected JavaScript now also intercepts anchor clicks.
|
| 9 |
INJECTED_JS = """
|
| 10 |
<script>
|
| 11 |
+
// Intercept history.pushState so dynamic URL changes are routed through the proxy.
|
| 12 |
(function() {
|
|
|
|
| 13 |
const originalPushState = history.pushState;
|
| 14 |
history.pushState = function(state, title, url) {
|
| 15 |
if (url) {
|
|
|
|
| 19 |
return originalPushState.call(history, state, title, url);
|
| 20 |
};
|
| 21 |
|
| 22 |
+
// Intercept fetch() requests.
|
| 23 |
const originalFetch = window.fetch;
|
| 24 |
window.fetch = function(input, init) {
|
| 25 |
let url;
|
|
|
|
| 39 |
return originalFetch(input, init);
|
| 40 |
};
|
| 41 |
|
| 42 |
+
// Intercept XMLHttpRequest.open().
|
| 43 |
const originalOpen = XMLHttpRequest.prototype.open;
|
| 44 |
XMLHttpRequest.prototype.open = function(method, url) {
|
| 45 |
const proxiedUrl = '/?url=' + encodeURIComponent(url);
|
| 46 |
return originalOpen.apply(this, [method, proxiedUrl, true]);
|
| 47 |
};
|
| 48 |
+
|
| 49 |
+
// Intercept anchor clicks to keep navigation within the proxy.
|
| 50 |
+
document.addEventListener('click', function(event) {
|
| 51 |
+
const target = event.target.closest('a');
|
| 52 |
+
if (target && target.href) {
|
| 53 |
+
// Skip if already proxied or if special attributes exist.
|
| 54 |
+
if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
|
| 55 |
+
return;
|
| 56 |
+
}
|
| 57 |
+
event.preventDefault();
|
| 58 |
+
window.location.href = '/?url=' + encodeURIComponent(target.href);
|
| 59 |
+
}
|
| 60 |
+
});
|
| 61 |
})();
|
| 62 |
</script>
|
| 63 |
"""
|
| 64 |
|
| 65 |
async def fetch_and_rewrite(target_url: str) -> Response:
|
| 66 |
async with httpx.AsyncClient() as client:
|
|
|
|
| 67 |
resp = await client.get(target_url)
|
| 68 |
content_type = resp.headers.get("Content-Type", "")
|
| 69 |
|
| 70 |
+
# For non-HTML resources (CSS, JS, images, etc.), return the content directly.
|
| 71 |
if "text/html" not in content_type:
|
| 72 |
return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
|
| 73 |
+
|
| 74 |
+
# Parse the HTML content.
|
| 75 |
soup = BeautifulSoup(resp.text, "html.parser")
|
| 76 |
+
|
| 77 |
+
# Remove any Content Security Policy meta tags that might block our injected scripts.
|
| 78 |
+
for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
|
| 79 |
+
meta.decompose()
|
| 80 |
+
|
| 81 |
+
# --- Insert a <base> Tag ---
|
| 82 |
+
# This ensures that relative URLs in the HTML resolve against the target domain.
|
| 83 |
parsed_target = urllib.parse.urlparse(target_url)
|
| 84 |
base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
|
| 85 |
if soup.head:
|
| 86 |
# Remove any existing <base> tags.
|
| 87 |
+
for base in soup.head.find_all("base"):
|
| 88 |
base.decompose()
|
| 89 |
base_tag = soup.new_tag("base", href=base_href)
|
| 90 |
soup.head.insert(0, base_tag)
|
|
|
|
| 93 |
base_tag = soup.new_tag("base", href=base_href)
|
| 94 |
head_tag.insert(0, base_tag)
|
| 95 |
soup.insert(0, head_tag)
|
| 96 |
+
|
| 97 |
# --- Inject JavaScript for Dynamic Routing ---
|
| 98 |
+
# This script intercepts dynamic navigation and network calls.
|
| 99 |
if soup.body:
|
| 100 |
soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
|
| 101 |
else:
|
| 102 |
soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
|
| 103 |
+
|
| 104 |
# --- Rewrite Resource URLs ---
|
| 105 |
+
# Rewrite URLs in various tags so that they are loaded through the proxy.
|
| 106 |
tags_attrs = {
|
| 107 |
"a": "href",
|
| 108 |
"img": "src",
|
|
|
|
| 114 |
for element in soup.find_all(tag):
|
| 115 |
if element.has_attr(attr):
|
| 116 |
orig = element[attr]
|
| 117 |
+
# Skip if already proxied or if it’s a javascript/mailto link.
|
| 118 |
+
if orig.startswith("/?url=") or orig.startswith("javascript:") or orig.startswith("mailto:"):
|
| 119 |
continue
|
|
|
|
| 120 |
new_url = urllib.parse.urljoin(target_url, orig)
|
|
|
|
| 121 |
element[attr] = "/?url=" + urllib.parse.quote(new_url)
|
| 122 |
+
|
| 123 |
return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
|
| 124 |
|
| 125 |
+
# Catch-all route that uses a query parameter or cookie to rebuild target URLs.
|
| 126 |
@app.get("/{full_path:path}")
|
| 127 |
async def catch_all(full_path: str, request: Request):
|
| 128 |
query_params = dict(request.query_params)
|
|
|
|
|
|
|
| 129 |
if "url" in query_params:
|
| 130 |
target_url = query_params["url"]
|
| 131 |
else:
|
|
|
|
| 132 |
target_base = request.cookies.get("target_base")
|
| 133 |
if not target_base:
|
| 134 |
return Response("No target URL provided.", status_code=400)
|
| 135 |
+
qs = request.url.query
|
| 136 |
target_url = urllib.parse.urljoin(target_base, full_path)
|
| 137 |
if qs:
|
| 138 |
target_url += "?" + qs
|
| 139 |
+
|
| 140 |
response = await fetch_and_rewrite(target_url)
|
| 141 |
+
# Store the target’s base URL in a cookie for subsequent requests.
|
|
|
|
| 142 |
parsed_target = urllib.parse.urlparse(target_url)
|
| 143 |
base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
|
| 144 |
response.set_cookie("target_base", base_url)
|
|
|
|
| 145 |
return response
|