Spaces:

triflix
/

testingproxy123

Paused

App Files Files Community

triflix commited on Mar 14

Commit

a0e40dd

verified ·

1 Parent(s): f459e61

Update main.py

Browse files

Files changed (1) hide show

main.py +39 -30

main.py CHANGED Viewed

@@ -5,11 +5,11 @@ import urllib.parse
 app = FastAPI()
-# JavaScript injected into HTML to intercept dynamic navigation and AJAX calls.
 INJECTED_JS = """
 <script>
 (function() {
-    // Intercept history.pushState so dynamic URL changes use our proxy.
     const originalPushState = history.pushState;
     history.pushState = function(state, title, url) {
         if (url) {
@@ -19,7 +19,7 @@ INJECTED_JS = """
         return originalPushState.call(history, state, title, url);
     };
-    // Intercept fetch() requests to route them through our proxy.
     const originalFetch = window.fetch;
     window.fetch = function(input, init) {
         let url;
@@ -39,36 +39,52 @@ INJECTED_JS = """
         return originalFetch(input, init);
     };
-    // Intercept XMLHttpRequest open() calls.
     const originalOpen = XMLHttpRequest.prototype.open;
     XMLHttpRequest.prototype.open = function(method, url) {
         const proxiedUrl = '/?url=' + encodeURIComponent(url);
         return originalOpen.apply(this, [method, proxiedUrl, true]);
     };
 })();
 </script>
 """
 async def fetch_and_rewrite(target_url: str) -> Response:
     async with httpx.AsyncClient() as client:
-        # Fetch the target URL.
         resp = await client.get(target_url)
     content_type = resp.headers.get("Content-Type", "")
-    # If not HTML (CSS, JS, images, etc.), return content directly.
     if "text/html" not in content_type:
         return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
-    # Parse HTML content.
     soup = BeautifulSoup(resp.text, "html.parser")
-    # --- Inject a <base> Tag ---
-    # This makes sure that all relative URLs in the HTML resolve correctly.
     parsed_target = urllib.parse.urlparse(target_url)
     base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
     if soup.head:
         # Remove any existing <base> tags.
-        for base in soup.head.find_all('base'):
             base.decompose()
         base_tag = soup.new_tag("base", href=base_href)
         soup.head.insert(0, base_tag)
@@ -77,16 +93,16 @@ async def fetch_and_rewrite(target_url: str) -> Response:
         base_tag = soup.new_tag("base", href=base_href)
         head_tag.insert(0, base_tag)
         soup.insert(0, head_tag)
     # --- Inject JavaScript for Dynamic Routing ---
-    # Place the JS in the <body> (or at the top if no body tag exists).
     if soup.body:
         soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     else:
         soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     # --- Rewrite Resource URLs ---
-    # Update links, images, scripts, CSS links, and form actions to route via the proxy.
     tags_attrs = {
         "a": "href",
         "img": "src",
@@ -98,39 +114,32 @@ async def fetch_and_rewrite(target_url: str) -> Response:
         for element in soup.find_all(tag):
             if element.has_attr(attr):
                 orig = element[attr]
-                # Skip javascript: or mailto: links.
-                if orig.startswith("javascript:") or orig.startswith("mailto:"):
                     continue
-                # Build an absolute URL using the target URL as base.
                 new_url = urllib.parse.urljoin(target_url, orig)
-                # Route it through the proxy.
                 element[attr] = "/?url=" + urllib.parse.quote(new_url)
     return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
-# Catch-all route to handle any path.
 @app.get("/{full_path:path}")
 async def catch_all(full_path: str, request: Request):
     query_params = dict(request.query_params)
-    # If a "url" query parameter is provided, this is the initial load.
     if "url" in query_params:
         target_url = query_params["url"]
     else:
-        # Otherwise, try to rebuild the target URL using a stored cookie.
         target_base = request.cookies.get("target_base")
         if not target_base:
             return Response("No target URL provided.", status_code=400)
-        qs = request.url.query  # Preserve any query string.
         target_url = urllib.parse.urljoin(target_base, full_path)
         if qs:
             target_url += "?" + qs
     response = await fetch_and_rewrite(target_url)
-    # Store the base URL (scheme + host) in a cookie for subsequent requests.
     parsed_target = urllib.parse.urlparse(target_url)
     base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
     response.set_cookie("target_base", base_url)
     return response

 app = FastAPI()
+# Injected JavaScript now also intercepts anchor clicks.
 INJECTED_JS = """
 <script>
+// Intercept history.pushState so dynamic URL changes are routed through the proxy.
 (function() {
     const originalPushState = history.pushState;
     history.pushState = function(state, title, url) {
         if (url) {
         return originalPushState.call(history, state, title, url);
     };
+    // Intercept fetch() requests.
     const originalFetch = window.fetch;
     window.fetch = function(input, init) {
         let url;
         return originalFetch(input, init);
     };
+    // Intercept XMLHttpRequest.open().
     const originalOpen = XMLHttpRequest.prototype.open;
     XMLHttpRequest.prototype.open = function(method, url) {
         const proxiedUrl = '/?url=' + encodeURIComponent(url);
         return originalOpen.apply(this, [method, proxiedUrl, true]);
     };
+    // Intercept anchor clicks to keep navigation within the proxy.
+    document.addEventListener('click', function(event) {
+        const target = event.target.closest('a');
+        if (target && target.href) {
+            // Skip if already proxied or if special attributes exist.
+            if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
+                return;
+            }
+            event.preventDefault();
+            window.location.href = '/?url=' + encodeURIComponent(target.href);
+        }
+    });
 })();
 </script>
 """
 async def fetch_and_rewrite(target_url: str) -> Response:
     async with httpx.AsyncClient() as client:
         resp = await client.get(target_url)
     content_type = resp.headers.get("Content-Type", "")
+    # For non-HTML resources (CSS, JS, images, etc.), return the content directly.
     if "text/html" not in content_type:
         return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
+    # Parse the HTML content.
     soup = BeautifulSoup(resp.text, "html.parser")
+    # Remove any Content Security Policy meta tags that might block our injected scripts.
+    for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
+        meta.decompose()
+    # --- Insert a <base> Tag ---
+    # This ensures that relative URLs in the HTML resolve against the target domain.
     parsed_target = urllib.parse.urlparse(target_url)
     base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
     if soup.head:
         # Remove any existing <base> tags.
+        for base in soup.head.find_all("base"):
             base.decompose()
         base_tag = soup.new_tag("base", href=base_href)
         soup.head.insert(0, base_tag)
         base_tag = soup.new_tag("base", href=base_href)
         head_tag.insert(0, base_tag)
         soup.insert(0, head_tag)
     # --- Inject JavaScript for Dynamic Routing ---
+    # This script intercepts dynamic navigation and network calls.
     if soup.body:
         soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     else:
         soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     # --- Rewrite Resource URLs ---
+    # Rewrite URLs in various tags so that they are loaded through the proxy.
     tags_attrs = {
         "a": "href",
         "img": "src",
         for element in soup.find_all(tag):
             if element.has_attr(attr):
                 orig = element[attr]
+                # Skip if already proxied or if it’s a javascript/mailto link.
+                if orig.startswith("/?url=") or orig.startswith("javascript:") or orig.startswith("mailto:"):
                     continue
                 new_url = urllib.parse.urljoin(target_url, orig)
                 element[attr] = "/?url=" + urllib.parse.quote(new_url)
     return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
+# Catch-all route that uses a query parameter or cookie to rebuild target URLs.
 @app.get("/{full_path:path}")
 async def catch_all(full_path: str, request: Request):
     query_params = dict(request.query_params)
     if "url" in query_params:
         target_url = query_params["url"]
     else:
         target_base = request.cookies.get("target_base")
         if not target_base:
             return Response("No target URL provided.", status_code=400)
+        qs = request.url.query
         target_url = urllib.parse.urljoin(target_base, full_path)
         if qs:
             target_url += "?" + qs
     response = await fetch_and_rewrite(target_url)
+    # Store the target’s base URL in a cookie for subsequent requests.
     parsed_target = urllib.parse.urlparse(target_url)
     base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
     response.set_cookie("target_base", base_url)
     return response