Spaces:

triflix
/

testingproxy123

Paused

App Files Files Community

triflix commited on Mar 14, 2025

Commit

2100890

verified ·

1 Parent(s): a0e40dd

Update main.py

Browse files

Files changed (1) hide show

main.py +122 -40

main.py CHANGED Viewed

@@ -5,11 +5,15 @@ import urllib.parse
 app = FastAPI()
-# Injected JavaScript now also intercepts anchor clicks.
 INJECTED_JS = """
 <script>
-// Intercept history.pushState so dynamic URL changes are routed through the proxy.
 (function() {
     const originalPushState = history.pushState;
     history.pushState = function(state, title, url) {
         if (url) {
@@ -19,7 +23,7 @@ INJECTED_JS = """
         return originalPushState.call(history, state, title, url);
     };
-    // Intercept fetch() requests.
     const originalFetch = window.fetch;
     window.fetch = function(input, init) {
         let url;
@@ -46,11 +50,10 @@ INJECTED_JS = """
         return originalOpen.apply(this, [method, proxiedUrl, true]);
     };
-    // Intercept anchor clicks to keep navigation within the proxy.
     document.addEventListener('click', function(event) {
         const target = event.target.closest('a');
         if (target && target.href) {
-            // Skip if already proxied or if special attributes exist.
             if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
                 return;
             }
@@ -58,32 +61,88 @@ INJECTED_JS = """
             window.location.href = '/?url=' + encodeURIComponent(target.href);
         }
     });
 })();
 </script>
 """
 async def fetch_and_rewrite(target_url: str) -> Response:
     async with httpx.AsyncClient() as client:
         resp = await client.get(target_url)
-    content_type = resp.headers.get("Content-Type", "")
-    # For non-HTML resources (CSS, JS, images, etc.), return the content directly.
     if "text/html" not in content_type:
         return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
-    # Parse the HTML content.
     soup = BeautifulSoup(resp.text, "html.parser")
-    # Remove any Content Security Policy meta tags that might block our injected scripts.
     for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
         meta.decompose()
-    # --- Insert a <base> Tag ---
-    # This ensures that relative URLs in the HTML resolve against the target domain.
     parsed_target = urllib.parse.urlparse(target_url)
     base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
     if soup.head:
-        # Remove any existing <base> tags.
         for base in soup.head.find_all("base"):
             base.decompose()
         base_tag = soup.new_tag("base", href=base_href)
@@ -93,38 +152,52 @@ async def fetch_and_rewrite(target_url: str) -> Response:
         base_tag = soup.new_tag("base", href=base_href)
         head_tag.insert(0, base_tag)
         soup.insert(0, head_tag)
-    # --- Inject JavaScript for Dynamic Routing ---
-    # This script intercepts dynamic navigation and network calls.
     if soup.body:
         soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     else:
         soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
-    # --- Rewrite Resource URLs ---
-    # Rewrite URLs in various tags so that they are loaded through the proxy.
-    tags_attrs = {
-        "a": "href",
-        "img": "src",
-        "script": "src",
-        "link": "href",
-        "form": "action"
-    }
-    for tag, attr in tags_attrs.items():
-        for element in soup.find_all(tag):
-            if element.has_attr(attr):
-                orig = element[attr]
-                # Skip if already proxied or if it’s a javascript/mailto link.
-                if orig.startswith("/?url=") or orig.startswith("javascript:") or orig.startswith("mailto:"):
-                    continue
-                new_url = urllib.parse.urljoin(target_url, orig)
-                element[attr] = "/?url=" + urllib.parse.quote(new_url)
     return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
-# Catch-all route that uses a query parameter or cookie to rebuild target URLs.
 @app.get("/{full_path:path}")
 async def catch_all(full_path: str, request: Request):
     query_params = dict(request.query_params)
     if "url" in query_params:
         target_url = query_params["url"]
@@ -138,8 +211,17 @@ async def catch_all(full_path: str, request: Request):
             target_url += "?" + qs
     response = await fetch_and_rewrite(target_url)
-    # Store the target’s base URL in a cookie for subsequent requests.
     parsed_target = urllib.parse.urlparse(target_url)
     base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
     response.set_cookie("target_base", base_url)
     return response

 app = FastAPI()
+# ---------------------------------------------------------------------------
+# Injected JavaScript for Dynamic Interception & Real-Time Updates
+# ---------------------------------------------------------------------------
+# This script intercepts client-side navigation (history, fetch, XHR, anchor clicks)
+# and uses MutationObserver to reprocess dynamically added DOM nodes.
 INJECTED_JS = """
 <script>
 (function() {
+    // Intercept history.pushState to route dynamic navigations.
     const originalPushState = history.pushState;
     history.pushState = function(state, title, url) {
         if (url) {
         return originalPushState.call(history, state, title, url);
     };
+    // Intercept fetch() calls.
     const originalFetch = window.fetch;
     window.fetch = function(input, init) {
         let url;
         return originalOpen.apply(this, [method, proxiedUrl, true]);
     };
+    // Intercept anchor clicks to ensure navigation goes through the proxy.
     document.addEventListener('click', function(event) {
         const target = event.target.closest('a');
         if (target && target.href) {
             if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
                 return;
             }
             window.location.href = '/?url=' + encodeURIComponent(target.href);
         }
     });
+    // Use MutationObserver to catch and rewrite dynamically added elements.
+    const observer = new MutationObserver(function(mutations) {
+        mutations.forEach(function(mutation) {
+            mutation.addedNodes.forEach(function(node) {
+                if (node.nodeType === Node.ELEMENT_NODE) {
+                    reProxyElement(node);
+                }
+            });
+        });
+    });
+    observer.observe(document.body, { childList: true, subtree: true });
+    // Rewrites URL attributes for an element and its children.
+    function reProxyElement(element) {
+        const urlAttrs = ['href', 'src', 'action', 'srcset'];
+        urlAttrs.forEach(function(attr) {
+            if (element.hasAttribute(attr)) {
+                const value = element.getAttribute(attr);
+                if (value && !value.startsWith('/?url=') &&
+                    !value.startsWith('javascript:') && !value.startsWith('mailto:')) {
+                    element.setAttribute(attr, '/?url=' + encodeURIComponent(value));
+                }
+            }
+        });
+        Array.from(element.children).forEach(child => reProxyElement(child));
+    }
 })();
 </script>
 """
+# ---------------------------------------------------------------------------
+# Helper Function: Rewrite URLs for All Relevant Attributes
+# ---------------------------------------------------------------------------
+def rewrite_urls(soup, target_url, proxy_prefix="/?url="):
+    """
+    Iterates over all elements in the parsed HTML and rewrites URL-like attributes.
+    Supports attributes such as: href, src, action, srcset.
+    """
+    url_attrs = ['href', 'src', 'action', 'srcset']
+    for element in soup.find_all(True):
+        for attr in list(element.attrs):
+            if attr in url_attrs:
+                orig_value = element.get(attr)
+                # Skip already proxied or special schemes.
+                if not orig_value or orig_value.startswith(proxy_prefix) or orig_value.startswith(("mailto:", "javascript:")):
+                    continue
+                # Resolve relative URLs.
+                new_url = urllib.parse.urljoin(target_url, orig_value)
+                # Rewrite URL to be loaded via the proxy.
+                element[attr] = proxy_prefix + urllib.parse.quote(new_url)
+    return soup
+# ---------------------------------------------------------------------------
+# Core Function: Fetch and Rewrite the Target HTML
+# ---------------------------------------------------------------------------
 async def fetch_and_rewrite(target_url: str) -> Response:
+    """
+    Asynchronously fetches the target URL, then parses and rewrites its HTML:
+    - Removes conflicting Content Security Policy meta tags.
+    - Inserts a <base> tag to resolve relative URLs.
+    - Injects JavaScript to intercept dynamic navigation and update DOM in real time.
+    - Rewrites URL attributes for a comprehensive set of tags.
+    """
     async with httpx.AsyncClient() as client:
         resp = await client.get(target_url)
+    content_type = resp.headers.get("Content-Type", "")
     if "text/html" not in content_type:
+        # For non-HTML content, return the response directly.
         return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
     soup = BeautifulSoup(resp.text, "html.parser")
+    # Remove Content Security Policy meta tags that might block our injected scripts.
     for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
         meta.decompose()
+    # Insert (or replace) a <base> tag for proper resolution of relative URLs.
     parsed_target = urllib.parse.urlparse(target_url)
     base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
     if soup.head:
         for base in soup.head.find_all("base"):
             base.decompose()
         base_tag = soup.new_tag("base", href=base_href)
         base_tag = soup.new_tag("base", href=base_href)
         head_tag.insert(0, base_tag)
         soup.insert(0, head_tag)
+    # Inject our dynamic JavaScript into the beginning of the <body>.
     if soup.body:
         soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
     else:
         soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
+    # Rewrite URLs for all relevant elements.
+    soup = rewrite_urls(soup, target_url)
     return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
+# ---------------------------------------------------------------------------
+# Simple Proxy Endpoint: /proxy_full
+# ---------------------------------------------------------------------------
+@app.get("/proxy_full")
+async def proxy_full(url: str):
+    """
+    A simple proxy endpoint that fetches the given URL and rewrites
+    its HTML so that all resource URLs route through the proxy.
+    """
+    if not url:
+        raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(url)
+    content_type = resp.headers.get("Content-Type", "")
+    if "text/html" not in content_type:
+        return Response(resp.content, media_type=content_type, status_code=resp.status_code)
+    soup = BeautifulSoup(resp.text, "html.parser")
+    soup = rewrite_urls(soup, url)
+    return Response(str(soup), media_type="text/html")
+# ---------------------------------------------------------------------------
+# Catch-All Dynamic Proxy Endpoint
+# ---------------------------------------------------------------------------
 @app.get("/{full_path:path}")
 async def catch_all(full_path: str, request: Request):
+    """
+    Catch-all endpoint for dynamic proxying.
+    Determines the target URL via a query parameter or a stored cookie.
+    Processes the HTML to inject dynamic JavaScript and rewrite URLs.
+    """
     query_params = dict(request.query_params)
     if "url" in query_params:
         target_url = query_params["url"]
             target_url += "?" + qs
     response = await fetch_and_rewrite(target_url)
+    # Save the target's base URL in a cookie for subsequent requests.
     parsed_target = urllib.parse.urlparse(target_url)
     base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
     response.set_cookie("target_base", base_url)
     return response
+# ---------------------------------------------------------------------------
+# Run the Application on Port 7860
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)