Spaces:

triflix
/

testingproxy123

Paused

App Files Files Community

triflix commited on Mar 14, 2025

Commit

840dd01

verified ·

1 Parent(s): 033d521

Create main.py

Browse files

Files changed (1) hide show

main.py +98 -0

main.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from fastapi import FastAPI, Request, HTTPException, Response
+import httpx
+from bs4 import BeautifulSoup
+import urllib.parse
+app = FastAPI()
+# JavaScript code that will be injected into proxied HTML pages.
+# It intercepts dynamic URL changes and client-side network requests.
+INJECTED_JS = """
+<script>
+// Intercept History API to rewrite URLs for proxying
+(function() {
+    const originalPushState = history.pushState;
+    history.pushState = function(state, title, url) {
+        if (url) {
+            // Rewrite the URL to use our proxy endpoint
+            const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
+            return originalPushState.call(history, state, title, proxiedUrl);
+        }
+        return originalPushState.call(history, state, title, url);
+    };
+    // Intercept fetch to ensure all dynamic requests go through the proxy
+    const originalFetch = window.fetch;
+    window.fetch = function(input, init) {
+        let url;
+        if (typeof input === 'string') {
+            url = input;
+        } else if (input && input.url) {
+            url = input.url;
+        } else {
+            return originalFetch(input, init);
+        }
+        const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
+        if (typeof input === 'object') {
+            input = new Request(proxiedUrl, input);
+        } else {
+            input = proxiedUrl;
+        }
+        return originalFetch(input, init);
+    };
+    // Intercept XMLHttpRequest open() method to proxy XHR requests
+    const originalOpen = XMLHttpRequest.prototype.open;
+    XMLHttpRequest.prototype.open = function(method, url) {
+        const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
+        return originalOpen.apply(this, [method, proxiedUrl, true]);
+    };
+})();
+</script>
+"""
+@app.get("/proxy_full")
+async def proxy_full(url: str):
+    if not url:
+        raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
+    # Fetch the target URL using an async HTTP client.
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(url)
+    content_type = resp.headers.get("Content-Type", "")
+    # For non-HTML content (images, CSS, JS, etc.), simply return the response.
+    if "text/html" not in content_type:
+        return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
+    # Parse HTML with BeautifulSoup
+    soup = BeautifulSoup(resp.text, "html.parser")
+    # Inject our JavaScript to intercept client-side navigation and network calls.
+    if soup.head:
+        soup.head.append(BeautifulSoup(INJECTED_JS, "html.parser"))
+    elif soup.body:
+        soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
+    else:
+        soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
+    # Rewrite URLs for key elements so that they go through the proxy.
+    tags_attrs = {
+        "a": "href",
+        "img": "src",
+        "script": "src",
+        "link": "href",
+        "form": "action"
+    }
+    for tag, attr in tags_attrs.items():
+        for element in soup.find_all(tag):
+            if element.has_attr(attr):
+                original = element[attr]
+                # Skip if already proxied
+                if original.startswith("/proxy_full?url="):
+                    continue
+                # Build an absolute URL and rewrite it to include our proxy
+                new_url = urllib.parse.urljoin(url, original)
+                element[attr] = "/proxy_full?url=" + urllib.parse.quote(new_url)
+    return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)