triflix commited on
Commit
840dd01
·
verified ·
1 Parent(s): 033d521

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +98 -0
main.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException, Response
2
+ import httpx
3
+ from bs4 import BeautifulSoup
4
+ import urllib.parse
5
+
6
+ app = FastAPI()
7
+
8
+ # JavaScript code that will be injected into proxied HTML pages.
9
+ # It intercepts dynamic URL changes and client-side network requests.
10
+ INJECTED_JS = """
11
+ <script>
12
+ // Intercept History API to rewrite URLs for proxying
13
+ (function() {
14
+ const originalPushState = history.pushState;
15
+ history.pushState = function(state, title, url) {
16
+ if (url) {
17
+ // Rewrite the URL to use our proxy endpoint
18
+ const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
19
+ return originalPushState.call(history, state, title, proxiedUrl);
20
+ }
21
+ return originalPushState.call(history, state, title, url);
22
+ };
23
+
24
+ // Intercept fetch to ensure all dynamic requests go through the proxy
25
+ const originalFetch = window.fetch;
26
+ window.fetch = function(input, init) {
27
+ let url;
28
+ if (typeof input === 'string') {
29
+ url = input;
30
+ } else if (input && input.url) {
31
+ url = input.url;
32
+ } else {
33
+ return originalFetch(input, init);
34
+ }
35
+ const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
36
+ if (typeof input === 'object') {
37
+ input = new Request(proxiedUrl, input);
38
+ } else {
39
+ input = proxiedUrl;
40
+ }
41
+ return originalFetch(input, init);
42
+ };
43
+
44
+ // Intercept XMLHttpRequest open() method to proxy XHR requests
45
+ const originalOpen = XMLHttpRequest.prototype.open;
46
+ XMLHttpRequest.prototype.open = function(method, url) {
47
+ const proxiedUrl = '/proxy_full?url=' + encodeURIComponent(url);
48
+ return originalOpen.apply(this, [method, proxiedUrl, true]);
49
+ };
50
+ })();
51
+ </script>
52
+ """
53
+
54
+ @app.get("/proxy_full")
55
+ async def proxy_full(url: str):
56
+ if not url:
57
+ raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
58
+
59
+ # Fetch the target URL using an async HTTP client.
60
+ async with httpx.AsyncClient() as client:
61
+ resp = await client.get(url)
62
+
63
+ content_type = resp.headers.get("Content-Type", "")
64
+ # For non-HTML content (images, CSS, JS, etc.), simply return the response.
65
+ if "text/html" not in content_type:
66
+ return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
67
+
68
+ # Parse HTML with BeautifulSoup
69
+ soup = BeautifulSoup(resp.text, "html.parser")
70
+
71
+ # Inject our JavaScript to intercept client-side navigation and network calls.
72
+ if soup.head:
73
+ soup.head.append(BeautifulSoup(INJECTED_JS, "html.parser"))
74
+ elif soup.body:
75
+ soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
76
+ else:
77
+ soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
78
+
79
+ # Rewrite URLs for key elements so that they go through the proxy.
80
+ tags_attrs = {
81
+ "a": "href",
82
+ "img": "src",
83
+ "script": "src",
84
+ "link": "href",
85
+ "form": "action"
86
+ }
87
+ for tag, attr in tags_attrs.items():
88
+ for element in soup.find_all(tag):
89
+ if element.has_attr(attr):
90
+ original = element[attr]
91
+ # Skip if already proxied
92
+ if original.startswith("/proxy_full?url="):
93
+ continue
94
+ # Build an absolute URL and rewrite it to include our proxy
95
+ new_url = urllib.parse.urljoin(url, original)
96
+ element[attr] = "/proxy_full?url=" + urllib.parse.quote(new_url)
97
+
98
+ return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)