triflix commited on
Commit
2100890
·
verified ·
1 Parent(s): a0e40dd

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +122 -40
main.py CHANGED
@@ -5,11 +5,15 @@ import urllib.parse
5
 
6
  app = FastAPI()
7
 
8
- # Injected JavaScript now also intercepts anchor clicks.
 
 
 
 
9
  INJECTED_JS = """
10
  <script>
11
- // Intercept history.pushState so dynamic URL changes are routed through the proxy.
12
  (function() {
 
13
  const originalPushState = history.pushState;
14
  history.pushState = function(state, title, url) {
15
  if (url) {
@@ -19,7 +23,7 @@ INJECTED_JS = """
19
  return originalPushState.call(history, state, title, url);
20
  };
21
 
22
- // Intercept fetch() requests.
23
  const originalFetch = window.fetch;
24
  window.fetch = function(input, init) {
25
  let url;
@@ -46,11 +50,10 @@ INJECTED_JS = """
46
  return originalOpen.apply(this, [method, proxiedUrl, true]);
47
  };
48
 
49
- // Intercept anchor clicks to keep navigation within the proxy.
50
  document.addEventListener('click', function(event) {
51
  const target = event.target.closest('a');
52
  if (target && target.href) {
53
- // Skip if already proxied or if special attributes exist.
54
  if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
55
  return;
56
  }
@@ -58,32 +61,88 @@ INJECTED_JS = """
58
  window.location.href = '/?url=' + encodeURIComponent(target.href);
59
  }
60
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  })();
62
  </script>
63
  """
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  async def fetch_and_rewrite(target_url: str) -> Response:
 
 
 
 
 
 
 
66
  async with httpx.AsyncClient() as client:
67
  resp = await client.get(target_url)
68
- content_type = resp.headers.get("Content-Type", "")
69
 
70
- # For non-HTML resources (CSS, JS, images, etc.), return the content directly.
71
  if "text/html" not in content_type:
 
72
  return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
73
-
74
- # Parse the HTML content.
75
  soup = BeautifulSoup(resp.text, "html.parser")
76
-
77
- # Remove any Content Security Policy meta tags that might block our injected scripts.
78
  for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
79
  meta.decompose()
80
-
81
- # --- Insert a <base> Tag ---
82
- # This ensures that relative URLs in the HTML resolve against the target domain.
83
  parsed_target = urllib.parse.urlparse(target_url)
84
  base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
85
  if soup.head:
86
- # Remove any existing <base> tags.
87
  for base in soup.head.find_all("base"):
88
  base.decompose()
89
  base_tag = soup.new_tag("base", href=base_href)
@@ -93,38 +152,52 @@ async def fetch_and_rewrite(target_url: str) -> Response:
93
  base_tag = soup.new_tag("base", href=base_href)
94
  head_tag.insert(0, base_tag)
95
  soup.insert(0, head_tag)
96
-
97
- # --- Inject JavaScript for Dynamic Routing ---
98
- # This script intercepts dynamic navigation and network calls.
99
  if soup.body:
100
  soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
101
  else:
102
  soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
103
-
104
- # --- Rewrite Resource URLs ---
105
- # Rewrite URLs in various tags so that they are loaded through the proxy.
106
- tags_attrs = {
107
- "a": "href",
108
- "img": "src",
109
- "script": "src",
110
- "link": "href",
111
- "form": "action"
112
- }
113
- for tag, attr in tags_attrs.items():
114
- for element in soup.find_all(tag):
115
- if element.has_attr(attr):
116
- orig = element[attr]
117
- # Skip if already proxied or if it’s a javascript/mailto link.
118
- if orig.startswith("/?url=") or orig.startswith("javascript:") or orig.startswith("mailto:"):
119
- continue
120
- new_url = urllib.parse.urljoin(target_url, orig)
121
- element[attr] = "/?url=" + urllib.parse.quote(new_url)
122
-
123
  return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
124
 
125
- # Catch-all route that uses a query parameter or cookie to rebuild target URLs.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  @app.get("/{full_path:path}")
127
  async def catch_all(full_path: str, request: Request):
 
 
 
 
 
128
  query_params = dict(request.query_params)
129
  if "url" in query_params:
130
  target_url = query_params["url"]
@@ -138,8 +211,17 @@ async def catch_all(full_path: str, request: Request):
138
  target_url += "?" + qs
139
 
140
  response = await fetch_and_rewrite(target_url)
141
- # Store the target’s base URL in a cookie for subsequent requests.
 
142
  parsed_target = urllib.parse.urlparse(target_url)
143
  base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
144
  response.set_cookie("target_base", base_url)
 
145
  return response
 
 
 
 
 
 
 
 
5
 
6
  app = FastAPI()
7
 
8
+ # ---------------------------------------------------------------------------
9
+ # Injected JavaScript for Dynamic Interception & Real-Time Updates
10
+ # ---------------------------------------------------------------------------
11
+ # This script intercepts client-side navigation (history, fetch, XHR, anchor clicks)
12
+ # and uses MutationObserver to reprocess dynamically added DOM nodes.
13
  INJECTED_JS = """
14
  <script>
 
15
  (function() {
16
+ // Intercept history.pushState to route dynamic navigations.
17
  const originalPushState = history.pushState;
18
  history.pushState = function(state, title, url) {
19
  if (url) {
 
23
  return originalPushState.call(history, state, title, url);
24
  };
25
 
26
+ // Intercept fetch() calls.
27
  const originalFetch = window.fetch;
28
  window.fetch = function(input, init) {
29
  let url;
 
50
  return originalOpen.apply(this, [method, proxiedUrl, true]);
51
  };
52
 
53
+ // Intercept anchor clicks to ensure navigation goes through the proxy.
54
  document.addEventListener('click', function(event) {
55
  const target = event.target.closest('a');
56
  if (target && target.href) {
 
57
  if (target.getAttribute('data-no-proxy') || target.href.indexOf('/?url=') === 0) {
58
  return;
59
  }
 
61
  window.location.href = '/?url=' + encodeURIComponent(target.href);
62
  }
63
  });
64
+
65
+ // Use MutationObserver to catch and rewrite dynamically added elements.
66
+ const observer = new MutationObserver(function(mutations) {
67
+ mutations.forEach(function(mutation) {
68
+ mutation.addedNodes.forEach(function(node) {
69
+ if (node.nodeType === Node.ELEMENT_NODE) {
70
+ reProxyElement(node);
71
+ }
72
+ });
73
+ });
74
+ });
75
+ observer.observe(document.body, { childList: true, subtree: true });
76
+
77
+ // Rewrites URL attributes for an element and its children.
78
+ function reProxyElement(element) {
79
+ const urlAttrs = ['href', 'src', 'action', 'srcset'];
80
+ urlAttrs.forEach(function(attr) {
81
+ if (element.hasAttribute(attr)) {
82
+ const value = element.getAttribute(attr);
83
+ if (value && !value.startsWith('/?url=') &&
84
+ !value.startsWith('javascript:') && !value.startsWith('mailto:')) {
85
+ element.setAttribute(attr, '/?url=' + encodeURIComponent(value));
86
+ }
87
+ }
88
+ });
89
+ Array.from(element.children).forEach(child => reProxyElement(child));
90
+ }
91
  })();
92
  </script>
93
  """
94
 
95
+ # ---------------------------------------------------------------------------
96
+ # Helper Function: Rewrite URLs for All Relevant Attributes
97
+ # ---------------------------------------------------------------------------
98
+ def rewrite_urls(soup, target_url, proxy_prefix="/?url="):
99
+ """
100
+ Iterates over all elements in the parsed HTML and rewrites URL-like attributes.
101
+ Supports attributes such as: href, src, action, srcset.
102
+ """
103
+ url_attrs = ['href', 'src', 'action', 'srcset']
104
+ for element in soup.find_all(True):
105
+ for attr in list(element.attrs):
106
+ if attr in url_attrs:
107
+ orig_value = element.get(attr)
108
+ # Skip already proxied or special schemes.
109
+ if not orig_value or orig_value.startswith(proxy_prefix) or orig_value.startswith(("mailto:", "javascript:")):
110
+ continue
111
+ # Resolve relative URLs.
112
+ new_url = urllib.parse.urljoin(target_url, orig_value)
113
+ # Rewrite URL to be loaded via the proxy.
114
+ element[attr] = proxy_prefix + urllib.parse.quote(new_url)
115
+ return soup
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Core Function: Fetch and Rewrite the Target HTML
119
+ # ---------------------------------------------------------------------------
120
  async def fetch_and_rewrite(target_url: str) -> Response:
121
+ """
122
+ Asynchronously fetches the target URL, then parses and rewrites its HTML:
123
+ - Removes conflicting Content Security Policy meta tags.
124
+ - Inserts a <base> tag to resolve relative URLs.
125
+ - Injects JavaScript to intercept dynamic navigation and update DOM in real time.
126
+ - Rewrites URL attributes for a comprehensive set of tags.
127
+ """
128
  async with httpx.AsyncClient() as client:
129
  resp = await client.get(target_url)
 
130
 
131
+ content_type = resp.headers.get("Content-Type", "")
132
  if "text/html" not in content_type:
133
+ # For non-HTML content, return the response directly.
134
  return Response(content=resp.content, media_type=content_type, status_code=resp.status_code)
135
+
 
136
  soup = BeautifulSoup(resp.text, "html.parser")
137
+
138
+ # Remove Content Security Policy meta tags that might block our injected scripts.
139
  for meta in soup.find_all("meta", attrs={"http-equiv": "Content-Security-Policy"}):
140
  meta.decompose()
141
+
142
+ # Insert (or replace) a <base> tag for proper resolution of relative URLs.
 
143
  parsed_target = urllib.parse.urlparse(target_url)
144
  base_href = f"{parsed_target.scheme}://{parsed_target.netloc}"
145
  if soup.head:
 
146
  for base in soup.head.find_all("base"):
147
  base.decompose()
148
  base_tag = soup.new_tag("base", href=base_href)
 
152
  base_tag = soup.new_tag("base", href=base_href)
153
  head_tag.insert(0, base_tag)
154
  soup.insert(0, head_tag)
155
+
156
+ # Inject our dynamic JavaScript into the beginning of the <body>.
 
157
  if soup.body:
158
  soup.body.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
159
  else:
160
  soup.insert(0, BeautifulSoup(INJECTED_JS, "html.parser"))
161
+
162
+ # Rewrite URLs for all relevant elements.
163
+ soup = rewrite_urls(soup, target_url)
164
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  return Response(content=str(soup), media_type="text/html", status_code=resp.status_code)
166
 
167
+ # ---------------------------------------------------------------------------
168
+ # Simple Proxy Endpoint: /proxy_full
169
+ # ---------------------------------------------------------------------------
170
+ @app.get("/proxy_full")
171
+ async def proxy_full(url: str):
172
+ """
173
+ A simple proxy endpoint that fetches the given URL and rewrites
174
+ its HTML so that all resource URLs route through the proxy.
175
+ """
176
+ if not url:
177
+ raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
178
+
179
+ async with httpx.AsyncClient() as client:
180
+ resp = await client.get(url)
181
+
182
+ content_type = resp.headers.get("Content-Type", "")
183
+ if "text/html" not in content_type:
184
+ return Response(resp.content, media_type=content_type, status_code=resp.status_code)
185
+
186
+ soup = BeautifulSoup(resp.text, "html.parser")
187
+ soup = rewrite_urls(soup, url)
188
+
189
+ return Response(str(soup), media_type="text/html")
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Catch-All Dynamic Proxy Endpoint
193
+ # ---------------------------------------------------------------------------
194
  @app.get("/{full_path:path}")
195
  async def catch_all(full_path: str, request: Request):
196
+ """
197
+ Catch-all endpoint for dynamic proxying.
198
+ Determines the target URL via a query parameter or a stored cookie.
199
+ Processes the HTML to inject dynamic JavaScript and rewrite URLs.
200
+ """
201
  query_params = dict(request.query_params)
202
  if "url" in query_params:
203
  target_url = query_params["url"]
 
211
  target_url += "?" + qs
212
 
213
  response = await fetch_and_rewrite(target_url)
214
+
215
+ # Save the target's base URL in a cookie for subsequent requests.
216
  parsed_target = urllib.parse.urlparse(target_url)
217
  base_url = f"{parsed_target.scheme}://{parsed_target.netloc}"
218
  response.set_cookie("target_base", base_url)
219
+
220
  return response
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # Run the Application on Port 7860
224
+ # ---------------------------------------------------------------------------
225
+ if __name__ == "__main__":
226
+ import uvicorn
227
+ uvicorn.run(app, host="0.0.0.0", port=7860)