triflix commited on
Commit
db32f30
·
verified ·
1 Parent(s): b616e61

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +46 -0
Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException, Response
2
+ import httpx
3
+ from bs4 import BeautifulSoup
4
+ import urllib.parse
5
+
6
+ app = FastAPI()
7
+
8
+ @app.get("/proxy_full")
9
+ async def proxy_full(url: str):
10
+ # Validate the URL parameter
11
+ if not url:
12
+ raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
13
+
14
+ async with httpx.AsyncClient() as client:
15
+ # Get the main page HTML
16
+ resp = await client.get(url)
17
+
18
+ # Check if the response is HTML
19
+ content_type = resp.headers.get("Content-Type", "")
20
+ if "text/html" not in content_type:
21
+ # If not HTML, return the content directly
22
+ return Response(resp.content, media_type=content_type, status_code=resp.status_code)
23
+
24
+ # Parse the HTML using BeautifulSoup
25
+ soup = BeautifulSoup(resp.text, 'html.parser')
26
+ # List of tags and the corresponding attribute to rewrite
27
+ tags_attrs = {
28
+ "a": "href",
29
+ "img": "src",
30
+ "script": "src",
31
+ "link": "href",
32
+ }
33
+ # Rewrite URLs to route through this proxy
34
+ for tag, attr in tags_attrs.items():
35
+ for element in soup.find_all(tag):
36
+ if element.has_attr(attr):
37
+ original = element[attr]
38
+ # Build an absolute URL using the original URL as a base
39
+ new_url = urllib.parse.urljoin(url, original)
40
+ # Encode the new URL so it can be passed as a parameter
41
+ proxied_url = f"/proxy_full?url={urllib.parse.quote(new_url)}"
42
+ element[attr] = proxied_url
43
+
44
+ # Optionally, you might want to adjust other parts of the page (like form actions)
45
+
46
+ return Response(str(soup), media_type="text/html")