File size: 1,688 Bytes
b616e61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from fastapi import FastAPI, Request, HTTPException, Response
import httpx
from bs4 import BeautifulSoup
import urllib.parse

app = FastAPI()

@app.get("/proxy_full")
async def proxy_full(url: str):
    # Validate the URL parameter
    if not url:
        raise HTTPException(status_code=400, detail="Missing 'url' query parameter")

    async with httpx.AsyncClient() as client:
        # Get the main page HTML
        resp = await client.get(url)
    
    # Check if the response is HTML
    content_type = resp.headers.get("Content-Type", "")
    if "text/html" not in content_type:
        # If not HTML, return the content directly
        return Response(resp.content, media_type=content_type, status_code=resp.status_code)

    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(resp.text, 'html.parser')
    # List of tags and the corresponding attribute to rewrite
    tags_attrs = {
        "a": "href",
        "img": "src",
        "script": "src",
        "link": "href",
    }
    # Rewrite URLs to route through this proxy
    for tag, attr in tags_attrs.items():
        for element in soup.find_all(tag):
            if element.has_attr(attr):
                original = element[attr]
                # Build an absolute URL using the original URL as a base
                new_url = urllib.parse.urljoin(url, original)
                # Encode the new URL so it can be passed as a parameter
                proxied_url = f"/proxy_full?url={urllib.parse.quote(new_url)}"
                element[attr] = proxied_url

    # Optionally, you might want to adjust other parts of the page (like form actions)

    return Response(str(soup), media_type="text/html")