from fastapi import FastAPI, Request, HTTPException, Response import httpx from bs4 import BeautifulSoup import urllib.parse app = FastAPI() @app.get("/proxy_full") async def proxy_full(url: str): # Validate the URL parameter if not url: raise HTTPException(status_code=400, detail="Missing 'url' query parameter") async with httpx.AsyncClient() as client: # Get the main page HTML resp = await client.get(url) # Check if the response is HTML content_type = resp.headers.get("Content-Type", "") if "text/html" not in content_type: # If not HTML, return the content directly return Response(resp.content, media_type=content_type, status_code=resp.status_code) # Parse the HTML using BeautifulSoup soup = BeautifulSoup(resp.text, 'html.parser') # List of tags and the corresponding attribute to rewrite tags_attrs = { "a": "href", "img": "src", "script": "src", "link": "href", } # Rewrite URLs to route through this proxy for tag, attr in tags_attrs.items(): for element in soup.find_all(tag): if element.has_attr(attr): original = element[attr] # Build an absolute URL using the original URL as a base new_url = urllib.parse.urljoin(url, original) # Encode the new URL so it can be passed as a parameter proxied_url = f"/proxy_full?url={urllib.parse.quote(new_url)}" element[attr] = proxied_url # Optionally, you might want to adjust other parts of the page (like form actions) return Response(str(soup), media_type="text/html")