proxyick / main.py
triflix's picture
Create main.py
b616e61 verified
from fastapi import FastAPI, Request, HTTPException, Response
import httpx
from bs4 import BeautifulSoup
import urllib.parse
app = FastAPI()
@app.get("/proxy_full")
async def proxy_full(url: str):
# Validate the URL parameter
if not url:
raise HTTPException(status_code=400, detail="Missing 'url' query parameter")
async with httpx.AsyncClient() as client:
# Get the main page HTML
resp = await client.get(url)
# Check if the response is HTML
content_type = resp.headers.get("Content-Type", "")
if "text/html" not in content_type:
# If not HTML, return the content directly
return Response(resp.content, media_type=content_type, status_code=resp.status_code)
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(resp.text, 'html.parser')
# List of tags and the corresponding attribute to rewrite
tags_attrs = {
"a": "href",
"img": "src",
"script": "src",
"link": "href",
}
# Rewrite URLs to route through this proxy
for tag, attr in tags_attrs.items():
for element in soup.find_all(tag):
if element.has_attr(attr):
original = element[attr]
# Build an absolute URL using the original URL as a base
new_url = urllib.parse.urljoin(url, original)
# Encode the new URL so it can be passed as a parameter
proxied_url = f"/proxy_full?url={urllib.parse.quote(new_url)}"
element[attr] = proxied_url
# Optionally, you might want to adjust other parts of the page (like form actions)
return Response(str(soup), media_type="text/html")