|
|
from fastapi import FastAPI, Request, HTTPException, Response |
|
|
import httpx |
|
|
from bs4 import BeautifulSoup |
|
|
import urllib.parse |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.get("/proxy_full") |
|
|
async def proxy_full(url: str): |
|
|
|
|
|
if not url: |
|
|
raise HTTPException(status_code=400, detail="Missing 'url' query parameter") |
|
|
|
|
|
async with httpx.AsyncClient() as client: |
|
|
|
|
|
resp = await client.get(url) |
|
|
|
|
|
|
|
|
content_type = resp.headers.get("Content-Type", "") |
|
|
if "text/html" not in content_type: |
|
|
|
|
|
return Response(resp.content, media_type=content_type, status_code=resp.status_code) |
|
|
|
|
|
|
|
|
soup = BeautifulSoup(resp.text, 'html.parser') |
|
|
|
|
|
tags_attrs = { |
|
|
"a": "href", |
|
|
"img": "src", |
|
|
"script": "src", |
|
|
"link": "href", |
|
|
} |
|
|
|
|
|
for tag, attr in tags_attrs.items(): |
|
|
for element in soup.find_all(tag): |
|
|
if element.has_attr(attr): |
|
|
original = element[attr] |
|
|
|
|
|
new_url = urllib.parse.urljoin(url, original) |
|
|
|
|
|
proxied_url = f"/proxy_full?url={urllib.parse.quote(new_url)}" |
|
|
element[attr] = proxied_url |
|
|
|
|
|
|
|
|
|
|
|
return Response(str(soup), media_type="text/html") |
|
|
|