from flask import Flask, request, Response import requests from bs4 import BeautifulSoup import os app = Flask(__name__) def create_proxy_url(original_url, base_proxy_url): """Creates a full proxy URL for a given original URL.""" # Ensure the URL is absolute if original_url.startswith('//'): original_url = 'https://' + original_url[2:] return f"{base_proxy_url}?url={requests.utils.quote(original_url)}" @app.route('/') def proxy(): target_url = request.args.get('url') if not target_url: # Simple landing page for the proxy itself return """ Proxy Service

Proxy Service is Active

Use this service by appending ?url= to the URL.

""", 200 try: # Use a session to handle cookies properly session = requests.Session() session.headers.update({ 'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36') }) # Make the request to the target URL resp = session.get(target_url, stream=True) # Get the content type to check if it's HTML content_type = resp.headers.get('Content-Type', '').lower() # These headers should be removed to allow embedding excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding'] headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] if 'text/html' in content_type: # Get the base URL of the Hugging Face Space for rewriting links # The SPACE_HOST variable is automatically provided by Hugging Face proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/" soup = BeautifulSoup(resp.content, 'html.parser') base_tag = soup.new_tag('base', href=target_url) soup.head.insert(0, base_tag) # Rewrite all links and resource URLs to go through the proxy for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]: for t in soup.find_all(tag, **{attr: True}): original_url = t[attr] # Make relative URLs absolute before proxying absolute_url = requests.compat.urljoin(target_url, original_url) t[attr] = create_proxy_url(absolute_url, proxy_base_url) content = str(soup) return Response(content, resp.status_code, headers) else: # For non-HTML content (images, CSS, JS), stream it directly return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers) except requests.exceptions.RequestException as e: return f"Error fetching URL: {e}", 500 if __name__ == '__main__': app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))