Spaces:
Paused
Paused
| from flask import Flask, request, Response | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import os | |
| app = Flask(__name__) | |
| def create_proxy_url(original_url, base_proxy_url): | |
| """Creates a full proxy URL for a given original URL.""" | |
| # Ensure the URL is absolute | |
| if original_url.startswith('//'): | |
| original_url = 'https://' + original_url[2:] | |
| return f"{base_proxy_url}?url={requests.utils.quote(original_url)}" | |
| def proxy(): | |
| target_url = request.args.get('url') | |
| if not target_url: | |
| # Simple landing page for the proxy itself | |
| return """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Proxy Service</title> | |
| <style> | |
| body { font-family: sans-serif; background-color: #121212; color: #e0e0e0; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; } | |
| .container { text-align: center; padding: 2rem; background-color: #1e1e1e; border-radius: 8px; } | |
| code { background-color: #2d2d2d; padding: 0.2rem 0.4rem; border-radius: 4px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>Proxy Service is Active</h1> | |
| <p>Use this service by appending <code>?url=<website_url></code> to the URL.</p> | |
| </div> | |
| </body> | |
| </html> | |
| """, 200 | |
| try: | |
| # Use a session to handle cookies properly | |
| session = requests.Session() | |
| session.headers.update({ | |
| 'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36') | |
| }) | |
| # Make the request to the target URL | |
| resp = session.get(target_url, stream=True) | |
| # Get the content type to check if it's HTML | |
| content_type = resp.headers.get('Content-Type', '').lower() | |
| # These headers should be removed to allow embedding | |
| excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding'] | |
| headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] | |
| if 'text/html' in content_type: | |
| # Get the base URL of the Hugging Face Space for rewriting links | |
| # The SPACE_HOST variable is automatically provided by Hugging Face | |
| proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/" | |
| soup = BeautifulSoup(resp.content, 'html.parser') | |
| base_tag = soup.new_tag('base', href=target_url) | |
| soup.head.insert(0, base_tag) | |
| # Rewrite all links and resource URLs to go through the proxy | |
| for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]: | |
| for t in soup.find_all(tag, **{attr: True}): | |
| original_url = t[attr] | |
| # Make relative URLs absolute before proxying | |
| absolute_url = requests.compat.urljoin(target_url, original_url) | |
| t[attr] = create_proxy_url(absolute_url, proxy_base_url) | |
| content = str(soup) | |
| return Response(content, resp.status_code, headers) | |
| else: | |
| # For non-HTML content (images, CSS, JS), stream it directly | |
| return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers) | |
| except requests.exceptions.RequestException as e: | |
| return f"Error fetching URL: {e}", 500 | |
| if __name__ == '__main__': | |
| app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860))) |