Spaces:
Paused
Paused
File size: 3,773 Bytes
a321cf9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
from flask import Flask, request, Response
import requests
from bs4 import BeautifulSoup
import os
app = Flask(__name__)
def create_proxy_url(original_url, base_proxy_url):
"""Creates a full proxy URL for a given original URL."""
# Ensure the URL is absolute
if original_url.startswith('//'):
original_url = 'https://' + original_url[2:]
return f"{base_proxy_url}?url={requests.utils.quote(original_url)}"
@app.route('/')
def proxy():
target_url = request.args.get('url')
if not target_url:
# Simple landing page for the proxy itself
return """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Proxy Service</title>
<style>
body { font-family: sans-serif; background-color: #121212; color: #e0e0e0; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; }
.container { text-align: center; padding: 2rem; background-color: #1e1e1e; border-radius: 8px; }
code { background-color: #2d2d2d; padding: 0.2rem 0.4rem; border-radius: 4px; }
</style>
</head>
<body>
<div class="container">
<h1>Proxy Service is Active</h1>
<p>Use this service by appending <code>?url=<website_url></code> to the URL.</p>
</div>
</body>
</html>
""", 200
try:
# Use a session to handle cookies properly
session = requests.Session()
session.headers.update({
'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
})
# Make the request to the target URL
resp = session.get(target_url, stream=True)
# Get the content type to check if it's HTML
content_type = resp.headers.get('Content-Type', '').lower()
# These headers should be removed to allow embedding
excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding']
headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
if 'text/html' in content_type:
# Get the base URL of the Hugging Face Space for rewriting links
# The SPACE_HOST variable is automatically provided by Hugging Face
proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/"
soup = BeautifulSoup(resp.content, 'html.parser')
base_tag = soup.new_tag('base', href=target_url)
soup.head.insert(0, base_tag)
# Rewrite all links and resource URLs to go through the proxy
for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]:
for t in soup.find_all(tag, **{attr: True}):
original_url = t[attr]
# Make relative URLs absolute before proxying
absolute_url = requests.compat.urljoin(target_url, original_url)
t[attr] = create_proxy_url(absolute_url, proxy_base_url)
content = str(soup)
return Response(content, resp.status_code, headers)
else:
# For non-HTML content (images, CSS, JS), stream it directly
return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers)
except requests.exceptions.RequestException as e:
return f"Error fetching URL: {e}", 500
if __name__ == '__main__':
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860))) |