File size: 3,773 Bytes
a321cf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from flask import Flask, request, Response
import requests
from bs4 import BeautifulSoup
import os

app = Flask(__name__)

def create_proxy_url(original_url, base_proxy_url):
    """Creates a full proxy URL for a given original URL."""
    # Ensure the URL is absolute
    if original_url.startswith('//'):
        original_url = 'https://' + original_url[2:]
    return f"{base_proxy_url}?url={requests.utils.quote(original_url)}"

@app.route('/')
def proxy():
    target_url = request.args.get('url')
    if not target_url:
        # Simple landing page for the proxy itself
        return """
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Proxy Service</title>
            <style>
                body { font-family: sans-serif; background-color: #121212; color: #e0e0e0; display: flex; justify-content: center; align-items: center; height: 100vh; margin: 0; }
                .container { text-align: center; padding: 2rem; background-color: #1e1e1e; border-radius: 8px; }
                code { background-color: #2d2d2d; padding: 0.2rem 0.4rem; border-radius: 4px; }
            </style>
        </head>
        <body>
            <div class="container">
                <h1>Proxy Service is Active</h1>
                <p>Use this service by appending <code>?url=<website_url></code> to the URL.</p>
            </div>
        </body>
        </html>
        """, 200

    try:
        # Use a session to handle cookies properly
        session = requests.Session()
        session.headers.update({
            'User-Agent': request.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
        })

        # Make the request to the target URL
        resp = session.get(target_url, stream=True)

        # Get the content type to check if it's HTML
        content_type = resp.headers.get('Content-Type', '').lower()

        # These headers should be removed to allow embedding
        excluded_headers = ['content-security-policy', 'x-frame-options', 'content-encoding']
        headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]

        if 'text/html' in content_type:
            # Get the base URL of the Hugging Face Space for rewriting links
            # The SPACE_HOST variable is automatically provided by Hugging Face
            proxy_base_url = f"https://{os.environ.get('SPACE_HOST')}/"
            
            soup = BeautifulSoup(resp.content, 'html.parser')
            base_tag = soup.new_tag('base', href=target_url)
            soup.head.insert(0, base_tag)

            # Rewrite all links and resource URLs to go through the proxy
            for tag, attr in [('a', 'href'), ('link', 'href'), ('script', 'src'), ('img', 'src')]:
                for t in soup.find_all(tag, **{attr: True}):
                    original_url = t[attr]
                    # Make relative URLs absolute before proxying
                    absolute_url = requests.compat.urljoin(target_url, original_url)
                    t[attr] = create_proxy_url(absolute_url, proxy_base_url)

            content = str(soup)
            return Response(content, resp.status_code, headers)
        else:
            # For non-HTML content (images, CSS, JS), stream it directly
            return Response(resp.iter_content(chunk_size=1024), resp.status_code, headers)

    except requests.exceptions.RequestException as e:
        return f"Error fetching URL: {e}", 500

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))