dpv007 commited on
Commit
e33bfc3
·
verified ·
1 Parent(s): e221fe9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +159 -99
main.py CHANGED
@@ -1,120 +1,180 @@
 
 
1
  import httpx
2
- from urllib.parse import urlparse, quote_plus
3
-
4
- from fastapi import FastAPI, Request, Query, Form
5
- from fastapi.responses import HTMLResponse, Response, RedirectResponse
6
- from fastapi.templating import Jinja2Templates
7
 
8
  app = FastAPI()
9
 
10
- templates = Jinja2Templates(directory="templates")
11
-
12
 
13
- def sanitize_url(url: str) -> str:
14
- url = url.strip()
15
- if not url:
16
- return ""
17
- # If it's just a word, treat it as search later
18
- if "://" not in url:
19
- return url
20
- p = urlparse(url)
21
- if p.scheme not in ("http", "https"):
22
- return ""
23
- if not p.netloc:
24
- return ""
25
- return p.geturl()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  @app.get("/", response_class=HTMLResponse)
29
- async def home(request: Request):
30
- return templates.TemplateResponse("index.html", {"request": request})
31
 
32
 
33
- @app.post("/go", response_class=HTMLResponse)
34
- async def go(request: Request, q: str = Form(...)):
35
  """
36
- Handle form submission: treat q as either URL or search query.
37
  """
38
- cleaned = sanitize_url(q)
39
- if cleaned:
40
- # Looks like a URL
41
- return RedirectResponse(url=f"/proxy?url={quote_plus(cleaned)}", status_code=302)
42
- else:
43
- # Treat as search query
44
- return RedirectResponse(url=f"/search?q={quote_plus(q)}", status_code=302)
 
 
 
 
45
 
46
 
47
- @app.get("/search", response_class=HTMLResponse)
48
- async def search(request: Request, q: str = Query(...)):
49
  """
50
- Very simple meta-search using DuckDuckGo HTML.
51
- Note: This is a hacky example, not an official API.
52
  """
53
- if not q.strip():
54
- return RedirectResponse(url="/", status_code=302)
55
-
56
- # DuckDuckGo HTML interface
57
- target = f"https://duckduckgo.com/html/?q={quote_plus(q)}"
58
-
59
- async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
60
- r = await client.get(target, headers={"User-Agent": "Mozilla/5.0"})
61
-
62
- # Rewrite links in the HTML so clicks go via /proxy
63
- html = r.text
64
- # Very naive replacement; proper rewrite would need HTML parsing
65
- html = html.replace('href="/', 'href="https://duckduckgo.com/')
66
- html = html.replace('href="http', 'href="/proxy?url=http')
67
-
68
- wrapper_html = f"""
69
- <html>
70
- <head>
71
- <title>Proxy Search - {q}</title>
72
- <style>
73
- body {{ font-family: sans-serif; margin: 1rem; }}
74
- form {{ margin-bottom: 1rem; }}
75
- input[type=text] {{ width: 70%; padding: 0.5rem; }}
76
- button {{ padding: 0.5rem 1rem; }}
77
- iframe {{ width: 100%; height: 80vh; border: 1px solid #ccc; }}
78
- </style>
79
- </head>
80
- <body>
81
- <form action="/go" method="post">
82
- <input type="text" name="q" value="{q}" placeholder="Search or enter URL" />
83
- <button type="submit">Go</button>
84
- </form>
85
- <hr/>
86
- {html}
87
- </body>
88
- </html>
89
- """
90
- return HTMLResponse(content=wrapper_html)
91
 
92
 
93
  @app.get("/proxy")
94
- async def proxy(url: str = Query(...)):
95
  """
96
- Simple GET proxy to fetch pages.
97
  """
98
- target = sanitize_url(url)
99
- if not target:
100
- return RedirectResponse(url="/", status_code=302)
101
-
102
- async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
103
- try:
104
- r = await client.get(target, headers={"User-Agent": "Mozilla/5.0"})
105
- except httpx.RequestError as e:
106
- return HTMLResponse(
107
- f"<h1>Upstream error</h1><pre>{e}</pre>", status_code=502
108
- )
109
-
110
- content_type = r.headers.get("content-type", "text/html")
111
- # Very simple: just forward content. No rewriting of embedded links/CSS/JS.
112
- return Response(content=r.content, status_code=r.status_code, media_type=content_type)
113
-
114
-
115
- if __name__ == "__main__":
116
- import os
117
- import uvicorn
118
-
119
- port = int(os.getenv("PORT", "7860"))
120
- uvicorn.run("main:app", host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Response
2
+ from fastapi.responses import HTMLResponse
3
  import httpx
4
+ from bs4 import BeautifulSoup
5
+ from urllib.parse import urljoin, quote
 
 
 
6
 
7
  app = FastAPI()
8
 
 
 
9
 
10
+ HTML_INDEX = """
11
+ <!doctype html>
12
+ <html>
13
+ <head>
14
+ <meta charset="utf-8" />
15
+ <title>HF Proxy Browser</title>
16
+ <style>
17
+ body { font-family: sans-serif; margin: 0; padding: 0; }
18
+ #bar {
19
+ padding: 10px;
20
+ background: #111827;
21
+ color: #e5e7eb;
22
+ display: flex;
23
+ gap: 8px;
24
+ align-items: center;
25
+ }
26
+ input[type="text"] {
27
+ flex: 1;
28
+ padding: 6px 8px;
29
+ border-radius: 4px;
30
+ border: 1px solid #4b5563;
31
+ background: #111827;
32
+ color: #e5e7eb;
33
+ }
34
+ button {
35
+ padding: 6px 12px;
36
+ border-radius: 4px;
37
+ border: none;
38
+ cursor: pointer;
39
+ }
40
+ #go {
41
+ background: #3b82f6;
42
+ color: white;
43
+ }
44
+ #frame {
45
+ width: 100%;
46
+ height: calc(100vh - 48px);
47
+ border: none;
48
+ }
49
+ </style>
50
+ </head>
51
+ <body>
52
+ <div id="bar">
53
+ <span>Proxy URL:</span>
54
+ <input id="url" type="text" placeholder="https://example.com" />
55
+ <button id="go">Go</button>
56
+ </div>
57
+ <iframe id="frame"></iframe>
58
+ <script>
59
+ const input = document.getElementById('url');
60
+ const frame = document.getElementById('frame');
61
+ const btn = document.getElementById('go');
62
+
63
+ function load() {
64
+ let url = input.value.trim();
65
+ if (!url) return;
66
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
67
+ url = 'https://' + url;
68
+ }
69
+ frame.src = '/proxy?url=' + encodeURIComponent(url);
70
+ }
71
+
72
+ btn.addEventListener('click', load);
73
+ input.addEventListener('keydown', e => {
74
+ if (e.key === 'Enter') {
75
+ e.preventDefault();
76
+ load();
77
+ }
78
+ });
79
+ </script>
80
+ </body>
81
+ </html>
82
+ """
83
 
84
 
85
  @app.get("/", response_class=HTMLResponse)
86
+ async def index():
87
+ return HTML_INDEX
88
 
89
 
90
+ async def fetch_url(url: str) -> httpx.Response:
 
91
  """
92
+ Fetch target URL via httpx.
93
  """
94
+ async with httpx.AsyncClient(follow_redirects=True, timeout=15) as client:
95
+ # Basic headers to mimic a browser
96
+ headers = {
97
+ "User-Agent": (
98
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
99
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
100
+ "Chrome/120.0 Safari/537.36"
101
+ )
102
+ }
103
+ resp = await client.get(url, headers=headers)
104
+ return resp
105
 
106
 
107
+ def rewrite_html(html: str, base_url: str) -> str:
 
108
  """
109
+ Rewrite links in HTML so sub-resources go through /proxy as well.
 
110
  """
111
+ soup = BeautifulSoup(html, "html.parser")
112
+
113
+ def proxify(attr: str, tag):
114
+ if attr not in tag.attrs:
115
+ return
116
+ original = tag.attrs.get(attr)
117
+ if not original:
118
+ return
119
+ # Handle things like //cdn.example.com, /path, relative paths, etc.
120
+ absolute = urljoin(base_url, original)
121
+ tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
122
+
123
+ # rewrite common URL-carrying tags
124
+ for tag in soup.find_all(["a", "img", "script", "link", "form", "iframe"]):
125
+ if tag.name in ("a", "link"):
126
+ proxify("href", tag)
127
+ if tag.name in ("img", "script", "iframe"):
128
+ proxify("src", tag)
129
+ if tag.name == "form":
130
+ proxify("action", tag)
131
+
132
+ # Optionally, inject a small banner to indicate proxied content
133
+ banner = soup.new_tag("div")
134
+ banner.string = f"Proxied via HF Space — {base_url}"
135
+ banner["style"] = (
136
+ "position:fixed;bottom:0;left:0;right:0;"
137
+ "background:#111827;color:#e5e7eb;"
138
+ "font-size:12px;padding:4px 8px;z-index:9999;"
139
+ )
140
+ if soup.body:
141
+ soup.body.append(banner)
142
+
143
+ return str(soup)
 
 
 
 
 
144
 
145
 
146
  @app.get("/proxy")
147
+ async def proxy(url: str, request: Request):
148
  """
149
+ Reverse-proxy endpoint: /proxy?url=https://example.com
150
  """
151
+ try:
152
+ upstream = await fetch_url(url)
153
+ except Exception as e:
154
+ return HTMLResponse(
155
+ f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
156
+ status_code=502,
157
+ )
158
+
159
+ content_type = upstream.headers.get("content-type", "")
160
+
161
+ # HTML: rewrite links so that all further requests go via /proxy
162
+ if "text/html" in content_type:
163
+ rewritten = rewrite_html(upstream.text, base_url=url)
164
+ return HTMLResponse(content=rewritten, status_code=upstream.status_code)
165
+
166
+ # For non-HTML (images, JS, CSS, fonts...), just pass through
167
+ # while stripping hop-by-hop headers.
168
+ safe_headers = {}
169
+ for k, v in upstream.headers.items():
170
+ lk = k.lower()
171
+ if lk in ("content-encoding", "transfer-encoding", "connection"):
172
+ continue
173
+ safe_headers[k] = v
174
+
175
+ return Response(
176
+ content=upstream.content,
177
+ status_code=upstream.status_code,
178
+ headers=safe_headers,
179
+ media_type=content_type or None,
180
+ )