Spaces:

dpv007
/

proxy

Sleeping

App Files Files Community

dpv007 commited on Nov 26, 2025

Commit

e33bfc3

verified ·

1 Parent(s): e221fe9

Update main.py

Browse files

Files changed (1) hide show

main.py +159 -99

main.py CHANGED Viewed

@@ -1,120 +1,180 @@
 import httpx
-from urllib.parse import urlparse, quote_plus
-from fastapi import FastAPI, Request, Query, Form
-from fastapi.responses import HTMLResponse, Response, RedirectResponse
-from fastapi.templating import Jinja2Templates
 app = FastAPI()
-templates = Jinja2Templates(directory="templates")
-def sanitize_url(url: str) -> str:
-    url = url.strip()
-    if not url:
-        return ""
-    # If it's just a word, treat it as search later
-    if "://" not in url:
-        return url
-    p = urlparse(url)
-    if p.scheme not in ("http", "https"):
-        return ""
-    if not p.netloc:
-        return ""
-    return p.geturl()
 @app.get("/", response_class=HTMLResponse)
-async def home(request: Request):
-    return templates.TemplateResponse("index.html", {"request": request})
-@app.post("/go", response_class=HTMLResponse)
-async def go(request: Request, q: str = Form(...)):
     """
-    Handle form submission: treat q as either URL or search query.
     """
-    cleaned = sanitize_url(q)
-    if cleaned:
-        # Looks like a URL
-        return RedirectResponse(url=f"/proxy?url={quote_plus(cleaned)}", status_code=302)
-    else:
-        # Treat as search query
-        return RedirectResponse(url=f"/search?q={quote_plus(q)}", status_code=302)
-@app.get("/search", response_class=HTMLResponse)
-async def search(request: Request, q: str = Query(...)):
     """
-    Very simple meta-search using DuckDuckGo HTML.
-    Note: This is a hacky example, not an official API.
     """
-    if not q.strip():
-        return RedirectResponse(url="/", status_code=302)
-    # DuckDuckGo HTML interface
-    target = f"https://duckduckgo.com/html/?q={quote_plus(q)}"
-    async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
-        r = await client.get(target, headers={"User-Agent": "Mozilla/5.0"})
-    # Rewrite links in the HTML so clicks go via /proxy
-    html = r.text
-    # Very naive replacement; proper rewrite would need HTML parsing
-    html = html.replace('href="/', 'href="https://duckduckgo.com/')
-    html = html.replace('href="http', 'href="/proxy?url=http')
-    wrapper_html = f"""
-    <html>
-    <head>
-        <title>Proxy Search - {q}</title>
-        <style>
-            body {{ font-family: sans-serif; margin: 1rem; }}
-            form {{ margin-bottom: 1rem; }}
-            input[type=text] {{ width: 70%; padding: 0.5rem; }}
-            button {{ padding: 0.5rem 1rem; }}
-            iframe {{ width: 100%; height: 80vh; border: 1px solid #ccc; }}
-        </style>
-    </head>
-    <body>
-        <form action="/go" method="post">
-            <input type="text" name="q" value="{q}" placeholder="Search or enter URL" />
-            <button type="submit">Go</button>
-        </form>
-        <hr/>
-        {html}
-    </body>
-    </html>
-    """
-    return HTMLResponse(content=wrapper_html)
 @app.get("/proxy")
-async def proxy(url: str = Query(...)):
     """
-    Simple GET proxy to fetch pages.
     """
-    target = sanitize_url(url)
-    if not target:
-        return RedirectResponse(url="/", status_code=302)
-    async with httpx.AsyncClient(follow_redirects=True, timeout=30) as client:
-        try:
-            r = await client.get(target, headers={"User-Agent": "Mozilla/5.0"})
-        except httpx.RequestError as e:
-            return HTMLResponse(
-                f"<h1>Upstream error</h1><pre>{e}</pre>", status_code=502
-            )
-    content_type = r.headers.get("content-type", "text/html")
-    # Very simple: just forward content. No rewriting of embedded links/CSS/JS.
-    return Response(content=r.content, status_code=r.status_code, media_type=content_type)
-if __name__ == "__main__":
-    import os
-    import uvicorn
-    port = int(os.getenv("PORT", "7860"))
-    uvicorn.run("main:app", host="0.0.0.0", port=port)

+from fastapi import FastAPI, Request, Response
+from fastapi.responses import HTMLResponse
 import httpx
+from bs4 import BeautifulSoup
+from urllib.parse import urljoin, quote
 app = FastAPI()
+HTML_INDEX = """
+<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8" />
+    <title>HF Proxy Browser</title>
+    <style>
+      body { font-family: sans-serif; margin: 0; padding: 0; }
+      #bar {
+        padding: 10px;
+        background: #111827;
+        color: #e5e7eb;
+        display: flex;
+        gap: 8px;
+        align-items: center;
+      }
+      input[type="text"] {
+        flex: 1;
+        padding: 6px 8px;
+        border-radius: 4px;
+        border: 1px solid #4b5563;
+        background: #111827;
+        color: #e5e7eb;
+      }
+      button {
+        padding: 6px 12px;
+        border-radius: 4px;
+        border: none;
+        cursor: pointer;
+      }
+      #go {
+        background: #3b82f6;
+        color: white;
+      }
+      #frame {
+        width: 100%;
+        height: calc(100vh - 48px);
+        border: none;
+      }
+    </style>
+  </head>
+  <body>
+    <div id="bar">
+      <span>Proxy URL:</span>
+      <input id="url" type="text" placeholder="https://example.com" />
+      <button id="go">Go</button>
+    </div>
+    <iframe id="frame"></iframe>
+    <script>
+      const input = document.getElementById('url');
+      const frame = document.getElementById('frame');
+      const btn = document.getElementById('go');
+      function load() {
+        let url = input.value.trim();
+        if (!url) return;
+        if (!url.startsWith('http://') && !url.startsWith('https://')) {
+          url = 'https://' + url;
+        }
+        frame.src = '/proxy?url=' + encodeURIComponent(url);
+      }
+      btn.addEventListener('click', load);
+      input.addEventListener('keydown', e => {
+        if (e.key === 'Enter') {
+          e.preventDefault();
+          load();
+        }
+      });
+    </script>
+  </body>
+</html>
+"""
 @app.get("/", response_class=HTMLResponse)
+async def index():
+    return HTML_INDEX
+async def fetch_url(url: str) -> httpx.Response:
     """
+    Fetch target URL via httpx.
     """
+    async with httpx.AsyncClient(follow_redirects=True, timeout=15) as client:
+        # Basic headers to mimic a browser
+        headers = {
+            "User-Agent": (
+                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+                "AppleWebKit/537.36 (KHTML, like Gecko) "
+                "Chrome/120.0 Safari/537.36"
+            )
+        }
+        resp = await client.get(url, headers=headers)
+        return resp
+def rewrite_html(html: str, base_url: str) -> str:
     """
+    Rewrite links in HTML so sub-resources go through /proxy as well.
     """
+    soup = BeautifulSoup(html, "html.parser")
+    def proxify(attr: str, tag):
+        if attr not in tag.attrs:
+            return
+        original = tag.attrs.get(attr)
+        if not original:
+            return
+        # Handle things like //cdn.example.com, /path, relative paths, etc.
+        absolute = urljoin(base_url, original)
+        tag.attrs[attr] = f"/proxy?url={quote(absolute, safe='')}"
+    # rewrite common URL-carrying tags
+    for tag in soup.find_all(["a", "img", "script", "link", "form", "iframe"]):
+        if tag.name in ("a", "link"):
+            proxify("href", tag)
+        if tag.name in ("img", "script", "iframe"):
+            proxify("src", tag)
+        if tag.name == "form":
+            proxify("action", tag)
+    # Optionally, inject a small banner to indicate proxied content
+    banner = soup.new_tag("div")
+    banner.string = f"Proxied via HF Space — {base_url}"
+    banner["style"] = (
+        "position:fixed;bottom:0;left:0;right:0;"
+        "background:#111827;color:#e5e7eb;"
+        "font-size:12px;padding:4px 8px;z-index:9999;"
+    )
+    if soup.body:
+        soup.body.append(banner)
+    return str(soup)
 @app.get("/proxy")
+async def proxy(url: str, request: Request):
     """
+    Reverse-proxy endpoint: /proxy?url=https://example.com
     """
+    try:
+        upstream = await fetch_url(url)
+    except Exception as e:
+        return HTMLResponse(
+            f"<h1>Error</h1><p>Could not fetch {url}</p><pre>{e}</pre>",
+            status_code=502,
+        )
+    content_type = upstream.headers.get("content-type", "")
+    # HTML: rewrite links so that all further requests go via /proxy
+    if "text/html" in content_type:
+        rewritten = rewrite_html(upstream.text, base_url=url)
+        return HTMLResponse(content=rewritten, status_code=upstream.status_code)
+    # For non-HTML (images, JS, CSS, fonts...), just pass through
+    # while stripping hop-by-hop headers.
+    safe_headers = {}
+    for k, v in upstream.headers.items():
+        lk = k.lower()
+        if lk in ("content-encoding", "transfer-encoding", "connection"):
+            continue
+        safe_headers[k] = v
+    return Response(
+        content=upstream.content,
+        status_code=upstream.status_code,
+        headers=safe_headers,
+        media_type=content_type or None,
+    )