| import gradio as gr |
| import requests |
| from bs4 import BeautifulSoup |
| import urllib.parse |
|
|
| def fetch_website(url): |
| try: |
| headers = { |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0 Safari/537.36" |
| } |
| response = requests.get(url, headers=headers, timeout=10) |
| response.raise_for_status() |
| |
| soup = BeautifulSoup(response.text, 'html.parser') |
| title = soup.title.string if soup.title else "No Title" |
| |
| |
| base_url = urllib.parse.urljoin(url, '/') |
| links = [ |
| urllib.parse.urljoin(base_url, a.get('href')) |
| for a in soup.find_all('a', href=True) |
| ] |
| |
| return { |
| "title": title, |
| "url": url, |
| "status_code": response.status_code, |
| "content_preview": response.text[:1000], |
| "links": "\n".join(links[:20]) |
| } |
| except Exception as e: |
| return { |
| "title": "Error", |
| "url": "", |
| "status_code": 500, |
| "content_preview": str(e), |
| "links": "" |
| } |
|
|
| |
| with gr.Blocks(theme="default") as demo: |
| gr.Markdown("# 🌐 Proxy Website Surfer\nEnter a URL to fetch its contents through this HuggingFace-powered proxy.") |
| |
| with gr.Row(): |
| url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com") |
| |
| submit_btn = gr.Button("Fetch Site") |
| |
| with gr.Row(): |
| title_output = gr.Textbox(label="Page Title") |
| status_output = gr.Number(label="HTTP Status Code") |
| |
| content_output = gr.Textbox(label="Content Preview (first 1000 characters)", lines=10) |
| link_output = gr.Textbox(label="Extracted Links", lines=15) |
|
|
| def wrapper(url): |
| result = fetch_website(url) |
| return ( |
| result["title"], |
| result["status_code"], |
| result["content_preview"], |
| result["links"] |
| ) |
|
|
| submit_btn.click( |
| fn=wrapper, |
| inputs=url_input, |
| outputs=[title_output, status_output, content_output, link_output] |
| ) |
|
|
| demo.launch() |