dkescape commited on
Commit
7437948
·
verified ·
1 Parent(s): 9c4292d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import urllib.parse
5
+
6
+ def fetch_website(url):
7
+ try:
8
+ headers = {
9
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0 Safari/537.36"
10
+ }
11
+ response = requests.get(url, headers=headers, timeout=10)
12
+ response.raise_for_status()
13
+
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+ title = soup.title.string if soup.title else "No Title"
16
+
17
+ # Extract links from page
18
+ base_url = urllib.parse.urljoin(url, '/')
19
+ links = [
20
+ urllib.parse.urljoin(base_url, a.get('href'))
21
+ for a in soup.find_all('a', href=True)
22
+ ]
23
+
24
+ return {
25
+ "title": title,
26
+ "url": url,
27
+ "status_code": response.status_code,
28
+ "content_preview": response.text[:1000], # First 1000 chars
29
+ "links": "\n".join(links[:20]) # Show up to 20 links
30
+ }
31
+ except Exception as e:
32
+ return {
33
+ "title": "Error",
34
+ "url": "",
35
+ "status_code": 500,
36
+ "content_preview": str(e),
37
+ "links": ""
38
+ }
39
+
40
+ # UI Definition
41
+ with gr.Blocks(theme="default") as demo:
42
+ gr.Markdown("# 🌐 Proxy Website Surfer\nEnter a URL to fetch its contents through this HuggingFace-powered proxy.")
43
+
44
+ with gr.Row():
45
+ url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
46
+
47
+ submit_btn = gr.Button("Fetch Site")
48
+
49
+ with gr.Row():
50
+ title_output = gr.Textbox(label="Page Title")
51
+ status_output = gr.Number(label="HTTP Status Code")
52
+
53
+ content_output = gr.Textbox(label="Content Preview (first 1000 characters)", lines=10)
54
+ link_output = gr.Textbox(label="Extracted Links", lines=15)
55
+
56
+ def wrapper(url):
57
+ result = fetch_website(url)
58
+ return (
59
+ result["title"],
60
+ result["status_code"],
61
+ result["content_preview"],
62
+ result["links"]
63
+ )
64
+
65
+ submit_btn.click(
66
+ fn=wrapper,
67
+ inputs=url_input,
68
+ outputs=[title_output, status_output, content_output, link_output]
69
+ )
70
+
71
+ demo.launch()