bparekh99 commited on
Commit
75028e1
·
verified ·
1 Parent(s): 01f2776

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -25
app.py CHANGED
@@ -1,27 +1,55 @@
1
  import gradio as gr
2
  import requests
 
3
  from bs4 import BeautifulSoup
 
4
  from google import genai
5
 
6
  # -----------------------------
7
- # Helper: Fetch & clean webpage
8
  # -----------------------------
9
- def fetch_website_text(url):
10
- headers = {"User-Agent": "Mozilla/5.0"}
11
- response = requests.get(url, headers=headers, timeout=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  response.raise_for_status()
13
 
14
  soup = BeautifulSoup(response.text, "html.parser")
15
 
16
- # Remove scripts & styles
17
  for tag in soup(["script", "style", "noscript"]):
18
  tag.decompose()
19
 
20
- title = soup.title.string if soup.title else ""
21
  h1 = soup.find("h1").get_text(strip=True) if soup.find("h1") else ""
22
 
23
  body_text = " ".join(soup.stripped_strings)
24
- body_text = body_text[:8000] # keep token usage sane
25
 
26
  return f"""
27
  PAGE TITLE:
@@ -34,17 +62,41 @@ VISIBLE CONTENT:
34
  {body_text}
35
  """
36
 
 
37
  # -----------------------------
38
- # Gemini analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # -----------------------------
40
  def analyze_website(api_key, url, industry, goal):
41
  if not api_key:
42
  return "❌ Please enter your Gemini API key."
43
 
 
 
 
44
  try:
 
 
45
  client = genai.Client(api_key=api_key)
46
 
47
- website_text = fetch_website_text(url)
48
 
49
  prompt = f"""
50
  You are an AI consultant helping small businesses improve their websites.
@@ -85,53 +137,66 @@ Website content:
85
 
86
  response = client.models.generate_content(
87
  model="gemini-1.5-flash",
88
- contents=prompt
89
  )
90
 
91
  return response.text
92
 
93
  except Exception as e:
94
- return f"❌ Error: {str(e)}"
 
95
 
96
  # -----------------------------
97
  # Gradio UI
98
  # -----------------------------
99
  with gr.Blocks(title="AI Website Review Tool") as demo:
100
  gr.Markdown("## 🔍 AI Website Review Tool")
101
- gr.Markdown("Analyze a website and receive practical, business-focused recommendations.")
 
 
102
 
103
- with gr.Row():
104
- api_key = gr.Textbox(
105
- label="Gemini API Key",
106
- placeholder="Paste your Gemini API key here",
107
- type="password"
108
- )
109
 
110
  url = gr.Textbox(
111
  label="Website URL",
112
- placeholder="https://example.com"
113
  )
114
 
115
  industry = gr.Dropdown(
116
  label="Industry",
117
- choices=["General SMB", "Law Firm", "Hospitality", "Healthcare", "Real Estate"],
118
- value="General SMB"
 
 
 
 
 
 
119
  )
120
 
121
  goal = gr.Dropdown(
122
  label="Primary Website Goal",
123
- choices=["Generate leads", "Sell services", "Build credibility", "Educate visitors"],
124
- value="Generate leads"
 
 
 
 
 
125
  )
126
 
127
  analyze_btn = gr.Button("Analyze Website")
128
 
129
- output = gr.Markdown(label="Analysis Results")
130
 
131
  analyze_btn.click(
132
  fn=analyze_website,
133
  inputs=[api_key, url, industry, goal],
134
- outputs=output
135
  )
136
 
137
  demo.launch()
 
1
  import gradio as gr
2
  import requests
3
+ import socket
4
  from bs4 import BeautifulSoup
5
+ from urllib.parse import urlparse
6
  from google import genai
7
 
8
  # -----------------------------
9
+ # URL Normalization
10
  # -----------------------------
11
+ def normalize_url(url: str) -> str:
12
+ parsed = urlparse(url)
13
+ if not parsed.scheme:
14
+ return "https://" + url
15
+ return url
16
+
17
+
18
+ # -----------------------------
19
+ # Fetch & Parse Website (Hardened)
20
+ # -----------------------------
21
+ def fetch_website_text(url: str) -> str:
22
+ socket.setdefaulttimeout(10)
23
+
24
+ headers = {
25
+ "User-Agent": (
26
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
27
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
28
+ "Chrome/121.0 Safari/537.36"
29
+ ),
30
+ "Accept": "text/html,application/xhtml+xml",
31
+ "Accept-Language": "en-US,en;q=0.9",
32
+ }
33
+
34
+ response = requests.get(
35
+ url,
36
+ headers=headers,
37
+ timeout=10,
38
+ allow_redirects=True,
39
+ )
40
  response.raise_for_status()
41
 
42
  soup = BeautifulSoup(response.text, "html.parser")
43
 
44
+ # Remove noisy tags
45
  for tag in soup(["script", "style", "noscript"]):
46
  tag.decompose()
47
 
48
+ title = soup.title.string.strip() if soup.title else ""
49
  h1 = soup.find("h1").get_text(strip=True) if soup.find("h1") else ""
50
 
51
  body_text = " ".join(soup.stripped_strings)
52
+ body_text = body_text[:8000] # token safety
53
 
54
  return f"""
55
  PAGE TITLE:
 
62
  {body_text}
63
  """
64
 
65
+
66
  # -----------------------------
67
+ # Safe Wrapper (Never Crash)
68
+ # -----------------------------
69
+ def fetch_website_text_safe(url: str) -> str:
70
+ try:
71
+ return fetch_website_text(url)
72
+ except Exception as e:
73
+ return f"""
74
+ ⚠️ Unable to fully fetch website content.
75
+
76
+ Error:
77
+ {str(e)}
78
+
79
+ Fallback:
80
+ Analyze based on URL structure, homepage intent, and general best practices.
81
+ """
82
+
83
+
84
+ # -----------------------------
85
+ # Gemini Analysis
86
  # -----------------------------
87
  def analyze_website(api_key, url, industry, goal):
88
  if not api_key:
89
  return "❌ Please enter your Gemini API key."
90
 
91
+ if not url:
92
+ return "❌ Please enter a website URL."
93
+
94
  try:
95
+ url = normalize_url(url)
96
+
97
  client = genai.Client(api_key=api_key)
98
 
99
+ website_text = fetch_website_text_safe(url)
100
 
101
  prompt = f"""
102
  You are an AI consultant helping small businesses improve their websites.
 
137
 
138
  response = client.models.generate_content(
139
  model="gemini-1.5-flash",
140
+ contents=prompt,
141
  )
142
 
143
  return response.text
144
 
145
  except Exception as e:
146
+ return f"❌ Error during analysis: {str(e)}"
147
+
148
 
149
  # -----------------------------
150
  # Gradio UI
151
  # -----------------------------
152
  with gr.Blocks(title="AI Website Review Tool") as demo:
153
  gr.Markdown("## 🔍 AI Website Review Tool")
154
+ gr.Markdown(
155
+ "Analyze any website and receive practical, business-focused recommendations."
156
+ )
157
 
158
+ api_key = gr.Textbox(
159
+ label="Gemini API Key",
160
+ placeholder="Paste your Gemini API key here",
161
+ type="password",
162
+ )
 
163
 
164
  url = gr.Textbox(
165
  label="Website URL",
166
+ placeholder="https://example.com",
167
  )
168
 
169
  industry = gr.Dropdown(
170
  label="Industry",
171
+ choices=[
172
+ "General SMB",
173
+ "Law Firm",
174
+ "Hospitality",
175
+ "Healthcare",
176
+ "Real Estate",
177
+ ],
178
+ value="General SMB",
179
  )
180
 
181
  goal = gr.Dropdown(
182
  label="Primary Website Goal",
183
+ choices=[
184
+ "Generate leads",
185
+ "Sell services",
186
+ "Build credibility",
187
+ "Educate visitors",
188
+ ],
189
+ value="Generate leads",
190
  )
191
 
192
  analyze_btn = gr.Button("Analyze Website")
193
 
194
+ output = gr.Markdown()
195
 
196
  analyze_btn.click(
197
  fn=analyze_website,
198
  inputs=[api_key, url, industry, goal],
199
+ outputs=output,
200
  )
201
 
202
  demo.launch()