ahmedumeraziz commited on
Commit
0a1d60a
·
verified ·
1 Parent(s): 84c9052

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -46
app.py CHANGED
@@ -2,71 +2,189 @@ import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urlparse
 
 
5
 
6
- def seo_check(url):
7
  try:
8
- response = requests.get(url, timeout=10)
9
- html = response.text
10
- soup = BeautifulSoup(html, 'html.parser')
 
11
  except Exception as e:
12
- return f"❌ Error fetching the URL: {e}", "", "", "", ""
 
 
 
 
 
 
 
 
13
 
14
  checks = []
15
 
16
- # SEO Test 1: Title tag
17
- title_tag = soup.title.string.strip() if soup.title and soup.title.string else ''
18
- if title_tag:
19
- checks.append(("Title Tag Present", "Passed", "A <title> tag exists.", "No action needed."))
20
- else:
21
- checks.append(("Title Tag Missing", "Failed", "The page is missing a <title> tag.", "Add a <title> tag to improve SEO."))
 
 
22
 
23
- # SEO Test 2: Meta description
24
  meta_desc = soup.find("meta", attrs={"name": "description"})
25
- if meta_desc and meta_desc.get("content", "").strip():
26
- checks.append(("Meta Description Present", "Passed", "A meta description is provided.", "No action needed."))
27
- else:
28
- checks.append(("Meta Description Missing", "Warning", "No meta description found.", "Add a concise meta description (150-160 characters)."))
29
 
30
- # SEO Test 3: H1 tag
31
  h1 = soup.find("h1")
32
- if h1 and h1.get_text(strip=True):
33
- checks.append(("H1 Tag Present", "Passed", "A single <h1> tag is present.", "No action needed."))
34
- else:
35
- checks.append(("H1 Tag Missing", "Failed", "No <h1> tag found.", "Include one descriptive <h1> tag per page."))
36
 
37
- # SEO Test 4: HTTPS usage
38
- parsed = urlparse(url)
39
- if parsed.scheme == "https":
40
- checks.append(("HTTPS Protocol", "Passed", "Site uses secure HTTPS connection.", "No action needed."))
41
- else:
42
- checks.append(("Not Using HTTPS", "Failed", "The site is not served over HTTPS.", "Enable SSL/TLS to secure the site and improve rankings."))
 
 
 
 
 
 
 
43
 
44
- # SEO Test 5: Robots.txt availability
45
  robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
46
  try:
47
  robots_response = requests.get(robots_url, timeout=5)
48
- if robots_response.status_code == 200:
49
- checks.append(("robots.txt Found", "Passed", "robots.txt file exists.", "No action needed."))
50
- else:
51
- checks.append(("robots.txt Missing", "Warning", "robots.txt file not found.", "Add a robots.txt to control crawler access."))
 
 
 
 
 
 
 
 
52
  except:
53
- checks.append(("robots.txt Fetch Failed", "Warning", "Could not access robots.txt.", "Ensure the file is publicly accessible."))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- # SEO Score calculation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  total = len(checks)
57
- passed = sum(1 for _, result, _, _ in checks if result == "Passed")
58
- warnings = sum(1 for _, result, _, _ in checks if result == "Warning")
59
- failed = sum(1 for _, result, _, _ in checks if result == "Failed")
60
  score = int((passed / total) * 100)
61
 
62
- passed_section = "\n".join([f"✅ {name}\n✔️ {desc}" for name, result, desc, _ in checks if result == "Passed"])
63
- warning_section = "\n".join([f"⚠️ {name}\n{desc}\n💡 Suggestion: {fix}" for name, result, desc, fix in checks if result == "Warning"])
64
- failed_section = "\n".join([f"❌ {name}\n{desc}\n💡 Fix: {fix}" for name, result, desc, fix in checks if result == "Failed"])
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- return f"🔎 SEO Score: {score}/100", f"✅ Passed: {passed}", f"⚠️ Warnings: {warnings}", f"❌ Failed: {failed}", f"{passed_section}\n\n{warning_section}\n\n{failed_section}"
67
 
68
  # Gradio Interface
69
- seo_interface = gr.Interface(
70
  fn=seo_check,
71
  inputs=gr.Textbox(label="Enter Website URL"),
72
  outputs=[
@@ -74,11 +192,12 @@ seo_interface = gr.Interface(
74
  gr.Textbox(label="Passed Tests"),
75
  gr.Textbox(label="Warnings"),
76
  gr.Textbox(label="Failed Tests"),
77
- gr.Textbox(label="Detailed Report", lines=20)
 
78
  ],
79
- title="🔍 Website SEO Check App",
80
- description="Enter any website URL to perform an SEO audit and get detailed, actionable feedback."
81
  )
82
 
83
  if __name__ == "__main__":
84
- seo_interface.launch()
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urlparse
5
+ import re
6
+ import matplotlib.pyplot as plt
7
 
8
+ def fetch_html(url):
9
  try:
10
+ headers = {'User-Agent': 'Mozilla/5.0'}
11
+ response = requests.get(url, timeout=10, headers=headers)
12
+ response.raise_for_status()
13
+ return response.text, ""
14
  except Exception as e:
15
+ return None, f"❌ Error fetching the URL: {e}"
16
+
17
+ def seo_check(url):
18
+ html, error = fetch_html(url)
19
+ if error:
20
+ return error, "", "", "", "", None
21
+
22
+ soup = BeautifulSoup(html, 'html.parser')
23
+ parsed = urlparse(url)
24
 
25
  checks = []
26
 
27
+ def add_check(name, result, details, suggestion=""):
28
+ checks.append((name, result, details.strip(), suggestion.strip()))
29
+
30
+ # SEO Tests
31
+ title = soup.title.string.strip() if soup.title and soup.title.string else ""
32
+ add_check("Title Tag", "Passed" if title else "Failed",
33
+ f"Found: {title}" if title else "Title tag missing.",
34
+ "" if title else "Add a <title> tag with relevant keywords.")
35
 
 
36
  meta_desc = soup.find("meta", attrs={"name": "description"})
37
+ meta_text = meta_desc.get("content", "").strip() if meta_desc else ""
38
+ add_check("Meta Description", "Passed" if meta_text else "Warning",
39
+ f"Found: {meta_text}" if meta_text else "No meta description found.",
40
+ "Add a concise meta description (150-160 characters)." if not meta_text else "")
41
 
 
42
  h1 = soup.find("h1")
43
+ h1_text = h1.get_text(strip=True) if h1 else ""
44
+ add_check("H1 Tag", "Passed" if h1_text else "Failed",
45
+ f"Found: {h1_text}" if h1_text else "No <h1> tag found.",
46
+ "Include one <h1> tag per page.")
47
 
48
+ h2_tags = soup.find_all("h2")
49
+ add_check("H2 Tags", "Passed" if h2_tags else "Warning",
50
+ f"Found {len(h2_tags)} <h2> tags." if h2_tags else "No <h2> tags found.",
51
+ "Use <h2> tags to structure subheadings.")
52
+
53
+ canonical = soup.find("link", rel="canonical")
54
+ add_check("Canonical Tag", "Passed" if canonical else "Warning",
55
+ f"Found: {canonical['href']}" if canonical and canonical.has_attr('href') else "No canonical tag found.",
56
+ "Add a canonical tag to prevent duplicate content issues.")
57
+
58
+ add_check("HTTPS", "Passed" if parsed.scheme == "https" else "Failed",
59
+ f"URL uses {'HTTPS' if parsed.scheme == 'https' else 'HTTP'}.",
60
+ "Use HTTPS for secure connections.")
61
 
 
62
  robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
63
  try:
64
  robots_response = requests.get(robots_url, timeout=5)
65
+ add_check("robots.txt", "Passed" if robots_response.status_code == 200 else "Warning",
66
+ "robots.txt is accessible." if robots_response.status_code == 200 else "robots.txt not found.",
67
+ "Create a robots.txt file to manage crawler access.")
68
+ except:
69
+ add_check("robots.txt", "Warning", "robots.txt could not be fetched.", "Ensure it's accessible.")
70
+
71
+ sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"
72
+ try:
73
+ sitemap_response = requests.get(sitemap_url, timeout=5)
74
+ add_check("sitemap.xml", "Passed" if sitemap_response.status_code == 200 else "Warning",
75
+ "sitemap.xml is accessible." if sitemap_response.status_code == 200 else "sitemap.xml not found.",
76
+ "Add a sitemap.xml to help search engines index your pages.")
77
  except:
78
+ add_check("sitemap.xml", "Warning", "Could not access sitemap.xml.", "Ensure it's publicly accessible.")
79
+
80
+ favicon = soup.find("link", rel=re.compile("icon", re.I))
81
+ add_check("Favicon", "Passed" if favicon else "Warning",
82
+ "Favicon found." if favicon else "No favicon detected.",
83
+ "Add a favicon for branding and user experience.")
84
+
85
+ charset = soup.find("meta", attrs={"charset": True})
86
+ add_check("Charset", "Passed" if charset else "Warning",
87
+ f"Found: {charset['charset']}" if charset else "No charset declared.",
88
+ "Add a <meta charset='UTF-8'> to define character encoding.")
89
+
90
+ lang_attr = soup.html.get("lang", "") if soup.html else ""
91
+ add_check("Language Attribute", "Passed" if lang_attr else "Warning",
92
+ f"Found: lang='{lang_attr}'" if lang_attr else "No lang attribute found in <html>.",
93
+ "Set <html lang='en'> for proper language targeting.")
94
 
95
+ viewport = soup.find("meta", attrs={"name": "viewport"})
96
+ add_check("Mobile Viewport", "Passed" if viewport else "Warning",
97
+ "Viewport tag present." if viewport else "No viewport meta tag found.",
98
+ "Add <meta name='viewport' content='width=device-width, initial-scale=1.0'>.")
99
+
100
+ ga_code = "google-analytics.com" in html or "gtag(" in html
101
+ add_check("Google Analytics", "Passed" if ga_code else "Warning",
102
+ "Google Analytics script detected." if ga_code else "No GA script found.",
103
+ "Install GA script to track visitors.")
104
+
105
+ og_tags = soup.find("meta", property="og:title")
106
+ add_check("Open Graph Tags", "Passed" if og_tags else "Warning",
107
+ "OG tags found." if og_tags else "No OG tags present.",
108
+ "Add Open Graph meta tags to enhance social sharing.")
109
+
110
+ twitter_card = soup.find("meta", attrs={"name": "twitter:card"})
111
+ add_check("Twitter Card", "Passed" if twitter_card else "Warning",
112
+ "Twitter Card tag present." if twitter_card else "No Twitter Card meta tag found.",
113
+ "Add Twitter Card tags to improve tweet previews.")
114
+
115
+ images = soup.find_all("img")
116
+ alt_missing = sum(1 for img in images if not img.get("alt"))
117
+ add_check("Image ALT Texts", "Passed" if alt_missing == 0 else "Warning",
118
+ f"{len(images)} images found, {alt_missing} missing alt text.",
119
+ "Add descriptive alt attributes to all images.")
120
+
121
+ inline_styles = bool(soup.find(style=True))
122
+ add_check("Inline Styles", "Warning" if inline_styles else "Passed",
123
+ "Inline styles detected." if inline_styles else "No inline styles found.",
124
+ "Move inline styles to external CSS.")
125
+
126
+ font_tags = soup.find_all("font")
127
+ add_check("Deprecated <font> Tags", "Warning" if font_tags else "Passed",
128
+ f"Found {len(font_tags)} <font> tags." if font_tags else "No deprecated tags.",
129
+ "Avoid deprecated tags like <font>, use CSS instead.")
130
+
131
+ strong_tags = soup.find_all("strong") + soup.find_all("em")
132
+ add_check("Semantic Emphasis Tags", "Passed" if strong_tags else "Warning",
133
+ f"Found {len(strong_tags)} <strong>/<em> tags." if strong_tags else "No emphasis tags.",
134
+ "Use <strong> and <em> to highlight important content.")
135
+
136
+ noindex = soup.find("meta", attrs={"name": "robots", "content": re.compile("noindex", re.I)})
137
+ add_check("Noindex Tag", "Warning" if noindex else "Passed",
138
+ "Page marked noindex." if noindex else "No noindex tag found.",
139
+ "Remove noindex to allow search indexing (if intentional).")
140
+
141
+ ext_scripts = soup.find_all("script", src=True)
142
+ ext_styles = soup.find_all("link", rel="stylesheet")
143
+ add_check("External JS/CSS", "Passed" if len(ext_scripts) + len(ext_styles) <= 10 else "Warning",
144
+ f"Found {len(ext_scripts)} JS and {len(ext_styles)} CSS includes.",
145
+ "Reduce number of external scripts/styles for better performance.")
146
+
147
+ social_links = [a['href'] for a in soup.find_all('a', href=True) if any(x in a['href'] for x in ['facebook', 'twitter', 'linkedin'])]
148
+ add_check("Social Media Links", "Passed" if social_links else "Warning",
149
+ f"Found links: {', '.join(social_links)}" if social_links else "No social media links found.",
150
+ "Add links to your social profiles to build trust.")
151
+
152
+ page_size = len(html.encode('utf-8'))
153
+ add_check("Page Size", "Passed" if page_size < 250000 else "Warning",
154
+ f"Page size: {page_size / 1024:.2f} KB.",
155
+ "Keep HTML under 250KB for faster load times.")
156
+
157
+ broken_links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith("http") and requests.head(a['href'], timeout=5).status_code >= 400]
158
+ add_check("Broken Links", "Passed" if not broken_links else "Failed",
159
+ f"Broken links: {broken_links}" if broken_links else "No broken links found.",
160
+ "Fix or remove broken links.")
161
+
162
+ # Score Calculation
163
  total = len(checks)
164
+ passed = sum(1 for _, r, _, _ in checks if r == "Passed")
165
+ warning = sum(1 for _, r, _, _ in checks if r == "Warning")
166
+ failed = sum(1 for _, r, _, _ in checks if r == "Failed")
167
  score = int((passed / total) * 100)
168
 
169
+ report = ""
170
+ for name, result, details, suggestion in checks:
171
+ icon = {"Passed": "✅", "Warning": "⚠️", "Failed": "❌"}[result]
172
+ report += f"{icon} {name} — {result}\n{details}\n"
173
+ if suggestion:
174
+ report += f"💡 {suggestion}\n"
175
+ report += "\n"
176
+
177
+ # Generate graph
178
+ fig, ax = plt.subplots()
179
+ ax.bar(["Passed", "Warnings", "Failed"], [passed, warning, failed], color=["green", "orange", "red"])
180
+ ax.set_title(f"SEO Test Summary (Score: {score}/100)")
181
+ ax.set_ylabel("Number of Checks")
182
+ fig.tight_layout()
183
 
184
+ return f" SEO Score: {score}/100", f"{passed}", f"{warning}", f"{failed}", report.strip(), fig
185
 
186
  # Gradio Interface
187
+ interface = gr.Interface(
188
  fn=seo_check,
189
  inputs=gr.Textbox(label="Enter Website URL"),
190
  outputs=[
 
192
  gr.Textbox(label="Passed Tests"),
193
  gr.Textbox(label="Warnings"),
194
  gr.Textbox(label="Failed Tests"),
195
+ gr.Textbox(label="Detailed Report", lines=30),
196
+ gr.Plot(label="SEO Score Graph")
197
  ],
198
+ title="🔍 Advanced SEO Checker",
199
+ description="Checks 25 SEO parameters and gives detailed recommendations. Built with ❤️ in Gradio."
200
  )
201
 
202
  if __name__ == "__main__":
203
+ interface.launch()