ahmedumeraziz commited on
Commit
29f72fe
·
verified ·
1 Parent(s): ca09566

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -88
app.py CHANGED
@@ -2,12 +2,9 @@ import requests
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlparse, urljoin
4
  import gradio as gr
 
5
 
6
  def seo_check(url):
7
- report = []
8
- suggestions = []
9
-
10
- # Ensure HTTPS
11
  if not url.startswith("http"):
12
  url = "https://" + url
13
 
@@ -16,120 +13,165 @@ def seo_check(url):
16
  response.raise_for_status()
17
  html = response.text
18
  except Exception as e:
19
- return f"❌ Error accessing URL: {e}", ""
20
 
21
  soup = BeautifulSoup(html, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Title Tag
24
- title = soup.title.string.strip() if soup.title else ""
25
- if not title:
26
- report.append("❌ Missing <title> tag.")
27
- suggestions.append("Add a <title> tag that describes your page in 50–60 characters.")
28
- elif len(title) > 70:
29
- report.append("⚠️ Title is too long.")
30
- suggestions.append("Keep title under 70 characters.")
31
 
32
- # Meta Description
33
  desc_tag = soup.find("meta", attrs={"name": "description"})
34
- desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else ""
35
- if not desc:
36
- report.append("❌ Missing meta description.")
37
- suggestions.append("Add a <meta name='description'> summarizing the page.")
38
- elif len(desc) > 160:
39
- report.append("⚠️ Meta description is too long.")
40
- suggestions.append("Keep meta descriptions under 160 characters.")
41
-
42
- # Canonical Tag
43
  canonical = soup.find("link", rel="canonical")
44
- if not canonical:
45
- report.append("❌ Missing canonical link.")
46
- suggestions.append("Add a <link rel='canonical'> to avoid duplicate content.")
47
 
48
- # H1 Tag
49
  h1_tags = soup.find_all("h1")
50
- if len(h1_tags) != 1:
51
- report.append(f"⚠️ Found {len(h1_tags)} <h1> tags.")
52
- suggestions.append("Use exactly one <h1> tag for SEO clarity.")
53
 
54
- # Mobile viewport
55
  viewport = soup.find("meta", attrs={"name": "viewport"})
56
- if not viewport:
57
- report.append("⚠️ No viewport meta tag.")
58
- suggestions.append("Add a viewport meta tag for mobile responsiveness.")
59
 
60
- # HTTPS check
61
- if not url.startswith("https://"):
62
- report.append("⚠️ URL is not secure (no HTTPS).")
63
- suggestions.append("Install SSL and redirect HTTP to HTTPS.")
64
 
65
- # Robots.txt and sitemap.xml
66
- parsed = urlparse(url)
67
- base = f"{parsed.scheme}://{parsed.netloc}"
68
- robots_url = urljoin(base, "/robots.txt")
69
- sitemap_url = urljoin(base, "/sitemap.xml")
70
  try:
71
- r1 = requests.get(robots_url)
72
- if r1.status_code != 200:
73
- report.append("❌ robots.txt not found.")
74
- suggestions.append("Create a robots.txt to guide search bots.")
75
  except:
76
- report.append(" Could not access robots.txt.")
77
 
 
78
  try:
79
- r2 = requests.get(sitemap_url)
80
- if r2.status_code != 200:
81
- report.append("❌ sitemap.xml not found.")
82
- suggestions.append("Add sitemap.xml for better crawling.")
83
  except:
84
- report.append(" Could not access sitemap.xml.")
85
 
86
- # Open Graph Tags
87
  og_title = soup.find("meta", property="og:title")
88
- if not og_title:
89
- report.append("⚠️ Missing Open Graph (og:title).")
90
- suggestions.append("Add OG tags to improve sharing on social media.")
 
 
91
 
92
- # Image alt text
93
  images = soup.find_all("img")
94
  alt_missing = [img for img in images if not img.get("alt")]
95
- if alt_missing:
96
- report.append(f"⚠️ {len(alt_missing)} images missing alt text.")
97
- suggestions.append("Add descriptive alt attributes to all images.")
98
 
99
- # Internal and external links
100
  links = soup.find_all("a", href=True)
101
- internal = 0
102
- external = 0
103
- for link in links:
104
- href = link['href']
105
- if parsed.netloc in href:
106
- internal += 1
107
- elif href.startswith("http"):
108
- external += 1
109
- report.append(f"ℹ️ Internal Links: {internal} | External Links: {external}")
110
- suggestions.append("Ensure most important links are internal. Check broken links.")
111
-
112
- # Keyword density (basic)
113
- body_text = soup.get_text().lower()
114
- words = body_text.split()
115
- word_count = len(words)
116
  keyword = parsed.netloc.replace("www.", "").split(".")[0]
117
- keyword_freq = words.count(keyword)
118
- density = (keyword_freq / word_count) * 100 if word_count else 0
119
- report.append(f"ℹ️ Keyword '{keyword}' appears {keyword_freq} times ({density:.2f}% density)")
120
- if density < 0.5:
121
- suggestions.append("Consider using your main keyword more often (target 1–2%).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
- return "\n".join(report), "\n".join(suggestions)
124
 
125
- # Gradio UI
126
  gr.Interface(
127
  fn=seo_check,
128
  inputs=gr.Textbox(label="Enter Website URL"),
129
  outputs=[
130
- gr.Textbox(label="SEO Report", lines=15),
131
- gr.Textbox(label="Suggestions & Fixes", lines=15)
 
 
132
  ],
133
- title="SEO Website Checker",
134
- description="Analyze your website's SEO like Sitechecker.pro & SEOSiteCheckup, with clear solutions!"
135
  ).launch()
 
2
  from bs4 import BeautifulSoup
3
  from urllib.parse import urlparse, urljoin
4
  import gradio as gr
5
+ import plotly.graph_objects as go
6
 
7
  def seo_check(url):
 
 
 
 
8
  if not url.startswith("http"):
9
  url = "https://" + url
10
 
 
13
  response.raise_for_status()
14
  html = response.text
15
  except Exception as e:
16
+ return f"❌ Could not access URL: {e}", None, "", "", ""
17
 
18
  soup = BeautifulSoup(html, "html.parser")
19
+ parsed = urlparse(url)
20
+ base = f"{parsed.scheme}://{parsed.netloc}"
21
+
22
+ results = []
23
+ passed = 0
24
+ failed = 0
25
+ warnings = 0
26
+
27
+ def check(condition, title, fix, warning=False):
28
+ nonlocal passed, failed, warnings
29
+ if condition:
30
+ results.append(f"✅ {title}")
31
+ passed += 1
32
+ else:
33
+ if warning:
34
+ results.append(f"⚠️ {title}")
35
+ warnings += 1
36
+ else:
37
+ results.append(f"❌ {title}")
38
+ failed += 1
39
+ suggestions.append(f"{title}: {fix}")
40
+
41
+ suggestions = []
42
 
43
+ # 1. Title tag
44
+ check(soup.title and soup.title.string.strip(), "Title Tag Present", "Add a <title> tag to your HTML.")
 
 
 
 
 
 
45
 
46
+ # 2. Meta Description
47
  desc_tag = soup.find("meta", attrs={"name": "description"})
48
+ check(desc_tag and desc_tag.get("content", "").strip(), "Meta Description Present", "Add a meta description.")
49
+
50
+ # 3. Canonical Link
 
 
 
 
 
 
51
  canonical = soup.find("link", rel="canonical")
52
+ check(bool(canonical), "Canonical Tag Present", "Add <link rel='canonical'> to avoid duplicate content.")
 
 
53
 
54
+ # 4. H1 Tag
55
  h1_tags = soup.find_all("h1")
56
+ check(len(h1_tags) == 1, "Exactly One H1 Tag", "Use exactly one <h1> tag.")
 
 
57
 
58
+ # 5. Mobile Meta Tag
59
  viewport = soup.find("meta", attrs={"name": "viewport"})
60
+ check(bool(viewport), "Mobile Viewport Meta Tag", "Add <meta name='viewport'> for mobile compatibility.")
 
 
61
 
62
+ # 6. HTTPS
63
+ check(url.startswith("https://"), "Uses HTTPS", "Secure your website with SSL.")
 
 
64
 
65
+ # 7. robots.txt
 
 
 
 
66
  try:
67
+ r1 = requests.get(urljoin(base, "/robots.txt"))
68
+ check(r1.status_code == 200, "robots.txt Found", "Create a robots.txt file.")
 
 
69
  except:
70
+ check(False, "robots.txt Found", "Create a robots.txt file.")
71
 
72
+ # 8. sitemap.xml
73
  try:
74
+ r2 = requests.get(urljoin(base, "/sitemap.xml"))
75
+ check(r2.status_code == 200, "sitemap.xml Found", "Add a sitemap.xml file.")
 
 
76
  except:
77
+ check(False, "sitemap.xml Found", "Add a sitemap.xml file.")
78
 
79
+ # 9. OG Title
80
  og_title = soup.find("meta", property="og:title")
81
+ check(bool(og_title), "Open Graph Title Present", "Add Open Graph meta tags.")
82
+
83
+ # 10. OG Description
84
+ og_desc = soup.find("meta", property="og:description")
85
+ check(bool(og_desc), "Open Graph Description Present", "Add Open Graph meta tags.")
86
 
87
+ # 11. Image ALT Tags
88
  images = soup.find_all("img")
89
  alt_missing = [img for img in images if not img.get("alt")]
90
+ check(len(alt_missing) < len(images), "Image ALT Tags Used", "Add alt attributes to all images.")
 
 
91
 
92
+ # 12. Internal links
93
  links = soup.find_all("a", href=True)
94
+ parsed_host = parsed.netloc
95
+ internal_links = [link for link in links if parsed_host in link['href']]
96
+ check(len(internal_links) >= 5, "Has Internal Links", "Add more internal links.")
97
+
98
+ # 13. External links
99
+ external_links = [link for link in links if link['href'].startswith("http") and parsed_host not in link['href']]
100
+ check(len(external_links) >= 1, "Has External Links", "Add authoritative external links.")
101
+
102
+ # 14. Favicon
103
+ favicon = soup.find("link", rel=lambda x: x and "icon" in x.lower())
104
+ check(bool(favicon), "Favicon Present", "Add a favicon link.")
105
+
106
+ # 15. Keyword in Title
 
 
107
  keyword = parsed.netloc.replace("www.", "").split(".")[0]
108
+ check(soup.title and keyword.lower() in soup.title.string.lower(), "Keyword in Title", f"Add keyword '{keyword}' in title.", warning=True)
109
+
110
+ # 16. Keyword in Description
111
+ check(desc_tag and keyword.lower() in desc_tag.get("content", "").lower(), "Keyword in Meta Description", f"Add keyword '{keyword}' in description.", warning=True)
112
+
113
+ # 17. Content Length
114
+ text = soup.get_text()
115
+ check(len(text.split()) >= 300, "Enough Text Content", "Add more meaningful content.")
116
+
117
+ # 18. No Inline CSS
118
+ inline_styles = soup.find_all(style=True)
119
+ check(len(inline_styles) < 5, "Minimal Inline CSS", "Avoid inline CSS styles.", warning=True)
120
+
121
+ # 19. No Broken Links (basic)
122
+ check(len([link for link in links if 'href' in link.attrs and link['href'].startswith("#")]) < len(links), "No Broken Anchor Links", "Avoid '#' as href in <a> tags.", warning=True)
123
+
124
+ # 20. No Flash
125
+ flash = soup.find_all("object")
126
+ check(len(flash) == 0, "No Flash Elements", "Avoid using Flash elements.")
127
+
128
+ # 21. Charset Set
129
+ charset = soup.find("meta", charset=True)
130
+ check(bool(charset), "Charset Declared", "Add <meta charset='UTF-8'>.")
131
+
132
+ # 22. HTML Language Set
133
+ check(soup.html and soup.html.get("lang"), "HTML lang Attribute", "Add lang attribute in <html> tag.")
134
+
135
+ # 23. Structured Data Present
136
+ ld_json = soup.find("script", type="application/ld+json")
137
+ check(bool(ld_json), "Structured Data Detected", "Add structured data using JSON-LD.")
138
+
139
+ # 24. H2 Tags
140
+ h2 = soup.find_all("h2")
141
+ check(len(h2) >= 1, "Has H2 Subheadings", "Use subheadings to organize content.")
142
+
143
+ # 25. Page Title Length OK
144
+ if soup.title:
145
+ check(len(soup.title.string) <= 70, "Title Length OK", "Keep title under 70 characters.", warning=True)
146
+
147
+ # Scoring
148
+ total_tests = 25
149
+ score = int((passed / total_tests) * 100)
150
+
151
+ # Chart
152
+ chart = go.Figure(go.Indicator(
153
+ mode="gauge+number",
154
+ value=score,
155
+ title={'text': "SEO Score"},
156
+ gauge={'axis': {'range': [0, 100]}, 'bar': {'color': "green"}}
157
+ ))
158
+ chart.update_layout(width=400, height=300)
159
+
160
+ # Result strings
161
+ summary = f"✅ Passed: {passed}/{total_tests}\n⚠️ Warnings: {warnings}/{total_tests}\n❌ Failed: {failed}/{total_tests}"
162
 
163
+ return summary, chart, "\n".join(results), "\n".join(suggestions)
164
 
165
+ # Gradio interface
166
  gr.Interface(
167
  fn=seo_check,
168
  inputs=gr.Textbox(label="Enter Website URL"),
169
  outputs=[
170
+ gr.Textbox(label="Summary"),
171
+ gr.Plot(label="SEO Score Gauge"),
172
+ gr.Textbox(label="Detailed Test Results", lines=25),
173
+ gr.Textbox(label="Fix Suggestions", lines=25)
174
  ],
175
+ title="Advanced SEO Audit Tool",
176
+ description="Performs 25+ SEO checks like SEOSiteCheckup and gives clear solutions."
177
  ).launch()