Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,177 +1,84 @@
|
|
|
|
|
| 1 |
import requests
|
| 2 |
from bs4 import BeautifulSoup
|
| 3 |
-
from urllib.parse import urlparse
|
| 4 |
-
import gradio as gr
|
| 5 |
-
import plotly.graph_objects as go
|
| 6 |
|
| 7 |
def seo_check(url):
|
| 8 |
-
if not url.startswith("http"):
|
| 9 |
-
url = "https://" + url
|
| 10 |
-
|
| 11 |
try:
|
| 12 |
response = requests.get(url, timeout=10)
|
| 13 |
-
response.raise_for_status()
|
| 14 |
html = response.text
|
|
|
|
| 15 |
except Exception as e:
|
| 16 |
-
return f"❌
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
parsed = urlparse(url)
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
failed = 0
|
| 25 |
-
warnings = 0
|
| 26 |
-
|
| 27 |
-
def check(condition, title, fix, warning=False):
|
| 28 |
-
nonlocal passed, failed, warnings
|
| 29 |
-
if condition:
|
| 30 |
-
results.append(f"✅ {title}")
|
| 31 |
-
passed += 1
|
| 32 |
-
else:
|
| 33 |
-
if warning:
|
| 34 |
-
results.append(f"⚠️ {title}")
|
| 35 |
-
warnings += 1
|
| 36 |
-
else:
|
| 37 |
-
results.append(f"❌ {title}")
|
| 38 |
-
failed += 1
|
| 39 |
-
suggestions.append(f"{title}: {fix}")
|
| 40 |
-
|
| 41 |
-
suggestions = []
|
| 42 |
-
|
| 43 |
-
# 1. Title tag
|
| 44 |
-
check(soup.title and soup.title.string.strip(), "Title Tag Present", "Add a <title> tag to your HTML.")
|
| 45 |
-
|
| 46 |
-
# 2. Meta Description
|
| 47 |
-
desc_tag = soup.find("meta", attrs={"name": "description"})
|
| 48 |
-
check(desc_tag and desc_tag.get("content", "").strip(), "Meta Description Present", "Add a meta description.")
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
|
| 52 |
-
check(bool(canonical), "Canonical Tag Present", "Add <link rel='canonical'> to avoid duplicate content.")
|
| 53 |
-
|
| 54 |
-
# 4. H1 Tag
|
| 55 |
-
h1_tags = soup.find_all("h1")
|
| 56 |
-
check(len(h1_tags) == 1, "Exactly One H1 Tag", "Use exactly one <h1> tag.")
|
| 57 |
-
|
| 58 |
-
# 5. Mobile Meta Tag
|
| 59 |
-
viewport = soup.find("meta", attrs={"name": "viewport"})
|
| 60 |
-
check(bool(viewport), "Mobile Viewport Meta Tag", "Add <meta name='viewport'> for mobile compatibility.")
|
| 61 |
-
|
| 62 |
-
# 6. HTTPS
|
| 63 |
-
check(url.startswith("https://"), "Uses HTTPS", "Secure your website with SSL.")
|
| 64 |
-
|
| 65 |
-
# 7. robots.txt
|
| 66 |
try:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
# 8. sitemap.xml
|
| 73 |
-
try:
|
| 74 |
-
r2 = requests.get(urljoin(base, "/sitemap.xml"))
|
| 75 |
-
check(r2.status_code == 200, "sitemap.xml Found", "Add a sitemap.xml file.")
|
| 76 |
except:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
# 9. OG Title
|
| 80 |
-
og_title = soup.find("meta", property="og:title")
|
| 81 |
-
check(bool(og_title), "Open Graph Title Present", "Add Open Graph meta tags.")
|
| 82 |
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
check(len(alt_missing) < len(images), "Image ALT Tags Used", "Add alt attributes to all images.")
|
| 91 |
|
| 92 |
-
|
| 93 |
-
links = soup.find_all("a", href=True)
|
| 94 |
-
parsed_host = parsed.netloc
|
| 95 |
-
internal_links = [link for link in links if parsed_host in link['href']]
|
| 96 |
-
check(len(internal_links) >= 5, "Has Internal Links", "Add more internal links.")
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
check(len(external_links) >= 1, "Has External Links", "Add authoritative external links.")
|
| 101 |
-
|
| 102 |
-
# 14. Favicon
|
| 103 |
-
favicon = soup.find("link", rel=lambda x: x and "icon" in x.lower())
|
| 104 |
-
check(bool(favicon), "Favicon Present", "Add a favicon link.")
|
| 105 |
-
|
| 106 |
-
# 15. Keyword in Title
|
| 107 |
-
keyword = parsed.netloc.replace("www.", "").split(".")[0]
|
| 108 |
-
check(soup.title and keyword.lower() in soup.title.string.lower(), "Keyword in Title", f"Add keyword '{keyword}' in title.", warning=True)
|
| 109 |
-
|
| 110 |
-
# 16. Keyword in Description
|
| 111 |
-
check(desc_tag and keyword.lower() in desc_tag.get("content", "").lower(), "Keyword in Meta Description", f"Add keyword '{keyword}' in description.", warning=True)
|
| 112 |
-
|
| 113 |
-
# 17. Content Length
|
| 114 |
-
text = soup.get_text()
|
| 115 |
-
check(len(text.split()) >= 300, "Enough Text Content", "Add more meaningful content.")
|
| 116 |
-
|
| 117 |
-
# 18. No Inline CSS
|
| 118 |
-
inline_styles = soup.find_all(style=True)
|
| 119 |
-
check(len(inline_styles) < 5, "Minimal Inline CSS", "Avoid inline CSS styles.", warning=True)
|
| 120 |
-
|
| 121 |
-
# 19. No Broken Links (basic)
|
| 122 |
-
check(len([link for link in links if 'href' in link.attrs and link['href'].startswith("#")]) < len(links), "No Broken Anchor Links", "Avoid '#' as href in <a> tags.", warning=True)
|
| 123 |
-
|
| 124 |
-
# 20. No Flash
|
| 125 |
-
flash = soup.find_all("object")
|
| 126 |
-
check(len(flash) == 0, "No Flash Elements", "Avoid using Flash elements.")
|
| 127 |
-
|
| 128 |
-
# 21. Charset Set
|
| 129 |
-
charset = soup.find("meta", charset=True)
|
| 130 |
-
check(bool(charset), "Charset Declared", "Add <meta charset='UTF-8'>.")
|
| 131 |
-
|
| 132 |
-
# 22. HTML Language Set
|
| 133 |
-
check(soup.html and soup.html.get("lang"), "HTML lang Attribute", "Add lang attribute in <html> tag.")
|
| 134 |
-
|
| 135 |
-
# 23. Structured Data Present
|
| 136 |
-
ld_json = soup.find("script", type="application/ld+json")
|
| 137 |
-
check(bool(ld_json), "Structured Data Detected", "Add structured data using JSON-LD.")
|
| 138 |
-
|
| 139 |
-
# 24. H2 Tags
|
| 140 |
-
h2 = soup.find_all("h2")
|
| 141 |
-
check(len(h2) >= 1, "Has H2 Subheadings", "Use subheadings to organize content.")
|
| 142 |
-
|
| 143 |
-
# 25. Page Title Length OK
|
| 144 |
-
if soup.title:
|
| 145 |
-
check(len(soup.title.string) <= 70, "Title Length OK", "Keep title under 70 characters.", warning=True)
|
| 146 |
-
|
| 147 |
-
# Scoring
|
| 148 |
-
total_tests = 25
|
| 149 |
-
score = int((passed / total_tests) * 100)
|
| 150 |
-
|
| 151 |
-
# Chart
|
| 152 |
-
chart = go.Figure(go.Indicator(
|
| 153 |
-
mode="gauge+number",
|
| 154 |
-
value=score,
|
| 155 |
-
title={'text': "SEO Score"},
|
| 156 |
-
gauge={'axis': {'range': [0, 100]}, 'bar': {'color': "green"}}
|
| 157 |
-
))
|
| 158 |
-
chart.update_layout(width=400, height=300)
|
| 159 |
-
|
| 160 |
-
# Result strings
|
| 161 |
-
summary = f"✅ Passed: {passed}/{total_tests}\n⚠️ Warnings: {warnings}/{total_tests}\n❌ Failed: {failed}/{total_tests}"
|
| 162 |
-
|
| 163 |
-
return summary, chart, "\n".join(results), "\n".join(suggestions)
|
| 164 |
-
|
| 165 |
-
# Gradio interface
|
| 166 |
-
gr.Interface(
|
| 167 |
fn=seo_check,
|
| 168 |
inputs=gr.Textbox(label="Enter Website URL"),
|
| 169 |
outputs=[
|
| 170 |
-
gr.Textbox(label="
|
| 171 |
-
gr.
|
| 172 |
-
gr.Textbox(label="
|
| 173 |
-
gr.Textbox(label="
|
|
|
|
| 174 |
],
|
| 175 |
-
title="
|
| 176 |
-
description="
|
| 177 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
+
from urllib.parse import urlparse
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def seo_check(url):
|
|
|
|
|
|
|
|
|
|
| 7 |
try:
|
| 8 |
response = requests.get(url, timeout=10)
|
|
|
|
| 9 |
html = response.text
|
| 10 |
+
soup = BeautifulSoup(html, 'html.parser')
|
| 11 |
except Exception as e:
|
| 12 |
+
return f"❌ Error fetching the URL: {e}", "", "", "", ""
|
| 13 |
+
|
| 14 |
+
checks = []
|
| 15 |
+
|
| 16 |
+
# SEO Test 1: Title tag
|
| 17 |
+
title_tag = soup.title.string.strip() if soup.title and soup.title.string else ''
|
| 18 |
+
if title_tag:
|
| 19 |
+
checks.append(("Title Tag Present", "Passed", "A <title> tag exists.", "No action needed."))
|
| 20 |
+
else:
|
| 21 |
+
checks.append(("Title Tag Missing", "Failed", "The page is missing a <title> tag.", "Add a <title> tag to improve SEO."))
|
| 22 |
+
|
| 23 |
+
# SEO Test 2: Meta description
|
| 24 |
+
meta_desc = soup.find("meta", attrs={"name": "description"})
|
| 25 |
+
if meta_desc and meta_desc.get("content", "").strip():
|
| 26 |
+
checks.append(("Meta Description Present", "Passed", "A meta description is provided.", "No action needed."))
|
| 27 |
+
else:
|
| 28 |
+
checks.append(("Meta Description Missing", "Warning", "No meta description found.", "Add a concise meta description (150-160 characters)."))
|
| 29 |
+
|
| 30 |
+
# SEO Test 3: H1 tag
|
| 31 |
+
h1 = soup.find("h1")
|
| 32 |
+
if h1 and h1.get_text(strip=True):
|
| 33 |
+
checks.append(("H1 Tag Present", "Passed", "A single <h1> tag is present.", "No action needed."))
|
| 34 |
+
else:
|
| 35 |
+
checks.append(("H1 Tag Missing", "Failed", "No <h1> tag found.", "Include one descriptive <h1> tag per page."))
|
| 36 |
+
|
| 37 |
+
# SEO Test 4: HTTPS usage
|
| 38 |
parsed = urlparse(url)
|
| 39 |
+
if parsed.scheme == "https":
|
| 40 |
+
checks.append(("HTTPS Protocol", "Passed", "Site uses secure HTTPS connection.", "No action needed."))
|
| 41 |
+
else:
|
| 42 |
+
checks.append(("Not Using HTTPS", "Failed", "The site is not served over HTTPS.", "Enable SSL/TLS to secure the site and improve rankings."))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
+
# SEO Test 5: Robots.txt availability
|
| 45 |
+
robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
try:
|
| 47 |
+
robots_response = requests.get(robots_url, timeout=5)
|
| 48 |
+
if robots_response.status_code == 200:
|
| 49 |
+
checks.append(("robots.txt Found", "Passed", "robots.txt file exists.", "No action needed."))
|
| 50 |
+
else:
|
| 51 |
+
checks.append(("robots.txt Missing", "Warning", "robots.txt file not found.", "Add a robots.txt to control crawler access."))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
except:
|
| 53 |
+
checks.append(("robots.txt Fetch Failed", "Warning", "Could not access robots.txt.", "Ensure the file is publicly accessible."))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
# SEO Score calculation
|
| 56 |
+
total = len(checks)
|
| 57 |
+
passed = sum(1 for _, result, _, _ in checks if result == "Passed")
|
| 58 |
+
warnings = sum(1 for _, result, _, _ in checks if result == "Warning")
|
| 59 |
+
failed = sum(1 for _, result, _, _ in checks if result == "Failed")
|
| 60 |
+
score = int((passed / total) * 100)
|
| 61 |
|
| 62 |
+
passed_section = "\n".join([f"✅ {name}\n✔️ {desc}" for name, result, desc, _ in checks if result == "Passed"])
|
| 63 |
+
warning_section = "\n".join([f"⚠️ {name}\n{desc}\n💡 Suggestion: {fix}" for name, result, desc, fix in checks if result == "Warning"])
|
| 64 |
+
failed_section = "\n".join([f"❌ {name}\n{desc}\n💡 Fix: {fix}" for name, result, desc, fix in checks if result == "Failed"])
|
|
|
|
| 65 |
|
| 66 |
+
return f"🔎 SEO Score: {score}/100", f"✅ Passed: {passed}", f"⚠️ Warnings: {warnings}", f"❌ Failed: {failed}", f"{passed_section}\n\n{warning_section}\n\n{failed_section}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
# Gradio Interface
|
| 69 |
+
seo_interface = gr.Interface(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
fn=seo_check,
|
| 71 |
inputs=gr.Textbox(label="Enter Website URL"),
|
| 72 |
outputs=[
|
| 73 |
+
gr.Textbox(label="SEO Score"),
|
| 74 |
+
gr.Textbox(label="Passed Tests"),
|
| 75 |
+
gr.Textbox(label="Warnings"),
|
| 76 |
+
gr.Textbox(label="Failed Tests"),
|
| 77 |
+
gr.Textbox(label="Detailed Report", lines=20)
|
| 78 |
],
|
| 79 |
+
title="🔍 Website SEO Check App",
|
| 80 |
+
description="Enter any website URL to perform an SEO audit and get detailed, actionable feedback."
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
if __name__ == "__main__":
|
| 84 |
+
seo_interface.launch()
|