Spaces:

ahmedumeraziz
/

website_seo_Checking

Sleeping

App Files Files Community

ahmedumeraziz commited on May 2, 2025

Commit

0a1d60a

verified ·

1 Parent(s): 84c9052

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -46

app.py CHANGED Viewed

@@ -2,71 +2,189 @@ import gradio as gr
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
-def seo_check(url):
     try:
-        response = requests.get(url, timeout=10)
-        html = response.text
-        soup = BeautifulSoup(html, 'html.parser')
     except Exception as e:
-        return f"❌ Error fetching the URL: {e}", "", "", "", ""
     checks = []
-    # SEO Test 1: Title tag
-    title_tag = soup.title.string.strip() if soup.title and soup.title.string else ''
-    if title_tag:
-        checks.append(("Title Tag Present", "Passed", "A <title> tag exists.", "No action needed."))
-    else:
-        checks.append(("Title Tag Missing", "Failed", "The page is missing a <title> tag.", "Add a <title> tag to improve SEO."))
-    # SEO Test 2: Meta description
     meta_desc = soup.find("meta", attrs={"name": "description"})
-    if meta_desc and meta_desc.get("content", "").strip():
-        checks.append(("Meta Description Present", "Passed", "A meta description is provided.", "No action needed."))
-    else:
-        checks.append(("Meta Description Missing", "Warning", "No meta description found.", "Add a concise meta description (150-160 characters)."))
-    # SEO Test 3: H1 tag
     h1 = soup.find("h1")
-    if h1 and h1.get_text(strip=True):
-        checks.append(("H1 Tag Present", "Passed", "A single <h1> tag is present.", "No action needed."))
-    else:
-        checks.append(("H1 Tag Missing", "Failed", "No <h1> tag found.", "Include one descriptive <h1> tag per page."))
-    # SEO Test 4: HTTPS usage
-    parsed = urlparse(url)
-    if parsed.scheme == "https":
-        checks.append(("HTTPS Protocol", "Passed", "Site uses secure HTTPS connection.", "No action needed."))
-    else:
-        checks.append(("Not Using HTTPS", "Failed", "The site is not served over HTTPS.", "Enable SSL/TLS to secure the site and improve rankings."))
-    # SEO Test 5: Robots.txt availability
     robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
     try:
         robots_response = requests.get(robots_url, timeout=5)
-        if robots_response.status_code == 200:
-            checks.append(("robots.txt Found", "Passed", "robots.txt file exists.", "No action needed."))
-        else:
-            checks.append(("robots.txt Missing", "Warning", "robots.txt file not found.", "Add a robots.txt to control crawler access."))
     except:
-        checks.append(("robots.txt Fetch Failed", "Warning", "Could not access robots.txt.", "Ensure the file is publicly accessible."))
-    # SEO Score calculation
     total = len(checks)
-    passed = sum(1 for _, result, _, _ in checks if result == "Passed")
-    warnings = sum(1 for _, result, _, _ in checks if result == "Warning")
-    failed = sum(1 for _, result, _, _ in checks if result == "Failed")
     score = int((passed / total) * 100)
-    passed_section = "\n".join([f"✅ {name}\n✔️ {desc}" for name, result, desc, _ in checks if result == "Passed"])
-    warning_section = "\n".join([f"⚠️ {name}\n{desc}\n💡 Suggestion: {fix}" for name, result, desc, fix in checks if result == "Warning"])
-    failed_section = "\n".join([f"❌ {name}\n{desc}\n💡 Fix: {fix}" for name, result, desc, fix in checks if result == "Failed"])
-    return f"🔎 SEO Score: {score}/100", f"✅ Passed: {passed}", f"⚠️ Warnings: {warnings}", f"❌ Failed: {failed}", f"{passed_section}\n\n{warning_section}\n\n{failed_section}"
 # Gradio Interface
-seo_interface = gr.Interface(
     fn=seo_check,
     inputs=gr.Textbox(label="Enter Website URL"),
     outputs=[
@@ -74,11 +192,12 @@ seo_interface = gr.Interface(
         gr.Textbox(label="Passed Tests"),
         gr.Textbox(label="Warnings"),
         gr.Textbox(label="Failed Tests"),
-        gr.Textbox(label="Detailed Report", lines=20)
     ],
-    title="🔍 Website SEO Check App",
-    description="Enter any website URL to perform an SEO audit and get detailed, actionable feedback."
 )
 if __name__ == "__main__":
-    seo_interface.launch()

 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse
+import re
+import matplotlib.pyplot as plt
+def fetch_html(url):
     try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, timeout=10, headers=headers)
+        response.raise_for_status()
+        return response.text, ""
     except Exception as e:
+        return None, f"❌ Error fetching the URL: {e}"
+def seo_check(url):
+    html, error = fetch_html(url)
+    if error:
+        return error, "", "", "", "", None
+    soup = BeautifulSoup(html, 'html.parser')
+    parsed = urlparse(url)
     checks = []
+    def add_check(name, result, details, suggestion=""):
+        checks.append((name, result, details.strip(), suggestion.strip()))
+    # SEO Tests
+    title = soup.title.string.strip() if soup.title and soup.title.string else ""
+    add_check("Title Tag", "Passed" if title else "Failed",
+              f"Found: {title}" if title else "Title tag missing.",
+              "" if title else "Add a <title> tag with relevant keywords.")
     meta_desc = soup.find("meta", attrs={"name": "description"})
+    meta_text = meta_desc.get("content", "").strip() if meta_desc else ""
+    add_check("Meta Description", "Passed" if meta_text else "Warning",
+              f"Found: {meta_text}" if meta_text else "No meta description found.",
+              "Add a concise meta description (150-160 characters)." if not meta_text else "")
     h1 = soup.find("h1")
+    h1_text = h1.get_text(strip=True) if h1 else ""
+    add_check("H1 Tag", "Passed" if h1_text else "Failed",
+              f"Found: {h1_text}" if h1_text else "No <h1> tag found.",
+              "Include one <h1> tag per page.")
+    h2_tags = soup.find_all("h2")
+    add_check("H2 Tags", "Passed" if h2_tags else "Warning",
+              f"Found {len(h2_tags)} <h2> tags." if h2_tags else "No <h2> tags found.",
+              "Use <h2> tags to structure subheadings.")
+    canonical = soup.find("link", rel="canonical")
+    add_check("Canonical Tag", "Passed" if canonical else "Warning",
+              f"Found: {canonical['href']}" if canonical and canonical.has_attr('href') else "No canonical tag found.",
+              "Add a canonical tag to prevent duplicate content issues.")
+    add_check("HTTPS", "Passed" if parsed.scheme == "https" else "Failed",
+              f"URL uses {'HTTPS' if parsed.scheme == 'https' else 'HTTP'}.",
+              "Use HTTPS for secure connections.")
     robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt"
     try:
         robots_response = requests.get(robots_url, timeout=5)
+        add_check("robots.txt", "Passed" if robots_response.status_code == 200 else "Warning",
+                  "robots.txt is accessible." if robots_response.status_code == 200 else "robots.txt not found.",
+                  "Create a robots.txt file to manage crawler access.")
+    except:
+        add_check("robots.txt", "Warning", "robots.txt could not be fetched.", "Ensure it's accessible.")
+    sitemap_url = f"{parsed.scheme}://{parsed.netloc}/sitemap.xml"
+    try:
+        sitemap_response = requests.get(sitemap_url, timeout=5)
+        add_check("sitemap.xml", "Passed" if sitemap_response.status_code == 200 else "Warning",
+                  "sitemap.xml is accessible." if sitemap_response.status_code == 200 else "sitemap.xml not found.",
+                  "Add a sitemap.xml to help search engines index your pages.")
     except:
+        add_check("sitemap.xml", "Warning", "Could not access sitemap.xml.", "Ensure it's publicly accessible.")
+    favicon = soup.find("link", rel=re.compile("icon", re.I))
+    add_check("Favicon", "Passed" if favicon else "Warning",
+              "Favicon found." if favicon else "No favicon detected.",
+              "Add a favicon for branding and user experience.")
+    charset = soup.find("meta", attrs={"charset": True})
+    add_check("Charset", "Passed" if charset else "Warning",
+              f"Found: {charset['charset']}" if charset else "No charset declared.",
+              "Add a <meta charset='UTF-8'> to define character encoding.")
+    lang_attr = soup.html.get("lang", "") if soup.html else ""
+    add_check("Language Attribute", "Passed" if lang_attr else "Warning",
+              f"Found: lang='{lang_attr}'" if lang_attr else "No lang attribute found in <html>.",
+              "Set <html lang='en'> for proper language targeting.")
+    viewport = soup.find("meta", attrs={"name": "viewport"})
+    add_check("Mobile Viewport", "Passed" if viewport else "Warning",
+              "Viewport tag present." if viewport else "No viewport meta tag found.",
+              "Add <meta name='viewport' content='width=device-width, initial-scale=1.0'>.")
+    ga_code = "google-analytics.com" in html or "gtag(" in html
+    add_check("Google Analytics", "Passed" if ga_code else "Warning",
+              "Google Analytics script detected." if ga_code else "No GA script found.",
+              "Install GA script to track visitors.")
+    og_tags = soup.find("meta", property="og:title")
+    add_check("Open Graph Tags", "Passed" if og_tags else "Warning",
+              "OG tags found." if og_tags else "No OG tags present.",
+              "Add Open Graph meta tags to enhance social sharing.")
+    twitter_card = soup.find("meta", attrs={"name": "twitter:card"})
+    add_check("Twitter Card", "Passed" if twitter_card else "Warning",
+              "Twitter Card tag present." if twitter_card else "No Twitter Card meta tag found.",
+              "Add Twitter Card tags to improve tweet previews.")
+    images = soup.find_all("img")
+    alt_missing = sum(1 for img in images if not img.get("alt"))
+    add_check("Image ALT Texts", "Passed" if alt_missing == 0 else "Warning",
+              f"{len(images)} images found, {alt_missing} missing alt text.",
+              "Add descriptive alt attributes to all images.")
+    inline_styles = bool(soup.find(style=True))
+    add_check("Inline Styles", "Warning" if inline_styles else "Passed",
+              "Inline styles detected." if inline_styles else "No inline styles found.",
+              "Move inline styles to external CSS.")
+    font_tags = soup.find_all("font")
+    add_check("Deprecated <font> Tags", "Warning" if font_tags else "Passed",
+              f"Found {len(font_tags)} <font> tags." if font_tags else "No deprecated tags.",
+              "Avoid deprecated tags like <font>, use CSS instead.")
+    strong_tags = soup.find_all("strong") + soup.find_all("em")
+    add_check("Semantic Emphasis Tags", "Passed" if strong_tags else "Warning",
+              f"Found {len(strong_tags)} <strong>/<em> tags." if strong_tags else "No emphasis tags.",
+              "Use <strong> and <em> to highlight important content.")
+    noindex = soup.find("meta", attrs={"name": "robots", "content": re.compile("noindex", re.I)})
+    add_check("Noindex Tag", "Warning" if noindex else "Passed",
+              "Page marked noindex." if noindex else "No noindex tag found.",
+              "Remove noindex to allow search indexing (if intentional).")
+    ext_scripts = soup.find_all("script", src=True)
+    ext_styles = soup.find_all("link", rel="stylesheet")
+    add_check("External JS/CSS", "Passed" if len(ext_scripts) + len(ext_styles) <= 10 else "Warning",
+              f"Found {len(ext_scripts)} JS and {len(ext_styles)} CSS includes.",
+              "Reduce number of external scripts/styles for better performance.")
+    social_links = [a['href'] for a in soup.find_all('a', href=True) if any(x in a['href'] for x in ['facebook', 'twitter', 'linkedin'])]
+    add_check("Social Media Links", "Passed" if social_links else "Warning",
+              f"Found links: {', '.join(social_links)}" if social_links else "No social media links found.",
+              "Add links to your social profiles to build trust.")
+    page_size = len(html.encode('utf-8'))
+    add_check("Page Size", "Passed" if page_size < 250000 else "Warning",
+              f"Page size: {page_size / 1024:.2f} KB.",
+              "Keep HTML under 250KB for faster load times.")
+    broken_links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith("http") and requests.head(a['href'], timeout=5).status_code >= 400]
+    add_check("Broken Links", "Passed" if not broken_links else "Failed",
+              f"Broken links: {broken_links}" if broken_links else "No broken links found.",
+              "Fix or remove broken links.")
+    # Score Calculation
     total = len(checks)
+    passed = sum(1 for _, r, _, _ in checks if r == "Passed")
+    warning = sum(1 for _, r, _, _ in checks if r == "Warning")
+    failed = sum(1 for _, r, _, _ in checks if r == "Failed")
     score = int((passed / total) * 100)
+    report = ""
+    for name, result, details, suggestion in checks:
+        icon = {"Passed": "✅", "Warning": "⚠️", "Failed": "❌"}[result]
+        report += f"{icon} {name} — {result}\n{details}\n"
+        if suggestion:
+            report += f"💡 {suggestion}\n"
+        report += "\n"
+    # Generate graph
+    fig, ax = plt.subplots()
+    ax.bar(["Passed", "Warnings", "Failed"], [passed, warning, failed], color=["green", "orange", "red"])
+    ax.set_title(f"SEO Test Summary (Score: {score}/100)")
+    ax.set_ylabel("Number of Checks")
+    fig.tight_layout()
+    return f"✅ SEO Score: {score}/100", f"{passed}", f"{warning}", f"{failed}", report.strip(), fig
 # Gradio Interface
+interface = gr.Interface(
     fn=seo_check,
     inputs=gr.Textbox(label="Enter Website URL"),
     outputs=[
         gr.Textbox(label="Passed Tests"),
         gr.Textbox(label="Warnings"),
         gr.Textbox(label="Failed Tests"),
+        gr.Textbox(label="Detailed Report", lines=30),
+        gr.Plot(label="SEO Score Graph")
     ],
+    title="🔍 Advanced SEO Checker",
+    description="Checks 25 SEO parameters and gives detailed recommendations. Built with ❤️ in Gradio."
 )
 if __name__ == "__main__":
+    interface.launch()