Spaces:

ahmedumeraziz
/

website_seo_Checking

Sleeping

App Files Files Community

ahmedumeraziz commited on May 2, 2025

Commit

4975741

verified ·

1 Parent(s): 844d184

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -34

app.py CHANGED Viewed

@@ -1,59 +1,135 @@
 import requests
 from bs4 import BeautifulSoup
 import gradio as gr
-def check_seo(url):
     try:
         response = requests.get(url, timeout=10)
         response.raise_for_status()
     except Exception as e:
-        return f"❌ Error accessing URL: {str(e)}", ""
-    soup = BeautifulSoup(response.text, "html.parser")
-    report = []
-    fix_guide = []
-    # Title tag
-    title = soup.title.string if soup.title else None
     if not title:
         report.append("❌ Missing <title> tag.")
-        fix_guide.append("Add a descriptive <title> tag with relevant keywords.")
-    elif len(title) < 10 or len(title) > 70:
-        report.append("⚠️ <title> tag length is not optimal.")
-        fix_guide.append("Keep your title between 50-60 characters.")
-    # Meta description
-    meta_desc = soup.find("meta", attrs={"name": "description"})
-    if not meta_desc or not meta_desc.get("content"):
         report.append("❌ Missing meta description.")
-        fix_guide.append("Add a <meta name='description'> with a summary of your page.")
-    elif len(meta_desc["content"]) < 50 or len(meta_desc["content"]) > 160:
-        report.append("⚠️ Meta description length is not optimal.")
-        fix_guide.append("Keep meta descriptions between 150-160 characters.")
-    # Headings
     h1_tags = soup.find_all("h1")
     if len(h1_tags) != 1:
         report.append(f"⚠️ Found {len(h1_tags)} <h1> tags.")
-        fix_guide.append("Use exactly one <h1> tag to define the main heading.")
-    # Images with alt
     images = soup.find_all("img")
-    missing_alt = [img for img in images if not img.get("alt")]
-    if missing_alt:
-        report.append(f"⚠️ {len(missing_alt)} image(s) missing alt attribute.")
-        fix_guide.append("Add descriptive alt text to all images.")
-    # Mobile-friendliness & Page speed (basic hints, real check would require APIs)
-    report.append("ℹ️ For mobile-friendliness and performance, use tools like Google PageSpeed Insights.")
-    fix_guide.append("Use https://pagespeed.web.dev/ for full performance and mobile checks.")
-    return "\n".join(report), "\n".join(fix_guide)
 gr.Interface(
-    fn=check_seo,
     inputs=gr.Textbox(label="Enter Website URL"),
-    outputs=[gr.Textbox(label="SEO Report"), gr.Textbox(label="Fix Suggestions")],
-    title="Website SEO Check",
-    description="Checks basic SEO elements of a given URL and provides recommendations."
 ).launch()

 import requests
 from bs4 import BeautifulSoup
+from urllib.parse import urlparse, urljoin
 import gradio as gr
+def seo_check(url):
+    report = []
+    suggestions = []
+    # Ensure HTTPS
+    if not url.startswith("http"):
+        url = "https://" + url
     try:
         response = requests.get(url, timeout=10)
         response.raise_for_status()
+        html = response.text
     except Exception as e:
+        return f"❌ Error accessing URL: {e}", ""
+    soup = BeautifulSoup(html, "html.parser")
+    # Title Tag
+    title = soup.title.string.strip() if soup.title else ""
     if not title:
         report.append("❌ Missing <title> tag.")
+        suggestions.append("Add a <title> tag that describes your page in 50–60 characters.")
+    elif len(title) > 70:
+        report.append("⚠️ Title is too long.")
+        suggestions.append("Keep title under 70 characters.")
+    # Meta Description
+    desc_tag = soup.find("meta", attrs={"name": "description"})
+    desc = desc_tag["content"].strip() if desc_tag and desc_tag.get("content") else ""
+    if not desc:
         report.append("❌ Missing meta description.")
+        suggestions.append("Add a <meta name='description'> summarizing the page.")
+    elif len(desc) > 160:
+        report.append("⚠️ Meta description is too long.")
+        suggestions.append("Keep meta descriptions under 160 characters.")
+    # Canonical Tag
+    canonical = soup.find("link", rel="canonical")
+    if not canonical:
+        report.append("❌ Missing canonical link.")
+        suggestions.append("Add a <link rel='canonical'> to avoid duplicate content.")
+    # H1 Tag
     h1_tags = soup.find_all("h1")
     if len(h1_tags) != 1:
         report.append(f"⚠️ Found {len(h1_tags)} <h1> tags.")
+        suggestions.append("Use exactly one <h1> tag for SEO clarity.")
+    # Mobile viewport
+    viewport = soup.find("meta", attrs={"name": "viewport"})
+    if not viewport:
+        report.append("⚠️ No viewport meta tag.")
+        suggestions.append("Add a viewport meta tag for mobile responsiveness.")
+    # HTTPS check
+    if not url.startswith("https://"):
+        report.append("⚠️ URL is not secure (no HTTPS).")
+        suggestions.append("Install SSL and redirect HTTP to HTTPS.")
+    # Robots.txt and sitemap.xml
+    parsed = urlparse(url)
+    base = f"{parsed.scheme}://{parsed.netloc}"
+    robots_url = urljoin(base, "/robots.txt")
+    sitemap_url = urljoin(base, "/sitemap.xml")
+    try:
+        r1 = requests.get(robots_url)
+        if r1.status_code != 200:
+            report.append("❌ robots.txt not found.")
+            suggestions.append("Create a robots.txt to guide search bots.")
+    except:
+        report.append("❌ Could not access robots.txt.")
+    try:
+        r2 = requests.get(sitemap_url)
+        if r2.status_code != 200:
+            report.append("❌ sitemap.xml not found.")
+            suggestions.append("Add sitemap.xml for better crawling.")
+    except:
+        report.append("❌ Could not access sitemap.xml.")
+    # Open Graph Tags
+    og_title = soup.find("meta", property="og:title")
+    if not og_title:
+        report.append("⚠️ Missing Open Graph (og:title).")
+        suggestions.append("Add OG tags to improve sharing on social media.")
+    # Image alt text
     images = soup.find_all("img")
+    alt_missing = [img for img in images if not img.get("alt")]
+    if alt_missing:
+        report.append(f"⚠️ {len(alt_missing)} images missing alt text.")
+        suggestions.append("Add descriptive alt attributes to all images.")
+    # Internal and external links
+    links = soup.find_all("a", href=True)
+    internal = 0
+    external = 0
+    for link in links:
+        href = link['href']
+        if parsed.netloc in href:
+            internal += 1
+        elif href.startswith("http"):
+            external += 1
+    report.append(f"ℹ️ Internal Links: {internal} | External Links: {external}")
+    suggestions.append("Ensure most important links are internal. Check broken links.")
+    # Keyword density (basic)
+    body_text = soup.get_text().lower()
+    words = body_text.split()
+    word_count = len(words)
+    keyword = parsed.netloc.replace("www.", "").split(".")[0]
+    keyword_freq = words.count(keyword)
+    density = (keyword_freq / word_count) * 100 if word_count else 0
+    report.append(f"ℹ️ Keyword '{keyword}' appears {keyword_freq} times ({density:.2f}% density)")
+    if density < 0.5:
+        suggestions.append("Consider using your main keyword more often (target 1–2%).")
+    return "\n".join(report), "\n".join(suggestions)
+# Gradio UI
 gr.Interface(
+    fn=seo_check,
     inputs=gr.Textbox(label="Enter Website URL"),
+    outputs=[
+        gr.Textbox(label="SEO Report", lines=15),
+        gr.Textbox(label="Suggestions & Fixes", lines=15)
+    ],
+    title="SEO Website Checker",
+    description="Analyze your website's SEO like Sitechecker.pro & SEOSiteCheckup, with clear solutions!"
 ).launch()