mcp-github / src /mcp-seo /server.py
mishrabp's picture
Upload folder using huggingface_hub
f29cac7 verified
"""
SEO & ADA Compliance MCP Server
"""
import sys
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from mcp.server.fastmcp import FastMCP
from typing import List, Dict, Any, Set
from core.mcp_telemetry import log_usage, log_trace, log_metric
import uuid
import time
# Initialize FastMCP Server
mcp = FastMCP("SEO & ADA Audit", host="0.0.0.0")
@mcp.tool()
def analyze_seo(url: str) -> Dict[str, Any]:
"""
Perform a basic SEO audit of a webpage.
Checks title, meta description, H1 tags, image alt attributes, and internal/external links.
"""
start_time = time.time()
trace_id = str(uuid.uuid4())
span_id = str(uuid.uuid4())
log_usage("mcp-seo", "analyze_seo")
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
result = {
"url": url,
"status_code": response.status_code,
"title": soup.title.string if soup.title else None,
"meta_description": None,
"h1_count": len(soup.find_all('h1')),
"images_missing_alt": 0,
"internal_links": 0,
"external_links": 0
}
# Meta Description
meta_desc = soup.find('meta', attrs={'name': 'description'})
if meta_desc:
result["meta_description"] = meta_desc.get('content')
# Images
imgs = soup.find_all('img')
for img in imgs:
if not img.get('alt'):
result["images_missing_alt"] += 1
# Links
links = soup.find_all('a', href=True)
domain = urlparse(url).netloc
for link in links:
href = link['href']
if href.startswith('/') or domain in href:
result["internal_links"] += 1
else:
result["external_links"] += 1
duration = (time.time() - start_time) * 1000
log_trace("mcp-seo", trace_id, span_id, "analyze_seo", duration, "ok")
log_metric("mcp-seo", "seo_links_found", result["internal_links"] + result["external_links"], {"url": url})
return result
except Exception as e:
duration = (time.time() - start_time) * 1000
log_trace("mcp-seo", trace_id, span_id, "analyze_seo", duration, "error")
return [{"error": str(e)}]
@mcp.tool()
def analyze_ada(url: str) -> Dict[str, Any]:
"""
Perform a basic ADA/WCAG accessibility check.
Checks for missing alt text, form labels, lang attribute, and ARIA usage.
"""
log_usage("mcp-seo", "analyze_ada")
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.content, 'html.parser')
issues = []
# 1. Images missing alt
imgs = soup.find_all('img')
missing_alt = [img.get('src', 'unknown') for img in imgs if not img.get('alt')]
if missing_alt:
issues.append(f"Found {len(missing_alt)} images missing alt text.")
# 2. Html Lang attribute
html_tag = soup.find('html')
if not html_tag or not html_tag.get('lang'):
issues.append("Missing 'lang' attribute on <html> tag.")
# 3. Form input labels
inputs = soup.find_all('input')
for inp in inputs:
# Check if input has id and a corresponding label
inp_id = inp.get('id')
label = soup.find('label', attrs={'for': inp_id}) if inp_id else None
# Or parent is label
parent_label = inp.find_parent('label')
# Or aria-label
aria_label = inp.get('aria-label')
if not (label or parent_label or aria_label):
issues.append(f"Input field (type={inp.get('type')}) missing label.")
return {
"url": url,
"compliance_score": max(0, 100 - (len(issues) * 10)), # Rough score
"issues": issues
}
except Exception as e:
return {"error": str(e)}
@mcp.tool()
def generate_sitemap(url: str, max_depth: int = 1) -> List[str]:
"""
Crawl the website to generate a simple list of internal URLs (sitemap).
"""
log_usage("mcp-seo", "generate_sitemap")
visited = set()
to_visit = [(url, 0)]
domain = urlparse(url).netloc
try:
while to_visit:
current_url, depth = to_visit.pop(0)
if current_url in visited or depth > max_depth:
continue
visited.add(current_url)
try:
response = requests.get(current_url, timeout=5)
if response.status_code != 200:
continue
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.find_all('a', href=True)
for link in links:
href = link['href']
full_url = urljoin(current_url, href)
parsed = urlparse(full_url)
if parsed.netloc == domain and full_url not in visited:
# Only add html pages usually, but for simplicity we add all internal
to_visit.append((full_url, depth + 1))
except Exception:
continue
return sorted(list(visited))
except Exception as e:
return [f"Error: {str(e)}"]
if __name__ == "__main__":
import os
if os.environ.get("MCP_TRANSPORT") == "sse":
import uvicorn
port = int(os.environ.get("PORT", 7860))
uvicorn.run(mcp.sse_app(), host="0.0.0.0", port=port)
else:
mcp.run()