| """ |
| Schema Generator |
| Automatically generates JSON-LD schema markup for websites |
| """ |
|
|
| import json |
| from typing import Dict, List |
| from datetime import datetime |
|
|
| def generate_organization_schema(audit: Dict) -> Dict: |
| """Generate Organization schema from audit data""" |
| pages = audit.get('pages', []) |
| org_name = audit.get('org_name', 'Company') |
| url = audit.get('url', '') |
| |
| |
| emails = [] |
| phones = [] |
| social_links = [] |
| |
| for page in pages: |
| text = page.get('text', '') + ' ' + str(page.get('paragraphs', '')) |
| |
| |
| import re |
| found_emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text) |
| emails.extend(found_emails) |
| |
| |
| found_phones = re.findall(r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text) |
| phones.extend(found_phones) |
| |
| |
| links = page.get('links', []) |
| for link in links: |
| href = link.get('href', '') if isinstance(link, dict) else str(link) |
| if any(social in href.lower() for social in ['facebook', 'twitter', 'instagram', 'linkedin', 'youtube']): |
| social_links.append(href) |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "Organization", |
| "name": org_name, |
| "url": url, |
| "logo": f"{url}/logo.png", |
| "description": f"{org_name} - خدمات متميزة" |
| } |
| |
| if emails: |
| schema["email"] = emails[0] |
| |
| if phones: |
| schema["telephone"] = phones[0] |
| |
| if social_links: |
| schema["sameAs"] = list(set(social_links))[:5] |
| |
| |
| if emails or phones: |
| schema["contactPoint"] = { |
| "@type": "ContactPoint", |
| "contactType": "customer service", |
| "email": emails[0] if emails else None, |
| "telephone": phones[0] if phones else None |
| } |
| |
| return schema |
|
|
|
|
| def generate_faq_schema(faqs: List[Dict]) -> Dict: |
| """Generate FAQPage schema from FAQ list""" |
| if not faqs: |
| return None |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "FAQPage", |
| "mainEntity": [] |
| } |
| |
| for faq in faqs: |
| question = faq.get('question', faq.get('q', '')) |
| answer = faq.get('answer', faq.get('a', '')) |
| |
| if question and answer: |
| schema["mainEntity"].append({ |
| "@type": "Question", |
| "name": question, |
| "acceptedAnswer": { |
| "@type": "Answer", |
| "text": answer |
| } |
| }) |
| |
| return schema if schema["mainEntity"] else None |
|
|
|
|
| def generate_breadcrumb_schema(url: str) -> Dict: |
| """Generate BreadcrumbList schema from URL structure""" |
| from urllib.parse import urlparse |
| |
| parsed = urlparse(url) |
| path_parts = [p for p in parsed.path.split('/') if p] |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "BreadcrumbList", |
| "itemListElement": [] |
| } |
| |
| |
| schema["itemListElement"].append({ |
| "@type": "ListItem", |
| "position": 1, |
| "name": "الرئيسية", |
| "item": f"{parsed.scheme}://{parsed.netloc}" |
| }) |
| |
| |
| current_url = f"{parsed.scheme}://{parsed.netloc}" |
| for i, part in enumerate(path_parts, start=2): |
| current_url += f"/{part}" |
| schema["itemListElement"].append({ |
| "@type": "ListItem", |
| "position": i, |
| "name": part.replace('-', ' ').replace('_', ' ').title(), |
| "item": current_url |
| }) |
| |
| return schema |
|
|
|
|
| def generate_website_schema(audit: Dict) -> Dict: |
| """Generate WebSite schema with search action""" |
| org_name = audit.get('org_name', 'Company') |
| url = audit.get('url', '') |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "WebSite", |
| "name": org_name, |
| "url": url, |
| "potentialAction": { |
| "@type": "SearchAction", |
| "target": { |
| "@type": "EntryPoint", |
| "urlTemplate": f"{url}/search?q={{search_term_string}}" |
| }, |
| "query-input": "required name=search_term_string" |
| } |
| } |
| |
| return schema |
|
|
|
|
| def generate_article_schema(page: Dict, org_name: str) -> Dict: |
| """Generate Article schema for blog posts""" |
| title = page.get('title', '') |
| url = page.get('url', '') |
| |
| |
| text = page.get('text', '') |
| import re |
| date_match = re.search(r'(\d{4}-\d{2}-\d{2})', text) |
| publish_date = date_match.group(1) if date_match else datetime.now().strftime('%Y-%m-%d') |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "Article", |
| "headline": title, |
| "url": url, |
| "datePublished": publish_date, |
| "dateModified": publish_date, |
| "author": { |
| "@type": "Organization", |
| "name": org_name |
| }, |
| "publisher": { |
| "@type": "Organization", |
| "name": org_name |
| } |
| } |
| |
| |
| images = page.get('images', []) |
| if images: |
| first_image = images[0] if isinstance(images[0], str) else images[0].get('src', '') |
| if first_image: |
| schema["image"] = first_image |
| |
| return schema |
|
|
|
|
| def generate_product_schema(page: Dict, org_name: str) -> Dict: |
| """Generate Product schema for product pages""" |
| title = page.get('title', '') |
| url = page.get('url', '') |
| text = page.get('text', '') |
| |
| |
| import re |
| price_match = re.search(r'(\$|SAR|ريال)\s*(\d+(?:\.\d{2})?)', text) |
| price = price_match.group(2) if price_match else "0" |
| currency = "SAR" if "ريال" in text or "SAR" in text else "USD" |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "Product", |
| "name": title, |
| "url": url, |
| "description": text[:200] if text else title, |
| "offers": { |
| "@type": "Offer", |
| "price": price, |
| "priceCurrency": currency, |
| "availability": "https://schema.org/InStock", |
| "seller": { |
| "@type": "Organization", |
| "name": org_name |
| } |
| } |
| } |
| |
| |
| images = page.get('images', []) |
| if images: |
| first_image = images[0] if isinstance(images[0], str) else images[0].get('src', '') |
| if first_image: |
| schema["image"] = first_image |
| |
| return schema |
|
|
|
|
| def generate_local_business_schema(audit: Dict) -> Dict: |
| """Generate LocalBusiness schema""" |
| pages = audit.get('pages', []) |
| org_name = audit.get('org_name', 'Company') |
| url = audit.get('url', '') |
| |
| |
| addresses = [] |
| for page in pages: |
| text = page.get('text', '') |
| |
| saudi_cities = ['الرياض', 'جدة', 'مكة', 'المدينة', 'الدمام', 'الخبر', 'تبوك', 'أبها'] |
| for city in saudi_cities: |
| if city in text: |
| addresses.append(city) |
| break |
| |
| schema = { |
| "@context": "https://schema.org", |
| "@type": "LocalBusiness", |
| "name": org_name, |
| "url": url, |
| "address": { |
| "@type": "PostalAddress", |
| "addressCountry": "SA", |
| "addressLocality": addresses[0] if addresses else "الرياض" |
| } |
| } |
| |
| return schema |
|
|
|
|
| def generate_all_schemas(audit: Dict) -> List[Dict]: |
| """Generate all applicable schemas for a website""" |
| schemas = [] |
| |
| |
| org_schema = generate_organization_schema(audit) |
| schemas.append(org_schema) |
| |
| |
| website_schema = generate_website_schema(audit) |
| schemas.append(website_schema) |
| |
| |
| local_schema = generate_local_business_schema(audit) |
| schemas.append(local_schema) |
| |
| |
| url = audit.get('url', '') |
| if url: |
| breadcrumb_schema = generate_breadcrumb_schema(url) |
| schemas.append(breadcrumb_schema) |
| |
| return schemas |
|
|
|
|
| def format_schema_for_html(schemas: List[Dict]) -> str: |
| """Format schemas as HTML script tags""" |
| html_parts = [] |
| |
| for schema in schemas: |
| if schema: |
| json_str = json.dumps(schema, ensure_ascii=False, indent=2) |
| html_parts.append(f'<script type="application/ld+json">\n{json_str}\n</script>') |
| |
| return '\n\n'.join(html_parts) |
|
|
|
|
| def get_schema_recommendations(audit: Dict) -> List[Dict]: |
| """Get recommendations for missing schemas""" |
| recommendations = [] |
| |
| pages = audit.get('pages', []) |
| |
| |
| has_org_schema = False |
| for page in pages: |
| html = page.get('html', '') |
| if '"@type":"Organization"' in html or '"@type": "Organization"' in html: |
| has_org_schema = True |
| break |
| |
| if not has_org_schema: |
| recommendations.append({ |
| 'type': 'organization', |
| 'priority': 'high', |
| 'title': 'أضف Organization Schema', |
| 'description': 'يساعد محركات البحث على فهم معلومات شركتك', |
| 'code': json.dumps(generate_organization_schema(audit), ensure_ascii=False, indent=2) |
| }) |
| |
| |
| has_faq = any('faq' in page.get('url', '').lower() for page in pages) |
| if has_faq: |
| recommendations.append({ |
| 'type': 'faq', |
| 'priority': 'medium', |
| 'title': 'أضف FAQPage Schema', |
| 'description': 'يظهر الأسئلة الشائعة مباشرة في نتائج البحث', |
| 'code': 'استخدم generate_faq_schema() مع قائمة الأسئلة' |
| }) |
| |
| |
| recommendations.append({ |
| 'type': 'breadcrumb', |
| 'priority': 'medium', |
| 'title': 'أضف BreadcrumbList Schema', |
| 'description': 'يحسن التنقل في نتائج البحث', |
| 'code': json.dumps(generate_breadcrumb_schema(audit.get('url', '')), ensure_ascii=False, indent=2) |
| }) |
| |
| return recommendations |
|
|