Spaces:

Alinabil1
/

last_edit

Sleeping

File size: 10,482 Bytes

a74b879

"""
Schema Generator
Automatically generates JSON-LD schema markup for websites
"""

import json
from typing import Dict, List
from datetime import datetime

def generate_organization_schema(audit: Dict) -> Dict:
    """Generate Organization schema from audit data"""
    pages = audit.get('pages', [])
    org_name = audit.get('org_name', 'Company')
    url = audit.get('url', '')
    
    # Extract contact info from pages
    emails = []
    phones = []
    social_links = []
    
    for page in pages:
        text = page.get('text', '') + ' ' + str(page.get('paragraphs', ''))
        
        # Extract emails
        import re
        found_emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
        emails.extend(found_emails)
        
        # Extract phones
        found_phones = re.findall(r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', text)
        phones.extend(found_phones)
        
        # Extract social links
        links = page.get('links', [])
        for link in links:
            href = link.get('href', '') if isinstance(link, dict) else str(link)
            if any(social in href.lower() for social in ['facebook', 'twitter', 'instagram', 'linkedin', 'youtube']):
                social_links.append(href)
    
    schema = {
        "@context": "https://schema.org",
        "@type": "Organization",
        "name": org_name,
        "url": url,
        "logo": f"{url}/logo.png",
        "description": f"{org_name} - خدمات متميزة"
    }
    
    if emails:
        schema["email"] = emails[0]
    
    if phones:
        schema["telephone"] = phones[0]
    
    if social_links:
        schema["sameAs"] = list(set(social_links))[:5]
    
    # Add contact point
    if emails or phones:
        schema["contactPoint"] = {
            "@type": "ContactPoint",
            "contactType": "customer service",
            "email": emails[0] if emails else None,
            "telephone": phones[0] if phones else None
        }
    
    return schema


def generate_faq_schema(faqs: List[Dict]) -> Dict:
    """Generate FAQPage schema from FAQ list"""
    if not faqs:
        return None
    
    schema = {
        "@context": "https://schema.org",
        "@type": "FAQPage",
        "mainEntity": []
    }
    
    for faq in faqs:
        question = faq.get('question', faq.get('q', ''))
        answer = faq.get('answer', faq.get('a', ''))
        
        if question and answer:
            schema["mainEntity"].append({
                "@type": "Question",
                "name": question,
                "acceptedAnswer": {
                    "@type": "Answer",
                    "text": answer
                }
            })
    
    return schema if schema["mainEntity"] else None


def generate_breadcrumb_schema(url: str) -> Dict:
    """Generate BreadcrumbList schema from URL structure"""
    from urllib.parse import urlparse
    
    parsed = urlparse(url)
    path_parts = [p for p in parsed.path.split('/') if p]
    
    schema = {
        "@context": "https://schema.org",
        "@type": "BreadcrumbList",
        "itemListElement": []
    }
    
    # Add home
    schema["itemListElement"].append({
        "@type": "ListItem",
        "position": 1,
        "name": "الرئيسية",
        "item": f"{parsed.scheme}://{parsed.netloc}"
    })
    
    # Add path parts
    current_url = f"{parsed.scheme}://{parsed.netloc}"
    for i, part in enumerate(path_parts, start=2):
        current_url += f"/{part}"
        schema["itemListElement"].append({
            "@type": "ListItem",
            "position": i,
            "name": part.replace('-', ' ').replace('_', ' ').title(),
            "item": current_url
        })
    
    return schema


def generate_website_schema(audit: Dict) -> Dict:
    """Generate WebSite schema with search action"""
    org_name = audit.get('org_name', 'Company')
    url = audit.get('url', '')
    
    schema = {
        "@context": "https://schema.org",
        "@type": "WebSite",
        "name": org_name,
        "url": url,
        "potentialAction": {
            "@type": "SearchAction",
            "target": {
                "@type": "EntryPoint",
                "urlTemplate": f"{url}/search?q={{search_term_string}}"
            },
            "query-input": "required name=search_term_string"
        }
    }
    
    return schema


def generate_article_schema(page: Dict, org_name: str) -> Dict:
    """Generate Article schema for blog posts"""
    title = page.get('title', '')
    url = page.get('url', '')
    
    # Try to extract publish date
    text = page.get('text', '')
    import re
    date_match = re.search(r'(\d{4}-\d{2}-\d{2})', text)
    publish_date = date_match.group(1) if date_match else datetime.now().strftime('%Y-%m-%d')
    
    schema = {
        "@context": "https://schema.org",
        "@type": "Article",
        "headline": title,
        "url": url,
        "datePublished": publish_date,
        "dateModified": publish_date,
        "author": {
            "@type": "Organization",
            "name": org_name
        },
        "publisher": {
            "@type": "Organization",
            "name": org_name
        }
    }
    
    # Extract image if available
    images = page.get('images', [])
    if images:
        first_image = images[0] if isinstance(images[0], str) else images[0].get('src', '')
        if first_image:
            schema["image"] = first_image
    
    return schema


def generate_product_schema(page: Dict, org_name: str) -> Dict:
    """Generate Product schema for product pages"""
    title = page.get('title', '')
    url = page.get('url', '')
    text = page.get('text', '')
    
    # Try to extract price
    import re
    price_match = re.search(r'(\$|SAR|ريال)\s*(\d+(?:\.\d{2})?)', text)
    price = price_match.group(2) if price_match else "0"
    currency = "SAR" if "ريال" in text or "SAR" in text else "USD"
    
    schema = {
        "@context": "https://schema.org",
        "@type": "Product",
        "name": title,
        "url": url,
        "description": text[:200] if text else title,
        "offers": {
            "@type": "Offer",
            "price": price,
            "priceCurrency": currency,
            "availability": "https://schema.org/InStock",
            "seller": {
                "@type": "Organization",
                "name": org_name
            }
        }
    }
    
    # Extract image
    images = page.get('images', [])
    if images:
        first_image = images[0] if isinstance(images[0], str) else images[0].get('src', '')
        if first_image:
            schema["image"] = first_image
    
    return schema


def generate_local_business_schema(audit: Dict) -> Dict:
    """Generate LocalBusiness schema"""
    pages = audit.get('pages', [])
    org_name = audit.get('org_name', 'Company')
    url = audit.get('url', '')
    
    # Extract location info
    addresses = []
    for page in pages:
        text = page.get('text', '')
        # Look for Saudi cities
        saudi_cities = ['الرياض', 'جدة', 'مكة', 'المدينة', 'الدمام', 'الخبر', 'تبوك', 'أبها']
        for city in saudi_cities:
            if city in text:
                addresses.append(city)
                break
    
    schema = {
        "@context": "https://schema.org",
        "@type": "LocalBusiness",
        "name": org_name,
        "url": url,
        "address": {
            "@type": "PostalAddress",
            "addressCountry": "SA",
            "addressLocality": addresses[0] if addresses else "الرياض"
        }
    }
    
    return schema


def generate_all_schemas(audit: Dict) -> List[Dict]:
    """Generate all applicable schemas for a website"""
    schemas = []
    
    # Always add Organization
    org_schema = generate_organization_schema(audit)
    schemas.append(org_schema)
    
    # Add WebSite with search
    website_schema = generate_website_schema(audit)
    schemas.append(website_schema)
    
    # Add LocalBusiness if applicable
    local_schema = generate_local_business_schema(audit)
    schemas.append(local_schema)
    
    # Add Breadcrumb for main page
    url = audit.get('url', '')
    if url:
        breadcrumb_schema = generate_breadcrumb_schema(url)
        schemas.append(breadcrumb_schema)
    
    return schemas


def format_schema_for_html(schemas: List[Dict]) -> str:
    """Format schemas as HTML script tags"""
    html_parts = []
    
    for schema in schemas:
        if schema:
            json_str = json.dumps(schema, ensure_ascii=False, indent=2)
            html_parts.append(f'<script type="application/ld+json">\n{json_str}\n</script>')
    
    return '\n\n'.join(html_parts)


def get_schema_recommendations(audit: Dict) -> List[Dict]:
    """Get recommendations for missing schemas"""
    recommendations = []
    
    pages = audit.get('pages', [])
    
    # Check if Organization schema exists
    has_org_schema = False
    for page in pages:
        html = page.get('html', '')
        if '"@type":"Organization"' in html or '"@type": "Organization"' in html:
            has_org_schema = True
            break
    
    if not has_org_schema:
        recommendations.append({
            'type': 'organization',
            'priority': 'high',
            'title': 'أضف Organization Schema',
            'description': 'يساعد محركات البحث على فهم معلومات شركتك',
            'code': json.dumps(generate_organization_schema(audit), ensure_ascii=False, indent=2)
        })
    
    # Check for FAQ schema
    has_faq = any('faq' in page.get('url', '').lower() for page in pages)
    if has_faq:
        recommendations.append({
            'type': 'faq',
            'priority': 'medium',
            'title': 'أضف FAQPage Schema',
            'description': 'يظهر الأسئلة الشائعة مباشرة في نتائج البحث',
            'code': 'استخدم generate_faq_schema() مع قائمة الأسئلة'
        })
    
    # Check for BreadcrumbList
    recommendations.append({
        'type': 'breadcrumb',
        'priority': 'medium',
        'title': 'أضف BreadcrumbList Schema',
        'description': 'يحسن التنقل في نتائج البحث',
        'code': json.dumps(generate_breadcrumb_schema(audit.get('url', '')), ensure_ascii=False, indent=2)
    })
    
    return recommendations