Spaces:

sudo-soldier
/

seo

Sleeping

File size: 4,192 Bytes

import requests
from gradio import Blocks, Button, Textbox, HTML
from urllib.parse import urljoin, urlparse
import re

# Function to get meta tags from a URL
def get_meta_tags(url):
    try:
        # Send HTTP request to the URL
        response = requests.get(url)

        if response.status_code == 200:
            data = {
                "meta_tags": {},
                "favicon": None,
                "robots_txt": False,
                "sitemap_xml": False,
                "robots_txt_content": "",
                "sitemap_xml_content": ""
            }

            # Extracting the meta tags from the HTML
            html = response.text
            data['meta_tags'] = parse_meta_tags(html)

            # Find the favicon link (if exists)
            favicon_url = extract_favicon(html, url)
            data['favicon'] = favicon_url

            # Check if robots.txt exists
            robots_txt_url = urljoin(url, "/robots.txt")
            robots_response = requests.get(robots_txt_url)
            if robots_response.status_code == 200:
                data['robots_txt'] = True
                data['robots_txt_content'] = robots_response.text

            # Check if sitemap.xml exists
            sitemap_url = urljoin(url, "/sitemap.xml")
            sitemap_response = requests.get(sitemap_url)
            if sitemap_response.status_code == 200:
                data['sitemap_xml'] = True
                data['sitemap_xml_content'] = sitemap_response.text

            return data

    except Exception as e:
        print(f"An error occurred: {e}")
        return {"error": str(e)}

# Function to parse meta tags from HTML
def parse_meta_tags(html):
    meta_tags = {}
    # Use regex to find <meta> tags and extract attributes
    matches = re.findall(r'<meta\s+([^\>]+)>', html)
    for match in matches:
        attrs = re.findall(r'(\w+)=["\']([^"\']+)["\']', match)
        for attr in attrs:
            meta_tags[attr[0]] = attr[1]
    return meta_tags

# Function to extract favicon URL from the HTML
def extract_favicon(html, base_url):
    # Look for the favicon in the HTML
    match = re.search(r'<link\s+rel=["\']icon["\']\s+href=["\']([^"\']+)["\']', html)
    if match:
        favicon_url = match.group(1)
        if not favicon_url.startswith('http'):
            favicon_url = urljoin(base_url, favicon_url)
        return favicon_url
    return None

# Function to format the result output
def format_output(result):
    if "error" in result:
        return f"Error: {result['error']}"

    output = "<h3>Meta Tags</h3>"
    for key, value in result["meta_tags"].items():
        output += f"<strong>{key}</strong>: {value}<br>"

    if result['favicon']:
        output += f"<h3>Favicon</h3><img src='{result['favicon']}' alt='Favicon' style='width:50px;height:50px;'><br>"
    else:
        output += "<h3>Favicon</h3><p>Missing</p><br>"

    if result['robots_txt']:
        output += "<h3>robots.txt</h3><p>Found</p><br>"
        output += f"<pre>{result['robots_txt_content']}</pre><br>"
    else:
        output += "<h3>robots.txt</h3><p>Not found</p><br>"

    if result['sitemap_xml']:
        output += "<h3>sitemap.xml</h3><p>Found</p><br>"
        output += f"<pre>{result['sitemap_xml_content']}</pre><br>"
    else:
        output += "<h3>sitemap.xml</h3><p>Not found</p><br>"

    return output

# Gradio Interface
def get_meta_tags_ui():
    with Blocks() as interface:
        # Input element to enter the URL
        url_input = Textbox(label="Enter URL", placeholder="https://example.com")
        
        # Button to trigger the meta tags fetch
        lookup_button = Button("Get Meta Tags from URL")

        # HTML output area for the results
        html_output = HTML()

        # Action when the button is clicked
        def update(value):
            if value:
                result = get_meta_tags(value)
                return format_output(result)

        # Link button click with the update function
        lookup_button.click(fn=update, inputs=url_input, outputs=html_output)

    return interface

# Run the interface
if __name__ == "__main__":
    interface = get_meta_tags_ui()
    interface.launch()