seo / app.py
sudo-soldier's picture
Update app.py
bb681cb verified
import requests
from gradio import Blocks, Button, Textbox, HTML
from urllib.parse import urljoin, urlparse
import re
# Function to get meta tags from a URL
def get_meta_tags(url):
try:
# Send HTTP request to the URL
response = requests.get(url)
if response.status_code == 200:
data = {
"meta_tags": {},
"favicon": None,
"robots_txt": False,
"sitemap_xml": False,
"robots_txt_content": "",
"sitemap_xml_content": ""
}
# Extracting the meta tags from the HTML
html = response.text
data['meta_tags'] = parse_meta_tags(html)
# Find the favicon link (if exists)
favicon_url = extract_favicon(html, url)
data['favicon'] = favicon_url
# Check if robots.txt exists
robots_txt_url = urljoin(url, "/robots.txt")
robots_response = requests.get(robots_txt_url)
if robots_response.status_code == 200:
data['robots_txt'] = True
data['robots_txt_content'] = robots_response.text
# Check if sitemap.xml exists
sitemap_url = urljoin(url, "/sitemap.xml")
sitemap_response = requests.get(sitemap_url)
if sitemap_response.status_code == 200:
data['sitemap_xml'] = True
data['sitemap_xml_content'] = sitemap_response.text
return data
except Exception as e:
print(f"An error occurred: {e}")
return {"error": str(e)}
# Function to parse meta tags from HTML
def parse_meta_tags(html):
meta_tags = {}
# Use regex to find <meta> tags and extract attributes
matches = re.findall(r'<meta\s+([^\>]+)>', html)
for match in matches:
attrs = re.findall(r'(\w+)=["\']([^"\']+)["\']', match)
for attr in attrs:
meta_tags[attr[0]] = attr[1]
return meta_tags
# Function to extract favicon URL from the HTML
def extract_favicon(html, base_url):
# Look for the favicon in the HTML
match = re.search(r'<link\s+rel=["\']icon["\']\s+href=["\']([^"\']+)["\']', html)
if match:
favicon_url = match.group(1)
if not favicon_url.startswith('http'):
favicon_url = urljoin(base_url, favicon_url)
return favicon_url
return None
# Function to format the result output
def format_output(result):
if "error" in result:
return f"Error: {result['error']}"
output = "<h3>Meta Tags</h3>"
for key, value in result["meta_tags"].items():
output += f"<strong>{key}</strong>: {value}<br>"
if result['favicon']:
output += f"<h3>Favicon</h3><img src='{result['favicon']}' alt='Favicon' style='width:50px;height:50px;'><br>"
else:
output += "<h3>Favicon</h3><p>Missing</p><br>"
if result['robots_txt']:
output += "<h3>robots.txt</h3><p>Found</p><br>"
output += f"<pre>{result['robots_txt_content']}</pre><br>"
else:
output += "<h3>robots.txt</h3><p>Not found</p><br>"
if result['sitemap_xml']:
output += "<h3>sitemap.xml</h3><p>Found</p><br>"
output += f"<pre>{result['sitemap_xml_content']}</pre><br>"
else:
output += "<h3>sitemap.xml</h3><p>Not found</p><br>"
return output
# Gradio Interface
def get_meta_tags_ui():
with Blocks() as interface:
# Input element to enter the URL
url_input = Textbox(label="Enter URL", placeholder="https://example.com")
# Button to trigger the meta tags fetch
lookup_button = Button("Get Meta Tags from URL")
# HTML output area for the results
html_output = HTML()
# Action when the button is clicked
def update(value):
if value:
result = get_meta_tags(value)
return format_output(result)
# Link button click with the update function
lookup_button.click(fn=update, inputs=url_input, outputs=html_output)
return interface
# Run the interface
if __name__ == "__main__":
interface = get_meta_tags_ui()
interface.launch()