import requests from gradio import Blocks, Button, Textbox, HTML from urllib.parse import urljoin, urlparse import re # Function to get meta tags from a URL def get_meta_tags(url): try: # Send HTTP request to the URL response = requests.get(url) if response.status_code == 200: data = { "meta_tags": {}, "favicon": None, "robots_txt": False, "sitemap_xml": False, "robots_txt_content": "", "sitemap_xml_content": "" } # Extracting the meta tags from the HTML html = response.text data['meta_tags'] = parse_meta_tags(html) # Find the favicon link (if exists) favicon_url = extract_favicon(html, url) data['favicon'] = favicon_url # Check if robots.txt exists robots_txt_url = urljoin(url, "/robots.txt") robots_response = requests.get(robots_txt_url) if robots_response.status_code == 200: data['robots_txt'] = True data['robots_txt_content'] = robots_response.text # Check if sitemap.xml exists sitemap_url = urljoin(url, "/sitemap.xml") sitemap_response = requests.get(sitemap_url) if sitemap_response.status_code == 200: data['sitemap_xml'] = True data['sitemap_xml_content'] = sitemap_response.text return data except Exception as e: print(f"An error occurred: {e}") return {"error": str(e)} # Function to parse meta tags from HTML def parse_meta_tags(html): meta_tags = {} # Use regex to find tags and extract attributes matches = re.findall(r']+)>', html) for match in matches: attrs = re.findall(r'(\w+)=["\']([^"\']+)["\']', match) for attr in attrs: meta_tags[attr[0]] = attr[1] return meta_tags # Function to extract favicon URL from the HTML def extract_favicon(html, base_url): # Look for the favicon in the HTML match = re.search(r'{key}: {value}
" if result['favicon']: output += f"

Favicon

Favicon
" else: output += "

Favicon

Missing


" if result['robots_txt']: output += "

robots.txt

Found


" output += f"
{result['robots_txt_content']}

" else: output += "

robots.txt

Not found


" if result['sitemap_xml']: output += "

sitemap.xml

Found


" output += f"
{result['sitemap_xml_content']}

" else: output += "

sitemap.xml

Not found


" return output # Gradio Interface def get_meta_tags_ui(): with Blocks() as interface: # Input element to enter the URL url_input = Textbox(label="Enter URL", placeholder="https://example.com") # Button to trigger the meta tags fetch lookup_button = Button("Get Meta Tags from URL") # HTML output area for the results html_output = HTML() # Action when the button is clicked def update(value): if value: result = get_meta_tags(value) return format_output(result) # Link button click with the update function lookup_button.click(fn=update, inputs=url_input, outputs=html_output) return interface # Run the interface if __name__ == "__main__": interface = get_meta_tags_ui() interface.launch()