import requests import xml.etree.ElementTree as ET from openpyxl import Workbook import gradio as gr import base64 def extract_links_from_xml(xml_content): root = ET.fromstring(xml_content) urls = [] # Find all elements and extract the text inside them for loc in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'): urls.append(loc.text.strip()) return urls def check_sitemap(sitemap_url): # Send a GET request to the sitemap URL response = requests.get(sitemap_url) if response.status_code == 200: urls = extract_links_from_xml(response.content) # Create a new Excel workbook and select the active sheet workbook = Workbook() sheet = workbook.active # Write headers to the first row of the sheet sheet['A1'] = 'URL' sheet['B1'] = 'Response' row = 2 # Starting row to write URLs and responses for url in urls: # Send a GET request to the URL in the sitemap url_response = requests.get(url) # Write the URL and response status code to the next row of the sheet sheet.cell(row=row, column=1, value=url) sheet.cell(row=row, column=2, value=url_response.status_code) row += 1 # Save the workbook as an Excel file workbook.save('sitemap_responses.xlsx') print("Excel file generated successfully.") else: print(f"Error retrieving sitemap: {response.status_code}") def download_xml(url): check_sitemap(url) with open("sitemap_responses.xlsx", "rb") as f: data = f.read() base64_data = base64.b64encode(data).decode("utf-8") return f'Download XML' # Gradio interface iface = gr.Interface(download_xml, inputs="text", outputs=gr.outputs.HTML(), title="Sitemap Checker") iface.launch()