|
|
import requests |
|
|
import xml.etree.ElementTree as ET |
|
|
from openpyxl import Workbook |
|
|
import gradio as gr |
|
|
import base64 |
|
|
|
|
|
def extract_links_from_xml(xml_content): |
|
|
root = ET.fromstring(xml_content) |
|
|
urls = [] |
|
|
|
|
|
|
|
|
for loc in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'): |
|
|
urls.append(loc.text.strip()) |
|
|
|
|
|
return urls |
|
|
|
|
|
|
|
|
def check_sitemap(sitemap_url): |
|
|
|
|
|
response = requests.get(sitemap_url) |
|
|
|
|
|
if response.status_code == 200: |
|
|
urls = extract_links_from_xml(response.content) |
|
|
|
|
|
|
|
|
workbook = Workbook() |
|
|
sheet = workbook.active |
|
|
|
|
|
|
|
|
sheet['A1'] = 'URL' |
|
|
sheet['B1'] = 'Response' |
|
|
|
|
|
row = 2 |
|
|
|
|
|
for url in urls: |
|
|
|
|
|
url_response = requests.get(url) |
|
|
|
|
|
|
|
|
sheet.cell(row=row, column=1, value=url) |
|
|
sheet.cell(row=row, column=2, value=url_response.status_code) |
|
|
|
|
|
row += 1 |
|
|
|
|
|
|
|
|
workbook.save('sitemap_responses.xlsx') |
|
|
print("Excel file generated successfully.") |
|
|
else: |
|
|
print(f"Error retrieving sitemap: {response.status_code}") |
|
|
|
|
|
|
|
|
def download_xml(url): |
|
|
check_sitemap(url) |
|
|
with open("sitemap_responses.xlsx", "rb") as f: |
|
|
data = f.read() |
|
|
base64_data = base64.b64encode(data).decode("utf-8") |
|
|
return f'<a href="data:application/octet-stream;base64,{base64_data}" download="sitemap_responses.xlsx">Download XML</a>' |
|
|
|
|
|
|
|
|
iface = gr.Interface(download_xml, inputs="text", outputs=gr.outputs.HTML(), title="Sitemap Checker") |
|
|
iface.launch() |