File size: 1,960 Bytes
8522793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6e77a8
8522793
 
b6e77a8
8522793
 
b6e77a8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import requests
import xml.etree.ElementTree as ET
from openpyxl import Workbook
import gradio as gr
import base64

def extract_links_from_xml(xml_content):
    root = ET.fromstring(xml_content)
    urls = []

    # Find all <loc> elements and extract the text inside them
    for loc in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
        urls.append(loc.text.strip())

    return urls


def check_sitemap(sitemap_url):
    # Send a GET request to the sitemap URL
    response = requests.get(sitemap_url)

    if response.status_code == 200:
        urls = extract_links_from_xml(response.content)

        # Create a new Excel workbook and select the active sheet
        workbook = Workbook()
        sheet = workbook.active

        # Write headers to the first row of the sheet
        sheet['A1'] = 'URL'
        sheet['B1'] = 'Response'

        row = 2  # Starting row to write URLs and responses

        for url in urls:
            # Send a GET request to the URL in the sitemap
            url_response = requests.get(url)

            # Write the URL and response status code to the next row of the sheet
            sheet.cell(row=row, column=1, value=url)
            sheet.cell(row=row, column=2, value=url_response.status_code)

            row += 1

        # Save the workbook as an Excel file
        workbook.save('sitemap_responses.xlsx')
        print("Excel file generated successfully.")
    else:
        print(f"Error retrieving sitemap: {response.status_code}")


def download_xml(url):
    check_sitemap(url)
    with open("sitemap_responses.xlsx", "rb") as f:
        data = f.read()
        base64_data = base64.b64encode(data).decode("utf-8")
        return f'<a href="data:application/octet-stream;base64,{base64_data}" download="sitemap_responses.xlsx">Download XML</a>'

# Gradio interface
iface = gr.Interface(download_xml, inputs="text", outputs=gr.outputs.HTML(), title="Sitemap Checker")
iface.launch()