Update app.py
Browse files
app.py
CHANGED
|
@@ -1,65 +1,8 @@
|
|
| 1 |
-
# import requests
|
| 2 |
-
# import xml.etree.ElementTree as ET
|
| 3 |
-
# from openpyxl import Workbook
|
| 4 |
-
#
|
| 5 |
-
# def extract_links_from_xml(xml_content):
|
| 6 |
-
# root = ET.fromstring(xml_content)
|
| 7 |
-
# urls = []
|
| 8 |
-
#
|
| 9 |
-
# # Find all <loc> elements and extract the text inside them
|
| 10 |
-
# for loc in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
|
| 11 |
-
# urls.append(loc.text.strip())
|
| 12 |
-
#
|
| 13 |
-
# return urls
|
| 14 |
-
#
|
| 15 |
-
#
|
| 16 |
-
# def check_sitemap(sitemap_url):
|
| 17 |
-
# # Send a GET request to the sitemap URL
|
| 18 |
-
# response = requests.get(sitemap_url)
|
| 19 |
-
#
|
| 20 |
-
# if response.status_code == 200:
|
| 21 |
-
# urls = extract_links_from_xml(response.content)
|
| 22 |
-
#
|
| 23 |
-
# # Create a new Excel workbook and select the active sheet
|
| 24 |
-
# workbook = Workbook()
|
| 25 |
-
# sheet = workbook.active
|
| 26 |
-
#
|
| 27 |
-
# # Write headers to the first row of the sheet
|
| 28 |
-
# sheet['A1'] = 'URL'
|
| 29 |
-
# sheet['B1'] = 'Response'
|
| 30 |
-
#
|
| 31 |
-
# row = 2 # Starting row to write URLs and responses
|
| 32 |
-
#
|
| 33 |
-
# for url in urls:
|
| 34 |
-
# # Send a GET request to the URL in the sitemap
|
| 35 |
-
# url_response = requests.get(url)
|
| 36 |
-
#
|
| 37 |
-
# # Write the URL and response status code to the next row of the sheet
|
| 38 |
-
# sheet.cell(row=row, column=1, value=url)
|
| 39 |
-
# sheet.cell(row=row, column=2, value=url_response.status_code)
|
| 40 |
-
#
|
| 41 |
-
# row += 1
|
| 42 |
-
#
|
| 43 |
-
# # Save the workbook as an Excel file
|
| 44 |
-
# workbook.save('sitemap_responses.xlsx')
|
| 45 |
-
# print("Excel file generated successfully.")
|
| 46 |
-
# else:
|
| 47 |
-
# print(f"Error retrieving sitemap: {response.status_code}")
|
| 48 |
-
#
|
| 49 |
-
#
|
| 50 |
-
# # Example usage
|
| 51 |
-
# sitemap_url = 'http://www.embedded-innovations.com/sitemap.xml'
|
| 52 |
-
# check_sitemap(sitemap_url)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
sitemap_url = 'http://www.embedded-innovations.com/sitemap.xml'
|
| 56 |
-
|
| 57 |
import requests
|
| 58 |
import xml.etree.ElementTree as ET
|
| 59 |
from openpyxl import Workbook
|
| 60 |
import gradio as gr
|
| 61 |
import base64
|
| 62 |
-
import os
|
| 63 |
|
| 64 |
def extract_links_from_xml(xml_content):
|
| 65 |
root = ET.fromstring(xml_content)
|
|
@@ -108,13 +51,11 @@ def check_sitemap(sitemap_url):
|
|
| 108 |
|
| 109 |
def download_xml(url):
|
| 110 |
check_sitemap(url)
|
| 111 |
-
|
| 112 |
-
file_size = os.path.getsize(file_name) / 1024 # Size in KB
|
| 113 |
-
with open(file_name, "rb") as f:
|
| 114 |
data = f.read()
|
| 115 |
base64_data = base64.b64encode(data).decode("utf-8")
|
| 116 |
-
return f'<a href="data:application/octet-stream;base64,{base64_data}" download="
|
| 117 |
|
| 118 |
# Gradio interface
|
| 119 |
-
iface = gr.Interface(download_xml, inputs="text", outputs=
|
| 120 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import requests
|
| 2 |
import xml.etree.ElementTree as ET
|
| 3 |
from openpyxl import Workbook
|
| 4 |
import gradio as gr
|
| 5 |
import base64
|
|
|
|
| 6 |
|
| 7 |
def extract_links_from_xml(xml_content):
|
| 8 |
root = ET.fromstring(xml_content)
|
|
|
|
| 51 |
|
| 52 |
def download_xml(url):
|
| 53 |
check_sitemap(url)
|
| 54 |
+
with open("sitemap_responses.xlsx", "rb") as f:
|
|
|
|
|
|
|
| 55 |
data = f.read()
|
| 56 |
base64_data = base64.b64encode(data).decode("utf-8")
|
| 57 |
+
return f'<a href="data:application/octet-stream;base64,{base64_data}" download="sitemap_responses.xlsx">Download XML</a>'
|
| 58 |
|
| 59 |
# Gradio interface
|
| 60 |
+
iface = gr.Interface(download_xml, inputs="text", outputs=gr.outputs.HTML(), title="Sitemap Checker")
|
| 61 |
+
iface.launch()
|