bonrix's picture
Update app.py
82e3594
import gradio as gr
import requests
import xml.etree.ElementTree as ET
def collect_urls_from_sitemap(sitemap_content):
root = ET.fromstring(sitemap_content)
urls = []
for url_element in root.findall(".//{http://www.sitemaps.org/schemas/sitemap/0.9}url"):
loc_element = url_element.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc")
if loc_element is not None:
urls.append(loc_element.text)
return urls
def compare_sitemaps(old_urls, new_urls):
added_urls = list(set(new_urls) - set(old_urls))
removed_urls = list(set(old_urls) - set(new_urls))
return added_urls, removed_urls
def compare_sitemaps_ui(old_sitemap_url, new_sitemap_url):
old_response = requests.get(old_sitemap_url)
new_response = requests.get(new_sitemap_url)
if old_response.status_code == 200 and new_response.status_code == 200:
old_sitemap_content = old_response.content
new_sitemap_content = new_response.content
old_urls = collect_urls_from_sitemap(old_sitemap_content)
new_urls = collect_urls_from_sitemap(new_sitemap_content)
added_urls, removed_urls = compare_sitemaps(old_urls, new_urls)
added_urls_text = "\n".join(added_urls)
removed_urls_text = "\n".join(removed_urls)
return [added_urls_text, removed_urls_text]
else:
return [{"Newly Added URLs": "Failed to fetch sitemaps."}, {"Removed URLs": "Failed to fetch sitemaps."}]
iface = gr.Interface(
fn=compare_sitemaps_ui,
inputs=["text", "text"],
outputs=[gr.outputs.Textbox(label="Newly Added URLs"), gr.outputs.Textbox(label="Removed URLs")],
title="Sitemap Comparator",
description="Enter the URLs of the old and new sitemaps to compare.",
examples=[["http://www.bonrix.net/old-sitemap.xml", "http://www.bonrix.net/sitemap.xml"]]
)
iface.launch()