Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import xml.etree.ElementTree as ET | |
| def collect_urls_from_sitemap(sitemap_content): | |
| root = ET.fromstring(sitemap_content) | |
| urls = [] | |
| for url_element in root.findall(".//{http://www.sitemaps.org/schemas/sitemap/0.9}url"): | |
| loc_element = url_element.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc") | |
| if loc_element is not None: | |
| urls.append(loc_element.text) | |
| return urls | |
| def compare_sitemaps(old_urls, new_urls): | |
| added_urls = list(set(new_urls) - set(old_urls)) | |
| removed_urls = list(set(old_urls) - set(new_urls)) | |
| return added_urls, removed_urls | |
| def compare_sitemaps_ui(old_sitemap_url, new_sitemap_url): | |
| old_response = requests.get(old_sitemap_url) | |
| new_response = requests.get(new_sitemap_url) | |
| if old_response.status_code == 200 and new_response.status_code == 200: | |
| old_sitemap_content = old_response.content | |
| new_sitemap_content = new_response.content | |
| old_urls = collect_urls_from_sitemap(old_sitemap_content) | |
| new_urls = collect_urls_from_sitemap(new_sitemap_content) | |
| added_urls, removed_urls = compare_sitemaps(old_urls, new_urls) | |
| added_urls_text = "\n".join(added_urls) | |
| removed_urls_text = "\n".join(removed_urls) | |
| return [added_urls_text, removed_urls_text] | |
| else: | |
| return [{"Newly Added URLs": "Failed to fetch sitemaps."}, {"Removed URLs": "Failed to fetch sitemaps."}] | |
| iface = gr.Interface( | |
| fn=compare_sitemaps_ui, | |
| inputs=["text", "text"], | |
| outputs=[gr.outputs.Textbox(label="Newly Added URLs"), gr.outputs.Textbox(label="Removed URLs")], | |
| title="Sitemap Comparator", | |
| description="Enter the URLs of the old and new sitemaps to compare.", | |
| examples=[["http://www.bonrix.net/old-sitemap.xml", "http://www.bonrix.net/sitemap.xml"]] | |
| ) | |
| iface.launch() | |