bonrix commited on
Commit
742b02a
·
1 Parent(s): 1683ffe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -24
app.py CHANGED
@@ -1,36 +1,54 @@
1
- import pandas as pd
2
  import gradio as gr
 
 
3
 
4
- def compare_excel_files(new_file, old_file):
5
- # Check if the file extensions are valid
6
- if not new_file.name.endswith(".xlsx") or not old_file.name.endswith(".xlsx"):
7
- raise ValueError("Invalid file type. Please upload Excel files with .xlsx extension.")
8
 
9
- # Read the Excel files into pandas DataFrames
10
- df_new = pd.read_excel(new_file.name)
11
- df_old = pd.read_excel(old_file.name)
12
 
13
- # Compare new file to old file (Find missing data in old file)
14
- missing_in_old = df_new[~df_new.isin(df_old)].dropna()
 
 
 
15
 
16
- # Compare old file to new file (Find added data in new file)
17
- added_in_new = df_old[~df_old.isin(df_new)].dropna()
18
 
19
- # Create Excel files for the comparison results
20
- output_file_missing = 'missing_data_in_old.xlsx'
21
- output_file_added = 'added_data_in_new.xlsx'
22
- missing_in_old.to_excel(output_file_missing, index=False)
23
- added_in_new.to_excel(output_file_added, index=False)
24
 
25
- return output_file_missing, output_file_added
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  iface = gr.Interface(
28
- fn=compare_excel_files,
29
- inputs=[gr.inputs.File(label="New Excel File"), gr.inputs.File(label="Old Excel File")],
30
- outputs=[gr.outputs.File(label="Missing Data in Old"), gr.outputs.File(label="Added Data in New")],
31
- title="Excel File Comparison",
32
- description="Upload two Excel files: one with new data and the other with old data to find missing data in old and added data in new."
33
-
34
  )
35
 
36
  iface.launch()
 
 
1
  import gradio as gr
2
+ import requests
3
+ import xml.etree.ElementTree as ET
4
 
 
 
 
 
5
 
6
+ def collect_urls_from_sitemap(sitemap_content):
7
+ root = ET.fromstring(sitemap_content)
 
8
 
9
+ urls = []
10
+ for url_element in root.findall(".//{http://www.sitemaps.org/schemas/sitemap/0.9}url"):
11
+ loc_element = url_element.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc")
12
+ if loc_element is not None:
13
+ urls.append(loc_element.text)
14
 
15
+ return urls
 
16
 
 
 
 
 
 
17
 
18
+ def compare_sitemaps(old_urls, new_urls):
19
+ added_urls = list(set(new_urls) - set(old_urls))
20
+ removed_urls = list(set(old_urls) - set(new_urls))
21
+ return added_urls, removed_urls
22
+
23
+
24
+ def compare_sitemaps_ui(old_sitemap_url, new_sitemap_url):
25
+ old_response = requests.get(old_sitemap_url)
26
+ new_response = requests.get(new_sitemap_url)
27
+
28
+ if old_response.status_code == 200 and new_response.status_code == 200:
29
+ old_sitemap_content = old_response.content
30
+ new_sitemap_content = new_response.content
31
+
32
+ old_urls = collect_urls_from_sitemap(old_sitemap_content)
33
+ new_urls = collect_urls_from_sitemap(new_sitemap_content)
34
+
35
+ added_urls, removed_urls = compare_sitemaps(old_urls, new_urls)
36
+
37
+ added_urls_text = "\n".join(added_urls)
38
+ removed_urls_text = "\n".join(removed_urls)
39
+
40
+ return f"Newly Added URLs:\n{added_urls_text}\n\nRemoved URLs:\n{removed_urls_text}"
41
+ else:
42
+ return "Failed to fetch sitemaps."
43
+
44
 
45
  iface = gr.Interface(
46
+ fn=compare_sitemaps_ui,
47
+ inputs=["text", "text"],
48
+ outputs="text",
49
+ title="Sitemap Comparator",
50
+ description="Enter the URLs of the old and new sitemaps to compare.",
51
+ examples=[["https://example.com/old-sitemap.xml", "https://example.com/new-sitemap.xml"]]
52
  )
53
 
54
  iface.launch()