Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,6 +37,44 @@ websites = [
|
|
| 37 |
"https://www.ihg.com"
|
| 38 |
]
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
def generate_transparency_links(query,history):
|
| 41 |
"""
|
| 42 |
Generate links to transparency sites for a given query (e.g., brand name or website).
|
|
@@ -85,6 +123,26 @@ def runchecks(message,history):
|
|
| 85 |
results.append([site,speed])
|
| 86 |
df_log = pd.DataFrame(results)
|
| 87 |
return str(df_log)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
else:
|
| 89 |
return(generate_transparency_links(message,history))
|
| 90 |
|
|
|
|
| 37 |
"https://www.ihg.com"
|
| 38 |
]
|
| 39 |
|
| 40 |
+
BTsites = [
|
| 41 |
+
"https://www.banyantree.com",
|
| 42 |
+
"https://www.angsana.com",
|
| 43 |
+
"https://www.cassia.com",
|
| 44 |
+
"https://www.dhawa.com",
|
| 45 |
+
"https://www.garrya.com",
|
| 46 |
+
"https://www.hommhotels.com",
|
| 47 |
+
"https://www.foliohotels.com",
|
| 48 |
+
"https://www.groupbanyan.com",
|
| 49 |
+
"https://veya.banyantree.com",
|
| 50 |
+
"https://escape.banyantree.com"
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
def check_robots(url):
|
| 54 |
+
try:
|
| 55 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
|
| 56 |
+
response = requests.head(url, timeout=10)
|
| 57 |
+
if response.status_code == 200 or response.status_code == 401:
|
| 58 |
+
return('OK')
|
| 59 |
+
else:
|
| 60 |
+
return('Not OK')
|
| 61 |
+
except requests.exceptions.RequestException as e:
|
| 62 |
+
print(f"Error checking {url}: {e}")
|
| 63 |
+
return None
|
| 64 |
+
|
| 65 |
+
def check_sitemap(url):
|
| 66 |
+
try:
|
| 67 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
|
| 68 |
+
new_url = url +"/sitemap.xml"
|
| 69 |
+
response = requests.head(url, timeout=10)
|
| 70 |
+
if response.status_code == 200 or response.status_code == 401:
|
| 71 |
+
return('OK')
|
| 72 |
+
else:
|
| 73 |
+
return('Not OK')
|
| 74 |
+
except requests.exceptions.RequestException as e:
|
| 75 |
+
print(f"Error checking {url}: {e}")
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
def generate_transparency_links(query,history):
|
| 79 |
"""
|
| 80 |
Generate links to transparency sites for a given query (e.g., brand name or website).
|
|
|
|
| 123 |
results.append([site,speed])
|
| 124 |
df_log = pd.DataFrame(results)
|
| 125 |
return str(df_log)
|
| 126 |
+
elseif message == 'check bots' or message == 'Check Bots':
|
| 127 |
+
for site in BTsites:
|
| 128 |
+
print(f"Checking robots.txt for {site}...")
|
| 129 |
+
botsCheck = check_robots(site)
|
| 130 |
+
time.sleep(5)
|
| 131 |
+
if botsCheck is not None:
|
| 132 |
+
print(f"Bots for {site}: {botsCheck} seconds")
|
| 133 |
+
results.append([site,botsCheck])
|
| 134 |
+
df_log = pd.DataFrame(results)
|
| 135 |
+
return str(df_log)
|
| 136 |
+
elseif message == 'check sitemaps' or message == 'Check Sitemaps':
|
| 137 |
+
for site in BTsites:
|
| 138 |
+
print(f"Checking sitemaps for {site}...")
|
| 139 |
+
mapsCheck = check_sitemap(site)
|
| 140 |
+
time.sleep(5)
|
| 141 |
+
if mapsCheck is not None:
|
| 142 |
+
print(f"Bots for {site}: {mapsCheck} seconds")
|
| 143 |
+
results.append([site,mapsCheck])
|
| 144 |
+
df_log = pd.DataFrame(results)
|
| 145 |
+
return str(df_log)
|
| 146 |
else:
|
| 147 |
return(generate_transparency_links(message,history))
|
| 148 |
|