Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import time
|
| 4 |
+
import requests
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from bs4 import BeautifulSoup
|
| 8 |
+
from playwright.sync_api import sync_playwright
|
| 9 |
+
import gradio as gr
|
| 10 |
+
import folium
|
| 11 |
+
from folium.plugins import FloatImage
|
| 12 |
+
|
| 13 |
+
# Configuration
|
| 14 |
+
file_name = 'bathing_sites.csv'
|
| 15 |
+
url = 'https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html'
|
| 16 |
+
|
| 17 |
+
# Data processing functions
|
| 18 |
+
def get_final_url(url):
|
| 19 |
+
with sync_playwright() as p:
|
| 20 |
+
browser = p.chromium.launch(headless=True)
|
| 21 |
+
page = browser.new_page()
|
| 22 |
+
page.set_extra_http_headers({"max-redirects": "9"})
|
| 23 |
+
if (('&X=' not in url) or ('&X=' not in url)):
|
| 24 |
+
page.goto(url, timeout=5000)
|
| 25 |
+
page.wait_for_timeout(2000)
|
| 26 |
+
url = page.url
|
| 27 |
+
browser.close()
|
| 28 |
+
return url
|
| 29 |
+
|
| 30 |
+
def extract_coordinates(url):
|
| 31 |
+
x_match = re.search(r'X=(\d+)', url)
|
| 32 |
+
y_match = re.search(r'Y=(\d+)', url)
|
| 33 |
+
|
| 34 |
+
x = x_match.group(1) if x_match else None
|
| 35 |
+
y = y_match.group(1) if y_match else None
|
| 36 |
+
|
| 37 |
+
return pd.Series([x, y])
|
| 38 |
+
|
| 39 |
+
def web_mercator_to_wgs84(x, y):
|
| 40 |
+
R = 6378137 # Earth's radius in meters
|
| 41 |
+
lon = (x / R) * (180 / np.pi)
|
| 42 |
+
lat = (180 / np.pi) * (2 * np.arctan(np.exp(y / R)) - np.pi / 2)
|
| 43 |
+
return lat, lon
|
| 44 |
+
|
| 45 |
+
def file_download():
|
| 46 |
+
df = pd.read_html(url)[0]
|
| 47 |
+
|
| 48 |
+
response = requests.get(url)
|
| 49 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 50 |
+
|
| 51 |
+
df['images'] = [tag.find("img")["src"] for tag in soup.select("td:has(img)")]
|
| 52 |
+
df['geoport'] = [tag.find("a")["href"] for tag in soup.select("td:has(a)") if 'geoportail' in tag.find("a")["href"]]
|
| 53 |
+
|
| 54 |
+
df['geoport'] = df['geoport'].apply(get_final_url)
|
| 55 |
+
|
| 56 |
+
df.columns = ['Lake', 'Sector', 'Water Quality', 'Swimming allowed', 'Reason for ban', 'Traffic lights', 'URL coordinates']
|
| 57 |
+
|
| 58 |
+
name_trim = ['Lac de la ', 'Lac de ', 'Etangs de ', 'Lac d\'']
|
| 59 |
+
quality_dict = {'Excellente': 'Excellent', 'Bonne': 'Good', 'Suffisante': 'Adequate', 'Insuffisante': 'Inadequate'}
|
| 60 |
+
df['Water Quality'] = df['Water Quality'].map(quality_dict).fillna(df['Water Quality'])
|
| 61 |
+
df['Lake'] = df['Lake'].str.replace('|'.join(name_trim), '', regex=True)
|
| 62 |
+
df['Lake'] = df['Lake'].str.split('(').str[0].str.strip()
|
| 63 |
+
df['Sector'] = df['Sector'].astype(str).apply(lambda x: 'Designated Zone' if 'baignade' in x else x)
|
| 64 |
+
df['Reason for ban'] = df['Reason for ban'].astype(str).apply(lambda x: 'nan' if '* Les informations ' in x else x)
|
| 65 |
+
df['Reason for ban'] = df['Reason for ban'].replace({'nan': 'No ban'})
|
| 66 |
+
df['Swimming allowed'] = df['Swimming allowed'].astype('string')
|
| 67 |
+
df.loc[df['Traffic lights'].str.contains('greng'), 'Swimming allowed'] = 'Yes'
|
| 68 |
+
df.loc[df['Traffic lights'].str.contains('roud'), 'Swimming allowed'] = 'No'
|
| 69 |
+
df = df.fillna('N/A')
|
| 70 |
+
|
| 71 |
+
df[['X', 'Y']] = df['URL coordinates'].apply(extract_coordinates)
|
| 72 |
+
df[['X', 'Y']] = df[['X', 'Y']].apply(pd.to_numeric, errors='coerce')
|
| 73 |
+
df[['lat', 'long']] = df.apply(lambda row: web_mercator_to_wgs84(row['X'], row['Y']), axis=1, result_type='expand')
|
| 74 |
+
df.drop(columns=['Traffic lights', 'URL coordinates', 'X', 'Y'], inplace=True)
|
| 75 |
+
|
| 76 |
+
df.to_csv(file_name, index=False)
|
| 77 |
+
return df
|
| 78 |
+
|
| 79 |
+
def load_data(force_refresh=False):
|
| 80 |
+
if force_refresh or (not os.path.exists(file_name)) or ((time.time() - os.path.getmtime(file_name)) > 3600):
|
| 81 |
+
return file_download()
|
| 82 |
+
return pd.read_csv(file_name)
|
| 83 |
+
|
| 84 |
+
def create_map(force_refresh=False):
|
| 85 |
+
df = load_data(force_refresh)
|
| 86 |
+
|
| 87 |
+
# Create base map with Luxembourg coordinates
|
| 88 |
+
if df.empty:
|
| 89 |
+
m = folium.Map(location=[49.8153, 6.1296], zoom_start=9)
|
| 90 |
+
else:
|
| 91 |
+
m = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=9)
|
| 92 |
+
|
| 93 |
+
# Add markers
|
| 94 |
+
for _, row in df.iterrows():
|
| 95 |
+
color = 'green' if row['Swimming allowed'] == 'Yes' else \
|
| 96 |
+
'red' if row['Swimming allowed'] == 'No' else 'gray'
|
| 97 |
+
|
| 98 |
+
popup_text = f"""
|
| 99 |
+
<b>Lake:</b> {row['Lake']}<br>
|
| 100 |
+
<b>Sector:</b> {row['Sector']}<br>
|
| 101 |
+
<b>Latitude:</b> {row['lat']:.6f}<br>
|
| 102 |
+
<b>Longitude:</b> {row['long']:.6f}<br>
|
| 103 |
+
<b>Water Quality:</b> {row['Water Quality']}<br>
|
| 104 |
+
<b>Swimming allowed:</b> {row['Swimming allowed']}<br>
|
| 105 |
+
<b>Reason for ban:</b> {row['Reason for ban']}
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
folium.CircleMarker(
|
| 109 |
+
location=[row['lat'], row['long']],
|
| 110 |
+
radius=8,
|
| 111 |
+
color=color,
|
| 112 |
+
fill=True,
|
| 113 |
+
fill_color=color,
|
| 114 |
+
fill_opacity=0.7,
|
| 115 |
+
popup=folium.Popup(popup_text, max_width=300)
|
| 116 |
+
).add_to(m)
|
| 117 |
+
|
| 118 |
+
# Use OpenStreetMap tiles
|
| 119 |
+
folium.TileLayer('openstreetmap').add_to(m)
|
| 120 |
+
|
| 121 |
+
# Remove attribution completely
|
| 122 |
+
m.get_root().html.add_child(folium.Element("""
|
| 123 |
+
<style>
|
| 124 |
+
.leaflet-control-attribution {
|
| 125 |
+
display: none !important;
|
| 126 |
+
}
|
| 127 |
+
</style>
|
| 128 |
+
"""))
|
| 129 |
+
|
| 130 |
+
# Return HTML representation
|
| 131 |
+
return m._repr_html_()
|
| 132 |
+
|
| 133 |
+
# Create Gradio interface
|
| 134 |
+
with gr.Blocks(title="LuxSplash") as app:
|
| 135 |
+
gr.Markdown("# 🏊♂️ LuxSplash")
|
| 136 |
+
gr.Markdown("[Freedom Luxembourg](https://freeletz.lu)")
|
| 137 |
+
|
| 138 |
+
with gr.Row():
|
| 139 |
+
refresh_btn = gr.Button("Refresh Data", variant="primary")
|
| 140 |
+
|
| 141 |
+
map_html = gr.HTML()
|
| 142 |
+
|
| 143 |
+
# Initial load
|
| 144 |
+
app.load(fn=lambda: create_map(False), inputs=None, outputs=map_html)
|
| 145 |
+
|
| 146 |
+
# Refresh functionality
|
| 147 |
+
refresh_btn.click(
|
| 148 |
+
fn=lambda: create_map(True),
|
| 149 |
+
inputs=None,
|
| 150 |
+
outputs=map_html
|
| 151 |
+
)
|
| 152 |
+
gr.Markdown(
|
| 153 |
+
"Data sourced from the official Luxembourg government website, the only authoritative source for bathing site information: "
|
| 154 |
+
"[eau.gouvernement.lu](https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html )"
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
app.launch()
|