Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import matplotlib.pyplot as plt | |
| import matplotlib.colors as mcolors | |
| import folium | |
| from streamlit_folium import st_folium | |
| from shapely.geometry import Point, box | |
| import geopandas as gpd | |
| import duckdb | |
| import rasterio | |
| from huggingface_hub import HfFileSystem | |
| import shapely.wkb | |
| from dotenv import load_dotenv | |
| import os | |
| from pathlib import Path | |
| # Import extracted utilities (shared with app_local.py) | |
| from utils import ( | |
| FLAIR_COLORS, CLASS_LABELS, process_mosaic | |
| ) | |
| # --- Cloud Configuration --- | |
| load_dotenv() | |
| env_path = Path(__file__).parent.parent / "DL_cologne_green" / ".env" | |
| if env_path.exists(): | |
| load_dotenv(env_path) | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| DATASET_ID = os.getenv("DATASET_ID", "Rahul-fix/cologne-green-data") | |
| BASE_URL = f"hf://datasets/{DATASET_ID}" | |
| STORAGE_OPTS = {"token": HF_TOKEN} if HF_TOKEN else None | |
| # --- DuckDB Connection --- | |
| def get_db_connection(): | |
| con = duckdb.connect(database=":memory:") | |
| con.execute("INSTALL spatial; LOAD spatial;") | |
| con.execute("INSTALL httpfs; LOAD httpfs;") | |
| if HF_TOKEN: | |
| fs = HfFileSystem(token=HF_TOKEN) | |
| con.register_filesystem(fs) | |
| return con | |
| con = get_db_connection() | |
| # --- Cloud Data Loading Functions (mirror utils.py structure) --- | |
| def safe_load_wkb(x): | |
| try: return shapely.wkb.loads(bytes(x)) | |
| except: return None | |
| def load_quarters_with_stats(): | |
| """Load Veedel boundaries with stats - Cloud version of utils.load_quarters_with_stats""" | |
| try: | |
| query = f""" | |
| SELECT | |
| v.name, ST_AsWKB(v.geometry) as geometry, | |
| COALESCE(s.green_area_m2, 0) as green_area_m2, | |
| COALESCE(s.ndvi_mean, 0) as ndvi_mean, | |
| v.Shape_Area, | |
| s.area_0, s.area_1, s.area_2, s.area_3, s.area_4, s.area_5, s.area_6, s.area_7, | |
| s.area_8, s.area_9, s.area_10, s.area_11, s.area_12, s.area_13, s.area_14, s.area_15, | |
| s.area_16, s.area_17, s.area_18 | |
| FROM '{BASE_URL}/data/boundaries/Stadtviertel.parquet' v | |
| LEFT JOIN '{BASE_URL}/data/stats/extended_stats.parquet' s ON v.name = s.name | |
| """ | |
| df = con.execute(query).fetchdf() | |
| df['geometry'] = df['geometry'].apply(safe_load_wkb) | |
| df = df.dropna(subset=['geometry']) | |
| gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") | |
| # CRS Check (remote data may be in EPSG:25832) | |
| if not gdf.empty and gdf.total_bounds[0] > 180: | |
| gdf.crs = "EPSG:25832" | |
| gdf = gdf.to_crs("EPSG:4326") | |
| # Calculate green_pct | |
| if 'green_area_m2' in gdf.columns and 'Shape_Area' in gdf.columns: | |
| gdf['green_pct'] = (gdf['green_area_m2'] / gdf['Shape_Area']) * 100 | |
| else: | |
| gdf['green_pct'] = 0.0 | |
| return gdf | |
| except Exception as e: | |
| st.error(f"Error loading quarters: {e}") | |
| return gpd.GeoDataFrame() | |
| def load_boroughs(): | |
| """Load borough boundaries - Cloud version of utils.load_boroughs""" | |
| try: | |
| query = f"SELECT STB_NAME as name, ST_AsWKB(geometry) as geometry FROM '{BASE_URL}/data/boundaries/Stadtbezirke.parquet'" | |
| df = con.execute(query).fetchdf() | |
| df['geometry'] = df['geometry'].apply(safe_load_wkb) | |
| df = df.dropna(subset=['geometry']) | |
| gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326") | |
| if not gdf.empty and gdf.total_bounds[0] > 180: | |
| gdf.crs = "EPSG:25832" | |
| gdf = gdf.to_crs("EPSG:4326") | |
| return gdf | |
| except Exception as e: | |
| st.error(f"Error loading boroughs: {e}") | |
| return gpd.GeoDataFrame() | |
| def get_tile_to_veedel_mapping(): | |
| """Get tile-to-Veedel mapping - Cloud version of utils.get_tile_to_veedel_mapping""" | |
| try: | |
| tiles_df = pd.read_csv(f"{BASE_URL}/data/metadata/cologne_tiles.csv", storage_options=STORAGE_OPTS) | |
| geometries = [box(r['Koordinatenursprung_East'], r['Koordinatenursprung_North'], | |
| r['Koordinatenursprung_East']+1000, r['Koordinatenursprung_North']+1000) for _, r in tiles_df.iterrows()] | |
| tiles_gdf = gpd.GeoDataFrame(tiles_df, geometry=geometries, crs="EPSG:25832") | |
| q_gdf = gpd.read_parquet(f"{BASE_URL}/data/boundaries/Stadtviertel.parquet", storage_options=STORAGE_OPTS) | |
| if q_gdf.crs != "EPSG:25832": q_gdf = q_gdf.to_crs("EPSG:25832") | |
| joined = gpd.sjoin(tiles_gdf, q_gdf, how="inner", predicate="intersects") | |
| return joined.groupby('name')['Kachelname'].apply(list).to_dict() | |
| except Exception as e: | |
| return {} | |
| def list_available_tiles(): | |
| """List available tiles from cloud (processed, web_optimized, or raw)""" | |
| try: | |
| fs = HfFileSystem(token=HF_TOKEN) | |
| tiles = set() | |
| # Check processed masks | |
| processed_files = fs.glob(f"datasets/{DATASET_ID}/data/processed/*_mask.tif") | |
| for f in processed_files: | |
| tiles.add(Path(f).stem.replace("_mask", "")) | |
| # Check web_optimized masks | |
| web_opt_files = fs.glob(f"datasets/{DATASET_ID}/data/web_optimized/*_mask.tif") | |
| for f in web_opt_files: | |
| tiles.add(Path(f).stem.replace("_mask", "")) | |
| # Also include raw tiles (for satellite view) | |
| raw_files = fs.glob(f"datasets/{DATASET_ID}/data/raw/*.jp2") | |
| for f in raw_files: | |
| tiles.add(Path(f).stem) | |
| return list(tiles) | |
| except: | |
| return [] | |
| def get_mosaic_data_remote(tile_names, layer_type): | |
| """Load and mosaic tiles - Cloud version of utils.get_mosaic_data_local""" | |
| fs = HfFileSystem(token=HF_TOKEN) | |
| sources = [] | |
| memfiles = [] | |
| try: | |
| for tile in tile_names: | |
| suffix = "_mask" if "Land Cover" in layer_type else "_ndvi" if "NDVI" in layer_type else "" | |
| paths = [ | |
| f"datasets/{DATASET_ID}/data/web_optimized/{tile}{suffix}.tif", | |
| f"datasets/{DATASET_ID}/data/processed/{tile}{suffix}.tif", | |
| ] | |
| if layer_type == "Satellite": | |
| paths.append(f"datasets/{DATASET_ID}/data/raw/{tile}.jp2") | |
| found_bytes = None | |
| for p in paths: | |
| try: | |
| with fs.open(p, "rb") as f: | |
| found_bytes = f.read() | |
| break | |
| except: continue | |
| if found_bytes: | |
| m = rasterio.MemoryFile(found_bytes) | |
| memfiles.append(m) | |
| sources.append(m.open()) | |
| # Use shared processing logic | |
| result = process_mosaic(sources, layer_type) | |
| # Cleanup | |
| for s in sources: s.close() | |
| for m in memfiles: m.close() | |
| return result | |
| except Exception as e: | |
| return None, None | |
| # 1. Page Configuration | |
| st.set_page_config(page_title="GreenCologne (Cloud)", layout="wide") | |
| st.title("🌿 GreenCologne (Cloud Dashboard)") | |
| # --- Sidebar: Dataset Info --- | |
| with st.sidebar: | |
| st.header("ℹ️ About This Dataset") | |
| with st.expander("📡 Data Sources", expanded=False): | |
| st.markdown(""" | |
| **Satellite Imagery** | |
| [OpenNRW DOP10](https://www.bezreg-koeln.nrw.de/geobasis-nrw/produkte-und-dienste/luftbild-und-satellitenbildinformationen/aktuelle-luftbild-und-0) – 10cm resolution aerial photos (2022-2025) | |
| **Administrative Boundaries** | |
| [Offene Daten Köln](https://www.offenedaten-koeln.de/) – Stadtviertel & Stadtbezirke | |
| **Coverage** | |
| 840 tiles covering Cologne's 86 Veedels | |
| """) | |
| with st.expander("🤖 Model & Methodology", expanded=False): | |
| st.markdown(""" | |
| **Land Cover Classification** | |
| [FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Deep learning semantic segmentation trained on French aerial imagery, adapted for German urban landscapes. | |
| **19 Land Cover Classes** | |
| Buildings, deciduous trees, herbaceous vegetation, water, impervious surfaces, agricultural land, and more. | |
| **NDVI Calculation** | |
| Normalized Difference Vegetation Index computed from NIR and Red bands: | |
| `NDVI = (NIR - Red) / (NIR + Red)` | |
| **Green Area Detection** | |
| Classes 4 (Deciduous), 5 (Coniferous), 17 (Herbaceous), and 18 (Agricultural) are classified as green areas. | |
| """) | |
| with st.expander("🙏 Acknowledgments", expanded=False): | |
| st.markdown(""" | |
| - [CorrelAid](https://correlaid.org/) – Data-for-good community | |
| - [OpenNRW](https://www.opengeodata.nrw.de/) – Open geospatial data | |
| - [IGNF/FLAIR-Hub](https://huggingface.co/IGNF/FLAIR-HUB_LC-A_IR_swinbase-upernet) – Segmentation model | |
| - [Stadt Köln](https://www.stadt-koeln.de/) – Open administrative data | |
| """) | |
| if not HF_TOKEN: | |
| st.warning("⚠️ HF_TOKEN missing. Set in .env or Streamlit Secrets.") | |
| # 2. Data Loading | |
| gdf_quarters = load_quarters_with_stats() | |
| gdf_boroughs = load_boroughs() | |
| tile_mapping = get_tile_to_veedel_mapping() | |
| available_tiles = list_available_tiles() | |
| # 3. State Management | |
| if 'selected_veedel' not in st.session_state: st.session_state['selected_veedel'] = "All" | |
| if 'map_center' not in st.session_state: st.session_state['map_center'] = [50.9375, 6.9603] # Cologne | |
| if 'map_zoom' not in st.session_state: st.session_state['map_zoom'] = 11 | |
| if 'map_click_counter' not in st.session_state: st.session_state['map_click_counter'] = 0 | |
| # --- Helper Functions --- | |
| def update_zoom_for_veedel(veedel_name): | |
| if veedel_name == "All": | |
| st.session_state['map_center'] = [50.9375, 6.9603] | |
| st.session_state['map_zoom'] = 10 | |
| elif gdf_quarters is not None and not gdf_quarters.empty: | |
| match = gdf_quarters[gdf_quarters['name'] == veedel_name] | |
| if not match.empty: | |
| centroid = match.geometry.centroid.iloc[0] | |
| st.session_state['map_center'] = [centroid.y, centroid.x] | |
| st.session_state['map_zoom'] = 14 | |
| def on_veedel_change(): | |
| sel = st.session_state['selected_veedel_widget'] | |
| st.session_state['selected_veedel'] = sel | |
| update_zoom_for_veedel(sel) | |
| # --- Layout --- | |
| col_map, col_details = st.columns([0.65, 0.35], gap="medium") | |
| with col_details: | |
| st.markdown("### GreenCologne Analysis") | |
| tab_opts, tab_stats = st.tabs(["🛠️ Options", "📊 Statistics"]) | |
| veedel_list = ["All"] + sorted(gdf_quarters['name'].unique().tolist()) if gdf_quarters is not None and not gdf_quarters.empty else ["All"] | |
| # --- Tab 1: Options --- | |
| with tab_opts: | |
| # Sync Widget | |
| if 'selected_veedel_widget' in st.session_state and st.session_state['selected_veedel'] != st.session_state['selected_veedel_widget']: | |
| st.session_state['selected_veedel_widget'] = st.session_state['selected_veedel'] | |
| selected_veedel = st.selectbox( | |
| "Select Quarter (Veedel/Stadtviertel):", | |
| veedel_list, | |
| key='selected_veedel_widget', | |
| on_change=on_veedel_change, | |
| index=veedel_list.index(st.session_state['selected_veedel']) if st.session_state['selected_veedel'] in veedel_list else 0 | |
| ) | |
| # Tile Logic - Only load tiles when a specific veedel is selected | |
| tiles_to_display = [] | |
| if selected_veedel != "All": | |
| veedel_tiles = set(tile_mapping.get(selected_veedel, [])) | |
| filtered_tiles = [t for t in veedel_tiles if t in available_tiles] | |
| tiles_to_display = sorted(filtered_tiles) | |
| # Layer Selection | |
| layer_selection = st.radio( | |
| "Select Layer:", | |
| ["Satellite", "Land Cover", "NDVI"], | |
| index=2, | |
| horizontal=True | |
| ) | |
| # Legends | |
| st.markdown("#### Legends") | |
| st.markdown("**Veedel Health (Mean NDVI)**") | |
| st.markdown(""" | |
| <div style="background: linear-gradient(to right, #d73027, #ffffbf, #1a9850); height: 10px; width: 100%; border-radius: 5px;"></div> | |
| <div style="display: flex; justify-content: space-between; font-size: 10px; margin-top: 2px;"><span>0.0 (Low)</span><span>0.3</span><span>0.6+ (High)</span></div> | |
| <div style="font-size: 11px; color: #666; margin-bottom: 15px;">*Average vegetation index per district.</div> | |
| """, unsafe_allow_html=True) | |
| if layer_selection == "Land Cover": | |
| st.markdown("**Land Cover Classes**") | |
| legend_html = "<div style='display: grid; grid-template-columns: 1fr 1fr; gap: 5px; font-size: 12px;'>" | |
| for cls_id, label in CLASS_LABELS.items(): | |
| if cls_id == 13: continue | |
| c = FLAIR_COLORS[cls_id] | |
| legend_html += f"<div style='display: flex; align-items: center;'><div style='width: 12px; height: 12px; background: rgba({c[0]},{c[1]},{c[2]},{c[3]/255}); margin-right: 5px; border: 1px solid #ccc;'></div>{label}</div>" | |
| st.markdown(legend_html + "</div>", unsafe_allow_html=True) | |
| elif layer_selection == "NDVI": | |
| st.markdown("**Pixel Vegetation Index (NDVI)**") | |
| st.markdown(""" | |
| <div style="background: linear-gradient(to right, #d73027, #ffffbf, #1a9850); height: 10px; width: 100%; border-radius: 5px;"></div> | |
| <div style="display: flex; justify-content: space-between; font-size: 10px; margin-top: 2px;"><span>-0.4 (Water)</span><span>0.3</span><span>1.0 (Dense)</span></div> | |
| """, unsafe_allow_html=True) | |
| # --- Tab 2: Statistics --- | |
| with tab_stats: | |
| if gdf_quarters is not None and not gdf_quarters.empty: | |
| title = "Cologne (All Veedels) Stats" | |
| area_m2 = 0 | |
| total_area_m2 = 1 | |
| class_data_source = None | |
| if selected_veedel != "All": | |
| row = gdf_quarters[gdf_quarters['name'] == selected_veedel] | |
| if not row.empty: | |
| title = f"{selected_veedel} Stats" | |
| area_m2 = row['green_area_m2'].values[0] if 'green_area_m2' in row else 0 | |
| total_area_m2 = row['Shape_Area'].values[0] if 'Shape_Area' in row else 1 | |
| class_data_source = row | |
| else: | |
| st.warning(f"No stats for {selected_veedel}") | |
| else: | |
| area_m2 = gdf_quarters['green_area_m2'].sum() if 'green_area_m2' in gdf_quarters else 0 | |
| total_area_m2 = gdf_quarters['Shape_Area'].sum() if 'Shape_Area' in gdf_quarters else 1 | |
| class_cols = [c for c in gdf_quarters.columns if str(c).startswith('area_')] | |
| if class_cols: | |
| class_data_source = pd.DataFrame([{c: gdf_quarters[c].sum() for c in class_cols}]) | |
| st.markdown(f"#### {title}") | |
| c1, c2 = st.columns(2) | |
| c1.metric("Green Area", f"{(area_m2/10000):.2f} ha") | |
| c2.metric("Green Coverage", f"{(area_m2/total_area_m2)*100:.1f}%") | |
| st.divider() | |
| if class_data_source is not None and not class_data_source.empty: | |
| class_cols = [c for c in class_data_source.columns if str(c).startswith('area_')] | |
| if class_cols: | |
| class_data = class_data_source[class_cols].T.reset_index() | |
| class_data.columns = ['class_col', 'area_m2'] | |
| class_data['class_id'] = pd.to_numeric(class_data['class_col'].str.replace('area_', '', regex=False), errors='coerce').fillna(0).astype(int) | |
| class_data['class_name'] = class_data['class_id'].map(CLASS_LABELS) | |
| class_data['color'] = class_data['class_id'].map(lambda x: f"rgba({FLAIR_COLORS[x][0]},{FLAIR_COLORS[x][1]},{FLAIR_COLORS[x][2]}, 1)") | |
| class_data = class_data.sort_values(by='area_m2', ascending=False) | |
| fig_pie = px.pie( | |
| class_data, | |
| names='class_name', | |
| values='area_m2', | |
| title="Land Cover Distribution", | |
| color='class_name', | |
| color_discrete_map={row['class_name']: row['color'] for _, row in class_data.iterrows()}, | |
| labels={'class_name': 'Land Cover', 'area_m2': 'Area'} | |
| ) | |
| fig_pie.update_traces( | |
| textposition='inside', | |
| textinfo='percent', | |
| hovertemplate='<b>%{label}</b><br>Area: %{value:,.0f} m² (%{percent})<extra></extra>' | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| else: st.info("No detailed land cover data.") | |
| # --- Map View --- | |
| with col_map: | |
| # 1. Base Map | |
| m = folium.Map(location=st.session_state['map_center'], zoom_start=st.session_state['map_zoom'], tiles="CartoDB positron", crs='EPSG3857') | |
| # 2. Results: Districts | |
| if gdf_boroughs is not None and not gdf_boroughs.empty: | |
| folium.GeoJson( | |
| gdf_boroughs, name="Districts", | |
| style_function=lambda x: {'fillColor': 'none', 'color': '#333333', 'weight': 2, 'dashArray': '5, 5', 'fillOpacity': 0.0}, | |
| tooltip=folium.GeoJsonTooltip(fields=['name'], aliases=['Bezirk:']) | |
| ).add_to(m) | |
| # 3. Quarters (Veedel) | |
| if gdf_quarters is not None and not gdf_quarters.empty: | |
| min_ndvi = gdf_quarters['ndvi_mean'].min() if 'ndvi_mean' in gdf_quarters else 0 | |
| max_ndvi = gdf_quarters['ndvi_mean'].max() if 'ndvi_mean' in gdf_quarters else 0.6 | |
| if pd.isna(min_ndvi): min_ndvi = 0 | |
| if pd.isna(max_ndvi): max_ndvi = 0.6 | |
| def get_style(feature): | |
| name = feature['properties']['name'] | |
| is_sel = (selected_veedel != "All" and name == selected_veedel) | |
| val = feature['properties'].get('ndvi_mean') | |
| fill_color = 'gray' | |
| if is_sel: fill_color = '#ffff00' | |
| elif val is not None and not pd.isna(val): | |
| norm = max(0, min(1, (val - min_ndvi) / (max_ndvi - min_ndvi + 1e-9))) | |
| fill_color = mcolors.to_hex(plt.get_cmap('RdYlGn')(norm)) | |
| return { | |
| 'fillColor': fill_color, | |
| 'color': 'black' if is_sel else '#666666', | |
| 'weight': 3 if is_sel else 1, | |
| 'fillOpacity': 0.0 if is_sel else 0.6 | |
| } | |
| folium.GeoJson( | |
| gdf_quarters, name="Veedel (NDVI)", style_function=get_style, | |
| tooltip=folium.GeoJsonTooltip( | |
| fields=['name', 'green_area_m2', 'green_pct', 'ndvi_mean'], | |
| aliases=['Veedel:', 'Green Area (m²):', 'Green Coverage (%):', 'Mean NDVI:'], | |
| localize=True, fmt='.2f' | |
| ) | |
| ).add_to(m) | |
| # 4. Tiles Grid (Mosaic) | |
| if tiles_to_display: | |
| with st.spinner(f"Loading {len(tiles_to_display)} tiles..."): | |
| mosaic_img, mosaic_bounds = get_mosaic_data_remote(tiles_to_display, layer_selection) | |
| if mosaic_img is not None and mosaic_bounds: | |
| folium.raster_layers.ImageOverlay( | |
| image=mosaic_img, bounds=mosaic_bounds, | |
| opacity=0.8 if "Land Cover" in layer_selection else 1.0, | |
| name=f"Mosaic - {layer_selection}", control=False | |
| ).add_to(m) | |
| folium.LayerControl().add_to(m) | |
| # 5. Click Logic (Hybrid) | |
| map_key = f"map_{st.session_state['selected_veedel']}_{st.session_state['map_zoom']}_{st.session_state['map_click_counter']}" | |
| map_output = st_folium(m, width=None, height=700, key=map_key, use_container_width=True, returned_objects=["last_object_clicked", "last_clicked"]) | |
| clicked_name_final = None | |
| if map_output: | |
| # A: Check Object Property | |
| if map_output.get('last_object_clicked'): | |
| props = map_output['last_object_clicked'].get('properties', {}) | |
| if props and 'name' in props: clicked_name_final = props['name'] | |
| # B: Spatial Query Fallback | |
| if not clicked_name_final and map_output.get('last_clicked'): | |
| lat = map_output['last_clicked']['lat'] | |
| lng = map_output['last_clicked']['lng'] | |
| if lat and lng and gdf_quarters is not None: | |
| p = Point(lng, lat) | |
| matches = gdf_quarters[gdf_quarters.geometry.contains(p)] | |
| if not matches.empty: clicked_name_final = matches['name'].iloc[0] | |
| if clicked_name_final and clicked_name_final in veedel_list and clicked_name_final != st.session_state['selected_veedel']: | |
| st.session_state['selected_veedel'] = clicked_name_final | |
| update_zoom_for_veedel(clicked_name_final) | |
| st.session_state['map_click_counter'] += 1 | |
| st.rerun() | |