Spaces:

GeoOrg13
/

flood-vulnerability

Sleeping

App Files Files Community

adema5051 commited on Dec 5, 2025

Commit

a359779

verified ·

1 Parent(s): 7a93f4c

Upload 10 files

Browse files

Files changed (10) hide show

.dockerignore +48 -0
Dockerfile +31 -0
README.md +28 -5
explainability.py +163 -0
gee_auth.py +56 -0
main.py +602 -0
requirements.txt +17 -0
runtime.txt +1 -0
spatial_queries.py +754 -0
vulnerability.py +507 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,48 @@

+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.so
+*.egg
+*.egg-info
+dist
+build
+.git
+.gitignore
+.github
+README.md
+*.md
+.env
+.venv
+venv/
+ENV/
+# Logs and cache
+*.log
+logs/
+cache/
+*.pkl
+# IDE
+.vscode
+.idea
+*.swp
+*.swo
+*~
+# Testing
+.pytest_cache
+.coverage
+htmlcov/
+.tox/
+# OS
+.DS_Store
+Thumbs.db
+# Local credentials (use Cloud Run secrets instead)
+gee-service-account.json
+# Git
+.gitattributes

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies for geopandas + GDAL
+RUN apt-get update && apt-get install -y \
+    gdal-bin \
+    libgdal-dev \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better layer caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create directories for logs (ephemeral but prevents errors)
+RUN mkdir -p logs
+# Cloud Run injects PORT environment variable
+ENV PORT=8080
+# Expose port for documentation
+EXPOSE 8080
+# Single worker - your ONNX models are too large for multiple workers
+CMD exec uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 1 --timeout-keep-alive 300

README.md CHANGED Viewed

@@ -1,10 +1,33 @@
 ---
-title: Flood Vulnerability Api
-emoji: 🏃
-colorFrom: pink
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Flood Vulnerability API
+emoji: 🌊
+colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
 ---
+# Flood Vulnerability Assessment API
+Global, real-time flood risk analysis powered by:
+- Google Earth Engine (terrain)
+- OpenStreetMap (water proximity)
+- SHAP (explanations)
+- Multi-hazard modeling (fluvial, coastal, pluvial)
+## Features
+- Batch CSV upload
+- 95% CI + uncertainty
+- Multi Hazard detection (Fluvial, Coastal Surge and Pluvial)
+## Try It
+1. Visit `/docs` for interactive API documentation
+2. Example coordinates:
+   - `29.17, -95.31` → **MODERATE**
+   - `27.7, 86.7` → **LOW**
+## Tech Stack
+- FastAPI + Hugging Face Spaces
+- GEE + OSM + Natural Earth
+- ONNX models for predictions
+- `@lru_cache` for 100x batch speed

explainability.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# explainability.py
+import numpy as np
+import pandas as pd
+from sklearn.ensemble import RandomForestRegressor
+import shap
+import pickle
+import os
+class VulnerabilityExplainer:
+    """
+    SHAP-based explainer for flood vulnerability scores
+    """
+    def __init__(self, model_path='models/rf_explainer.pkl'):
+        self.model = None
+        self.explainer = None
+        self.model_path = model_path
+        self.feature_names = [
+            'proximity_score',
+            'tpi_score',
+            'slope_score',
+            'height_score',
+            'elevation'
+        ]
+    def train(self, training_data_path='training_data.csv'):
+        """
+        Train surrogate RF model on existing vulnerability assessments
+        """
+        print(f"Loading training data from {training_data_path}...")
+        df = pd.read_csv(training_data_path)
+        missing_cols = [col for col in self.feature_names if col not in df.columns]
+        if missing_cols:
+            raise ValueError(f"Missing columns in training data: {missing_cols}")
+        if 'vulnerability_index' not in df.columns:
+            raise ValueError("Training data must have 'vulnerability_index' column")
+        X = df[self.feature_names]
+        y = df['vulnerability_index']
+        print(f"Training Random Forest on {len(df)} samples...")
+        self.model = RandomForestRegressor(
+            n_estimators=100,
+            max_depth=10,
+            random_state=42,
+            n_jobs=-1
+        )
+        self.model.fit(X, y)
+        print("Creating SHAP explainer...")
+        self.explainer = shap.TreeExplainer(self.model)
+        os.makedirs(os.path.dirname(self.model_path), exist_ok=True)
+        with open(self.model_path, 'wb') as f:
+            pickle.dump({
+                'model': self.model,
+                'explainer': self.explainer,
+                'feature_names': self.feature_names
+            }, f)
+        r2_score = self.model.score(X, y)
+        print(f"✅ Model trained successfully!")
+        print(f"   R² score: {r2_score:.3f}")
+        print(f"   Saved to: {self.model_path}")
+    def load(self):
+        """Load trained model"""
+        if os.path.exists(self.model_path):
+            try:
+                with open(self.model_path, 'rb') as f:
+                    data = pickle.load(f)
+                    self.model = data['model']
+                    self.explainer = data['explainer']
+                    self.feature_names = data['feature_names']
+                print(f"✅ SHAP model loaded from {self.model_path}")
+                return True
+            except Exception as e:
+                print(f"⚠️ Failed to load SHAP model: {e}")
+                return False
+        else:
+            print(f"⚠️ SHAP model not found at {self.model_path}")
+            return False
+    def explain(self, features_dict):
+        """
+        Generate SHAP explanation for a single assessment
+        """
+        if not self.explainer:
+            if not self.load():
+                return None
+        try:
+            X = pd.DataFrame([features_dict])[self.feature_names]
+        except KeyError as e:
+            print(f"Missing feature in input: {e}")
+            return None
+        shap_values = self.explainer.shap_values(X)
+        if isinstance(shap_values, list):
+            shap_values = shap_values[0]
+        shap_values = np.array(shap_values).astype(float).flatten()
+        base_value = float(np.array(self.explainer.expected_value).mean())
+        contributions = list(zip(self.feature_names, shap_values))
+        contributions.sort(key=lambda x: abs(x[1]), reverse=True)
+        total_impact = sum(abs(v) for _, v in contributions)
+        explanations = []
+        for name, value in contributions:
+            value = float(value)
+            pct = (abs(value) / total_impact * 100) if total_impact > 0 else 0
+            direction = "increases" if value > 0 else "decreases"
+            explanations.append({
+                'factor': self._humanize_feature(name),
+                'contribution_pct': round(pct, 1),
+                'direction': direction,
+                'shap_value': round(value, 3)
+            })
+        return {
+            'base_vulnerability': round(base_value, 3),
+            'predicted_vulnerability': round(base_value + sum(shap_values), 3),
+            'explanations': explanations,
+            'top_risk_driver': explanations[0]['factor'] if explanations else None
+        }
+    def _humanize_feature(self, feature_name):
+        """Convert feature names to readable descriptions"""
+        labels = {
+            'proximity_score': 'Distance to water',
+            'tpi_score': 'Topographic position (valley vs. ridge)',
+            'slope_score': 'Terrain slope',
+            'height_score': 'Building height and basement',
+            'elevation': 'Elevation above sea level'
+        }
+        return labels.get(feature_name, feature_name)
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        training_file = sys.argv[1]
+    else:
+        training_file = 'training_data.csv'
+    if not os.path.exists(training_file):
+        print(f"❌ Training data not found: {training_file}")
+        sys.exit(1)
+    explainer = VulnerabilityExplainer()
+    explainer.train(training_file)
+    print("\n✅ SHAP explainer ready!")

gee_auth.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import ee
+import os
+import json
+def initialize_gee():
+    """Initialize Google Earth Engine with service account from env"""
+    try:
+        # Local development - JSON file
+        if os.path.exists('gee-service-account.json'):
+            SERVICE_ACCOUNT = 'gee-access@gee-research-project.iam.gserviceaccount.com'
+            credentials = ee.ServiceAccountCredentials(
+                SERVICE_ACCOUNT,
+                'gee-service-account.json'
+            )
+            ee.Initialize(credentials)
+            print("✅ GEE authenticated (local file)")
+            return True
+        # Cloud Run - full JSON key as secret
+        gee_key_json = os.getenv('GEE_SERVICE_ACCOUNT_KEY')
+        if gee_key_json:
+            # Parse JSON credentials
+            key_dict = json.loads(gee_key_json)
+            credentials = ee.ServiceAccountCredentials(
+                email=key_dict['client_email'],
+                key_data=gee_key_json
+            )
+            ee.Initialize(credentials)
+            print("✅ GEE authenticated (Cloud Run secret)")
+            return True
+        # Fallback to split-key format (Render compatibility)
+        service_account = os.getenv('GEE_SERVICE_ACCOUNT')
+        private_key_json = os.getenv('GEE_PRIVATE_KEY')
+        if service_account and private_key_json:
+            # Clean up private key formatting
+            if private_key_json.startswith('"'):
+                private_key = private_key_json.strip('"').replace('\\n', '\n')
+            else:
+                private_key = private_key_json.replace('\\n', '\n')
+            credentials = ee.ServiceAccountCredentials(
+                email=service_account,
+                key_data=private_key
+            )
+            ee.Initialize(credentials)
+            print("✅ GEE authenticated (split credentials)")
+            return True
+        raise ValueError("No GEE credentials found - set GEE_SERVICE_ACCOUNT_KEY or use local JSON file")
+    except Exception as e:
+        print(f"❌ GEE authentication failed: {e}")
+        return False

main.py ADDED Viewed

	@@ -0,0 +1,602 @@

+# main.py - FastAPI application for Flood Vulnerability Assessment
+from fastapi import FastAPI, File, UploadFile, HTTPException, Request
+from fastapi.responses import StreamingResponse, HTMLResponse
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel, field_validator
+from typing import Optional, Dict
+import pandas as pd
+import io
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from contextlib import asynccontextmanager
+from datetime import datetime
+from spatial_queries import get_terrain_metrics, distance_to_water
+from vulnerability import calculate_vulnerability_index
+from gee_auth import initialize_gee
+import os
+DISABLE_HEIGHT_PREDICTOR = os.environ.get("DISABLE_HEIGHT", "false").lower() == "true"
+# Global flags for model readiness
+model_ready = False
+gee_ready = False
+# OSM rate limiting
+_last_osm_request = None
+_osm_lock = asyncio.Lock()
+async def throttled_distance_to_water(lat, lon):
+    """
+    Throttle OSM requests
+    """
+    global _last_osm_request
+    async with _osm_lock:
+        if _last_osm_request:
+            elapsed = (datetime.now() - _last_osm_request).total_seconds()
+            if elapsed < 0.5:  # 2 req/sec max
+                await asyncio.sleep(0.5 - elapsed)
+        loop = asyncio.get_event_loop()
+        result = await loop.run_in_executor(None, distance_to_water, lat, lon)
+        _last_osm_request = datetime.now()
+        return result
+# Lifespan context manager - loads heavy models AFTER port binding
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup: Port binds first, models load in background
+    print("🚀 FastAPI server starting - port binding now")
+    asyncio.create_task(load_heavy_models())
+    yield
+    # Shutdown
+    print("🛑 Shutting down")
+async def load_heavy_models():
+    """Load heavy models asynchronously after server starts"""
+    global model_ready, gee_ready
+    try:
+        # Initialize GEE immediately (no delay needed)
+        print("📡 Initializing GEE...")
+        initialize_gee()
+        gee_ready = True
+        print("✅ GEE initialized")
+        # Load SHAP explainer
+        try:
+            from explainability import VulnerabilityExplainer
+            global explainer
+            explainer = VulnerabilityExplainer()
+            print("✅ SHAP model initialized")
+        except Exception as e:
+            print(f"⚠️ SHAP explainer not available: {e}")
+            explainer = None
+        # Load height predictor (334 MB model)
+        print("📦 Loading height predictor...")
+        if DISABLE_HEIGHT_PREDICTOR:
+            print("⚠️ Height predictor disabled for this deployment.")
+            model_ready = False
+        else:
+            try:
+                from height_predictor.inference import get_predictor
+                get_predictor()
+                model_ready = True
+                print("✅ Height predictor ready")
+            except Exception as e:
+                print(f"⚠️ Height predictor failed to load: {e}")
+                model_ready = False
+    except Exception as e:
+        print(f"❌ Model loading failed: {e}")
+# APP INITIALIZATION
+app = FastAPI(
+    title="Flood Vulnerability Assessment API",
+    version="1.0",
+    lifespan=lifespan
+)
+# Frontend templates setup
+templates = Jinja2Templates(directory="templates")
+# Thread pool for batch processing
+executor = ThreadPoolExecutor(max_workers=10)
+# Initialize explainer as None (loaded during startup)
+explainer = None
+# DATA MODEL
+class SingleAssessment(BaseModel):
+    latitude: float
+    longitude: float
+    height: Optional[float] = 0.0
+    basement: Optional[float] = 0.0
+    @field_validator('latitude')
+    @classmethod
+    def check_lat(cls, v: float) -> float:
+        if not -90 <= v <= 90:
+            raise ValueError('Latitude must be between -90 and 90')
+        return v
+    @field_validator('longitude')
+    @classmethod
+    def check_lon(cls, v: float) -> float:
+        if not -180 <= v <= 180:
+            raise ValueError('Longitude must be between -180 and 180')
+        return v
+    @field_validator('basement')
+    @classmethod
+    def check_basement(cls, v: float) -> float:
+        if v > 0:
+            raise ValueError('Basement height must be 0 or negative (e.g., -1, -2, -3)')
+        return v
+# FRONTEND ROUTE
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    """Serve the main web interface"""
+    return templates.TemplateResponse("index.html", {"request": request})
+# API ROUTES
+@app.get("/api")
+async def root() -> Dict:
+    """API info endpoint"""
+    return {
+        "service": "Flood Vulnerability Assessment API",
+        "version": "1.0",
+        "endpoints": {
+            "POST /assess": "Assess single location",
+            "POST /assess_batch": "Assess batch from CSV file",
+            "GET /health": "Health check"
+        }
+    }
+@app.get("/health")
+async def health_check() -> Dict:
+    """Health check endpoint - responds immediately even if models still loading"""
+    return {
+        "status": "healthy",
+        "gee_initialized": gee_ready,
+        "height_predictor_ready": model_ready
+    }
+@app.post("/assess")
+async def assess_single(data: SingleAssessment) -> Dict:
+    """Assess flood vulnerability for a single location (non-blocking)."""
+    if not gee_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="GEE still initializing, try again in 10 seconds"
+        )
+    loop = asyncio.get_event_loop()
+    try:
+        # Run terrain query in background thread
+        terrain = await loop.run_in_executor(
+            None,
+            get_terrain_metrics,
+            data.latitude,
+            data.longitude
+        )
+        # Throttled water distance query
+        water_dist = await throttled_distance_to_water(data.latitude, data.longitude)
+        # Calculate vulnerability after terrain + water distance retrieved
+        result = calculate_vulnerability_index(
+            lat=data.latitude,
+            lon=data.longitude,
+            height=data.height,
+            basement=data.basement,
+            terrain_metrics=terrain,
+            water_distance=water_dist
+        )
+        return {
+            "status": "success",
+            "input": data.dict(),
+            "assessment": result
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Assessment failed: {e}")
+async def process_single_row_async(row, use_predicted_height: bool = False):
+    """Process a single row from CSV with async throttling."""
+    try:
+        lat = row['latitude']
+        lon = row['longitude']
+        height = row.get('height', 0.0)
+        basement = row.get('basement', 0.0)
+        if use_predicted_height:
+            if not model_ready:
+                raise ValueError("Height predictor not ready yet")
+            try:
+                from height_predictor.inference import get_predictor
+                predictor = get_predictor()
+                pred = predictor.predict_from_coordinates(lat, lon)
+                if pred.get("status") == "success" and pred.get("predicted_height") is not None:
+                    height = float(pred["predicted_height"])
+            except Exception as e:
+                raise ValueError(f"Height prediction failed for ({lat}, {lon}): {e}")
+        # Run terrain in thread pool
+        loop = asyncio.get_event_loop()
+        terrain = await loop.run_in_executor(None, get_terrain_metrics, lat, lon)
+        # Throttled water distance
+        water_dist = await throttled_distance_to_water(lat, lon)
+        result = calculate_vulnerability_index(
+            lat=lat,
+            lon=lon,
+            height=height,
+            basement=basement,
+            terrain_metrics=terrain,
+            water_distance=water_dist
+        )
+        # CSV output - essential columns
+        return {
+            'latitude': lat,
+            'longitude': lon,
+            'height': height,
+            'basement': basement,
+            'vulnerability_index': result['vulnerability_index'],
+            'ci_lower_95': result['confidence_interval']['lower_bound_95'],
+            'ci_upper_95': result['confidence_interval']['upper_bound_95'],
+            'risk_level': result['risk_level'],
+            'confidence': result['uncertainty_analysis']['confidence'],
+            'confidence_interpretation': result['uncertainty_analysis']['interpretation'],
+            'elevation_m': result['elevation_m'],
+            'tpi_m': result['relative_elevation_m'],
+            'slope_degrees': result['slope_degrees'],
+            'distance_to_water_m': result['distance_to_water_m'],
+            'quality_flags': ','.join(result['uncertainty_analysis']['data_quality_flags']) if result['uncertainty_analysis']['data_quality_flags'] else ''
+        }
+    except Exception as e:
+        return {
+            'latitude': row.get('latitude'),
+            'longitude': row.get('longitude'),
+            'height': row.get('height', 0.0),
+            'basement': row.get('basement', 0.0),
+            'error': str(e),
+            'vulnerability_index': None,
+            'ci_lower_95': None,
+            'ci_upper_95': None,
+            'risk_level': None,
+            'confidence': None,
+            'confidence_interpretation': None,
+            'elevation_m': None,
+            'tpi_m': None,
+            'slope_degrees': None,
+            'distance_to_water_m': None,
+            'quality_flags': ''
+        }
+@app.post("/assess_batch")
+async def assess_batch(file: UploadFile = File(...), use_predicted_height: bool = False) -> StreamingResponse:
+    """Assess flood vulnerability for multiple locations from a CSV file."""
+    if not gee_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="GEE still initializing, try again in 10 seconds"
+        )
+    if use_predicted_height and not model_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="Height predictor still loading, try again in 30 seconds"
+        )
+    try:
+        contents = await file.read()
+        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))
+        if 'latitude' not in df.columns or 'longitude' not in df.columns:
+            raise HTTPException(
+                status_code=400,
+                detail="CSV must contain 'latitude' and 'longitude' columns"
+            )
+        import numpy as np
+        df = df[(np.abs(df['latitude']) <= 90) & (np.abs(df['longitude']) <= 180)]
+        if len(df) == 0:
+            raise HTTPException(status_code=400, detail="No valid coordinates in CSV (lat -90..90, lon -180..180)")
+        # Set defaults for optional columns
+        if 'height' not in df.columns:
+            df['height'] = 0.0
+        if 'basement' not in df.columns:
+            df['basement'] = 0.0
+        # Process rows with async throttling
+        results = []
+        for _, row in df.iterrows():
+            result = await process_single_row_async(row, use_predicted_height)
+            results.append(result)
+        results_df = pd.DataFrame(results)
+        output = io.StringIO()
+        results_df.to_csv(output, index=False)
+        output.seek(0)
+        return StreamingResponse(
+            io.BytesIO(output.getvalue().encode('utf-8')),
+            media_type="text/csv",
+            headers={
+                "Content-Disposition": (
+                    "attachment; filename=vulnerability_results.csv; "
+                    "filename*=UTF-8''vulnerability_results.csv"
+                )
+            }
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}")
+@app.post("/assess_batch_multihazard")
+async def assess_batch_multihazard(file: UploadFile = File(...)) -> StreamingResponse:
+    if not gee_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="GEE still initializing, try again in 10 seconds"
+        )
+    try:
+        contents = await file.read()
+        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))
+        if 'latitude' not in df.columns or 'longitude' not in df.columns:
+            raise HTTPException(
+                status_code=400,
+                detail="CSV must contain 'latitude' and 'longitude' columns"
+            )
+        results = []
+        for _, row in df.iterrows():
+            result = await process_single_row_multihazard_async(row)
+            results.append(result)
+        results_df = pd.DataFrame(results)
+        output = io.StringIO()
+        results_df.to_csv(output, index=False)
+        output.seek(0)
+        return StreamingResponse(
+            io.BytesIO(output.getvalue().encode('utf-8')),
+            media_type="text/csv",
+            headers={
+                "Content-Disposition": (
+                    "attachment; filename=multihazard_results.csv; "
+                    "filename*=UTF-8''multihazard_results.csv"
+                )
+            }
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Batch multihazard failed: {str(e)}")
+@app.post("/explain")
+async def explain_assessment(data: SingleAssessment) -> Dict:
+    """Assess vulnerability with SHAP explanation"""
+    if not gee_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="GEE still initializing, try again in 10 seconds"
+        )
+    loop = asyncio.get_event_loop()
+    try:
+        # Run terrain in background thread
+        terrain = await loop.run_in_executor(
+            None,
+            get_terrain_metrics,
+            data.latitude,
+            data.longitude
+        )
+        # Throttled water distance
+        water_dist = await throttled_distance_to_water(data.latitude, data.longitude)
+        result = calculate_vulnerability_index(
+            lat=data.latitude,
+            lon=data.longitude,
+            height=data.height,
+            basement=data.basement,
+            terrain_metrics=terrain,
+            water_distance=water_dist
+        )
+        # Generate explanation if explainer available
+        explanation = None
+        if explainer:
+            try:
+                explanation = explainer.explain(result['components'])
+            except Exception as e:
+                print(f"SHAP explanation failed: {e}")
+        return {
+            "status": "success",
+            "input": data.dict(),
+            "assessment": result,
+            "explanation": explanation
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Assessment failed: {e}")
+async def process_single_row_multihazard_async(row):
+    """Process a single row with multi-hazard assessment."""
+    try:
+        from vulnerability import calculate_multi_hazard_vulnerability
+        lat = row['latitude']
+        lon = row['longitude']
+        height = row.get('height', 0.0)
+        basement = row.get('basement', 0.0)
+        loop = asyncio.get_event_loop()
+        terrain = await loop.run_in_executor(None, get_terrain_metrics, lat, lon)
+        water_dist = await throttled_distance_to_water(lat, lon)
+        result = calculate_multi_hazard_vulnerability(
+            lat=lat,
+            lon=lon,
+            height=height,
+            basement=basement,
+            terrain_metrics=terrain,
+            water_distance=water_dist
+        )
+        return {
+            'latitude': lat,
+            'longitude': lon,
+            'height': height,
+            'basement': basement,
+            'vulnerability_index': result['vulnerability_index'],
+            'ci_lower_95': result['confidence_interval']['lower_bound_95'],
+            'ci_upper_95': result['confidence_interval']['upper_bound_95'],
+            'risk_level': result['risk_level'],
+            'confidence': result['uncertainty_analysis']['confidence'],
+            'confidence_interpretation': result['uncertainty_analysis']['interpretation'],
+            'elevation_m': result['elevation_m'],
+            'tpi_m': result['relative_elevation_m'],
+            'slope_degrees': result['slope_degrees'],
+            'distance_to_water_m': result['distance_to_water_m'],
+            'dominant_hazard': result['dominant_hazard'],
+            'fluvial_risk': result['hazard_breakdown']['fluvial_riverine'],
+            'coastal_risk': result['hazard_breakdown']['coastal_surge'],
+            'pluvial_risk': result['hazard_breakdown']['pluvial_drainage'],
+            'combined_risk': result['hazard_breakdown']['combined_index'],
+            'quality_flags': ','.join(result['uncertainty_analysis']['data_quality_flags'])
+                     if result['uncertainty_analysis']['data_quality_flags'] else ''
+        }
+    except Exception as e:
+        return {
+            'latitude': row.get('latitude'),
+            'longitude': row.get('longitude'),
+            'error': str(e),
+            'vulnerability_index': None
+        }
+@app.post("/assess_multihazard")
+async def assess_multihazard(data: SingleAssessment) -> Dict:
+    """Multi-hazard assessment (fluvial + coastal + pluvial)"""
+    if not gee_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="GEE still initializing, try again in 10 seconds"
+        )
+    loop = asyncio.get_event_loop()
+    try:
+        from vulnerability import calculate_multi_hazard_vulnerability
+        # Run terrain in background thread
+        terrain = await loop.run_in_executor(
+            None,
+            get_terrain_metrics,
+            data.latitude,
+            data.longitude
+        )
+        # Throttled water distance
+        water_dist = await throttled_distance_to_water(data.latitude, data.longitude)
+        result = calculate_multi_hazard_vulnerability(
+            lat=data.latitude,
+            lon=data.longitude,
+            height=data.height,
+            basement=data.basement,
+            terrain_metrics=terrain,
+            water_distance=water_dist
+        )
+        return {
+            "status": "success",
+            "input": data.dict(),
+            "assessment": result
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Assessment failed: {e}")
+class HeightRequest(BaseModel):
+    latitude: float
+    longitude: float
+    @field_validator("latitude")
+    @classmethod
+    def check_lat(cls, v: float) -> float:
+        if not -90 <= v <= 90:
+            raise ValueError("Latitude must be between -90 and 90")
+        return v
+    @field_validator("longitude")
+    @classmethod
+    def check_lon(cls, v: float) -> float:
+        if not -180 <= v <= 180:
+            raise ValueError("Longitude must be between -180 and 180")
+        return v
+@app.post("/predict_height")
+async def predict_height(data: HeightRequest) -> Dict:
+    if DISABLE_HEIGHT_PREDICTOR:
+        raise HTTPException(status_code=503,
+                        detail="Height predictor disabled on this deployment.")
+    if not model_ready:
+        raise HTTPException(
+            status_code=503,
+            detail="Height predictor still loading, try again later."
+        )
+    try:
+        from height_predictor.inference import get_predictor
+        predictor = get_predictor()
+        loop = asyncio.get_event_loop()
+        result = await loop.run_in_executor(
+            None,
+            predictor.predict_from_coordinates,
+            data.latitude,
+            data.longitude,
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Height prediction failed: {str(e)}",
+        )
+# For local development
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    port = int(os.environ.get("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+earthengine-api==0.1.384
+geopandas==1.0.1
+pandas==2.2.0
+numpy==1.26.4
+shapely==2.0.6
+pyproj==3.6.1
+fiona==1.10.1
+requests==2.31.0
+jinja2==3.1.2
+python-multipart==0.0.6
+scikit-learn==1.7.2
+shap==0.48.0
+pydantic==2.12.4
+pillow==12.0.0
+onnxruntime

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.11.9

spatial_queries.py ADDED Viewed

	@@ -0,0 +1,754 @@

+import ee
+import geopandas as gpd
+from shapely.geometry import Point
+import requests
+import numpy as np
+from functools import lru_cache
+import warnings
+import json
+from pyproj import CRS, Transformer
+import time
+from datetime import datetime
+# Initialize GEE
+from gee_auth import initialize_gee
+# Suppress shapely distance warnings
+warnings.filterwarnings("ignore", category=RuntimeWarning, module="shapely.measurement")
+# LAZY LOADING
+_RIVERS = None
+_LAKES = None
+def get_rivers():
+    """Lazy load rivers dataset"""
+    global _RIVERS
+    if _RIVERS is None:
+        _RIVERS = gpd.read_file('data/natural_earth/ne_10m_rivers_lake_centerlines.shp')
+        _RIVERS = _RIVERS[_RIVERS.geometry.is_valid].copy()
+        print("✅ Rivers shapefile loaded")
+    return _RIVERS
+def get_lakes():
+    """Lazy load lakes dataset"""
+    global _LAKES
+    if _LAKES is None:
+        _LAKES = gpd.read_file('data/natural_earth/ne_10m_lakes.shp')
+        _LAKES = _LAKES[_LAKES.geometry.is_valid].copy()
+        print("✅ Lakes shapefile loaded")
+    return _LAKES
+def get_terrain_metrics(lat, lon, buffer_m=500, force_dem=None):
+    """
+    Extract DEM-based metrics with hierarchical fallback strategy.
+    """
+    initialize_gee()
+    if abs(lat) > 70:
+        buffer_m = 100
+    try:
+        if abs(lat) > 85:
+            print(f"Polar region {lat},{lon} - no terrain data")
+            return {'elevation': None, 'slope': None, 'tpi': None, 'mean_elevation': None, 'dem_source': None}
+        point = ee.Geometry.Point([lon, lat])
+        region = point.buffer(buffer_m)
+        # Hierarchical DEM selection OR forced DEM for validation
+        if force_dem:
+            dem, dem_source = _get_forced_dem(lat, lon, force_dem)
+            if dem is None:
+                # Forced DEM not available at this location
+                return {'elevation': None, 'slope': None, 'tpi': None, 'mean_elevation': None, 'dem_source': None}
+        else:
+            dem, dem_source = _select_best_dem(lat, lon)
+        if dem is None:
+            print(f"All DEM sources failed for {lat},{lon}")
+            return {'elevation': None, 'slope': None, 'tpi': None, 'mean_elevation': None, 'dem_source': None}
+        # Point elevation with smaller buffer
+        elevation_sample = dem.reduceRegion(
+            reducer=ee.Reducer.mean(),
+            geometry=point.buffer(15),
+            scale=30,
+            maxPixels=1e9,
+            bestEffort=True
+        )
+        elevation = elevation_sample.get('elevation').getInfo()
+        if elevation is None:
+            print(f"GEE elevation failed for {lat},{lon} using {dem_source}")
+            return {'elevation': None, 'slope': None, 'tpi': None, 'mean_elevation': None, 'dem_source': dem_source}
+        try:
+            mean_elevation_sample = dem.reduceRegion(
+                reducer=ee.Reducer.mean(),
+                geometry=region,
+                scale=30,
+                maxPixels=1e9,
+                bestEffort=True
+            )
+            mean_elevation = mean_elevation_sample.get('elevation').getInfo()
+        except Exception as me_err:
+            print(f"GEE mean elev failed for {lat},{lon}: {me_err}")
+            mean_elevation = None
+        # Slope
+        slope_img = ee.Terrain.slope(dem)
+        slope_mean = None
+        slope_max = None
+        def safe_reduce(reducer_type):
+            try:
+                reducer = ee.Reducer.mean() if reducer_type == 'mean' else ee.Reducer.max()
+                stats_dict = slope_img.reduceRegion(
+                    reducer=reducer,
+                    geometry=point.buffer(200),
+                    scale=30,
+                    maxPixels=1e9,
+                    bestEffort=True
+                )
+                return stats_dict.get('slope').getInfo()
+            except Exception as err:
+                if "transform edge" not in str(err):
+                    print(f"GEE slope {reducer_type} failed for {lat},{lon}: {err}")
+                return None
+        slope_mean = safe_reduce('mean')
+        slope_max = safe_reduce('max')
+        if slope_max is not None and slope_mean is not None:
+            if slope_max >= slope_mean * 1.8:
+                slope = slope_max
+            else:
+                slope = slope_mean
+        elif slope_mean is not None:
+            slope = slope_mean
+        elif slope_max is not None:
+            slope = slope_max
+        else:
+            slope = None
+        # TPI
+        tpi = None
+        if elevation is not None and mean_elevation is not None:
+            try:
+                tpi = float(elevation) - float(mean_elevation)
+            except (ValueError, TypeError):
+                tpi = None
+        return {
+            'elevation': round(float(elevation), 2) if elevation is not None else None,
+            'slope': round(float(slope), 2) if slope is not None else None,
+            'tpi': round(float(tpi), 2) if tpi is not None else None,
+            'mean_elevation': round(float(mean_elevation), 2) if mean_elevation is not None else None,
+            'dem_source': dem_source
+        }
+    except Exception as e:
+        print(f"GEE error for {lat},{lon}: {e}")
+        return {
+            'elevation': None,
+            'slope': None,
+            'tpi': None,
+            'mean_elevation': None,
+            'dem_source': None
+        }
+def _select_best_dem(lat, lon):
+    """
+    Hierarchical DEM selection: prioritize highest-resolution DEM available.
+    """
+    point = ee.Geometry.Point([lon, lat])
+# Regional high-resolution DEMs
+# 1. USGS 3DEP 10m (USA)
+    if -130 < lon < -60 and 20 < lat < 55:
+        try:
+            usgs_10m = (
+                ee.ImageCollection("USGS/3DEP/10m_collection")
+                .filterBounds(point)
+                .mosaic()
+            )
+            # Dynamically detect elevation band
+            elev_band = usgs_10m.bandNames().getInfo()[0]
+            usgs_10m = usgs_10m.select(elev_band).rename("elevation")
+            usgs_10m = usgs_10m.reproject(crs="EPSG:4326", scale=10)
+            test = usgs_10m.reduceRegion(
+                ee.Reducer.first(),
+                point,
+                10,
+                bestEffort=True
+            ).get("elevation").getInfo()
+            if test is not None:
+                print(f"Using USGS 3DEP 10m for {lat},{lon}")
+                return usgs_10m, "USGS_3DEP_10m_collection"
+        except Exception:
+            pass
+    # Netherlands AHN2/3/ (0.5 m – best national DEM globally)
+    if 50 < lat < 54 and 3 < lon < 8:
+        # Priority: AHN3 > AHN2
+        try:
+            # AHN3 (2014–2019)
+            ahn3 = ee.ImageCollection("AHN/AHN3").select("DTM").mosaic()
+            test = ahn3.reduceRegion(
+                ee.Reducer.first(), point, 1, bestEffort=True
+            ).get("DTM").getInfo()
+            if test is not None:
+                print(f"Using AHN3 0.5m DTM for {lat},{lon}")
+                return ahn3.rename("elevation"), "AHN3_0.5m"
+        except:
+            pass
+        try:
+            # AHN2 (2012)
+            ahn2 = ee.Image("AHN/AHN2_05M_INT").select("elevation")
+            test = ahn2.reduceRegion(
+                ee.Reducer.first(), point, 1, bestEffort=True
+            ).get("elevation").getInfo()
+            if test is not None:
+                print(f"Using AHN2 0.5m DTM for {lat},{lon}")
+                return ahn2, "AHN2_0.5m"
+        except:
+            pass
+    # 3. UK Environment Agency Composite DTM/DSM (1m)
+    if 49 < lat < 61 and -8 < lon < 3:
+        try:
+            ea = ee.Image("UK/EA/ENGLAND_1M_TERRAIN/2022")
+            # Identify available elevation band
+            bands = ea.bandNames().getInfo()
+            elev_candidates = [b for b in bands if b.lower() in ["dtm", "elevation", "b1"]]
+            if not elev_candidates:
+                raise Exception("No valid elevation band found")
+            elev_band = elev_candidates[0]
+            # Reproject to WGS84 before sampling
+            ea_reproj = ea.select(elev_band).reproject(
+                crs="EPSG:4326",
+                scale=2
+            )
+            test = ea_reproj.reduceRegion(
+                reducer=ee.Reducer.first(),
+                geometry=point,
+                scale=2,
+                bestEffort=True,
+                maxPixels=1e9
+            ).get(elev_band).getInfo()
+            if test is not None:
+                print(f"Using UK EA DTM 1m for {lat},{lon}")
+                return ea_reproj.rename("elevation"), "EA_UK_1m"
+        except Exception as e:
+            print(f"EA UK DEM failed for {lat},{lon}: {e}")
+            pass
+    # 4. Australia 5m DEM (LiDAR coastal & urban areas)
+    if -45 < lat < -10 and 110 < lon < 155:
+        try:
+            aus_col = ee.ImageCollection("AU/GA/AUSTRALIA_5M_DEM")
+            # Mosaic all tiles that intersect the point
+            aus = aus_col.filterBounds(point).mosaic()
+            elev_band = "elevation"
+            test = aus.select(elev_band).reduceRegion(
+                reducer=ee.Reducer.first(),
+                geometry=point,
+                scale=5,
+                bestEffort=True,
+                maxPixels=1e9
+            ).get(elev_band).getInfo()
+            if test is not None:
+                print(f"Using Australia 5m DEM for {lat},{lon}")
+                return aus.select(elev_band), "Australia_5m"
+        except Exception as e:
+            print(f"AU DEM failed for {lat},{lon}: {e}")
+            pass
+    # Global 30m DEMs
+    # 5. NASADEM
+    if -56 <= lat <= 60:
+        try:
+            nasadem = ee.Image("NASA/NASADEM_HGT/001").select("elevation")
+            test = nasadem.reduceRegion(
+                ee.Reducer.first(), point, 30, bestEffort=True
+            ).get("elevation").getInfo()
+            if test is not None:
+                print(f"Using NASADEM for {lat},{lon}")
+                return nasadem, "NASADEM"
+        except Exception:
+            pass
+    # 6. Copernicus GLO-30
+    try:
+        cop = ee.ImageCollection("COPERNICUS/DEM/GLO30").mosaic().select("DEM").rename("elevation")
+        test = cop.reduceRegion(
+            ee.Reducer.first(), point, 30, bestEffort=True
+        ).get("elevation").getInfo()
+        if test is not None:
+            print(f"Using Copernicus GLO-30 for {lat},{lon}")
+            return cop, "Copernicus_GLO30"
+    except Exception:
+        pass
+    # 7. ALOS World 3D-30m
+    if abs(lat) <= 82:
+        try:
+            alos = ee.ImageCollection("JAXA/ALOS/AW3D30/V4_1").mosaic().select("AVE").rename("elevation")
+            test = alos.reduceRegion(
+                ee.Reducer.first(), point, 30, bestEffort=True
+            ).get("elevation").getInfo()
+            if test is not None:
+                print(f"Using ALOS AW3D30 AVE for {lat},{lon}")
+                return alos, 'ALOS_AW3D30_AVE'
+        except Exception:
+            pass
+    # 8. SRTM fallback
+    if -56 <= lat <= 60:
+        try:
+            srtm = ee.Image("USGS/SRTMGL1_003").select("elevation")
+            test = srtm.reduceRegion(
+                ee.Reducer.first(), point, 30, bestEffort=True
+            ).get("elevation").getInfo()
+            if test is not None:
+                print(f"Using SRTM fallback for {lat},{lon}")
+                return srtm, "SRTM_v3"
+        except Exception:
+            pass
+    print(f"All DEM sources failed for {lat},{lon}")
+    return None, None
+def _get_forced_dem(lat, lon, dem_name):
+    """
+    Force specific DEM retrieval for validation studies.
+    Returns None if DEM unavailable at location.
+    """
+    point = ee.Geometry.Point([lon, lat])
+    # Map DEM names to their retrieval logic
+    dem_map = {
+        'ALOS_AW3D30': lambda: (
+            ee.ImageCollection("JAXA/ALOS/AW3D30/V4_1").mosaic().select("AVE").rename("elevation"),
+            30
+        ),
+        'Copernicus_GLO30': lambda: (
+            ee.ImageCollection("COPERNICUS/DEM/GLO30").mosaic().select("DEM").rename("elevation"),
+            30
+        ),
+        'NASADEM': lambda: (
+            ee.Image("NASA/NASADEM_HGT/001").select("elevation"),
+            30
+        ),
+        'SRTM_v3': lambda: (
+            ee.Image("USGS/SRTMGL1_003").select("elevation"),
+            30
+        ),
+        'AHN3_0.5m': lambda: (
+            ee.ImageCollection("AHN/AHN3").select("DTM").mosaic().rename("elevation"),
+            1
+        ),
+        'AHN2_0.5m': lambda: (
+            ee.Image("AHN/AHN2_05M_INT").select("elevation"),
+            1
+        ),
+        'EA_UK_1m': lambda: (
+            ee.Image("UK/EA/ENGLAND_1M_TERRAIN/2022").select("dtm").reproject(crs="EPSG:4326", scale=2).rename("elevation"),
+            2
+        ),
+        'Australia_5m': lambda: (
+            ee.ImageCollection("AU/GA/AUSTRALIA_5M_DEM").filterBounds(point).mosaic().select("elevation"),
+            5
+        ),
+        'USGS_3DEP_10m_collection': lambda: (
+            ee.ImageCollection("USGS/3DEP/10m_collection").filterBounds(point).mosaic().select("elevation"),
+            10
+        )
+    }
+    if dem_name not in dem_map:
+        print(f"Unknown DEM name: {dem_name}")
+        return None, None
+    try:
+        dem, scale = dem_map[dem_name]()
+        # Test if data exists at this location
+        test = dem.reduceRegion(
+            ee.Reducer.first(),
+            point,
+            scale,
+            bestEffort=True
+        ).get("elevation").getInfo()
+        if test is not None:
+            print(f"Forced DEM {dem_name} available at {lat},{lon}")
+            return dem, dem_name
+        else:
+            print(f"Forced DEM {dem_name} has no data at {lat},{lon}")
+            return None, None
+    except Exception as e:
+        print(f"Failed to get forced DEM {dem_name} at {lat},{lon}: {e}")
+        return None, None
+def is_significant_water_body(element):
+    """
+    Determine if water feature is significant for flood risk assessment
+    """
+    tags = element.get('tags', {})
+    name = tags.get('name', '')
+    # Filter by name - fountains
+    if name and ('fuente' in name.lower() or 'fountain' in name.lower() or
+                 'fonte' in name.lower()):
+        return False
+    # Filter by water type tag
+    water_type = tags.get('water', '')
+    if water_type in ['fountain', 'reflecting_pool', 'pond', 'ornamental']:
+        return False
+    # Filter by amenity tag
+    if tags.get('amenity') == 'fountain':
+        return False
+    # Check if it's a waterway (rivers/streams/canals are significant)
+    if tags.get('waterway') in ['river', 'stream', 'canal', 'drain']:
+        return True
+    # Calculate approximate area for unnamed water bodies
+    if tags.get('natural') == 'water' and 'geometry' in element:
+        coords = element.get('geometry', [])
+        if len(coords) >= 3:
+            lons = [c['lon'] for c in coords]
+            lats = [c['lat'] for c in coords]
+            width = (max(lons) - min(lons)) * 111320
+            height = (max(lats) - min(lats)) * 111320
+            approx_area = width * height
+            if approx_area < 500:
+                return False
+            if len(coords) < 10 and approx_area < 2000:
+                return False
+    # Natural water bodies with names (excluding fountains)
+    if tags.get('natural') == 'water' and name:
+        return True
+    # Large unnamed water bodies
+    if tags.get('natural') == 'water' and not name:
+        coords = element.get('geometry', [])
+        if len(coords) > 50:
+            return True
+    return False
+def distance_to_water_osm(lat, lon, radius_m=5000, timeout=20, retry_count=2):
+    """
+    Query OpenStreetMap for nearby SIGNIFICANT water bodies with retry logic
+    """
+    overpass_url = "http://overpass-api.de/api/interpreter"
+    query = f"""
+    [out:json][timeout:{timeout}];
+    (
+    way["natural"="water"](around:{radius_m},{lat},{lon});
+    way["waterway"="river"](around:{radius_m},{lat},{lon});
+    way["waterway"="canal"](around:{radius_m},{lat},{lon});
+    way["waterway"="stream"](around:{radius_m},{lat},{lon});
+    relation["natural"="water"](around:{radius_m},{lat},{lon});
+    way["natural"="bay"](around:{radius_m},{lat},{lon});
+    );
+    out geom;
+    """
+    for attempt in range(retry_count):
+        try:
+            if not (-90 <= lat <= 90 and -180 <= lon <= 180):
+                print(f"Invalid coords for OSM: {lat},{lon}")
+                return None
+            response = requests.post(overpass_url, data={'data': query}, timeout=timeout)
+            if response.status_code == 429:
+                print(f"OSM rate limited for {lat},{lon} - waiting {2 ** attempt}s")
+                time.sleep(2 ** attempt)
+                continue
+            if response.status_code == 400:
+                print(f"OSM 400 for {lat},{lon} - bad query")
+                return None
+            if response.status_code != 200:
+                print(f"OSM HTTP {response.status_code} for {lat},{lon}")
+                if attempt < retry_count - 1:
+                    time.sleep(1)
+                    continue
+                return None
+            if not response.text.strip():
+                print(f"OSM empty response for {lat},{lon}")
+                return None
+            try:
+                data = response.json()
+            except (json.JSONDecodeError, ValueError) as je:
+                print(f"OSM JSON decode failed for {lat},{lon}: {je}")
+                return None
+            if not data.get('elements'):
+                print(f"OSM no elements found for {lat},{lon}")
+                return None
+            point = Point(lon, lat)
+            min_distance = float('inf')
+            significant_features = [e for e in data['elements'] if is_significant_water_body(e)]
+            if not significant_features and radius_m < 12500:
+                print(f"Retrying {lat},{lon} with extended radius...")
+                return distance_to_water_osm(lat, lon, radius_m=10000, timeout=timeout, retry_count=1)
+            if not significant_features:
+                print(f"OSM only ornamental features for {lat},{lon}")
+                return None
+            from shapely.geometry import LineString, Polygon
+            for element in significant_features:
+                if 'geometry' in element and len(element['geometry']) >= 2:
+                    coords = [(node['lon'], node['lat']) for node in element['geometry']]
+                    if element.get('tags', {}).get('waterway'):
+                        try:
+                            water_geom = LineString(coords)
+                        except Exception:
+                            continue
+                    else:
+                        try:
+                            water_geom = Polygon(coords)
+                        except:
+                            try:
+                                water_geom = LineString(coords)
+                            except:
+                                continue
+                    if not water_geom.is_valid:
+                        continue
+                    distance = point.distance(water_geom) * 111320
+                    if not np.isnan(distance):
+                        min_distance = min(min_distance, distance)
+            result = min_distance if min_distance != float('inf') else None
+            if result is not None:
+                print(f"OSM success for {lat},{lon}: {result:.1f}m")
+            return result
+        except requests.exceptions.Timeout:
+            print(f"OSM timeout for {lat},{lon} (attempt {attempt + 1}/{retry_count})")
+            if attempt < retry_count - 1:
+                time.sleep(1)
+                continue
+            return None
+        except Exception as e:
+            print(f"OSM exception for {lat},{lon}: {e}")
+            if attempt < retry_count - 1:
+                time.sleep(1)
+                continue
+            return None
+    return None
+def distance_to_water_static(lat, lon):
+    """
+    Fallback: calculate distance to Natural Earth water bodies
+    """
+    point = Point(lon, lat)
+    utm_zone = int((lon + 180) / 6) + 1
+    hemisphere = 'north' if lat >= 0 else 'south'
+    utm_crs = CRS.from_string(f"+proj=utm +zone={utm_zone} +{hemisphere} +datum=WGS84")
+    transformer = Transformer.from_crs("EPSG:4326", utm_crs, always_xy=True)
+    point_utm_coords = transformer.transform(lon, lat)
+    point_utm = Point(point_utm_coords)
+    try:
+        # Use lazy-loaded datasets
+        rivers_utm = get_rivers().to_crs(utm_crs)
+        lakes_utm = get_lakes().to_crs(utm_crs)
+        river_distances = rivers_utm.geometry.distance(point_utm)
+        river_distances = river_distances[river_distances.notna()]
+        min_river_dist = river_distances.min() if len(river_distances) > 0 else np.inf
+        lake_distances = lakes_utm.geometry.distance(point_utm)
+        lake_distances = lake_distances[lake_distances.notna()]
+        min_lake_dist = lake_distances.min() if len(lake_distances) > 0 else np.inf
+        min_dist = min(min_river_dist, min_lake_dist)
+        result = min_dist if min_dist != np.inf else None
+        if result is not None:
+            print(f"Static fallback for {lat},{lon}: {result:.1f}m")
+        else:
+            print(f"Static fallback failed for {lat},{lon}")
+        return result
+    except Exception as p_err:
+        print(f"Static distance error for {lat},{lon}: {p_err}")
+        return None
+def check_coastal(lat, lon, timeout=15):
+    """
+    Adaptive coastal detection: expands search radius until coastline is found.
+    """
+    overpass_url = "http://overpass-api.de/api/interpreter"
+    point = Point(lon, lat)
+    # Sweep radii from 1 km to 5 km
+    radii = [1000, 2000, 5000]
+    print(f"[Coastal] Starting coastal search for {lat},{lon} ...")
+    for r in radii:
+        query = f"""
+        [out:json][timeout:{timeout}];
+        (
+          way["natural"="coastline"](around:{r},{lat},{lon});
+        );
+        out geom;
+        """
+        try:
+            response = requests.post(overpass_url, data={'data': query}, timeout=timeout)
+            if not response.text.strip():
+                continue
+            try:
+                data = response.json()
+            except:
+                continue
+            if not data.get('elements'):
+                print(f"[Coastal] No coastline found at {r} m")
+                continue
+            min_distance = float('inf')
+            from shapely.geometry import LineString
+            for element in data['elements']:
+                if 'geometry' in element and len(element['geometry']) >= 2:
+                    coords = [(node['lon'], node['lat']) for node in element['geometry']]
+                    coastline = LineString(coords)
+                    distance = point.distance(coastline) * 111320
+                    min_distance = min(min_distance, distance)
+            if min_distance != float('inf'):
+                print(f"Coastal detected for {lat},{lon}: {min_distance:.1f}m (radius={r})")
+                return True, min_distance
+        except Exception as e:
+            print(f"[Coastal] Error at radius {r}: {e}")
+            continue
+    # If nothing is found
+    print(f"[Coastal] No coastline detected for {lat},{lon}. Continuing with OSM water search.")
+    return False, None
+@lru_cache(maxsize=1000)
+def distance_to_water(lat, lon):
+    """
+    Combined water distance with caching for batch efficiency.
+    Uses OSM first, then Natural Earth fallback.
+    """
+    lat, lon = round(float(lat), 6), round(float(lon), 6)
+    print(f"--- Water distance query for {lat},{lon} ---")
+    # 1. Check coastal proximity
+    try:
+        is_coastal, coast_distance = check_coastal(lat, lon)
+        if is_coastal and coast_distance is not None:
+            print(f"Coastal detected for {lat},{lon}: {coast_distance:.1f} m")
+            return coast_distance
+    except Exception as e:
+        print(f"Coastal check failed for {lat},{lon}: {e}")
+    # 2. Try OSM query with retries
+    for radius in [3000, 5000, 8000]:
+        for attempt in range(3):
+            try:
+                print(f"OSM attempt {attempt + 1}/3 at radius {radius} m for {lat},{lon}")
+                d = distance_to_water_osm(lat, lon, radius_m=radius)
+                if d is not None:
+                    print(f"OSM success for {lat},{lon}: {d:.1f} m (radius={radius})")
+                    return d
+            except Exception as e:
+                print(f"OSM exception on attempt {attempt + 1} for {lat},{lon}: {e}")
+                time.sleep(1.5)
+        time.sleep(1.5)
+    # 3. Static fallback
+    try:
+        d_static = distance_to_water_static(lat, lon)
+        if d_static is not None:
+            corrected = d_static * 0.7
+            print(f"Static fallback for {lat},{lon}: raw={d_static:.1f} m, corrected={corrected:.1f} m")
+            return corrected
+        else:
+            print(f"Static fallback failed for {lat},{lon}")
+    except Exception as e:
+        print(f"Static distance error for {lat},{lon}: {e}")
+    print(f"All water distance queries failed for {lat},{lon}")
+    return None

vulnerability.py ADDED Viewed

	@@ -0,0 +1,507 @@

+# vulnerability.py
+import numpy as np
+def normalize_component(value, max_value, inverse=False):
+    """
+    Normalize to 0-1 range
+    """
+    if value is None:
+        return 0.5
+    if inverse:
+        normalized = min(1.0, abs(value) / max_value)
+    else:
+        normalized = max(0.0, 1.0 - (abs(value) / max_value))
+    return normalized
+def assess_flood_context(elevation, tpi, water_distance):
+    # Context 1: Coastal (<10m)
+    if elevation < 10:
+        if water_distance is not None and water_distance < 500:
+            return 'very_high', 1.0
+        elif water_distance is not None and water_distance < 2000:
+            return 'very_high' if tpi < -3 else 'very high', 1.0 if tpi < -3 else 0.98
+        elif water_distance is not None and water_distance < 5000:
+            return 'high' if tpi < -3 else 'moderate', 0.9 if tpi < -3 else 0.75
+        else:
+            return 'moderate', 0.7 if tpi < -5 else 0.6
+    # Context 2: High plateau (>600m)
+    elif elevation > 600:
+        if tpi < -15 and water_distance is not None and water_distance < 100:
+            return 'moderate', 0.65
+        elif tpi < -10:
+            return 'low', 0.55
+        else:
+            return 'low', 0.50
+    # Context 3: Mountain (300–600m)
+    elif elevation > 300:
+        if water_distance is not None and water_distance < 200 and tpi < -10:
+            return 'moderate', 0.75
+        elif water_distance is not None and water_distance < 500:
+            return 'low', 0.65
+        else:
+            return 'low', 0.55
+    # Context 4: River valley (100–300m)
+    elif 100 < elevation < 300:
+        if water_distance is not None and water_distance < 300 and tpi < -5:
+            return 'high', 1.0
+        elif water_distance is not None and water_distance < 500:
+            return 'moderate', 0.85
+        else:
+            return 'moderate', 0.7
+    # Context 5: Low inland (10–100m)
+    else:
+        if water_distance is None:
+            return 'moderate', 0.7
+        elif water_distance < 200:
+            if tpi < -8:
+                return 'very_high', 1.0
+            elif tpi < -5:
+                return 'high', 0.95
+            else:
+                return 'high', 0.85
+        elif water_distance < 500:
+            return 'high' if tpi < -5 else 'moderate', 0.85 if tpi < -5 else 0.75
+        elif water_distance < 1000:
+            return 'moderate', 0.70 if tpi < -5 else 0.65
+        else:
+            if tpi < -8:
+                return 'moderate', 0.65
+            elif tpi < -5:
+                return 'low', 0.60
+            else:
+                return 'low', 0.55
+def calculate_vulnerability_index(lat, lon, height, basement, terrain_metrics, water_distance):
+    """
+    Calculate flood vulnerability index with basement consideration
+    """
+    elevation = terrain_metrics.get('elevation') or 0
+    tpi = terrain_metrics.get('tpi') or 0
+    slope = terrain_metrics.get('slope') or 0
+    # GET FLOOD CONTEXT
+    try:
+        context_risk_level, context_factor = assess_flood_context(elevation, tpi, water_distance)
+    except (TypeError, ValueError) as te:
+        print(f"Context failed for {lat},{lon}: {te} - default moderate")
+        context_risk_level, context_factor = 'moderate', 0.8
+    # Apply elevation penalty for high-altitude locations
+    if elevation > 500:
+        elevation_factor = max(0.3, 1.0 - (elevation - 500) / 1000)
+    else:
+        elevation_factor = 1.0
+    # Component 1: Proximity (with elevation adjustment)
+    if water_distance is None:
+        proximity_score = 0.5
+    elif water_distance < 100:
+        proximity_score = 1.0 * elevation_factor
+    elif water_distance < 500:
+        proximity_score = (0.9 - ((water_distance - 100) / 400) * 0.5) * elevation_factor
+    elif water_distance < 2000:
+        proximity_score = (0.4 - ((water_distance - 500) / 1500) * 0.3) * elevation_factor
+    elif water_distance < 5000:
+        proximity_score = max(0.0, 0.1 - ((water_distance - 2000) / 3000) * 0.1) * elevation_factor
+    else:
+        proximity_score = 0.0
+    # Component 2: TPI (Topographic Position Index)
+    if tpi is not None:
+        if tpi < -5:
+            tpi_score = min(1.0, 0.7 + abs(tpi + 5) / 30)
+        elif tpi > 5:
+            tpi_score = max(0.0, 0.3 - (tpi - 5) / 50)
+        else:
+            tpi_score = 0.5 - (tpi / 20)
+    else:
+        tpi_score = 0.5
+    tpi_score = max(0.0, min(1.0, tpi_score))
+    if elevation > 500:
+        tpi_score = tpi_score * elevation_factor
+    # Component 3: Slope
+    if slope < 0.5:
+        slope_score = 0.9
+    elif slope < 2:
+        slope_score = 0.8 - ((slope - 0.5) / 1.5) * 0.3
+    elif slope < 6:
+        slope_score = 0.5 - ((slope - 2) / 4) * 0.3
+    else:
+        slope_score = max(0.05, 0.2 - (slope - 6) / 20)
+    # Component 4: Building protection factor
+    net_protection = height + abs(basement)
+    # Height protection calculation (without basement penalty)
+    if net_protection <= 0:
+        height_score = 0.9
+    elif net_protection < 3:
+        height_score = 0.8 - (net_protection / 3) * 0.3
+    elif net_protection < 8:
+        height_score = 0.5 - ((net_protection - 3) / 5) * 0.3
+    else:
+        height_score = max(0.1, 0.2 - ((net_protection - 8) / 15) * 0.15)
+    height_score = max(0.0, min(1.0, height_score))
+    # Increase weight for building characteristics when basement present
+    if basement < 0:
+        weights = {
+            'proximity': 0.25,
+            'tpi': 0.30,
+            'slope': 0.15,
+            'height': 0.30
+        }
+    else:
+        weights = {
+            'proximity': 0.30,
+            'tpi': 0.35,
+            'slope': 0.20,
+            'height': 0.15
+        }
+    # Base vulnerability
+    base_vulnerability = (
+        weights['proximity'] * proximity_score +
+        weights['tpi'] * tpi_score +
+        weights['slope'] * slope_score +
+        weights['height'] * height_score
+    )
+    # Basement as multiplier
+    if basement < 0:
+        basement_multiplier = 1.0 + (abs(basement) * 0.15)
+        base_vulnerability = min(1.0, base_vulnerability * basement_multiplier)
+    # Apply context adjustment
+    vulnerability_index = base_vulnerability * context_factor
+    # Risk level based on final vulnerability_index with threshold mapping
+    if vulnerability_index >= 0.80:
+        final_risk = 'very_high'
+    elif vulnerability_index >= 0.65:
+        final_risk = 'high'
+    elif vulnerability_index >= 0.40:
+        final_risk = 'moderate'
+    elif vulnerability_index >= 0.20:
+        final_risk = 'low'
+    else:
+        final_risk = 'very_low'
+    # Keep context-based label if more severe
+    risk_levels_order = ['very_low', 'low', 'moderate', 'high', 'very_high']
+    context_severity = risk_levels_order.index(context_risk_level) if context_risk_level in risk_levels_order else 2
+    final_severity = risk_levels_order.index(final_risk)
+    risk_level = risk_levels_order[max(context_severity, final_severity)]
+    # Track component scores for SHAP
+    components = {
+        'proximity_score': proximity_score,
+        'tpi_score': tpi_score,
+        'slope_score': slope_score,
+        'height_score': height_score,
+        'elevation': elevation
+         }
+       # Calculate uncertainty
+    uncertainty_analysis = calculate_uncertainty(
+        terrain_metrics,
+        water_distance,
+        context_factor,
+        lat,
+        lon
+    )
+    # Calculate confidence interval
+    confidence_interval = calculate_confidence_interval(
+        vulnerability_index,
+        uncertainty_analysis['uncertainty']
+    )
+    return {
+        'vulnerability_index': round(vulnerability_index, 3),
+        'confidence_interval': confidence_interval,
+        'risk_level': risk_level,
+        'distance_to_water_m': round(water_distance, 1) if water_distance else None,
+        'elevation_m': elevation,
+        'relative_elevation_m': round(tpi, 2) if tpi is not None else None,
+        'slope_degrees': round(slope, 2) if slope is not None else None,
+        'uncertainty_analysis': uncertainty_analysis,
+        'components': components
+    }
+def calculate_uncertainty(terrain_metrics, water_distance, context_factor, lat, lon):
+    """
+    Physically-based uncertainty quantification - FIXED scaling
+    """
+    uncertainties = {}
+    # 1. ELEVATION UNCERTAINTY
+    elevation = terrain_metrics.get('elevation')
+    slope = terrain_metrics.get('slope') or 0
+    if elevation is None:
+        uncertainties['elevation'] = 0.15
+    else:
+        # Base DEM error in meters
+        if abs(lat) < 60:
+            base_error_m = 2.5
+        else:
+            base_error_m = 4.0
+        # Slope increases error
+        if slope > 15:
+            slope_multiplier = 1 + (slope - 15) / 30
+            base_error_m *= slope_multiplier
+        # Convert to normalized uncertainty
+        if elevation < 10:
+            uncertainties['elevation'] = 0.08  # coastal - elevation matters a lot
+        elif elevation < 100:
+            uncertainties['elevation'] = 0.06  # low inland
+        else:
+            uncertainties['elevation'] = 0.03  # elevated - less critical
+    # 2. TPI UNCERTAINTY
+    tpi = terrain_metrics.get('tpi')
+    if tpi is None:
+        uncertainties['tpi'] = 0.12
+    else:
+        # TPI uncertainty affects the depression detection
+        if abs(tpi) < 2:
+            uncertainties['tpi'] = 0.10  # near-flat, hard to classify
+        elif abs(tpi) < 5:
+            uncertainties['tpi'] = 0.06
+        else:
+            uncertainties['tpi'] = 0.04  # clear depression/ridge
+    # 3. SLOPE UNCERTAINTY
+    if slope is None:
+        uncertainties['slope'] = 0.10
+    else:
+        if slope < 2:
+            uncertainties['slope'] = 0.08  # very flat = uncertain
+        elif slope < 10:
+            uncertainties['slope'] = 0.04
+        else:
+            uncertainties['slope'] = 0.03  # steep = clear signal
+    # 4. WATER DISTANCE UNCERTAINTY
+    if water_distance is None:
+        uncertainties['water_proximity'] = 0.20
+    elif water_distance < 50:
+        uncertainties['water_proximity'] = 0.03
+    elif water_distance < 500:
+        uncertainties['water_proximity'] = 0.06
+    elif water_distance < 2000:
+        uncertainties['water_proximity'] = 0.10
+    else:
+        uncertainties['water_proximity'] = 0.15
+    # 5. CONTEXT UNCERTAINTY
+    if context_factor < 0.7:
+        uncertainties['context'] = 0.04
+    elif context_factor > 0.95:
+        uncertainties['context'] = 0.06
+    else:
+        uncertainties['context'] = 0.03
+    # 6. MODEL STRUCTURAL UNCERTAINTY
+    uncertainties['model'] = 0.08
+    # Weight by component importance in vulnerability calculation
+    weights = {
+        'elevation': 0.20,
+        'tpi': 0.30,
+        'slope': 0.15,
+        'water_proximity': 0.25,
+        'context': 0.05,
+        'model': 0.05
+    }
+    # Weighted root-sum-of-squares
+    weighted_variance = sum(weights[k] * (v ** 2) for k, v in uncertainties.items())
+    total_uncertainty = np.sqrt(weighted_variance)
+    # Additional damping factor
+    total_uncertainty *= 0.7  # empirical adjustment
+    confidence = max(0.0, min(1.0, 1.0 - total_uncertainty))
+    # Get dominant error sources
+    sorted_uncertainties = sorted(uncertainties.items(), key=lambda x: x[1], reverse=True)
+    dominant_sources = sorted_uncertainties[:3]
+    return {
+        'confidence': round(confidence, 3),
+        'uncertainty': round(total_uncertainty, 3),
+        'components': {k: round(v, 3) for k, v in uncertainties.items()},
+        'interpretation': interpret_confidence(confidence),
+        'data_quality_flags': get_quality_flags(terrain_metrics, water_distance),
+        'dominant_error_sources': dominant_sources
+    }
+def get_quality_flags(terrain_metrics, water_distance):
+    """
+    Identify specific data quality issues
+    """
+    flags = []
+    if terrain_metrics.get('elevation') is None:
+        flags.append('missing_elevation')
+    if terrain_metrics.get('tpi') is None:
+        flags.append('missing_tpi')
+    if terrain_metrics.get('slope') is None:
+        flags.append('missing_slope')
+    if water_distance is None:
+        flags.append('water_distance_unknown')
+    elif water_distance > 5000:
+        flags.append('far_from_water_search_limited')
+    elevation = terrain_metrics.get('elevation') or 0
+    slope = terrain_metrics.get('slope') or 0
+    if slope > 20:
+        flags.append('steep_terrain_dem_error_high')
+    if elevation < 1 and water_distance is not None and water_distance < 100:
+        flags.append('coastal_surge_risk_not_modeled')
+    return flags
+def interpret_confidence(confidence):
+    """
+    Realistic confidence interpretation
+    """
+    if confidence >= 0.85:
+        return "High confidence - complete terrain data with low uncertainty"
+    elif confidence >= 0.75:
+        return "Good confidence - reliable data sources available"
+    elif confidence >= 0.65:
+        return "Moderate confidence - some data limitations present"
+    elif confidence >= 0.50:
+        return "Fair confidence - significant data gaps or measurement uncertainty"
+    else:
+        return "Low confidence - substantial missing data, use with caution"
+def calculate_confidence_interval(vulnerability_index, uncertainty):
+    """
+    Calculate 95% confidence interval with proper bounds
+    """
+    margin = 1.96 * uncertainty
+    # Clip to valid 0-1 range
+    lower = max(0.0, vulnerability_index - margin)
+    upper = min(1.0, vulnerability_index + margin)
+    return {
+        'point_estimate': round(vulnerability_index, 3),
+        'lower_bound_95': round(lower, 3),
+        'upper_bound_95': round(upper, 3),
+        'margin_of_error': round(margin, 3)
+    }
+def calculate_multi_hazard_vulnerability(lat, lon, height, basement, terrain_metrics, water_distance):
+    """
+    Multi-hazard assessment
+    """
+    # Base assessment
+    base_result = calculate_vulnerability_index(
+        lat, lon, height, basement, terrain_metrics, water_distance
+    )
+    elevation = terrain_metrics.get('elevation') or 0
+    # Coastal surge risk
+    from spatial_queries import check_coastal
+    is_coastal, coast_distance = check_coastal(lat, lon)
+    if is_coastal and coast_distance < 5000:
+        if elevation < 2:
+            coastal_risk = 0.99
+        elif elevation < 10:
+            coastal_risk = max(0.05, 0.99 - ((elevation - 2) / 8) * 0.95)
+        else:
+            coastal_risk = 0.15                          # Residual surge potential
+    else:
+        coastal_risk = 0.0
+    # Pluvial risk
+    tpi = terrain_metrics.get('tpi') or 0
+    slope = terrain_metrics.get('slope') or 0
+    if tpi < -5:
+        topo_factor = 1.0
+    elif tpi < 0:
+        topo_factor = 0.5 + abs(tpi) / 5 * 0.5
+    else:
+        topo_factor = 0.5
+    if slope < 1:
+        slope_factor = 0.85
+    elif slope < 3:
+        slope_factor = 0.65
+    else:
+        slope_factor = 0.3
+    # Elevation decay for pluvial
+    if elevation > 800:
+        elevation_decay = max(0.1, 1.0 - (elevation - 800) / 1000)
+    elif elevation > 400:
+        elevation_decay = max(0.5, 1.0 - (elevation - 400) / 800)
+    else:
+        elevation_decay = 1.0
+    pluvial_risk = (topo_factor * 0.6 + slope_factor * 0.4) * elevation_decay
+    # Combined hazard with adaptive weights
+    if elevation < 10:  # Coastal zone
+        weights = {'fluvial': 0.3, 'coastal': 0.5, 'pluvial': 0.2}
+    elif elevation < 100:  # Low inland
+        weights = {'fluvial': 0.5, 'coastal': 0.1, 'pluvial': 0.4}
+    else:  # Elevated
+        weights = {'fluvial': 0.6, 'coastal': 0.0, 'pluvial': 0.4}
+    combined = (base_result['vulnerability_index'] * weights['fluvial'] +
+                coastal_risk * weights['coastal'] +
+                pluvial_risk * weights['pluvial'])
+    # Identify dominant hazard
+    hazards = {
+        'fluvial_riverine': base_result['vulnerability_index'],
+        'coastal_surge': coastal_risk,
+        'pluvial_drainage': pluvial_risk
+    }
+    dominant = max(hazards, key=hazards.get)
+    return {
+        **base_result,
+        'hazard_breakdown': {
+            'fluvial_riverine': round(base_result['vulnerability_index'], 3),
+            'coastal_surge': round(coastal_risk, 3),
+            'pluvial_drainage': round(pluvial_risk, 3),
+            'combined_index': round(combined, 3)
+        },
+        'dominant_hazard': dominant
+    }