|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
from shapely.geometry import Point, Polygon
|
|
|
import random
|
|
|
import datetime
|
|
|
import gradio as gr
|
|
|
import tempfile
|
|
|
import os
|
|
|
import requests
|
|
|
import json
|
|
|
from typing import List, Tuple, Optional, Dict, Any, Union
|
|
|
|
|
|
def fetch_osm_exclusion_zones(bounds: Tuple[float, float, float, float], exclusion_types: List[str]) -> Optional[Any]:
|
|
|
"""
|
|
|
Fetch exclusion zones from OpenStreetMap using Overpass API.
|
|
|
|
|
|
Args:
|
|
|
bounds: (min_lat, min_lon, max_lat, max_lon) bounding box
|
|
|
exclusion_types: List of exclusion types to fetch
|
|
|
|
|
|
Returns:
|
|
|
GeoDataFrame with exclusion polygons or None if failed
|
|
|
"""
|
|
|
try:
|
|
|
import geopandas as gpd
|
|
|
from shapely.geometry import Polygon, MultiPolygon, LineString
|
|
|
|
|
|
|
|
|
overpass_url = "http://overpass-api.de/api/interpreter"
|
|
|
|
|
|
|
|
|
queries = []
|
|
|
|
|
|
if "Water bodies" in exclusion_types:
|
|
|
|
|
|
queries.extend([
|
|
|
|
|
|
f'way["natural"="water"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'relation["natural"="water"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["landuse"="reservoir"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["water"="lake"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["water"="pond"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
|
|
|
f'way["waterway"="river"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["waterway"="stream"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["waterway"="canal"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
|
|
|
])
|
|
|
|
|
|
if "Parks & green spaces" in exclusion_types:
|
|
|
queries.extend([
|
|
|
f'way["leisure"="park"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["landuse"="forest"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["landuse"="grass"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["natural"="wood"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
|
|
|
])
|
|
|
|
|
|
if "Industrial areas" in exclusion_types:
|
|
|
queries.extend([
|
|
|
f'way["landuse"="industrial"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});',
|
|
|
f'way["landuse"="commercial"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
|
|
|
])
|
|
|
|
|
|
if "Major roads" in exclusion_types:
|
|
|
queries.extend([
|
|
|
f'way["highway"~"motorway|trunk|primary"]({bounds[0]},{bounds[1]},{bounds[2]},{bounds[3]});'
|
|
|
])
|
|
|
|
|
|
if not queries:
|
|
|
return None
|
|
|
|
|
|
|
|
|
overpass_query = f"""
|
|
|
[out:json][timeout:25];
|
|
|
(
|
|
|
{chr(10).join(queries)}
|
|
|
);
|
|
|
out geom;
|
|
|
"""
|
|
|
|
|
|
print(f"Fetching OSM data for exclusion zones: {exclusion_types}")
|
|
|
|
|
|
|
|
|
response = requests.get(overpass_url, params={'data': overpass_query})
|
|
|
response.raise_for_status()
|
|
|
|
|
|
data = response.json()
|
|
|
|
|
|
if 'elements' not in data or not data['elements']:
|
|
|
print("No exclusion zones found in the specified area")
|
|
|
return None
|
|
|
|
|
|
|
|
|
polygons = []
|
|
|
zone_types = []
|
|
|
|
|
|
for element in data['elements']:
|
|
|
try:
|
|
|
if element['type'] == 'way' and 'geometry' in element:
|
|
|
tags = element.get('tags', {})
|
|
|
|
|
|
|
|
|
zone_type = None
|
|
|
if 'natural' in tags and tags['natural'] == 'water':
|
|
|
zone_type = 'Water'
|
|
|
elif 'landuse' in tags and tags['landuse'] == 'reservoir':
|
|
|
zone_type = 'Water'
|
|
|
elif 'water' in tags:
|
|
|
zone_type = 'Water'
|
|
|
elif 'waterway' in tags and tags['waterway'] in ['river', 'stream', 'canal']:
|
|
|
zone_type = 'Water'
|
|
|
elif 'leisure' in tags and tags['leisure'] == 'park':
|
|
|
zone_type = 'Park'
|
|
|
elif 'landuse' in tags and tags['landuse'] in ['forest', 'grass']:
|
|
|
zone_type = 'Green space'
|
|
|
elif 'natural' in tags and tags['natural'] == 'wood':
|
|
|
zone_type = 'Forest'
|
|
|
elif 'landuse' in tags and tags['landuse'] in ['industrial', 'commercial']:
|
|
|
zone_type = 'Industrial/Commercial'
|
|
|
elif 'highway' in tags:
|
|
|
zone_type = 'Major road'
|
|
|
|
|
|
if zone_type is None:
|
|
|
continue
|
|
|
|
|
|
|
|
|
coords = [(node['lon'], node['lat']) for node in element['geometry']]
|
|
|
|
|
|
|
|
|
if 'waterway' in tags or 'highway' in tags:
|
|
|
|
|
|
if len(coords) >= 2:
|
|
|
try:
|
|
|
line = LineString(coords)
|
|
|
|
|
|
if 'waterway' in tags:
|
|
|
if tags['waterway'] == 'river':
|
|
|
buffer_size = 50 / 111320
|
|
|
elif tags['waterway'] == 'canal':
|
|
|
buffer_size = 30 / 111320
|
|
|
else:
|
|
|
buffer_size = 20 / 111320
|
|
|
else:
|
|
|
buffer_size = 25 / 111320
|
|
|
|
|
|
polygon = line.buffer(buffer_size)
|
|
|
if polygon.is_valid and polygon.area > 0:
|
|
|
polygons.append(polygon)
|
|
|
zone_types.append(zone_type)
|
|
|
except Exception as e:
|
|
|
print(f"Error buffering linear feature: {str(e)}")
|
|
|
continue
|
|
|
else:
|
|
|
|
|
|
if len(coords) > 2:
|
|
|
|
|
|
if coords[0] != coords[-1]:
|
|
|
coords.append(coords[0])
|
|
|
|
|
|
if len(coords) >= 4:
|
|
|
try:
|
|
|
polygon = Polygon(coords)
|
|
|
if polygon.is_valid and polygon.area > 0:
|
|
|
polygons.append(polygon)
|
|
|
zone_types.append(zone_type)
|
|
|
except Exception as e:
|
|
|
print(f"Error creating polygon: {str(e)}")
|
|
|
continue
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error processing OSM element: {str(e)}")
|
|
|
continue
|
|
|
|
|
|
if not polygons:
|
|
|
print("No valid polygons found in OSM data")
|
|
|
return None
|
|
|
|
|
|
|
|
|
gdf = gpd.GeoDataFrame(
|
|
|
{'zone_type': zone_types},
|
|
|
geometry=polygons,
|
|
|
crs='EPSG:4326'
|
|
|
)
|
|
|
|
|
|
print(f"Successfully fetched {len(gdf)} exclusion zones from OpenStreetMap")
|
|
|
print(f"Zone types found: {gdf['zone_type'].value_counts().to_dict()}")
|
|
|
return gdf
|
|
|
|
|
|
except ImportError:
|
|
|
print("GeoPandas not available for OSM processing")
|
|
|
return None
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
print(f"Error fetching data from OpenStreetMap: {str(e)}")
|
|
|
return None
|
|
|
except Exception as e:
|
|
|
print(f"Error processing OpenStreetMap data: {str(e)}")
|
|
|
return None
|
|
|
|
|
|
def calculate_bounds_from_points(input_df: pd.DataFrame, buffer_km: float = 2.0) -> Tuple[float, float, float, float]:
|
|
|
"""Calculate bounding box around input points with buffer"""
|
|
|
|
|
|
min_lat = input_df['lat'].min()
|
|
|
max_lat = input_df['lat'].max()
|
|
|
min_lon = input_df['lon'].min()
|
|
|
max_lon = input_df['lon'].max()
|
|
|
|
|
|
|
|
|
buffer_deg = buffer_km / 111.0
|
|
|
|
|
|
return (
|
|
|
min_lat - buffer_deg,
|
|
|
min_lon - buffer_deg,
|
|
|
max_lat + buffer_deg,
|
|
|
max_lon + buffer_deg
|
|
|
)
|
|
|
|
|
|
class SpatialDiffuser:
|
|
|
"""
|
|
|
Class for performing spatial diffusion - takes points with counts and diffuses them
|
|
|
according to specified distributions within given radii, with optional exclusion zones.
|
|
|
"""
|
|
|
|
|
|
def __init__(self):
|
|
|
self.distribution_methods = {
|
|
|
"uniform": self._uniform_distribution,
|
|
|
"normal": self._normal_distribution,
|
|
|
"exponential_decay": self._exponential_decay,
|
|
|
"distance_weighted": self._distance_weighted
|
|
|
}
|
|
|
|
|
|
def diffuse_points(self,
|
|
|
input_data: pd.DataFrame,
|
|
|
distribution_type: str = "uniform",
|
|
|
global_radius: Optional[float] = None,
|
|
|
time_start: Optional[datetime.datetime] = None,
|
|
|
time_end: Optional[datetime.datetime] = None,
|
|
|
seed: Optional[int] = None,
|
|
|
exclusion_zones_gdf: Optional[Any] = None) -> pd.DataFrame:
|
|
|
"""
|
|
|
Generate diffused points based on input coordinates and counts.
|
|
|
|
|
|
Args:
|
|
|
input_data: DataFrame with columns: lat, lon, count, radius (optional)
|
|
|
distribution_type: Type of spatial distribution to use
|
|
|
global_radius: Radius to use for all points if not specified individually (in meters)
|
|
|
time_start: Start time for temporal distribution
|
|
|
time_end: End time for temporal distribution
|
|
|
seed: Random seed for reproducible results
|
|
|
exclusion_zones_gdf: GeoDataFrame with polygons to exclude points from
|
|
|
|
|
|
Returns:
|
|
|
DataFrame with columns: lat, lon, source_id, timestamp (if temporal)
|
|
|
"""
|
|
|
|
|
|
if seed is not None:
|
|
|
np.random.seed(seed)
|
|
|
random.seed(seed)
|
|
|
|
|
|
if distribution_type not in self.distribution_methods:
|
|
|
raise ValueError(f"Distribution type '{distribution_type}' not supported. Choose from: {list(self.distribution_methods.keys())}")
|
|
|
|
|
|
|
|
|
all_points = []
|
|
|
|
|
|
|
|
|
for idx, row in input_data.iterrows():
|
|
|
|
|
|
radius = row.get('radius', global_radius)
|
|
|
if radius is None:
|
|
|
raise ValueError("Radius must be specified either globally or per point")
|
|
|
|
|
|
|
|
|
count = int(row['count'])
|
|
|
if count <= 0:
|
|
|
continue
|
|
|
|
|
|
|
|
|
new_points = self._generate_points_with_exclusions(
|
|
|
lat=row['lat'],
|
|
|
lon=row['lon'],
|
|
|
count=count,
|
|
|
radius=radius,
|
|
|
distribution_type=distribution_type,
|
|
|
exclusion_zones_gdf=exclusion_zones_gdf
|
|
|
)
|
|
|
|
|
|
|
|
|
source_ids = [idx] * len(new_points)
|
|
|
|
|
|
|
|
|
if time_start is not None and time_end is not None:
|
|
|
timestamps = self._generate_timestamps(len(new_points), time_start, time_end)
|
|
|
|
|
|
|
|
|
for i, point in enumerate(new_points):
|
|
|
all_points.append({
|
|
|
'lat': point[0],
|
|
|
'lon': point[1],
|
|
|
'source_id': source_ids[i],
|
|
|
'timestamp': timestamps[i]
|
|
|
})
|
|
|
else:
|
|
|
|
|
|
for i, point in enumerate(new_points):
|
|
|
all_points.append({
|
|
|
'lat': point[0],
|
|
|
'lon': point[1],
|
|
|
'source_id': source_ids[i]
|
|
|
})
|
|
|
|
|
|
|
|
|
result = pd.DataFrame(all_points)
|
|
|
return result
|
|
|
|
|
|
def _generate_points_with_exclusions(self, lat: float, lon: float, count: int, radius: float,
|
|
|
distribution_type: str, exclusion_zones_gdf: Optional[Any] = None) -> List[Tuple[float, float]]:
|
|
|
"""Generate points while avoiding exclusion zones"""
|
|
|
|
|
|
if exclusion_zones_gdf is None or len(exclusion_zones_gdf) == 0:
|
|
|
|
|
|
return self.distribution_methods[distribution_type](lat, lon, count, radius)
|
|
|
|
|
|
try:
|
|
|
import geopandas as gpd
|
|
|
from shapely.geometry import Point
|
|
|
|
|
|
valid_points = []
|
|
|
max_attempts = count * 10
|
|
|
attempts = 0
|
|
|
|
|
|
|
|
|
if exclusion_zones_gdf.crs is None:
|
|
|
exclusion_zones_gdf = exclusion_zones_gdf.set_crs('EPSG:4326')
|
|
|
elif exclusion_zones_gdf.crs != 'EPSG:4326':
|
|
|
exclusion_zones_gdf = exclusion_zones_gdf.to_crs('EPSG:4326')
|
|
|
|
|
|
while len(valid_points) < count and attempts < max_attempts:
|
|
|
|
|
|
batch_size = min(count * 2, max_attempts - attempts)
|
|
|
candidate_points = self.distribution_methods[distribution_type](
|
|
|
lat, lon, batch_size, radius
|
|
|
)
|
|
|
|
|
|
|
|
|
for point in candidate_points:
|
|
|
if len(valid_points) >= count:
|
|
|
break
|
|
|
|
|
|
point_geom = Point(point[1], point[0])
|
|
|
|
|
|
|
|
|
is_excluded = False
|
|
|
for _, exclusion_zone in exclusion_zones_gdf.iterrows():
|
|
|
if point_geom.intersects(exclusion_zone.geometry):
|
|
|
is_excluded = True
|
|
|
break
|
|
|
|
|
|
if not is_excluded:
|
|
|
valid_points.append(point)
|
|
|
|
|
|
attempts += batch_size
|
|
|
|
|
|
|
|
|
if len(valid_points) < count:
|
|
|
print(f"Warning: Could only generate {len(valid_points)} valid points out of {count} requested for location ({lat}, {lon}). Exclusion zones may be too restrictive.")
|
|
|
|
|
|
return valid_points
|
|
|
|
|
|
except ImportError:
|
|
|
print("GeoPandas not available for exclusion zone processing. Generating points without exclusions.")
|
|
|
return self.distribution_methods[distribution_type](lat, lon, count, radius)
|
|
|
except Exception as e:
|
|
|
print(f"Error processing exclusion zones: {str(e)}. Generating points without exclusions.")
|
|
|
return self.distribution_methods[distribution_type](lat, lon, count, radius)
|
|
|
|
|
|
def _uniform_distribution(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
|
|
|
"""Generate points uniformly distributed within a circle"""
|
|
|
points = []
|
|
|
|
|
|
for _ in range(count):
|
|
|
|
|
|
angle = random.uniform(0, 2 * np.pi)
|
|
|
|
|
|
r = radius * np.sqrt(random.uniform(0, 1))
|
|
|
|
|
|
|
|
|
x = r * np.cos(angle)
|
|
|
y = r * np.sin(angle)
|
|
|
|
|
|
|
|
|
|
|
|
lat_offset = y / 111320
|
|
|
|
|
|
lon_offset = x / (111320 * np.cos(np.radians(lat)))
|
|
|
|
|
|
new_lat = lat + lat_offset
|
|
|
new_lon = lon + lon_offset
|
|
|
|
|
|
points.append((new_lat, new_lon))
|
|
|
|
|
|
return points
|
|
|
|
|
|
def _normal_distribution(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
|
|
|
"""Generate points with normal distribution (more concentrated near center)"""
|
|
|
points = []
|
|
|
|
|
|
|
|
|
std_dev = radius / 3
|
|
|
|
|
|
for _ in range(count):
|
|
|
|
|
|
while True:
|
|
|
|
|
|
x = np.random.normal(0, std_dev)
|
|
|
y = np.random.normal(0, std_dev)
|
|
|
|
|
|
|
|
|
distance = np.sqrt(x**2 + y**2)
|
|
|
|
|
|
|
|
|
if distance <= radius:
|
|
|
break
|
|
|
|
|
|
|
|
|
lat_offset = y / 111320
|
|
|
lon_offset = x / (111320 * np.cos(np.radians(lat)))
|
|
|
|
|
|
new_lat = lat + lat_offset
|
|
|
new_lon = lon + lon_offset
|
|
|
|
|
|
points.append((new_lat, new_lon))
|
|
|
|
|
|
return points
|
|
|
|
|
|
def _exponential_decay(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
|
|
|
"""Generate points with exponential decay from center"""
|
|
|
points = []
|
|
|
|
|
|
|
|
|
rate = 3.0 / radius
|
|
|
|
|
|
for _ in range(count):
|
|
|
|
|
|
angle = random.uniform(0, 2 * np.pi)
|
|
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
|
r = random.expovariate(rate)
|
|
|
if r <= radius:
|
|
|
break
|
|
|
|
|
|
|
|
|
x = r * np.cos(angle)
|
|
|
y = r * np.sin(angle)
|
|
|
|
|
|
|
|
|
lat_offset = y / 111320
|
|
|
lon_offset = x / (111320 * np.cos(np.radians(lat)))
|
|
|
|
|
|
new_lat = lat + lat_offset
|
|
|
new_lon = lon + lon_offset
|
|
|
|
|
|
points.append((new_lat, new_lon))
|
|
|
|
|
|
return points
|
|
|
|
|
|
def _distance_weighted(self, lat: float, lon: float, count: int, radius: float) -> List[Tuple[float, float]]:
|
|
|
"""
|
|
|
Generate points with a custom distance-weighted distribution
|
|
|
(more points at medium distances than at center or edge)
|
|
|
"""
|
|
|
points = []
|
|
|
|
|
|
for _ in range(count):
|
|
|
|
|
|
angle = random.uniform(0, 2 * np.pi)
|
|
|
|
|
|
|
|
|
|
|
|
r_squared = random.betavariate(2, 2)
|
|
|
r = np.sqrt(r_squared) * radius
|
|
|
|
|
|
|
|
|
x = r * np.cos(angle)
|
|
|
y = r * np.sin(angle)
|
|
|
|
|
|
|
|
|
lat_offset = y / 111320
|
|
|
lon_offset = x / (111320 * np.cos(np.radians(lat)))
|
|
|
|
|
|
new_lat = lat + lat_offset
|
|
|
new_lon = lon + lon_offset
|
|
|
|
|
|
points.append((new_lat, new_lon))
|
|
|
|
|
|
return points
|
|
|
|
|
|
def _generate_timestamps(self, count: int, start_time: datetime.datetime, end_time: datetime.datetime) -> List[datetime.datetime]:
|
|
|
"""Generate uniformly distributed timestamps"""
|
|
|
timestamps = []
|
|
|
|
|
|
|
|
|
start_ts = start_time.timestamp()
|
|
|
end_ts = end_time.timestamp()
|
|
|
|
|
|
for _ in range(count):
|
|
|
|
|
|
random_ts = random.uniform(start_ts, end_ts)
|
|
|
timestamp = datetime.datetime.fromtimestamp(random_ts)
|
|
|
timestamps.append(timestamp)
|
|
|
|
|
|
|
|
|
timestamps.sort()
|
|
|
|
|
|
return timestamps
|
|
|
|
|
|
def create_visualization(input_df, output_df, show_basemap=False, exclusion_zones_gdf=None):
|
|
|
"""Create visualization of input and diffused points"""
|
|
|
fig, ax = plt.subplots(figsize=(12, 10))
|
|
|
|
|
|
|
|
|
fig.patch.set_facecolor('white')
|
|
|
ax.set_facecolor('#f8f9fa')
|
|
|
|
|
|
|
|
|
exclusion_colors = {
|
|
|
'Water': '#4FC3F7',
|
|
|
'Park': '#66BB6A',
|
|
|
'Green space': '#81C784',
|
|
|
'Forest': '#4CAF50',
|
|
|
'Industrial/Commercial': '#90A4AE',
|
|
|
'Major road': '#FFD54F',
|
|
|
'Other': '#FFAB91'
|
|
|
}
|
|
|
|
|
|
|
|
|
if show_basemap:
|
|
|
try:
|
|
|
import contextily as ctx
|
|
|
import geopandas as gpd
|
|
|
from shapely.geometry import Point
|
|
|
|
|
|
|
|
|
input_gdf = gpd.GeoDataFrame(
|
|
|
input_df,
|
|
|
geometry=[Point(lon, lat) for lon, lat in zip(input_df['lon'], input_df['lat'])],
|
|
|
crs='EPSG:4326'
|
|
|
)
|
|
|
output_gdf = gpd.GeoDataFrame(
|
|
|
output_df,
|
|
|
geometry=[Point(lon, lat) for lon, lat in zip(output_df['lon'], output_df['lat'])],
|
|
|
crs='EPSG:4326'
|
|
|
)
|
|
|
|
|
|
|
|
|
input_gdf_merc = input_gdf.to_crs('EPSG:3857')
|
|
|
output_gdf_merc = output_gdf.to_crs('EPSG:3857')
|
|
|
|
|
|
|
|
|
if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
|
|
|
try:
|
|
|
exclusion_zones_merc = exclusion_zones_gdf.to_crs('EPSG:3857')
|
|
|
|
|
|
|
|
|
plotted_types = set()
|
|
|
for zone_type in exclusion_zones_merc['zone_type'].unique():
|
|
|
zone_subset = exclusion_zones_merc[exclusion_zones_merc['zone_type'] == zone_type]
|
|
|
color = exclusion_colors.get(zone_type, exclusion_colors['Other'])
|
|
|
|
|
|
|
|
|
label = zone_type if zone_type not in plotted_types else None
|
|
|
if label:
|
|
|
plotted_types.add(zone_type)
|
|
|
|
|
|
zone_subset.plot(ax=ax, color=color, alpha=0.6, edgecolor='white',
|
|
|
linewidth=0.5, label=label)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error plotting exclusion zones: {str(e)}")
|
|
|
|
|
|
|
|
|
input_x = input_gdf_merc.geometry.x
|
|
|
input_y = input_gdf_merc.geometry.y
|
|
|
output_x = output_gdf_merc.geometry.x
|
|
|
output_y = output_gdf_merc.geometry.y
|
|
|
|
|
|
|
|
|
ax.scatter(output_x, output_y,
|
|
|
alpha=0.7, color='#FF9800', s=12, label=f'Generated Points (n={len(output_df)})',
|
|
|
edgecolors='white', linewidth=0.3)
|
|
|
|
|
|
|
|
|
for idx, row in input_df.iterrows():
|
|
|
radius = row.get('radius', None)
|
|
|
|
|
|
if radius is not None:
|
|
|
|
|
|
center_point = gpd.GeoDataFrame(
|
|
|
[1], geometry=[Point(row['lon'], row['lat'])], crs='EPSG:4326'
|
|
|
).to_crs('EPSG:3857')
|
|
|
|
|
|
center_x = center_point.geometry.x.iloc[0]
|
|
|
center_y = center_point.geometry.y.iloc[0]
|
|
|
|
|
|
|
|
|
circle = plt.Circle((center_x, center_y), radius,
|
|
|
fill=False, color='#9C27B0', linestyle='--',
|
|
|
alpha=0.5, linewidth=2)
|
|
|
ax.add_patch(circle)
|
|
|
|
|
|
|
|
|
min_size = 100
|
|
|
max_size = 800
|
|
|
if len(input_df) > 1:
|
|
|
size_range = input_df['count'].max() - input_df['count'].min()
|
|
|
if size_range > 0:
|
|
|
sizes = min_size + (input_df['count'] - input_df['count'].min()) / size_range * (max_size - min_size)
|
|
|
else:
|
|
|
sizes = [min_size] * len(input_df)
|
|
|
else:
|
|
|
sizes = [max_size]
|
|
|
|
|
|
|
|
|
ax.scatter(input_x, input_y,
|
|
|
s=sizes, c='#9C27B0', alpha=0.9,
|
|
|
edgecolors='white', linewidth=2,
|
|
|
label='Source Points (size = count)', zorder=5)
|
|
|
|
|
|
|
|
|
for idx, row in input_df.iterrows():
|
|
|
point_merc = gpd.GeoDataFrame(
|
|
|
[1], geometry=[Point(row['lon'], row['lat'])], crs='EPSG:4326'
|
|
|
).to_crs('EPSG:3857')
|
|
|
|
|
|
x_merc = point_merc.geometry.x.iloc[0]
|
|
|
y_merc = point_merc.geometry.y.iloc[0]
|
|
|
|
|
|
ax.annotate(f'{int(row["count"])}',
|
|
|
(x_merc, y_merc),
|
|
|
xytext=(8, 8), textcoords='offset points',
|
|
|
fontsize=10, fontweight='bold', color='white',
|
|
|
bbox=dict(boxstyle='round,pad=0.3', facecolor='#9C27B0', alpha=0.8),
|
|
|
zorder=6)
|
|
|
|
|
|
|
|
|
try:
|
|
|
ctx.add_basemap(ax, crs='EPSG:3857', source=ctx.providers.CartoDB.Positron, alpha=0.8)
|
|
|
basemap_added = True
|
|
|
except Exception as e:
|
|
|
print(f"Could not add basemap: {str(e)}")
|
|
|
basemap_added = False
|
|
|
|
|
|
|
|
|
ax.set_xlabel('Easting (Web Mercator)', fontsize=12)
|
|
|
ax.set_ylabel('Northing (Web Mercator)', fontsize=12)
|
|
|
|
|
|
|
|
|
x_coords = list(input_x) + list(output_x)
|
|
|
y_coords = list(input_y) + list(output_y)
|
|
|
|
|
|
except ImportError:
|
|
|
print("Contextily not available for basemap. Falling back to simple plot.")
|
|
|
show_basemap = False
|
|
|
except Exception as e:
|
|
|
print(f"Error creating basemap: {str(e)}. Falling back to simple plot.")
|
|
|
show_basemap = False
|
|
|
|
|
|
|
|
|
if not show_basemap:
|
|
|
|
|
|
if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
|
|
|
try:
|
|
|
|
|
|
if exclusion_zones_gdf.crs != 'EPSG:4326':
|
|
|
exclusion_zones_gdf = exclusion_zones_gdf.to_crs('EPSG:4326')
|
|
|
|
|
|
|
|
|
plotted_types = set()
|
|
|
for idx, zone in exclusion_zones_gdf.iterrows():
|
|
|
zone_type = zone.get('zone_type', 'Other')
|
|
|
color = exclusion_colors.get(zone_type, exclusion_colors['Other'])
|
|
|
|
|
|
|
|
|
label = zone_type if zone_type not in plotted_types else None
|
|
|
if label:
|
|
|
plotted_types.add(zone_type)
|
|
|
|
|
|
if zone.geometry.geom_type == 'Polygon':
|
|
|
x, y = zone.geometry.exterior.xy
|
|
|
ax.fill(x, y, color=color, alpha=0.6, edgecolor='white',
|
|
|
linewidth=0.5, label=label)
|
|
|
elif zone.geometry.geom_type == 'MultiPolygon':
|
|
|
for poly in zone.geometry.geoms:
|
|
|
x, y = poly.exterior.xy
|
|
|
ax.fill(x, y, color=color, alpha=0.6, edgecolor='white',
|
|
|
linewidth=0.5, label=label)
|
|
|
label = None
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Error plotting exclusion zones: {str(e)}")
|
|
|
|
|
|
|
|
|
ax.scatter(output_df['lon'], output_df['lat'],
|
|
|
alpha=0.7, color='#FF9800', s=12, label=f'Generated Points (n={len(output_df)})',
|
|
|
edgecolors='white', linewidth=0.3)
|
|
|
|
|
|
|
|
|
for idx, row in input_df.iterrows():
|
|
|
radius = row.get('radius', None)
|
|
|
|
|
|
if radius is not None:
|
|
|
|
|
|
radius_deg_lat = radius / 111320
|
|
|
radius_deg_lon = radius / (111320 * np.cos(np.radians(row['lat'])))
|
|
|
|
|
|
|
|
|
radius_deg = (radius_deg_lat + radius_deg_lon) / 2
|
|
|
|
|
|
|
|
|
circle = plt.Circle((row['lon'], row['lat']), radius_deg,
|
|
|
fill=False, color='#9C27B0', linestyle='--',
|
|
|
alpha=0.5, linewidth=2)
|
|
|
ax.add_patch(circle)
|
|
|
|
|
|
|
|
|
min_size = 100
|
|
|
max_size = 800
|
|
|
if len(input_df) > 1:
|
|
|
size_range = input_df['count'].max() - input_df['count'].min()
|
|
|
if size_range > 0:
|
|
|
sizes = min_size + (input_df['count'] - input_df['count'].min()) / size_range * (max_size - min_size)
|
|
|
else:
|
|
|
sizes = [min_size] * len(input_df)
|
|
|
else:
|
|
|
sizes = [max_size]
|
|
|
|
|
|
|
|
|
ax.scatter(input_df['lon'], input_df['lat'],
|
|
|
s=sizes, c='#9C27B0', alpha=0.9,
|
|
|
edgecolors='white', linewidth=2,
|
|
|
label='Source Points (size = count)', zorder=5)
|
|
|
|
|
|
|
|
|
for idx, row in input_df.iterrows():
|
|
|
ax.annotate(f'{int(row["count"])}',
|
|
|
(row['lon'], row['lat']),
|
|
|
xytext=(8, 8), textcoords='offset points',
|
|
|
fontsize=10, fontweight='bold', color='white',
|
|
|
bbox=dict(boxstyle='round,pad=0.3', facecolor='#9C27B0', alpha=0.8),
|
|
|
zorder=6)
|
|
|
|
|
|
|
|
|
ax.set_xlabel('Longitude', fontsize=12)
|
|
|
ax.set_ylabel('Latitude', fontsize=12)
|
|
|
|
|
|
|
|
|
x_coords = list(input_df['lon']) + list(output_df['lon'])
|
|
|
y_coords = list(input_df['lat']) + list(output_df['lat'])
|
|
|
|
|
|
|
|
|
title = 'Spatial Diffusion Results'
|
|
|
if show_basemap:
|
|
|
title += ' (with Basemap)'
|
|
|
if exclusion_zones_gdf is not None and len(exclusion_zones_gdf) > 0:
|
|
|
title += ' - Exclusion Zones Applied'
|
|
|
subtitle = 'Purple source points sized by count, orange generated points, dashed circles show diffusion radius'
|
|
|
|
|
|
ax.set_title(f'{title}\n{subtitle}',
|
|
|
fontsize=14, fontweight='bold', pad=20)
|
|
|
|
|
|
|
|
|
legend = ax.legend(loc='upper right', bbox_to_anchor=(1, 1),
|
|
|
frameon=True, fancybox=True, shadow=True)
|
|
|
legend.get_frame().set_facecolor('white')
|
|
|
legend.get_frame().set_alpha(0.9)
|
|
|
|
|
|
|
|
|
grid_alpha = 0.2 if show_basemap else 0.3
|
|
|
ax.grid(True, alpha=grid_alpha, linestyle='-', linewidth=0.5)
|
|
|
|
|
|
|
|
|
ax.set_aspect('equal', 'box')
|
|
|
|
|
|
|
|
|
x_margin = (max(x_coords) - min(x_coords)) * 0.1
|
|
|
y_margin = (max(y_coords) - min(y_coords)) * 0.1
|
|
|
|
|
|
if x_margin == 0:
|
|
|
x_margin = 1000 if show_basemap else 0.01
|
|
|
if y_margin == 0:
|
|
|
y_margin = 1000 if show_basemap else 0.01
|
|
|
|
|
|
ax.set_xlim(min(x_coords) - x_margin, max(x_coords) + x_margin)
|
|
|
ax.set_ylim(min(y_coords) - y_margin, max(y_coords) + y_margin)
|
|
|
|
|
|
|
|
|
plt.tight_layout()
|
|
|
|
|
|
return fig
|
|
|
|
|
|
def process_csv(file_obj, distribution_type, global_radius, show_basemap, auto_exclusions, exclusion_file, include_time, time_start, time_end, seed):
|
|
|
"""Process input CSV and generate diffused points"""
|
|
|
try:
|
|
|
|
|
|
df = pd.read_csv(file_obj.name)
|
|
|
|
|
|
|
|
|
required_cols = ['lat', 'lon', 'count']
|
|
|
if not all(col in df.columns for col in required_cols):
|
|
|
return None, f"Error: CSV must contain columns: {', '.join(required_cols)}"
|
|
|
|
|
|
|
|
|
if global_radius and global_radius.strip():
|
|
|
try:
|
|
|
global_radius = float(global_radius)
|
|
|
except ValueError:
|
|
|
return None, "Error: Global radius must be a number"
|
|
|
else:
|
|
|
global_radius = None
|
|
|
|
|
|
if 'radius' not in df.columns:
|
|
|
return None, "Error: Either provide a global radius or include a 'radius' column in the CSV"
|
|
|
|
|
|
|
|
|
if seed and seed.strip():
|
|
|
try:
|
|
|
seed = int(seed)
|
|
|
except ValueError:
|
|
|
return None, "Error: Seed must be an integer"
|
|
|
else:
|
|
|
seed = None
|
|
|
|
|
|
|
|
|
exclusion_zones_gdf = None
|
|
|
|
|
|
|
|
|
if exclusion_file is not None:
|
|
|
try:
|
|
|
import geopandas as gpd
|
|
|
|
|
|
|
|
|
file_extension = os.path.splitext(exclusion_file.name)[1].lower()
|
|
|
|
|
|
if file_extension in ['.geojson', '.json']:
|
|
|
exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
|
|
|
elif file_extension == '.gpkg':
|
|
|
exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
|
|
|
elif file_extension == '.shp':
|
|
|
exclusion_zones_gdf = gpd.read_file(exclusion_file.name)
|
|
|
else:
|
|
|
return None, f"Error: Unsupported exclusion zone file format: {file_extension}"
|
|
|
|
|
|
|
|
|
if exclusion_zones_gdf.crs is None:
|
|
|
exclusion_zones_gdf = exclusion_zones_gdf.set_crs('EPSG:4326')
|
|
|
|
|
|
print(f"Loaded {len(exclusion_zones_gdf)} custom exclusion zones from {exclusion_file.name}")
|
|
|
|
|
|
except ImportError:
|
|
|
return None, "Error: GeoPandas required for exclusion zones processing"
|
|
|
except Exception as e:
|
|
|
return None, f"Error reading exclusion zones file: {str(e)}"
|
|
|
|
|
|
|
|
|
elif auto_exclusions and len(auto_exclusions) > 0:
|
|
|
try:
|
|
|
|
|
|
bounds = calculate_bounds_from_points(df)
|
|
|
print(f"Fetching automatic exclusions for bounds: {bounds}")
|
|
|
|
|
|
|
|
|
exclusion_zones_gdf = fetch_osm_exclusion_zones(bounds, auto_exclusions)
|
|
|
|
|
|
if exclusion_zones_gdf is not None:
|
|
|
print(f"Fetched {len(exclusion_zones_gdf)} exclusion zones from OpenStreetMap")
|
|
|
else:
|
|
|
print("No exclusion zones found in OpenStreetMap for this area")
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"Warning: Could not fetch automatic exclusions: {str(e)}")
|
|
|
|
|
|
exclusion_zones_gdf = None
|
|
|
|
|
|
|
|
|
if include_time:
|
|
|
if not time_start or not time_end:
|
|
|
return None, "Error: If time distribution is enabled, both start and end times must be provided"
|
|
|
try:
|
|
|
time_start_dt = datetime.datetime.strptime(time_start, "%Y-%m-%d %H:%M:%S")
|
|
|
time_end_dt = datetime.datetime.strptime(time_end, "%Y-%m-%d %H:%M:%S")
|
|
|
if time_start_dt >= time_end_dt:
|
|
|
return None, "Error: End time must be after start time"
|
|
|
except ValueError:
|
|
|
return None, "Error: Invalid time format. Use YYYY-MM-DD HH:MM:SS"
|
|
|
else:
|
|
|
time_start_dt = None
|
|
|
time_end_dt = None
|
|
|
|
|
|
|
|
|
diffuser = SpatialDiffuser()
|
|
|
result_df = diffuser.diffuse_points(
|
|
|
input_data=df,
|
|
|
distribution_type=distribution_type,
|
|
|
global_radius=global_radius,
|
|
|
time_start=time_start_dt,
|
|
|
time_end=time_end_dt,
|
|
|
seed=seed,
|
|
|
exclusion_zones_gdf=exclusion_zones_gdf
|
|
|
)
|
|
|
|
|
|
|
|
|
temp_file = "diffused_points.csv"
|
|
|
result_df.to_csv(temp_file, index=False)
|
|
|
|
|
|
|
|
|
fig = create_visualization(df, result_df, show_basemap, exclusion_zones_gdf)
|
|
|
|
|
|
return fig, temp_file
|
|
|
|
|
|
except Exception as e:
|
|
|
return None, f"Error: {str(e)}"
|
|
|
|
|
|
def create_diffusion_interface():
|
|
|
"""Create Gradio interface for the spatial diffusion tool"""
|
|
|
|
|
|
with gr.Blocks() as diffusion_interface:
|
|
|
gr.Markdown("## 🗺️ Spatial Diffusion Tool")
|
|
|
|
|
|
with gr.Row():
|
|
|
with gr.Column(scale=1):
|
|
|
|
|
|
gr.Markdown("""
|
|
|
### About This Tool
|
|
|
Transform aggregated geographic points with counts into individual points using spatial diffusion methods.
|
|
|
|
|
|
**Input CSV Format:**
|
|
|
- `lat`: Latitude of source point
|
|
|
- `lon`: Longitude of source point
|
|
|
- `count`: Number of points to generate
|
|
|
- `radius`: (Optional) Diffusion radius in meters
|
|
|
|
|
|
**Distribution Types:**
|
|
|
- **Uniform**: Equal probability throughout circle
|
|
|
- **Normal**: Higher density near center
|
|
|
- **Exponential Decay**: Density decreases from center
|
|
|
- **Distance-Weighted**: More points at medium distances
|
|
|
""")
|
|
|
|
|
|
|
|
|
input_file = gr.File(label="Input CSV File", file_types=[".csv"])
|
|
|
|
|
|
|
|
|
gr.Markdown("### 🎯 Distribution Options")
|
|
|
with gr.Row():
|
|
|
distribution = gr.Dropdown(
|
|
|
choices=["uniform", "normal", "exponential_decay", "distance_weighted"],
|
|
|
value="uniform",
|
|
|
label="Distribution Type",
|
|
|
scale=2
|
|
|
)
|
|
|
seed = gr.Textbox(
|
|
|
label="Random Seed (optional)",
|
|
|
placeholder="e.g. 42",
|
|
|
scale=1
|
|
|
)
|
|
|
|
|
|
global_radius = gr.Textbox(
|
|
|
label="Global Radius (meters)",
|
|
|
placeholder="Only if radius column not in CSV"
|
|
|
)
|
|
|
|
|
|
|
|
|
with gr.Accordion("⏰ Temporal Distribution (Optional)", open=False):
|
|
|
include_time = gr.Checkbox(label="Enable Temporal Distribution", value=False)
|
|
|
with gr.Group() as time_group:
|
|
|
time_start = gr.Textbox(
|
|
|
label="Start Time",
|
|
|
placeholder="YYYY-MM-DD HH:MM:SS"
|
|
|
)
|
|
|
time_end = gr.Textbox(
|
|
|
label="End Time",
|
|
|
placeholder="YYYY-MM-DD HH:MM:SS"
|
|
|
)
|
|
|
|
|
|
|
|
|
gr.Markdown("### 🗺️ Map & Exclusion Options")
|
|
|
show_basemap = gr.Checkbox(
|
|
|
label="Show underlying map (requires internet)",
|
|
|
value=False
|
|
|
)
|
|
|
gr.Markdown("*Adds geographic context with street/satellite imagery*")
|
|
|
|
|
|
|
|
|
auto_exclusions = gr.CheckboxGroup(
|
|
|
label="Auto-exclude from OpenStreetMap:",
|
|
|
choices=["Water bodies", "Parks & green spaces", "Industrial areas", "Major roads"],
|
|
|
value=[]
|
|
|
)
|
|
|
|
|
|
|
|
|
with gr.Accordion("🔧 Advanced: Custom Exclusion Zones", open=False):
|
|
|
exclusion_file = gr.File(
|
|
|
label="Upload custom shapefile (optional)",
|
|
|
file_types=[".geojson", ".json", ".gpkg", ".shp"]
|
|
|
)
|
|
|
gr.Markdown("*Overrides automatic exclusions if provided*")
|
|
|
|
|
|
process_btn = gr.Button(
|
|
|
"🎯 Generate Diffused Points",
|
|
|
variant="primary",
|
|
|
size="lg"
|
|
|
)
|
|
|
|
|
|
with gr.Column(scale=2):
|
|
|
|
|
|
plot_output = gr.Plot(
|
|
|
label="📍 Spatial Diffusion Visualization",
|
|
|
show_label=True
|
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
|
with gr.Column(scale=2):
|
|
|
file_output = gr.File(label="📥 Download Generated Points")
|
|
|
with gr.Column(scale=1):
|
|
|
gr.Markdown(
|
|
|
"""
|
|
|
**Legend:**
|
|
|
🟣 Source points (sized by count)
|
|
|
🟠 Generated points
|
|
|
⭕ Diffusion radius
|
|
|
🟦 Water bodies
|
|
|
🟢 Parks & green spaces
|
|
|
⬜ Industrial areas
|
|
|
🟡 Major roads
|
|
|
"""
|
|
|
)
|
|
|
|
|
|
|
|
|
process_btn.click(
|
|
|
fn=process_csv,
|
|
|
inputs=[input_file, distribution, global_radius, show_basemap, auto_exclusions, exclusion_file, include_time, time_start, time_end, seed],
|
|
|
outputs=[plot_output, file_output]
|
|
|
)
|
|
|
|
|
|
|
|
|
include_time.change(
|
|
|
fn=lambda x: gr.update(visible=x),
|
|
|
inputs=[include_time],
|
|
|
outputs=[time_group]
|
|
|
)
|
|
|
|
|
|
return diffusion_interface
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
app = create_diffusion_interface()
|
|
|
app.launch() |