map / scripts /generate_heatmap_geopandas.py
atsuga's picture
Upload 40 files
6f10462 verified
#!/usr/bin/env python3
"""Generate a choropleth heatmap for Jawa Timur kabupaten/kota using GeoPandas.
This script:
- loads `data/geojson/jatim_kabkota.geojson` (expects a `properties.name` field),
- optionally merges a CSV of metrics (by `name`),
- creates a choropleth PNG saved to `app/static/img/heatmap_jatim.png`,
- writes an augmented GeoJSON to `app/static/geojson/jatim_kabkota_metric.geojson`.
Run: python scripts/generate_heatmap_geopandas.py [--metrics data/metrics.csv]
"""
from __future__ import annotations
import argparse
import os
import sys
import random
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
try:
import contextily as ctx
except Exception:
ctx = None
try:
import matplotlib.patheffects as pe
except Exception:
pe = None
def load_geodata(path: str) -> gpd.GeoDataFrame:
gdf = gpd.read_file(path)
if gdf.crs is None:
# most GeoJSONs are in WGS84
gdf = gdf.set_crs(epsg=4326, allow_override=True)
return gdf
def prepare_metric(gdf: gpd.GeoDataFrame, metrics_csv: str | None) -> gpd.GeoDataFrame:
gdf = gdf.copy()
if metrics_csv and os.path.exists(metrics_csv):
dfm = pd.read_csv(metrics_csv)
# assume merge key is 'name'
if 'name' not in dfm.columns:
raise ValueError('metrics CSV must contain a `name` column to join on')
gdf = gdf.merge(dfm, on='name', how='left')
if 'metric' not in gdf.columns:
# user may have different metric column; try to pick the first numeric
numeric_cols = dfm.select_dtypes('number').columns.tolist()
if numeric_cols:
gdf['metric'] = gdf[numeric_cols[0]]
else:
raise ValueError('metrics CSV provided but no numeric column found to use as metric')
else:
# fallback: create a reproducible random metric for demonstration
random.seed(42)
gdf['metric'] = np.random.randint(5, 100, size=len(gdf))
# fill missing with 0
gdf['metric'] = gdf['metric'].fillna(0).astype(float)
return gdf
def plot_heatmap(gdf: gpd.GeoDataFrame, out_png: str, out_geojson: str, cmap: str = 'OrRd') -> None:
# project to web mercator for basemap / correct area calculations
gdf_web = gdf.to_crs(epsg=3857)
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
# plot choropleth
gdf_web.plot(
column='metric',
cmap=cmap,
linewidth=0.5,
edgecolor='white',
ax=ax,
legend=True,
legend_kwds={'shrink': 0.6},
)
# add labels at centroids
for idx, row in gdf_web.iterrows():
try:
cent = row['geometry'].centroid
x, y = cent.x, cent.y
name = row.get('name') or row.get('NAME') or ''
short = str(name).replace('Kab. ', '').replace('Kota ', '')
if short:
txt = ax.text(
x, y, short,
fontsize=8, ha='center', va='center', color='white'
)
if pe is not None:
txt.set_path_effects([pe.Stroke(linewidth=2, foreground='black'), pe.Normal()])
except Exception:
# some geometries may be empty; skip
continue
# add basemap if contextily is available
if ctx is not None:
try:
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
except Exception:
# fallback: ignore basemap if provider errors
pass
ax.set_axis_off()
ax.set_title('Heatmap: Jawa Timur - Kabupaten/Kota', fontsize=16)
# ensure output directory exists
os.makedirs(os.path.dirname(out_png), exist_ok=True)
fig.savefig(out_png, dpi=150, bbox_inches='tight')
plt.close(fig)
# write augmented geojson (keep original CRS WGS84 for web use)
try:
gdf.to_file(out_geojson, driver='GeoJSON')
except Exception as e:
print('Warning: failed to write GeoJSON:', e, file=sys.stderr)
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('--geojson', default=os.path.join('data', 'geojson', 'jatim_kabkota.geojson'))
parser.add_argument('--metrics', default=None, help='optional CSV with columns `name` and a numeric metric')
parser.add_argument('--out-png', default=os.path.join('app', 'static', 'img', 'heatmap_jatim.png'))
parser.add_argument('--out-geojson', default=os.path.join('app', 'static', 'geojson', 'jatim_kabkota_metric.geojson'))
parser.add_argument('--cmap', default='OrRd')
args = parser.parse_args(argv)
if not os.path.exists(args.geojson):
print('GeoJSON not found at', args.geojson, file=sys.stderr)
sys.exit(2)
gdf = load_geodata(args.geojson)
gdf = prepare_metric(gdf, args.metrics)
# ensure static folders exist
os.makedirs(os.path.dirname(args.out_geojson), exist_ok=True)
os.makedirs(os.path.dirname(args.out_png), exist_ok=True)
plot_heatmap(gdf, args.out_png, args.out_geojson, cmap=args.cmap)
print('Heatmap image written to:', args.out_png)
print('Augmented GeoJSON written to:', args.out_geojson)
if __name__ == '__main__':
main()