#!/usr/bin/env python3 """Generate a choropleth heatmap for Jawa Timur kabupaten/kota using GeoPandas. This script: - loads `data/geojson/jatim_kabkota.geojson` (expects a `properties.name` field), - optionally merges a CSV of metrics (by `name`), - creates a choropleth PNG saved to `app/static/img/heatmap_jatim.png`, - writes an augmented GeoJSON to `app/static/geojson/jatim_kabkota_metric.geojson`. Run: python scripts/generate_heatmap_geopandas.py [--metrics data/metrics.csv] """ from __future__ import annotations import argparse import os import sys import random import geopandas as gpd import matplotlib.pyplot as plt import pandas as pd import numpy as np try: import contextily as ctx except Exception: ctx = None try: import matplotlib.patheffects as pe except Exception: pe = None def load_geodata(path: str) -> gpd.GeoDataFrame: gdf = gpd.read_file(path) if gdf.crs is None: # most GeoJSONs are in WGS84 gdf = gdf.set_crs(epsg=4326, allow_override=True) return gdf def prepare_metric(gdf: gpd.GeoDataFrame, metrics_csv: str | None) -> gpd.GeoDataFrame: gdf = gdf.copy() if metrics_csv and os.path.exists(metrics_csv): dfm = pd.read_csv(metrics_csv) # assume merge key is 'name' if 'name' not in dfm.columns: raise ValueError('metrics CSV must contain a `name` column to join on') gdf = gdf.merge(dfm, on='name', how='left') if 'metric' not in gdf.columns: # user may have different metric column; try to pick the first numeric numeric_cols = dfm.select_dtypes('number').columns.tolist() if numeric_cols: gdf['metric'] = gdf[numeric_cols[0]] else: raise ValueError('metrics CSV provided but no numeric column found to use as metric') else: # fallback: create a reproducible random metric for demonstration random.seed(42) gdf['metric'] = np.random.randint(5, 100, size=len(gdf)) # fill missing with 0 gdf['metric'] = gdf['metric'].fillna(0).astype(float) return gdf def plot_heatmap(gdf: gpd.GeoDataFrame, out_png: str, out_geojson: str, cmap: str = 'OrRd') -> None: # project to web mercator for basemap / correct area calculations gdf_web = gdf.to_crs(epsg=3857) fig, ax = plt.subplots(1, 1, figsize=(12, 12)) # plot choropleth gdf_web.plot( column='metric', cmap=cmap, linewidth=0.5, edgecolor='white', ax=ax, legend=True, legend_kwds={'shrink': 0.6}, ) # add labels at centroids for idx, row in gdf_web.iterrows(): try: cent = row['geometry'].centroid x, y = cent.x, cent.y name = row.get('name') or row.get('NAME') or '' short = str(name).replace('Kab. ', '').replace('Kota ', '') if short: txt = ax.text( x, y, short, fontsize=8, ha='center', va='center', color='white' ) if pe is not None: txt.set_path_effects([pe.Stroke(linewidth=2, foreground='black'), pe.Normal()]) except Exception: # some geometries may be empty; skip continue # add basemap if contextily is available if ctx is not None: try: ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron) except Exception: # fallback: ignore basemap if provider errors pass ax.set_axis_off() ax.set_title('Heatmap: Jawa Timur - Kabupaten/Kota', fontsize=16) # ensure output directory exists os.makedirs(os.path.dirname(out_png), exist_ok=True) fig.savefig(out_png, dpi=150, bbox_inches='tight') plt.close(fig) # write augmented geojson (keep original CRS WGS84 for web use) try: gdf.to_file(out_geojson, driver='GeoJSON') except Exception as e: print('Warning: failed to write GeoJSON:', e, file=sys.stderr) def main(argv=None): parser = argparse.ArgumentParser() parser.add_argument('--geojson', default=os.path.join('data', 'geojson', 'jatim_kabkota.geojson')) parser.add_argument('--metrics', default=None, help='optional CSV with columns `name` and a numeric metric') parser.add_argument('--out-png', default=os.path.join('app', 'static', 'img', 'heatmap_jatim.png')) parser.add_argument('--out-geojson', default=os.path.join('app', 'static', 'geojson', 'jatim_kabkota_metric.geojson')) parser.add_argument('--cmap', default='OrRd') args = parser.parse_args(argv) if not os.path.exists(args.geojson): print('GeoJSON not found at', args.geojson, file=sys.stderr) sys.exit(2) gdf = load_geodata(args.geojson) gdf = prepare_metric(gdf, args.metrics) # ensure static folders exist os.makedirs(os.path.dirname(args.out_geojson), exist_ok=True) os.makedirs(os.path.dirname(args.out_png), exist_ok=True) plot_heatmap(gdf, args.out_png, args.out_geojson, cmap=args.cmap) print('Heatmap image written to:', args.out_png) print('Augmented GeoJSON written to:', args.out_geojson) if __name__ == '__main__': main()