File size: 5,379 Bytes
6f10462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python3
"""Generate a choropleth heatmap for Jawa Timur kabupaten/kota using GeoPandas.



This script:

- loads `data/geojson/jatim_kabkota.geojson` (expects a `properties.name` field),

- optionally merges a CSV of metrics (by `name`),

- creates a choropleth PNG saved to `app/static/img/heatmap_jatim.png`,

- writes an augmented GeoJSON to `app/static/geojson/jatim_kabkota_metric.geojson`.



Run: python scripts/generate_heatmap_geopandas.py [--metrics data/metrics.csv]

"""
from __future__ import annotations

import argparse
import os
import sys
import random

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

try:
    import contextily as ctx
except Exception:
    ctx = None

try:
    import matplotlib.patheffects as pe
except Exception:
    pe = None


def load_geodata(path: str) -> gpd.GeoDataFrame:
    gdf = gpd.read_file(path)
    if gdf.crs is None:
        # most GeoJSONs are in WGS84
        gdf = gdf.set_crs(epsg=4326, allow_override=True)
    return gdf


def prepare_metric(gdf: gpd.GeoDataFrame, metrics_csv: str | None) -> gpd.GeoDataFrame:
    gdf = gdf.copy()
    if metrics_csv and os.path.exists(metrics_csv):
        dfm = pd.read_csv(metrics_csv)
        # assume merge key is 'name'
        if 'name' not in dfm.columns:
            raise ValueError('metrics CSV must contain a `name` column to join on')
        gdf = gdf.merge(dfm, on='name', how='left')
        if 'metric' not in gdf.columns:
            # user may have different metric column; try to pick the first numeric
            numeric_cols = dfm.select_dtypes('number').columns.tolist()
            if numeric_cols:
                gdf['metric'] = gdf[numeric_cols[0]]
            else:
                raise ValueError('metrics CSV provided but no numeric column found to use as metric')
    else:
        # fallback: create a reproducible random metric for demonstration
        random.seed(42)
        gdf['metric'] = np.random.randint(5, 100, size=len(gdf))
    # fill missing with 0
    gdf['metric'] = gdf['metric'].fillna(0).astype(float)
    return gdf


def plot_heatmap(gdf: gpd.GeoDataFrame, out_png: str, out_geojson: str, cmap: str = 'OrRd') -> None:
    # project to web mercator for basemap / correct area calculations
    gdf_web = gdf.to_crs(epsg=3857)

    fig, ax = plt.subplots(1, 1, figsize=(12, 12))

    # plot choropleth
    gdf_web.plot(
        column='metric',
        cmap=cmap,
        linewidth=0.5,
        edgecolor='white',
        ax=ax,
        legend=True,
        legend_kwds={'shrink': 0.6},
    )

    # add labels at centroids
    for idx, row in gdf_web.iterrows():
        try:
            cent = row['geometry'].centroid
            x, y = cent.x, cent.y
            name = row.get('name') or row.get('NAME') or ''
            short = str(name).replace('Kab. ', '').replace('Kota ', '')
            if short:
                txt = ax.text(
                    x, y, short,
                    fontsize=8, ha='center', va='center', color='white'
                )
                if pe is not None:
                    txt.set_path_effects([pe.Stroke(linewidth=2, foreground='black'), pe.Normal()])
        except Exception:
            # some geometries may be empty; skip
            continue

    # add basemap if contextily is available
    if ctx is not None:
        try:
            ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
        except Exception:
            # fallback: ignore basemap if provider errors
            pass

    ax.set_axis_off()
    ax.set_title('Heatmap: Jawa Timur - Kabupaten/Kota', fontsize=16)

    # ensure output directory exists
    os.makedirs(os.path.dirname(out_png), exist_ok=True)
    fig.savefig(out_png, dpi=150, bbox_inches='tight')
    plt.close(fig)

    # write augmented geojson (keep original CRS WGS84 for web use)
    try:
        gdf.to_file(out_geojson, driver='GeoJSON')
    except Exception as e:
        print('Warning: failed to write GeoJSON:', e, file=sys.stderr)


def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('--geojson', default=os.path.join('data', 'geojson', 'jatim_kabkota.geojson'))
    parser.add_argument('--metrics', default=None, help='optional CSV with columns `name` and a numeric metric')
    parser.add_argument('--out-png', default=os.path.join('app', 'static', 'img', 'heatmap_jatim.png'))
    parser.add_argument('--out-geojson', default=os.path.join('app', 'static', 'geojson', 'jatim_kabkota_metric.geojson'))
    parser.add_argument('--cmap', default='OrRd')
    args = parser.parse_args(argv)

    if not os.path.exists(args.geojson):
        print('GeoJSON not found at', args.geojson, file=sys.stderr)
        sys.exit(2)

    gdf = load_geodata(args.geojson)
    gdf = prepare_metric(gdf, args.metrics)

    # ensure static folders exist
    os.makedirs(os.path.dirname(args.out_geojson), exist_ok=True)
    os.makedirs(os.path.dirname(args.out_png), exist_ok=True)

    plot_heatmap(gdf, args.out_png, args.out_geojson, cmap=args.cmap)

    print('Heatmap image written to:', args.out_png)
    print('Augmented GeoJSON written to:', args.out_geojson)


if __name__ == '__main__':
    main()