File size: 6,305 Bytes
3dbff85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""Vectorize a uint8 prediction raster (binary mask or class index)
into an EPSG:4326 GeoJSON FeatureCollection so the frontend can paint
it on the MapLibre map.

The droplet's `/v1/prithvi-pluvial` and `/v1/terramind` routes return
their predictions as base64-encoded uint8 with a shape and (where
relevant) a class-label list. This module reconstructs the affine
transform from the chip's geographic bounds (which the HF Space
already knows) and walks `rasterio.features.shapes` to build polygons
in the chip's native CRS, then reprojects to WGS84 for the map.

Best-effort: any failure returns an empty FeatureCollection rather
than raising into the caller's path. The map layer is decorative —
the briefing is the deliverable.
"""
from __future__ import annotations

import base64
import logging

log = logging.getLogger("riprap.polygonize")

EMPTY: dict = {"type": "FeatureCollection", "features": []}


def _decode_pred(pred_b64: str, pred_shape: list[int]):
    """Inverse of the droplet's `base64(pred.tobytes())`. Returns a
    uint8 numpy array of shape `pred_shape`, or None on decode error."""
    try:
        import numpy as np
        raw = base64.b64decode(pred_b64)
        return np.frombuffer(raw, dtype="uint8").reshape(pred_shape)
    except Exception:
        log.exception("polygonize: pred decode failed")
        return None


def polygonize_class_raster(
    pred_b64: str,
    pred_shape: list[int],
    class_labels: list[str] | None,
    bounds_4326: tuple[float, float, float, float],
    *,
    drop_classes: tuple[int, ...] = (0,),
    simplify_tolerance: float = 0.0,
) -> dict:
    """Vectorize a categorical prediction raster (one integer class per
    pixel) into a FeatureCollection with one Feature per connected
    polygon. `bounds_4326` is `(minlon, minlat, maxlon, maxlat)` of the
    chip; the raster is assumed to span those bounds at uniform
    pixel size. Each feature carries `class_idx` and `class_label`
    so the frontend can color by class.

    `drop_classes`: skip pixels matching these class indices (default
    drops 0 = "Background" / "outside" / etc).
    """
    pred = _decode_pred(pred_b64, pred_shape)
    if pred is None:
        return EMPTY
    try:
        from rasterio.features import shapes
        from rasterio.transform import from_bounds
        from shapely.geometry import shape
        h, w = pred.shape
        minlon, minlat, maxlon, maxlat = bounds_4326
        # The chip is in EPSG:4326 for our use — Sentinel-2 chips are
        # natively in their UTM zone, but we can polygonize against the
        # WGS84 extent because the inference chip is a small bbox where
        # the pixel-grid → lat/lon mapping is locally affine (sub-pixel
        # error at NYC scale).
        transform = from_bounds(minlon, minlat, maxlon, maxlat, w, h)
        feats = []
        for geom, value in shapes(pred, mask=pred > 0, transform=transform):
            v = int(value)
            if v in drop_classes:
                continue
            label = (class_labels[v]
                     if class_labels and 0 <= v < len(class_labels)
                     else f"class_{v}")
            poly = shape(geom)
            if simplify_tolerance > 0:
                poly = poly.simplify(simplify_tolerance, preserve_topology=True)
            if poly.is_empty:
                continue
            feats.append({
                "type": "Feature",
                "geometry": poly.__geo_interface__,
                "properties": {
                    "class_idx": v,
                    "class_label": label,
                    "fill_color": _PALETTE.get(label.lower(), _DEFAULT_FILL),
                },
            })
        return {"type": "FeatureCollection", "features": feats}
    except Exception:
        log.exception("polygonize: class raster vectorisation failed")
        return EMPTY


def polygonize_binary_mask(
    pred_b64: str,
    pred_shape: list[int],
    bounds_4326: tuple[float, float, float, float],
    *,
    label: str = "water",
    fill_color: str = "#4A90E2",
    simplify_tolerance: float = 0.0,
) -> dict:
    """Vectorize a binary prediction raster (e.g. Prithvi water mask;
    1 = water, 0 = not). Returns one Feature per connected positive
    region. Use this for prithvi_eo_live and the buildings LoRA."""
    pred = _decode_pred(pred_b64, pred_shape)
    if pred is None:
        return EMPTY
    try:
        from rasterio.features import shapes
        from rasterio.transform import from_bounds
        from shapely.geometry import shape
        h, w = pred.shape
        minlon, minlat, maxlon, maxlat = bounds_4326
        transform = from_bounds(minlon, minlat, maxlon, maxlat, w, h)
        feats = []
        for geom, _value in shapes(pred, mask=pred > 0, transform=transform):
            poly = shape(geom)
            if simplify_tolerance > 0:
                poly = poly.simplify(simplify_tolerance, preserve_topology=True)
            if poly.is_empty:
                continue
            feats.append({
                "type": "Feature",
                "geometry": poly.__geo_interface__,
                "properties": {
                    "class_label": label,
                    "fill_color": fill_color,
                },
            })
        return {"type": "FeatureCollection", "features": feats}
    except Exception:
        log.exception("polygonize: binary mask vectorisation failed")
        return EMPTY


# Lightweight palette used by the LULC + buildings layers. Frontend
# may override via `fill_color` per feature; this is a sensible
# default keyed on lowercase class labels.
_DEFAULT_FILL = "#A0A0A0"
_PALETTE = {
    # ESRI 2020 LULC schema (terramind v1 base generative)
    "water":              "#1F77B4",
    "trees":              "#2CA02C",
    "grass":              "#7FBF53",
    "flooded vegetation": "#74C476",
    "crops":              "#E1C75A",
    "scrub/shrub":        "#A6BC44",
    "built":              "#D62728",
    "bare ground":        "#B07A4C",
    "snow/ice":            "#E0E7EC",
    "clouds":              "#CCCCCC",
    # NYC LoRA LULC schema
    "cropland":           "#E1C75A",
    "bare":               "#B07A4C",
    # Buildings LoRA
    "building":           "#D62728",
    "background":         _DEFAULT_FILL,
}