""" Build clean postcode GeoJSON by dissolving commune boundaries by postcode. Downloads commune contours from geo.api.gouv.fr (per department), maps each commune to its postcode(s), and dissolves geometries by postcode. This produces non-overlapping postcode polygons. Key design decision: each commune is assigned to ONE postcode only (the first in sorted order) to prevent overlapping boundaries. For Paris/Lyon/Marseille, arrondissement boundaries are used instead of the meta-commune. """ import json import time import urllib.request import geopandas as gpd import pandas as pd from shapely.ops import unary_union from pathlib import Path OUTPUT_PATH = Path(__file__).parent.parent / "data" / "aggregated" / "postcodes.geojson" # All metropolitan + DOM department codes DEPT_CODES = [ "01","02","03","04","05","06","07","08","09","10", "11","12","13","14","15","16","17","18","19","21", "22","23","24","25","26","27","28","29","2A","2B", "30","31","32","33","34","35","36","37","38","39", "40","41","42","43","44","45","46","47","48","49", "50","51","52","53","54","55","56","57","58","59", "60","61","62","63","64","65","66","67","68","69", "70","71","72","73","74","75","76","77","78","79", "80","81","82","83","84","85","86","87","88","89", "90","91","92","93","94","95", "971","972","973","974","976", ] # Paris/Lyon/Marseille meta-communes that need arrondissement-level treatment ARRONDISSEMENT_CITIES = { "75056": "Paris", "69123": "Lyon", "13055": "Marseille", } def fetch_url(url: str, retries: int = 3) -> dict | None: """Fetch JSON from a URL with retries.""" for attempt in range(retries): try: req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) with urllib.request.urlopen(req, timeout=60) as resp: return json.loads(resp.read().decode()) except Exception as e: if attempt < retries - 1: wait = 2 ** attempt print(f" retry {attempt+1} in {wait}s...", end=" ", flush=True) time.sleep(wait) else: print(f" WARNING: Failed to fetch {url}: {e}") return None def fetch_dept_communes(dept_code: str) -> list[dict]: """Fetch communes with contours for a department from geo.api.gouv.fr.""" url = ( f"https://geo.api.gouv.fr/departements/{dept_code}/communes" f"?fields=code,nom,codesPostaux,contour&format=geojson&geometry=contour" ) data = fetch_url(url) return data.get("features", []) if data else [] def fetch_arrondissements(city_code: str) -> list[dict]: """Fetch arrondissement boundaries for Paris/Lyon/Marseille.""" url = ( f"https://geo.api.gouv.fr/communes/{city_code}" f"?type=arrondissement-municipal" f"&fields=code,nom,codesPostaux,contour&format=geojson" ) data = fetch_url(url) if data and "features" in data: return data["features"] # API might return a single object instead of FeatureCollection if data and data.get("type") == "Feature": return [data] return [] def fetch_arrondissements_list(city_code: str) -> list[dict]: """Fetch arrondissements for a city as a list of GeoJSON features.""" # The arrondissements API endpoint url = ( f"https://geo.api.gouv.fr/communes" f"?codeParent={city_code}&type=arrondissement-municipal" f"&fields=code,nom,codesPostaux,contour&format=geojson&geometry=contour" ) data = fetch_url(url) if data and "features" in data: return data["features"] return [] def build_postcode_geojson(): """Main pipeline: download communes, dissolve by postcode, export GeoJSON.""" print("=== Building clean postcode boundaries ===\n") # Step 1: Download all commune features with their postcodes all_rows = [] # (postcode, geometry) total_communes = 0 skip_codes = set(ARRONDISSEMENT_CITIES.keys()) for i, dept in enumerate(DEPT_CODES): print(f"[{i+1}/{len(DEPT_CODES)}] Fetching dept {dept}...", end=" ", flush=True) features = fetch_dept_communes(dept) print(f"{len(features)} communes") for f in features: geom = f.get("geometry") props = f.get("properties", {}) code = props.get("code", "") postcodes = props.get("codesPostaux", []) if not geom or not postcodes: continue # Skip meta-communes (Paris/Lyon/Marseille) - handle via arrondissements if code in skip_codes: continue # Assign commune to ONE postcode only (first in sorted order) # This prevents overlapping boundaries for multi-postcode communes pc = sorted(postcodes)[0] all_rows.append({"codePostal": pc, "geometry": geom}) total_communes += 1 # Be polite to the API time.sleep(0.3) # Step 1b: Fetch arrondissements for Paris/Lyon/Marseille for city_code, city_name in ARRONDISSEMENT_CITIES.items(): print(f"Fetching {city_name} arrondissements...", end=" ", flush=True) features = fetch_arrondissements_list(city_code) print(f"{len(features)} arrondissements") for f in features: geom = f.get("geometry") props = f.get("properties", {}) postcodes = props.get("codesPostaux", []) if not geom or not postcodes: continue # Each arrondissement typically maps to one postcode pc = sorted(postcodes)[0] all_rows.append({"codePostal": pc, "geometry": geom}) total_communes += 1 time.sleep(0.3) print(f"\nTotal communes/arrondissements fetched: {total_communes}") print(f"Total rows: {len(all_rows)}") # Step 2: Build GeoDataFrame print("\nBuilding GeoDataFrame...") gdf = gpd.GeoDataFrame.from_features( [{"type": "Feature", "geometry": r["geometry"], "properties": {"codePostal": r["codePostal"]}} for r in all_rows], crs="EPSG:4326", ) print(f" Shape: {gdf.shape}") print(f" Unique postcodes: {gdf['codePostal'].nunique()}") # Step 3: Dissolve by postcode (union geometries from different communes) print("\nDissolving by postcode...") dissolved = gdf.dissolve(by="codePostal", aggfunc="first").reset_index() print(f" Dissolved features: {len(dissolved)}") # Step 4: Simplify geometries for web (tolerance ~55m, smooth at zoom 12-13) print("\nSimplifying geometries...") dissolved["geometry"] = dissolved["geometry"].simplify(tolerance=0.0005, preserve_topology=True) # Step 5: Export print(f"\nExporting to {OUTPUT_PATH}...") dissolved.to_file(OUTPUT_PATH, driver="GeoJSON") # Post-process: reduce coordinate precision print("Post-processing: reducing coordinate precision...") with open(OUTPUT_PATH) as f: geojson = json.load(f) def round_coords(coords): if isinstance(coords[0], (int, float)): return [round(c, 5) for c in coords] return [round_coords(c) for c in coords] for feature in geojson["features"]: feature["geometry"]["coordinates"] = round_coords(feature["geometry"]["coordinates"]) with open(OUTPUT_PATH, "w") as f: json.dump(geojson, f, separators=(",", ":")) final_size = OUTPUT_PATH.stat().st_size / (1024 * 1024) print(f"\nDone! Output: {OUTPUT_PATH}") print(f" Features: {len(geojson['features'])}") print(f" File size: {final_size:.1f} MB") if __name__ == "__main__": build_postcode_geojson()