Spaces:
Sleeping
Sleeping
| """ | |
| Build clean postcode GeoJSON by dissolving commune boundaries by postcode. | |
| Downloads commune contours from geo.api.gouv.fr (per department), | |
| maps each commune to its postcode(s), and dissolves geometries by postcode. | |
| This produces non-overlapping postcode polygons. | |
| Key design decision: each commune is assigned to ONE postcode only (the first | |
| in sorted order) to prevent overlapping boundaries. For Paris/Lyon/Marseille, | |
| arrondissement boundaries are used instead of the meta-commune. | |
| """ | |
| import json | |
| import time | |
| import urllib.request | |
| import geopandas as gpd | |
| import pandas as pd | |
| from shapely.ops import unary_union | |
| from pathlib import Path | |
| OUTPUT_PATH = Path(__file__).parent.parent / "data" / "aggregated" / "postcodes.geojson" | |
| # All metropolitan + DOM department codes | |
| DEPT_CODES = [ | |
| "01","02","03","04","05","06","07","08","09","10", | |
| "11","12","13","14","15","16","17","18","19","21", | |
| "22","23","24","25","26","27","28","29","2A","2B", | |
| "30","31","32","33","34","35","36","37","38","39", | |
| "40","41","42","43","44","45","46","47","48","49", | |
| "50","51","52","53","54","55","56","57","58","59", | |
| "60","61","62","63","64","65","66","67","68","69", | |
| "70","71","72","73","74","75","76","77","78","79", | |
| "80","81","82","83","84","85","86","87","88","89", | |
| "90","91","92","93","94","95", | |
| "971","972","973","974","976", | |
| ] | |
| # Paris/Lyon/Marseille meta-communes that need arrondissement-level treatment | |
| ARRONDISSEMENT_CITIES = { | |
| "75056": "Paris", | |
| "69123": "Lyon", | |
| "13055": "Marseille", | |
| } | |
| def fetch_url(url: str, retries: int = 3) -> dict | None: | |
| """Fetch JSON from a URL with retries.""" | |
| for attempt in range(retries): | |
| try: | |
| req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) | |
| with urllib.request.urlopen(req, timeout=60) as resp: | |
| return json.loads(resp.read().decode()) | |
| except Exception as e: | |
| if attempt < retries - 1: | |
| wait = 2 ** attempt | |
| print(f" retry {attempt+1} in {wait}s...", end=" ", flush=True) | |
| time.sleep(wait) | |
| else: | |
| print(f" WARNING: Failed to fetch {url}: {e}") | |
| return None | |
| def fetch_dept_communes(dept_code: str) -> list[dict]: | |
| """Fetch communes with contours for a department from geo.api.gouv.fr.""" | |
| url = ( | |
| f"https://geo.api.gouv.fr/departements/{dept_code}/communes" | |
| f"?fields=code,nom,codesPostaux,contour&format=geojson&geometry=contour" | |
| ) | |
| data = fetch_url(url) | |
| return data.get("features", []) if data else [] | |
| def fetch_arrondissements(city_code: str) -> list[dict]: | |
| """Fetch arrondissement boundaries for Paris/Lyon/Marseille.""" | |
| url = ( | |
| f"https://geo.api.gouv.fr/communes/{city_code}" | |
| f"?type=arrondissement-municipal" | |
| f"&fields=code,nom,codesPostaux,contour&format=geojson" | |
| ) | |
| data = fetch_url(url) | |
| if data and "features" in data: | |
| return data["features"] | |
| # API might return a single object instead of FeatureCollection | |
| if data and data.get("type") == "Feature": | |
| return [data] | |
| return [] | |
| def fetch_arrondissements_list(city_code: str) -> list[dict]: | |
| """Fetch arrondissements for a city as a list of GeoJSON features.""" | |
| # The arrondissements API endpoint | |
| url = ( | |
| f"https://geo.api.gouv.fr/communes" | |
| f"?codeParent={city_code}&type=arrondissement-municipal" | |
| f"&fields=code,nom,codesPostaux,contour&format=geojson&geometry=contour" | |
| ) | |
| data = fetch_url(url) | |
| if data and "features" in data: | |
| return data["features"] | |
| return [] | |
| def build_postcode_geojson(): | |
| """Main pipeline: download communes, dissolve by postcode, export GeoJSON.""" | |
| print("=== Building clean postcode boundaries ===\n") | |
| # Step 1: Download all commune features with their postcodes | |
| all_rows = [] # (postcode, geometry) | |
| total_communes = 0 | |
| skip_codes = set(ARRONDISSEMENT_CITIES.keys()) | |
| for i, dept in enumerate(DEPT_CODES): | |
| print(f"[{i+1}/{len(DEPT_CODES)}] Fetching dept {dept}...", end=" ", flush=True) | |
| features = fetch_dept_communes(dept) | |
| print(f"{len(features)} communes") | |
| for f in features: | |
| geom = f.get("geometry") | |
| props = f.get("properties", {}) | |
| code = props.get("code", "") | |
| postcodes = props.get("codesPostaux", []) | |
| if not geom or not postcodes: | |
| continue | |
| # Skip meta-communes (Paris/Lyon/Marseille) - handle via arrondissements | |
| if code in skip_codes: | |
| continue | |
| # Assign commune to ONE postcode only (first in sorted order) | |
| # This prevents overlapping boundaries for multi-postcode communes | |
| pc = sorted(postcodes)[0] | |
| all_rows.append({"codePostal": pc, "geometry": geom}) | |
| total_communes += 1 | |
| # Be polite to the API | |
| time.sleep(0.3) | |
| # Step 1b: Fetch arrondissements for Paris/Lyon/Marseille | |
| for city_code, city_name in ARRONDISSEMENT_CITIES.items(): | |
| print(f"Fetching {city_name} arrondissements...", end=" ", flush=True) | |
| features = fetch_arrondissements_list(city_code) | |
| print(f"{len(features)} arrondissements") | |
| for f in features: | |
| geom = f.get("geometry") | |
| props = f.get("properties", {}) | |
| postcodes = props.get("codesPostaux", []) | |
| if not geom or not postcodes: | |
| continue | |
| # Each arrondissement typically maps to one postcode | |
| pc = sorted(postcodes)[0] | |
| all_rows.append({"codePostal": pc, "geometry": geom}) | |
| total_communes += 1 | |
| time.sleep(0.3) | |
| print(f"\nTotal communes/arrondissements fetched: {total_communes}") | |
| print(f"Total rows: {len(all_rows)}") | |
| # Step 2: Build GeoDataFrame | |
| print("\nBuilding GeoDataFrame...") | |
| gdf = gpd.GeoDataFrame.from_features( | |
| [{"type": "Feature", "geometry": r["geometry"], "properties": {"codePostal": r["codePostal"]}} for r in all_rows], | |
| crs="EPSG:4326", | |
| ) | |
| print(f" Shape: {gdf.shape}") | |
| print(f" Unique postcodes: {gdf['codePostal'].nunique()}") | |
| # Step 3: Dissolve by postcode (union geometries from different communes) | |
| print("\nDissolving by postcode...") | |
| dissolved = gdf.dissolve(by="codePostal", aggfunc="first").reset_index() | |
| print(f" Dissolved features: {len(dissolved)}") | |
| # Step 4: Simplify geometries for web (tolerance ~55m, smooth at zoom 12-13) | |
| print("\nSimplifying geometries...") | |
| dissolved["geometry"] = dissolved["geometry"].simplify(tolerance=0.0005, preserve_topology=True) | |
| # Step 5: Export | |
| print(f"\nExporting to {OUTPUT_PATH}...") | |
| dissolved.to_file(OUTPUT_PATH, driver="GeoJSON") | |
| # Post-process: reduce coordinate precision | |
| print("Post-processing: reducing coordinate precision...") | |
| with open(OUTPUT_PATH) as f: | |
| geojson = json.load(f) | |
| def round_coords(coords): | |
| if isinstance(coords[0], (int, float)): | |
| return [round(c, 5) for c in coords] | |
| return [round_coords(c) for c in coords] | |
| for feature in geojson["features"]: | |
| feature["geometry"]["coordinates"] = round_coords(feature["geometry"]["coordinates"]) | |
| with open(OUTPUT_PATH, "w") as f: | |
| json.dump(geojson, f, separators=(",", ":")) | |
| final_size = OUTPUT_PATH.stat().st_size / (1024 * 1024) | |
| print(f"\nDone! Output: {OUTPUT_PATH}") | |
| print(f" Features: {len(geojson['features'])}") | |
| print(f" File size: {final_size:.1f} MB") | |
| if __name__ == "__main__": | |
| build_postcode_geojson() | |